deploy-k8s.sh 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914
  1. #!/usr/bin/env bash
  2. # Claude Code Hub - Kubernetes / k3s One-Click Deployment
  3. # 兼容 k3s 与标准 Kubernetes (EKS/GKE/AKS/self-hosted)
  4. # 详见: docs/k8s-deployment.md
  5. set -euo pipefail
  6. ###############################################################################
  7. # Colors (在非 TTY 或 NO_COLOR 环境自动降级)
  8. ###############################################################################
  9. if [[ -t 1 ]] && [[ -z "${NO_COLOR:-}" ]]; then
  10. RED=$'\033[0;31m'
  11. GREEN=$'\033[0;32m'
  12. YELLOW=$'\033[1;33m'
  13. BLUE=$'\033[0;34m'
  14. CYAN=$'\033[0;36m'
  15. NC=$'\033[0m'
  16. else
  17. RED=""
  18. GREEN=""
  19. YELLOW=""
  20. BLUE=""
  21. CYAN=""
  22. NC=""
  23. fi
  24. ###############################################################################
  25. # Script metadata
  26. ###############################################################################
  27. VERSION="1.0.0"
  28. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  29. REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
  30. SOURCE_MANIFEST_DIR="$REPO_ROOT/deploy/k8s"
  31. ###############################################################################
  32. # Logging
  33. ###############################################################################
  34. log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
  35. log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
  36. log_warning() { echo -e "${YELLOW}[WARNING]${NC} $1"; }
  37. log_error() { echo -e "${RED}[ERROR]${NC} $1" >&2; }
  38. # 跨平台 base64 decode (macOS BSD 旧版只认 -D)
  39. b64d() {
  40. if base64 -d </dev/null >/dev/null 2>&1; then
  41. base64 -d
  42. elif base64 -D </dev/null >/dev/null 2>&1; then
  43. base64 -D
  44. else
  45. openssl base64 -d
  46. fi
  47. }
  48. ###############################################################################
  49. # Defaults
  50. ###############################################################################
  51. DEFAULT_NAMESPACE="claude-code-hub"
  52. DEFAULT_IMAGE="ghcr.io/ding113/claude-code-hub:latest"
  53. DEFAULT_REPLICAS=2
  54. DEFAULT_HPA_MIN=2
  55. DEFAULT_HPA_MAX=6
  56. DEFAULT_PG_SIZE="50Gi"
  57. DEFAULT_REDIS_SIZE="10Gi"
  58. DEFAULT_TIMEZONE="Asia/Shanghai"
  59. ###############################################################################
  60. # CLI argument variables
  61. ###############################################################################
  62. NAMESPACE_ARG=""
  63. IMAGE_ARG=""
  64. BRANCH_ARG=""
  65. TOKEN_ARG=""
  66. REPLICAS_ARG=""
  67. HPA_MIN_ARG=""
  68. HPA_MAX_ARG=""
  69. STORAGE_CLASS_ARG=""
  70. PG_SIZE_ARG=""
  71. REDIS_SIZE_ARG=""
  72. TIMEZONE_ARG=""
  73. INGRESS_HOST_ARG=""
  74. INGRESS_CLASS_ARG=""
  75. DISABLE_INGRESS=false
  76. DISABLE_NETWORKPOLICY=false
  77. DEPLOY_DIR_ARG=""
  78. KUBE_CONTEXT_ARG=""
  79. INSTALL_K3S=false
  80. INSTALL_CCH=false
  81. FORCE_NEW=false
  82. DRY_RENDER=false
  83. NON_INTERACTIVE=false
  84. ###############################################################################
  85. # Runtime state
  86. ###############################################################################
  87. NAMESPACE=""
  88. APP_IMAGE=""
  89. APP_REPLICAS=""
  90. APP_HPA_MIN=""
  91. APP_HPA_MAX=""
  92. STORAGE_CLASS=""
  93. PG_STORAGE_SIZE=""
  94. REDIS_STORAGE_SIZE=""
  95. TIMEZONE=""
  96. INGRESS_HOST=""
  97. INGRESS_CLASS=""
  98. INGRESS_VARIANT="" # standard | traefik | nodeport
  99. APP_SERVICE_TYPE="" # ClusterIP | NodePort
  100. DEPLOY_DIR=""
  101. RUNTIME="" # k3s | kubectl
  102. RUNTIME_OVERRIDE="${RUNTIME_OVERRIDE:-}"
  103. KUBECTL=""
  104. UPDATE_MODE=false
  105. ADMIN_TOKEN=""
  106. PG_PASSWORD=""
  107. REDIS_PASSWORD=""
  108. ###############################################################################
  109. # Help
  110. ###############################################################################
  111. show_help() {
  112. cat << EOF
  113. Claude Code Hub - K8s/k3s One-Click Deployment Script v${VERSION}
  114. Usage: $0 [OPTIONS]
  115. Cluster:
  116. -n, --namespace <ns> K8s namespace (default: ${DEFAULT_NAMESPACE})
  117. --kube-context <ctx> kubectl context (default: current)
  118. --install-k3s 本机无集群时自动安装 k3s (需要 sudo)
  119. Application:
  120. -i, --image <ref> 应用镜像 (default: ${DEFAULT_IMAGE})
  121. -b, --branch <name> 分支捷径 main→:latest / dev→:dev
  122. -t, --admin-token <token> 自定义 ADMIN_TOKEN (default: auto-generated)
  123. --replicas <n> Deployment 基线副本数 (default: ${DEFAULT_REPLICAS})
  124. --hpa-min <n> HPA 最小副本 (default: ${DEFAULT_HPA_MIN})
  125. --hpa-max <n> HPA 最大副本 (default: ${DEFAULT_HPA_MAX})
  126. --timezone <tz> 容器时区 (default: ${DEFAULT_TIMEZONE})
  127. Storage:
  128. --storage-class <name> PVC storageClassName (default: 自动探测)
  129. --pg-size <size> PostgreSQL PVC 大小 (default: ${DEFAULT_PG_SIZE})
  130. --redis-size <size> Redis PVC 大小 (default: ${DEFAULT_REDIS_SIZE})
  131. Ingress:
  132. --ingress-host <host> 启用 Ingress 并绑定域名
  133. --ingress-class <cls> Ingress className (default: 自动探测)
  134. --disable-ingress 跳过 Ingress,使用 NodePort
  135. --disable-networkpolicy 跳过 NetworkPolicy (Ingress Controller 不在标准 ns 时需要)
  136. Deployment:
  137. -d, --deploy-dir <path> manifest + cch 安装目录 (default: auto)
  138. --force-new 删除已有 namespace 后强制重装 (会提示)
  139. --install-cch 把 cch 软链接到 /usr/local/bin/cch (需 sudo)
  140. --dry-render 只渲染 manifest 不 apply (用于审阅)
  141. Misc:
  142. -y, --yes 非交互模式 (用默认值)
  143. -h, --help 显示帮助
  144. --version 显示版本号
  145. Examples:
  146. # 最简,交互式
  147. $0
  148. # 非交互,纯默认
  149. $0 -y
  150. # 部署 dev 分支,自定义命名空间与域名
  151. $0 -b dev -n my-hub --ingress-host hub.example.com -y
  152. # 标准 K8s,指定 storage class
  153. $0 --storage-class standard -y
  154. # 仅渲染 manifest 不应用 (用于离线审阅)
  155. $0 --dry-render --deploy-dir /tmp/cch-k8s -y
  156. For more information: https://github.com/ding113/claude-code-hub
  157. EOF
  158. }
  159. ###############################################################################
  160. # Arg parsing
  161. ###############################################################################
  162. parse_args() {
  163. while [[ $# -gt 0 ]]; do
  164. case "$1" in
  165. -n|--namespace) NAMESPACE_ARG="$2"; shift 2 ;;
  166. --kube-context) KUBE_CONTEXT_ARG="$2"; shift 2 ;;
  167. --install-k3s) INSTALL_K3S=true; shift ;;
  168. -i|--image) IMAGE_ARG="$2"; shift 2 ;;
  169. -b|--branch) BRANCH_ARG="$2"; shift 2 ;;
  170. -t|--admin-token) TOKEN_ARG="$2"; shift 2 ;;
  171. --replicas) REPLICAS_ARG="$2"; shift 2 ;;
  172. --hpa-min) HPA_MIN_ARG="$2"; shift 2 ;;
  173. --hpa-max) HPA_MAX_ARG="$2"; shift 2 ;;
  174. --timezone) TIMEZONE_ARG="$2"; shift 2 ;;
  175. --storage-class) STORAGE_CLASS_ARG="$2"; shift 2 ;;
  176. --pg-size) PG_SIZE_ARG="$2"; shift 2 ;;
  177. --redis-size) REDIS_SIZE_ARG="$2"; shift 2 ;;
  178. --ingress-host) INGRESS_HOST_ARG="$2"; shift 2 ;;
  179. --ingress-class) INGRESS_CLASS_ARG="$2"; shift 2 ;;
  180. --disable-ingress) DISABLE_INGRESS=true; shift ;;
  181. --disable-networkpolicy) DISABLE_NETWORKPOLICY=true; shift ;;
  182. -d|--deploy-dir) DEPLOY_DIR_ARG="$2"; shift 2 ;;
  183. --force-new) FORCE_NEW=true; shift ;;
  184. --install-cch) INSTALL_CCH=true; shift ;;
  185. --dry-render) DRY_RENDER=true; shift ;;
  186. -y|--yes) NON_INTERACTIVE=true; shift ;;
  187. -h|--help) show_help; exit 0 ;;
  188. --version) echo "deploy-k8s.sh v${VERSION}"; exit 0 ;;
  189. *) log_error "Unknown argument: $1"; show_help; exit 1 ;;
  190. esac
  191. done
  192. }
  193. ###############################################################################
  194. # Banner
  195. ###############################################################################
  196. print_header() {
  197. echo -e "${BLUE}"
  198. echo "+=================================================================+"
  199. echo "| |"
  200. echo "| Claude Code Hub - K8s / k3s One-Click Deployment |"
  201. echo "| Version ${VERSION} |"
  202. echo "| |"
  203. echo "+=================================================================+"
  204. echo -e "${NC}"
  205. }
  206. ###############################################################################
  207. # OS & runtime detection
  208. ###############################################################################
  209. detect_os() {
  210. local os_type
  211. case "$OSTYPE" in
  212. linux*) os_type="linux" ;;
  213. darwin*) os_type="macos" ;;
  214. *) log_error "Unsupported OS: $OSTYPE"; exit 1 ;;
  215. esac
  216. log_info "Detected OS: $os_type"
  217. # Default deploy dir depends on permissions
  218. if [[ -z "$DEPLOY_DIR_ARG" ]]; then
  219. if [[ $EUID -eq 0 ]]; then
  220. DEPLOY_DIR="/opt/claude-code-hub"
  221. else
  222. DEPLOY_DIR="${XDG_CONFIG_HOME:-$HOME/.config}/cch"
  223. fi
  224. else
  225. DEPLOY_DIR="$DEPLOY_DIR_ARG"
  226. fi
  227. log_info "Deploy directory: $DEPLOY_DIR"
  228. }
  229. detect_runtime() {
  230. # 显式覆盖
  231. if [[ -n "$RUNTIME_OVERRIDE" ]]; then
  232. RUNTIME="$RUNTIME_OVERRIDE"
  233. KUBECTL="kubectl"
  234. if [[ -n "$KUBE_CONTEXT_ARG" ]]; then
  235. KUBECTL="kubectl --context=$KUBE_CONTEXT_ARG"
  236. fi
  237. log_info "Runtime: $RUNTIME (override)"
  238. return
  239. fi
  240. # 如果用户显式指定了 kube-context,则直接用 kubectl
  241. if [[ -n "$KUBE_CONTEXT_ARG" ]]; then
  242. if ! command -v kubectl &>/dev/null; then
  243. log_error "kubectl 不可用,无法使用 --kube-context"
  244. exit 1
  245. fi
  246. RUNTIME="kubectl"
  247. KUBECTL="kubectl --context=$KUBE_CONTEXT_ARG"
  248. log_info "Runtime: kubectl (context=$KUBE_CONTEXT_ARG)"
  249. return
  250. fi
  251. # 标准 kubectl 优先
  252. if command -v kubectl &>/dev/null; then
  253. if kubectl cluster-info &>/dev/null; then
  254. RUNTIME="kubectl"
  255. KUBECTL="kubectl"
  256. # 探测当前集群是否是 k3s (观察节点 kubelet version 或 rancher 标识)
  257. if kubectl get nodes -o jsonpath='{.items[*].status.nodeInfo.kubeletVersion}' 2>/dev/null | grep -q 'k3s'; then
  258. RUNTIME="k3s"
  259. log_info "Runtime: k3s (via kubectl)"
  260. else
  261. log_info "Runtime: standard Kubernetes (via kubectl)"
  262. fi
  263. return
  264. fi
  265. fi
  266. # 没有 kubectl 但本机有 k3s
  267. if command -v k3s &>/dev/null; then
  268. RUNTIME="k3s"
  269. KUBECTL="sudo k3s kubectl"
  270. log_info "Runtime: k3s (via \`k3s kubectl\`)"
  271. return
  272. fi
  273. # 什么都没有
  274. if [[ "$INSTALL_K3S" == true ]]; then
  275. install_k3s
  276. return
  277. fi
  278. log_error "未检测到可用的 K8s 集群 (缺少 kubectl 或 k3s)。"
  279. log_info "可选方案:"
  280. log_info " 1. 安装 kubectl 并确保 ~/.kube/config 指向可用集群"
  281. log_info " 2. 传入 --install-k3s 让本脚本为你安装 k3s (单机场景)"
  282. exit 1
  283. }
  284. install_k3s() {
  285. if [[ "$NON_INTERACTIVE" != true ]]; then
  286. echo ""
  287. log_warning "即将在本机安装 k3s (官方脚本,curl | sh),这会修改系统服务。"
  288. log_warning "生产环境请先审阅 https://get.k3s.io 返回的脚本内容后再执行。"
  289. read -p "继续?(y/N) " -n 1 -r confirm
  290. echo ""
  291. if [[ ! "$confirm" =~ ^[Yy]$ ]]; then
  292. log_error "已取消"; exit 1
  293. fi
  294. fi
  295. log_info "Installing k3s via official installer..."
  296. if ! curl -fsSL https://get.k3s.io | sh -; then
  297. log_error "k3s 安装失败"; exit 1
  298. fi
  299. # 让 kubectl 可以读取 k3s config
  300. if [[ -r /etc/rancher/k3s/k3s.yaml ]]; then
  301. export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
  302. fi
  303. if command -v kubectl &>/dev/null; then
  304. KUBECTL="kubectl"
  305. else
  306. KUBECTL="sudo k3s kubectl"
  307. fi
  308. RUNTIME="k3s"
  309. log_success "k3s installed"
  310. # 等 API 就绪
  311. local i=0
  312. until $KUBECTL get nodes &>/dev/null; do
  313. if [[ $i -ge 30 ]]; then log_error "k3s API 长时间不可达"; exit 1; fi
  314. sleep 2; i=$((i+1))
  315. done
  316. }
  317. ###############################################################################
  318. # Preflight
  319. ###############################################################################
  320. preflight_checks() {
  321. if ! $KUBECTL get nodes &>/dev/null; then
  322. log_error "kubectl 无法连接集群。请检查 kubeconfig / context。"
  323. exit 1
  324. fi
  325. local node_count
  326. node_count=$($KUBECTL get nodes --no-headers 2>/dev/null | wc -l)
  327. log_info "Cluster reachable. Node count: $node_count"
  328. # 一些基础权限试探 (create ns 的权限)
  329. if ! $KUBECTL auth can-i create namespace &>/dev/null; then
  330. log_warning "当前用户可能无 create namespace 权限,如部署失败请用 cluster-admin 重试"
  331. fi
  332. }
  333. ###############################################################################
  334. # 应用配置合并 (CLI > 默认)
  335. ###############################################################################
  336. resolve_config() {
  337. NAMESPACE="${NAMESPACE_ARG:-$DEFAULT_NAMESPACE}"
  338. # 分支捷径
  339. if [[ -n "$BRANCH_ARG" ]]; then
  340. case "$BRANCH_ARG" in
  341. main|master) APP_IMAGE="ghcr.io/ding113/claude-code-hub:latest" ;;
  342. dev) APP_IMAGE="ghcr.io/ding113/claude-code-hub:dev" ;;
  343. *) log_error "Unknown branch: $BRANCH_ARG (expected: main|dev)"; exit 1 ;;
  344. esac
  345. fi
  346. APP_IMAGE="${IMAGE_ARG:-${APP_IMAGE:-$DEFAULT_IMAGE}}"
  347. APP_REPLICAS="${REPLICAS_ARG:-$DEFAULT_REPLICAS}"
  348. APP_HPA_MIN="${HPA_MIN_ARG:-$DEFAULT_HPA_MIN}"
  349. APP_HPA_MAX="${HPA_MAX_ARG:-$DEFAULT_HPA_MAX}"
  350. PG_STORAGE_SIZE="${PG_SIZE_ARG:-$DEFAULT_PG_SIZE}"
  351. REDIS_STORAGE_SIZE="${REDIS_SIZE_ARG:-$DEFAULT_REDIS_SIZE}"
  352. TIMEZONE="${TIMEZONE_ARG:-$DEFAULT_TIMEZONE}"
  353. INGRESS_HOST="${INGRESS_HOST_ARG:-}"
  354. # 校验
  355. if ! [[ "$APP_REPLICAS" =~ ^[0-9]+$ ]] || [[ "$APP_REPLICAS" -lt 1 ]]; then
  356. log_error "--replicas 必须是正整数: $APP_REPLICAS"; exit 1
  357. fi
  358. if ! [[ "$APP_HPA_MIN" =~ ^[0-9]+$ ]] || [[ "$APP_HPA_MIN" -lt 1 ]]; then
  359. log_error "--hpa-min 必须是正整数: $APP_HPA_MIN"; exit 1
  360. fi
  361. if ! [[ "$APP_HPA_MAX" =~ ^[0-9]+$ ]] || [[ "$APP_HPA_MAX" -lt 1 ]]; then
  362. log_error "--hpa-max 必须是正整数: $APP_HPA_MAX"; exit 1
  363. fi
  364. if [[ "$APP_HPA_MIN" -gt "$APP_HPA_MAX" ]]; then
  365. log_error "--hpa-min ($APP_HPA_MIN) 不能大于 --hpa-max ($APP_HPA_MAX)"; exit 1
  366. fi
  367. log_info "Namespace: $NAMESPACE"
  368. log_info "App image: $APP_IMAGE"
  369. log_info "Replicas: $APP_REPLICAS (HPA: $APP_HPA_MIN-$APP_HPA_MAX)"
  370. log_info "PG storage: $PG_STORAGE_SIZE"
  371. log_info "Redis storage: $REDIS_STORAGE_SIZE"
  372. log_info "Timezone: $TIMEZONE"
  373. if [[ "$APP_REPLICAS" -gt 1 ]]; then
  374. log_info "AUTO_MIGRATE 由 PostgreSQL advisory lock 串行化,首次多副本启动会排队等待迁移完成"
  375. fi
  376. }
  377. detect_storage_class() {
  378. if [[ -n "$STORAGE_CLASS_ARG" ]]; then
  379. STORAGE_CLASS="$STORAGE_CLASS_ARG"
  380. log_info "Storage class (user): $STORAGE_CLASS"
  381. return
  382. fi
  383. # k3s → local-path
  384. if [[ "$RUNTIME" == "k3s" ]] && $KUBECTL get sc local-path &>/dev/null; then
  385. STORAGE_CLASS="local-path"
  386. log_info "Storage class (k3s default): local-path"
  387. return
  388. fi
  389. # 尝试找默认 StorageClass
  390. local default_sc
  391. default_sc=$($KUBECTL get sc -o jsonpath='{range .items[?(@.metadata.annotations.storageclass\.kubernetes\.io/is-default-class=="true")]}{.metadata.name}{"\n"}{end}' 2>/dev/null | head -1)
  392. if [[ -z "$default_sc" ]]; then
  393. default_sc=$($KUBECTL get sc -o jsonpath='{range .items[?(@.metadata.annotations.storageclass\.beta\.kubernetes\.io/is-default-class=="true")]}{.metadata.name}{"\n"}{end}' 2>/dev/null | head -1)
  394. fi
  395. if [[ -n "$default_sc" ]]; then
  396. STORAGE_CLASS="$default_sc"
  397. log_info "Storage class (cluster default): $default_sc"
  398. return
  399. fi
  400. # 找不到默认,用空串让集群自行决定
  401. STORAGE_CLASS=""
  402. log_warning "未检测到默认 StorageClass。PVC 将使用集群默认设置,可能无法自动绑定卷"
  403. log_info "如有需要,请传入 --storage-class <name> 指定"
  404. }
  405. detect_ingress_variant() {
  406. if [[ "$DISABLE_INGRESS" == true ]]; then
  407. INGRESS_VARIANT="nodeport"
  408. APP_SERVICE_TYPE="NodePort"
  409. log_info "Ingress: disabled (Service=NodePort)"
  410. return
  411. fi
  412. if [[ -z "$INGRESS_HOST" ]]; then
  413. INGRESS_VARIANT="nodeport"
  414. APP_SERVICE_TYPE="NodePort"
  415. log_warning "未指定 --ingress-host,将使用 NodePort 暴露"
  416. return
  417. fi
  418. # 检测 Traefik CRD
  419. if $KUBECTL get crd ingressroutes.traefik.io &>/dev/null; then
  420. INGRESS_VARIANT="traefik"
  421. APP_SERVICE_TYPE="ClusterIP"
  422. log_info "Ingress: Traefik IngressRoute (host=$INGRESS_HOST)"
  423. return
  424. fi
  425. # 标准 Ingress
  426. if $KUBECTL api-resources 2>/dev/null | grep -q '^ingresses.*networking.k8s.io'; then
  427. INGRESS_CLASS="${INGRESS_CLASS_ARG:-}"
  428. if [[ -z "$INGRESS_CLASS" ]]; then
  429. # 查找 IngressClass
  430. local first_ic
  431. first_ic=$($KUBECTL get ingressclass -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
  432. if [[ -n "$first_ic" ]]; then
  433. INGRESS_CLASS="$first_ic"
  434. else
  435. INGRESS_CLASS="nginx"
  436. log_warning "集群无 IngressClass,默认填 nginx。请通过 --ingress-class 显式指定"
  437. fi
  438. fi
  439. INGRESS_VARIANT="standard"
  440. APP_SERVICE_TYPE="ClusterIP"
  441. log_info "Ingress: standard (className=$INGRESS_CLASS, host=$INGRESS_HOST)"
  442. return
  443. fi
  444. # 都不可用
  445. INGRESS_VARIANT="nodeport"
  446. APP_SERVICE_TYPE="NodePort"
  447. log_warning "集群不支持 Ingress,回落到 NodePort"
  448. }
  449. ###############################################################################
  450. # Existing deployment detection
  451. ###############################################################################
  452. force_new_reset_existing_namespace() {
  453. if ! $KUBECTL get namespace "$NAMESPACE" &>/dev/null; then
  454. log_info "--force-new 已启用,但 namespace=$NAMESPACE 当前不存在,将按新装模式继续"
  455. return
  456. fi
  457. if [[ "$NON_INTERACTIVE" != true ]]; then
  458. echo ""
  459. log_warning "--force-new 将删除 namespace=$NAMESPACE 并重建所有资源"
  460. log_warning "这会清空 Deployment / StatefulSet / Secret / PVC,现有数据不会保留"
  461. read -p "输入 yes 继续: " confirm
  462. echo ""
  463. if [[ "$confirm" != "yes" ]]; then
  464. log_error "已取消"
  465. exit 1
  466. fi
  467. else
  468. log_warning "--force-new 已启用: 删除 namespace=$NAMESPACE 并重建所有资源"
  469. fi
  470. log_warning "PV 是否真正释放取决于 StorageClass reclaimPolicy; 若为 Retain,旧 PV 会进入 Released,需手动清理"
  471. log_info "删除旧 namespace: $NAMESPACE"
  472. if ! $KUBECTL delete namespace "$NAMESPACE" --timeout=180s >/dev/null; then
  473. log_error "删除 namespace 失败: $NAMESPACE"
  474. exit 1
  475. fi
  476. log_success "旧部署已清理,将按新装模式继续"
  477. }
  478. detect_existing_deployment() {
  479. if [[ "$FORCE_NEW" == true ]]; then
  480. force_new_reset_existing_namespace
  481. UPDATE_MODE=false
  482. return
  483. fi
  484. if $KUBECTL get namespace "$NAMESPACE" &>/dev/null && { \
  485. $KUBECTL -n "$NAMESPACE" get deployment claude-code-hub &>/dev/null || \
  486. $KUBECTL -n "$NAMESPACE" get statefulset postgres &>/dev/null || \
  487. $KUBECTL -n "$NAMESPACE" get statefulset redis &>/dev/null || \
  488. $KUBECTL -n "$NAMESPACE" get secret claude-code-hub-secrets &>/dev/null; \
  489. }; then
  490. UPDATE_MODE=true
  491. log_info "检测到已有安装痕迹(namespace=$NAMESPACE),进入升级模式"
  492. else
  493. UPDATE_MODE=false
  494. log_info "未检测到已有部署,进入新装模式"
  495. fi
  496. }
  497. ###############################################################################
  498. # Secret generation
  499. ###############################################################################
  500. generate_random() {
  501. local length="${1:-32}"
  502. if command -v openssl &>/dev/null; then
  503. openssl rand -base64 48 | tr -d '=/+' | head -c "$length"
  504. else
  505. tr -dc 'A-Za-z0-9' < /dev/urandom | head -c "$length"
  506. fi
  507. }
  508. prepare_secret_values() {
  509. if [[ "$UPDATE_MODE" == true ]] && \
  510. $KUBECTL -n "$NAMESPACE" get secret claude-code-hub-secrets &>/dev/null; then
  511. log_info "升级模式:复用已有 Secret 中的密码"
  512. PG_PASSWORD=$($KUBECTL -n "$NAMESPACE" get secret claude-code-hub-secrets \
  513. -o jsonpath='{.data.pg-password}' | b64d)
  514. REDIS_PASSWORD=$($KUBECTL -n "$NAMESPACE" get secret claude-code-hub-secrets \
  515. -o jsonpath='{.data.redis-password}' | b64d)
  516. if [[ -n "$TOKEN_ARG" ]]; then
  517. ADMIN_TOKEN="$TOKEN_ARG"
  518. log_info "使用 CLI 传入的 admin-token 覆盖"
  519. else
  520. ADMIN_TOKEN=$($KUBECTL -n "$NAMESPACE" get secret claude-code-hub-secrets \
  521. -o jsonpath='{.data.admin-token}' | b64d)
  522. fi
  523. else
  524. log_info "生成随机凭据..."
  525. PG_PASSWORD=$(generate_random 40)
  526. REDIS_PASSWORD=$(generate_random 40)
  527. ADMIN_TOKEN="${TOKEN_ARG:-$(generate_random 48)}"
  528. fi
  529. }
  530. apply_secret() {
  531. local dsn redis_url tmp
  532. dsn="postgresql://claude_code_hub:${PG_PASSWORD}@postgres:5432/claude_code_hub"
  533. redis_url="redis://:${REDIS_PASSWORD}@redis:6379/0"
  534. # 避免把凭据写进命令行参数 (ps / /proc/<pid>/cmdline 会暴露 --from-literal=...)
  535. # 改为先把密码落到 600 权限的临时文件,通过 --from-file 引用,再渲染 YAML 并 apply
  536. tmp=$(mktemp -d)
  537. chmod 700 "$tmp"
  538. trap 'rm -rf "$tmp"' EXIT
  539. printf '%s' "$PG_PASSWORD" > "$tmp/pg-password"; chmod 600 "$tmp/pg-password"
  540. printf '%s' "$REDIS_PASSWORD" > "$tmp/redis-password"; chmod 600 "$tmp/redis-password"
  541. printf '%s' "$ADMIN_TOKEN" > "$tmp/admin-token"; chmod 600 "$tmp/admin-token"
  542. printf '%s' "$dsn" > "$tmp/dsn"; chmod 600 "$tmp/dsn"
  543. printf '%s' "$redis_url" > "$tmp/redis-url"; chmod 600 "$tmp/redis-url"
  544. $KUBECTL -n "$NAMESPACE" create secret generic claude-code-hub-secrets \
  545. --from-file=pg-password="$tmp/pg-password" \
  546. --from-file=redis-password="$tmp/redis-password" \
  547. --from-file=admin-token="$tmp/admin-token" \
  548. --from-file=dsn="$tmp/dsn" \
  549. --from-file=redis-url="$tmp/redis-url" \
  550. --dry-run=client -o yaml | $KUBECTL apply -f -
  551. rm -rf "$tmp"
  552. trap - EXIT
  553. log_success "Secret claude-code-hub-secrets applied"
  554. }
  555. ###############################################################################
  556. # Manifest rendering
  557. ###############################################################################
  558. render_manifests() {
  559. if [[ ! -d "$SOURCE_MANIFEST_DIR" ]]; then
  560. log_error "未找到源 manifest 目录: $SOURCE_MANIFEST_DIR"
  561. exit 1
  562. fi
  563. local target="$DEPLOY_DIR/k8s"
  564. mkdir -p "$target"
  565. log_info "复制 manifest 模板 -> $target"
  566. # 使用 `src/.` + trailing slash 写法,兼容 BSD (macOS) 与 GNU cp。
  567. # `cp -RT` 是 GNU 专有选项,BSD cp 会报错。
  568. cp -R "$SOURCE_MANIFEST_DIR/." "$target/"
  569. log_info "渲染占位符..."
  570. # 处理 storageClass 的特殊情况:空串时整行删除
  571. local sc_line_action
  572. if [[ -z "$STORAGE_CLASS" ]]; then
  573. sc_line_action="delete"
  574. else
  575. sc_line_action="replace"
  576. fi
  577. # 对所有 yaml 做占位符替换
  578. local f
  579. while IFS= read -r -d '' f; do
  580. # 跳过 README
  581. [[ "$f" == *"README.md" ]] && continue
  582. # 用 python 一次性渲染 (避免 sed 引号/特殊字符问题)
  583. python3 - "$f" "$NAMESPACE" "$APP_IMAGE" "$APP_REPLICAS" \
  584. "$APP_HPA_MIN" "$APP_HPA_MAX" "$STORAGE_CLASS" \
  585. "$PG_STORAGE_SIZE" "$REDIS_STORAGE_SIZE" \
  586. "$TIMEZONE" "$INGRESS_HOST" "$INGRESS_CLASS" \
  587. "$APP_SERVICE_TYPE" "$sc_line_action" <<'PY'
  588. import sys, re
  589. path = sys.argv[1]
  590. ns, image, replicas, hpa_min, hpa_max, sc, pg_size, redis_size, tz, ing_host, ing_class, svc_type, sc_action = sys.argv[2:]
  591. with open(path) as f:
  592. text = f.read()
  593. if sc_action == "delete":
  594. # 删除包含 storageClassName: {{STORAGE_CLASS}} 的整行
  595. text = "\n".join(l for l in text.splitlines() if "{{STORAGE_CLASS}}" not in l) + ("\n" if text.endswith("\n") else "")
  596. repl = {
  597. "{{NAMESPACE}}": ns,
  598. "{{APP_IMAGE}}": image,
  599. "{{APP_REPLICAS}}": replicas,
  600. "{{APP_HPA_MIN}}": hpa_min,
  601. "{{APP_HPA_MAX}}": hpa_max,
  602. "{{STORAGE_CLASS}}": sc,
  603. "{{PG_STORAGE_SIZE}}": pg_size,
  604. "{{REDIS_STORAGE_SIZE}}": redis_size,
  605. "{{TIMEZONE}}": tz,
  606. "{{INGRESS_HOST}}": ing_host,
  607. "{{INGRESS_CLASS}}": ing_class,
  608. "{{APP_SERVICE_TYPE}}": svc_type,
  609. }
  610. for k, v in repl.items():
  611. text = text.replace(k, v)
  612. with open(path, "w") as f:
  613. f.write(text)
  614. PY
  615. done < <(find "$target" -type f -name '*.yaml' -print0)
  616. log_success "Manifest 渲染完成: $target"
  617. }
  618. ###############################################################################
  619. # Apply
  620. ###############################################################################
  621. kube_apply() {
  622. local f="$1"
  623. if [[ ! -f "$f" ]]; then
  624. log_warning "跳过 (文件不存在): $f"
  625. return
  626. fi
  627. $KUBECTL apply -f "$f"
  628. }
  629. apply_manifests() {
  630. local base="$DEPLOY_DIR/k8s"
  631. log_info "应用 manifest (按依赖顺序)..."
  632. kube_apply "$base/namespace.yaml"
  633. apply_secret
  634. # NetworkPolicy (可选,失败不致命 — 集群可能不启用 NP)
  635. # 注意:默认 app/networkpolicy.yaml 仅放行 namespace 标签为
  636. # kube-system / ingress-nginx / traefik 的 Ingress Controller。
  637. # 若你的 Ingress Controller 位于其他 namespace,传 --disable-networkpolicy
  638. # 并改用自定义 NP,或者编辑 deploy/k8s/app/networkpolicy.yaml
  639. if [[ "$DISABLE_NETWORKPOLICY" == true ]]; then
  640. log_info "已跳过 NetworkPolicy (--disable-networkpolicy)"
  641. else
  642. kube_apply "$base/postgres/networkpolicy.yaml" || log_warning "postgres networkpolicy 应用失败,忽略"
  643. kube_apply "$base/redis/networkpolicy.yaml" || log_warning "redis networkpolicy 应用失败,忽略"
  644. if [[ "$INGRESS_VARIANT" == "nodeport" ]]; then
  645. log_warning "NodePort 模式下跳过 app NetworkPolicy,避免阻断外部访问"
  646. else
  647. kube_apply "$base/app/networkpolicy.yaml" || log_warning "app networkpolicy 应用失败,忽略"
  648. fi
  649. fi
  650. # DB & Cache
  651. kube_apply "$base/postgres/service.yaml"
  652. kube_apply "$base/postgres/statefulset.yaml"
  653. kube_apply "$base/redis/service.yaml"
  654. kube_apply "$base/redis/statefulset.yaml"
  655. log_info "等待 Postgres / Redis 就绪 (最长 5 分钟)..."
  656. if ! $KUBECTL -n "$NAMESPACE" rollout status statefulset/postgres --timeout=300s; then
  657. log_error "Postgres StatefulSet 未就绪,请检查 PVC / StorageClass / 节点资源"
  658. log_info " kubectl -n $NAMESPACE describe pod -l app=postgres"
  659. exit 1
  660. fi
  661. if ! $KUBECTL -n "$NAMESPACE" rollout status statefulset/redis --timeout=300s; then
  662. log_error "Redis StatefulSet 未就绪,请检查 PVC / StorageClass"
  663. exit 1
  664. fi
  665. # 已移除独立的 migration Job (deploy/k8s/jobs/ 目录不再存在):
  666. # 1. 应用启动时 instrumentation.ts 会自动执行 drizzle migrations (AUTO_MIGRATE=true 默认开)
  667. # 2. Job 需要 devDependency drizzle-kit,在 standalone 运行时镜像里不可用
  668. # 3. 避免 Job 与应用 AUTO_MIGRATE 的并发迁移竞态
  669. # App
  670. kube_apply "$base/app/deployment.yaml"
  671. kube_apply "$base/app/service.yaml"
  672. kube_apply "$base/app/hpa.yaml"
  673. kube_apply "$base/app/pdb.yaml"
  674. log_info "等待 App 滚动更新完成 (最长 10 分钟)..."
  675. if ! $KUBECTL -n "$NAMESPACE" rollout status deployment/claude-code-hub --timeout=600s; then
  676. log_error "App Deployment 滚动未能在 10 分钟内完成"
  677. log_info "诊断建议:"
  678. log_info " kubectl -n $NAMESPACE describe deployment claude-code-hub"
  679. log_info " kubectl -n $NAMESPACE logs deploy/claude-code-hub --tail=100"
  680. if [[ "$UPDATE_MODE" == true ]]; then
  681. log_warning "升级模式失败,执行 rollout undo 回滚..."
  682. $KUBECTL -n "$NAMESPACE" rollout undo deployment/claude-code-hub || true
  683. $KUBECTL -n "$NAMESPACE" rollout status deployment/claude-code-hub --timeout=300s || true
  684. fi
  685. exit 1
  686. fi
  687. # Ingress
  688. case "$INGRESS_VARIANT" in
  689. standard) kube_apply "$base/ingress/ingress.yaml" ;;
  690. traefik) kube_apply "$base/ingress/traefik-ingressroute.yaml" ;;
  691. nodeport) log_info "Ingress variant=nodeport,跳过 ingress manifest" ;;
  692. esac
  693. log_success "所有 manifest 已应用"
  694. }
  695. ###############################################################################
  696. # Post-install
  697. ###############################################################################
  698. install_cch_cli_if_requested() {
  699. if [[ "$INSTALL_CCH" != true ]]; then return; fi
  700. local src="$SCRIPT_DIR/cch"
  701. local dst="/usr/local/bin/cch"
  702. if [[ ! -x "$src" ]]; then
  703. log_warning "$src 不存在或不可执行,跳过 cch 安装"
  704. return
  705. fi
  706. log_info "将 cch 软链到 $dst (需要 sudo)"
  707. if sudo ln -sf "$src" "$dst"; then
  708. log_success "cch 已安装: $(which cch)"
  709. else
  710. log_warning "cch 软链失败,可手动: sudo ln -sf $src $dst"
  711. fi
  712. }
  713. write_cch_config() {
  714. # 写一份配置供 cch 读取 (namespace / image / deploy-dir)
  715. local cfg_dir="${XDG_CONFIG_HOME:-$HOME/.config}/cch"
  716. mkdir -p "$cfg_dir"
  717. # 使用 POSIX 可移植的时间戳格式,避免 GNU 专有的 `date -Iseconds`
  718. local ts
  719. ts=$(date -u +'%Y-%m-%dT%H:%M:%SZ')
  720. cat > "$cfg_dir/config" <<EOF
  721. # Auto-generated by deploy-k8s.sh v${VERSION} at $ts
  722. CCH_NAMESPACE="$NAMESPACE"
  723. CCH_IMAGE="$APP_IMAGE"
  724. CCH_DEPLOY_DIR="$DEPLOY_DIR"
  725. CCH_RUNTIME="$RUNTIME"
  726. CCH_INGRESS_HOST="$INGRESS_HOST"
  727. CCH_INGRESS_VARIANT="$INGRESS_VARIANT"
  728. EOF
  729. log_info "cch 配置已写入: $cfg_dir/config"
  730. }
  731. get_node_ip() {
  732. local node_ip
  733. node_ip=$($KUBECTL get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="ExternalIP")].address}' 2>/dev/null || echo "")
  734. if [[ -z "$node_ip" ]]; then
  735. node_ip=$($KUBECTL get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="InternalIP")].address}' 2>/dev/null || echo "")
  736. fi
  737. echo "${node_ip:-<your-node-ip>}"
  738. }
  739. print_success_message() {
  740. local url admin_note
  741. case "$INGRESS_VARIANT" in
  742. standard|traefik)
  743. url="http://$INGRESS_HOST"
  744. ;;
  745. nodeport)
  746. local np node_ip
  747. np=$($KUBECTL -n "$NAMESPACE" get svc claude-code-hub \
  748. -o jsonpath='{.spec.ports[0].nodePort}' 2>/dev/null || echo "")
  749. node_ip=$(get_node_ip)
  750. if [[ -n "$np" ]]; then
  751. url="http://${node_ip}:${np}"
  752. else
  753. url="(kubectl port-forward svc/claude-code-hub -n $NAMESPACE 13500:80)"
  754. fi
  755. ;;
  756. esac
  757. echo ""
  758. echo -e "${GREEN}+================================================================+${NC}"
  759. echo -e "${GREEN}| |${NC}"
  760. if [[ "$UPDATE_MODE" == true ]]; then
  761. echo -e "${GREEN}| Claude Code Hub Upgrade Complete! |${NC}"
  762. else
  763. echo -e "${GREEN}| Claude Code Hub Deployment Complete! |${NC}"
  764. fi
  765. echo -e "${GREEN}| |${NC}"
  766. echo -e "${GREEN}+================================================================+${NC}"
  767. echo ""
  768. echo -e "${BLUE}Access URL:${NC} ${GREEN}$url${NC}"
  769. echo -e "${BLUE}Namespace:${NC} $NAMESPACE"
  770. echo -e "${BLUE}Image:${NC} $APP_IMAGE"
  771. echo ""
  772. if [[ "$UPDATE_MODE" == false ]]; then
  773. echo -e "${BLUE}Admin Token (保管好):${NC}"
  774. echo -e " ${YELLOW}${ADMIN_TOKEN}${NC}"
  775. echo ""
  776. fi
  777. echo -e "${BLUE}常用命令 (cch):${NC}"
  778. echo -e " cch status # 查看 Pod / HPA / 资源"
  779. echo -e " cch logs # 查看日志"
  780. echo -e " cch update # 拉新镜像 + 滚动更新"
  781. echo -e " cch backup # 备份 PostgreSQL"
  782. echo -e " cch info # 展示访问地址与 Admin Token"
  783. echo ""
  784. if [[ "$INSTALL_CCH" != true ]]; then
  785. echo -e "${YELLOW}提示:${NC} cch CLI 未安装到 PATH。你可以:"
  786. echo -e " bash scripts/deploy-k8s.sh --install-cch # 软链到 /usr/local/bin/cch"
  787. echo -e " 或直接: bash scripts/cch status"
  788. fi
  789. echo ""
  790. if [[ "$UPDATE_MODE" == false ]]; then
  791. echo -e "${RED}IMPORTANT:${NC} 请妥善保存 Admin Token,丢失后只能通过集群 Secret 找回"
  792. fi
  793. echo ""
  794. }
  795. ###############################################################################
  796. # Main
  797. ###############################################################################
  798. main() {
  799. parse_args "$@"
  800. print_header
  801. detect_os
  802. if [[ "$DRY_RENDER" == true ]]; then
  803. # 离线模式:不探测集群,用用户传入或默认值渲染 manifest
  804. log_info "Dry-render mode: 跳过集群探测"
  805. RUNTIME="${RUNTIME_OVERRIDE:-kubectl}"
  806. resolve_config
  807. STORAGE_CLASS="${STORAGE_CLASS_ARG:-local-path}"
  808. log_info "Storage class (dry-render): $STORAGE_CLASS"
  809. if [[ "$DISABLE_INGRESS" == true ]] || [[ -z "$INGRESS_HOST" ]]; then
  810. INGRESS_VARIANT="nodeport"
  811. APP_SERVICE_TYPE="NodePort"
  812. else
  813. INGRESS_VARIANT="standard"
  814. APP_SERVICE_TYPE="ClusterIP"
  815. INGRESS_CLASS="${INGRESS_CLASS_ARG:-nginx}"
  816. fi
  817. UPDATE_MODE=false
  818. render_manifests
  819. log_success "Dry render 完成,manifest 位于: $DEPLOY_DIR/k8s"
  820. log_info "可用于审阅: kubectl apply --dry-run=client -R -f $DEPLOY_DIR/k8s/"
  821. exit 0
  822. fi
  823. detect_runtime
  824. preflight_checks
  825. resolve_config
  826. detect_existing_deployment
  827. detect_storage_class
  828. detect_ingress_variant
  829. prepare_secret_values
  830. render_manifests
  831. apply_manifests
  832. install_cch_cli_if_requested
  833. write_cch_config
  834. print_success_message
  835. }
  836. main "$@"