deploy-k8s.sh 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948
  1. #!/usr/bin/env bash
  2. # Claude Code Hub - Kubernetes / k3s One-Click Deployment
  3. # 兼容 k3s 与标准 Kubernetes (EKS/GKE/AKS/self-hosted)
  4. # 详见: docs/k8s-deployment.md
  5. set -euo pipefail
  6. ###############################################################################
  7. # Colors (在非 TTY 或 NO_COLOR 环境自动降级)
  8. ###############################################################################
  9. if [[ -t 1 ]] && [[ -z "${NO_COLOR:-}" ]]; then
  10. RED=$'\033[0;31m'
  11. GREEN=$'\033[0;32m'
  12. YELLOW=$'\033[1;33m'
  13. BLUE=$'\033[0;34m'
  14. CYAN=$'\033[0;36m'
  15. NC=$'\033[0m'
  16. else
  17. RED=""
  18. GREEN=""
  19. YELLOW=""
  20. BLUE=""
  21. CYAN=""
  22. NC=""
  23. fi
  24. ###############################################################################
  25. # Script metadata
  26. ###############################################################################
  27. VERSION="1.0.0"
  28. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  29. REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
  30. SOURCE_MANIFEST_DIR="$REPO_ROOT/deploy/k8s"
  31. ###############################################################################
  32. # Logging
  33. ###############################################################################
  34. log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
  35. log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
  36. log_warning() { echo -e "${YELLOW}[WARNING]${NC} $1"; }
  37. log_error() { echo -e "${RED}[ERROR]${NC} $1" >&2; }
  38. has_command() {
  39. command -v "$1" >/dev/null 2>&1
  40. }
  41. # 跨平台 base64 decode (macOS BSD 旧版只认 -D)
  42. b64d() {
  43. if base64 -d </dev/null >/dev/null 2>&1; then
  44. base64 -d
  45. elif base64 -D </dev/null >/dev/null 2>&1; then
  46. base64 -D
  47. else
  48. openssl base64 -d
  49. fi
  50. }
  51. ###############################################################################
  52. # Defaults
  53. ###############################################################################
  54. DEFAULT_NAMESPACE="claude-code-hub"
  55. # k8s 部署默认跟随 main 分支发布镜像;仅显式传 -b dev 时才切到 :dev。
  56. DEFAULT_IMAGE="ghcr.io/ding113/claude-code-hub:latest"
  57. DEFAULT_REPLICAS=2
  58. DEFAULT_HPA_MIN=2
  59. DEFAULT_HPA_MAX=6
  60. DEFAULT_PG_SIZE="50Gi"
  61. DEFAULT_REDIS_SIZE="10Gi"
  62. DEFAULT_TIMEZONE="Asia/Shanghai"
  63. ###############################################################################
  64. # CLI argument variables
  65. ###############################################################################
  66. NAMESPACE_ARG=""
  67. IMAGE_ARG=""
  68. BRANCH_ARG=""
  69. TOKEN_ARG=""
  70. REPLICAS_ARG=""
  71. HPA_MIN_ARG=""
  72. HPA_MAX_ARG=""
  73. STORAGE_CLASS_ARG=""
  74. PG_SIZE_ARG=""
  75. REDIS_SIZE_ARG=""
  76. TIMEZONE_ARG=""
  77. INGRESS_HOST_ARG=""
  78. INGRESS_CLASS_ARG=""
  79. DISABLE_INGRESS=false
  80. DISABLE_NETWORKPOLICY=false
  81. DEPLOY_DIR_ARG=""
  82. KUBE_CONTEXT_ARG=""
  83. INSTALL_K3S=false
  84. INSTALL_CCH=false
  85. FORCE_NEW=false
  86. DRY_RENDER=false
  87. NON_INTERACTIVE=false
  88. ###############################################################################
  89. # Runtime state
  90. ###############################################################################
  91. NAMESPACE=""
  92. APP_IMAGE=""
  93. APP_REPLICAS=""
  94. APP_HPA_MIN=""
  95. APP_HPA_MAX=""
  96. STORAGE_CLASS=""
  97. PG_STORAGE_SIZE=""
  98. REDIS_STORAGE_SIZE=""
  99. TIMEZONE=""
  100. INGRESS_HOST=""
  101. INGRESS_CLASS=""
  102. INGRESS_VARIANT="" # standard | traefik | nodeport
  103. APP_SERVICE_TYPE="" # ClusterIP | NodePort
  104. DEPLOY_DIR=""
  105. RUNTIME="" # k3s | kubectl
  106. RUNTIME_OVERRIDE="${RUNTIME_OVERRIDE:-}"
  107. KUBECTL=""
  108. UPDATE_MODE=false
  109. ADMIN_TOKEN=""
  110. PG_PASSWORD=""
  111. REDIS_PASSWORD=""
  112. ###############################################################################
  113. # Help
  114. ###############################################################################
  115. show_help() {
  116. cat << EOF
  117. Claude Code Hub - K8s/k3s One-Click Deployment Script v${VERSION}
  118. Usage: $0 [OPTIONS]
  119. Cluster:
  120. -n, --namespace <ns> K8s namespace (default: ${DEFAULT_NAMESPACE})
  121. --kube-context <ctx> kubectl context (default: current)
  122. --install-k3s 本机无集群时自动安装 k3s (需要 sudo)
  123. Application:
  124. -i, --image <ref> 应用镜像 (default: ${DEFAULT_IMAGE})
  125. -b, --branch <name> 分支捷径 默认 main→:latest / dev→:dev
  126. -t, --admin-token <token> 自定义 ADMIN_TOKEN (default: auto-generated)
  127. --replicas <n> Deployment 基线副本数 (default: ${DEFAULT_REPLICAS})
  128. --hpa-min <n> HPA 最小副本 (default: ${DEFAULT_HPA_MIN})
  129. --hpa-max <n> HPA 最大副本 (default: ${DEFAULT_HPA_MAX})
  130. --timezone <tz> 容器时区 (default: ${DEFAULT_TIMEZONE})
  131. Storage:
  132. --storage-class <name> PVC storageClassName (default: 自动探测)
  133. --pg-size <size> PostgreSQL PVC 大小 (default: ${DEFAULT_PG_SIZE})
  134. --redis-size <size> Redis PVC 大小 (default: ${DEFAULT_REDIS_SIZE})
  135. Ingress:
  136. --ingress-host <host> 启用 Ingress 并绑定域名
  137. --ingress-class <cls> Ingress className (default: 自动探测)
  138. --disable-ingress 跳过 Ingress,使用 NodePort
  139. --disable-networkpolicy 跳过 NetworkPolicy (Ingress Controller 不在标准 ns 时需要)
  140. Deployment:
  141. -d, --deploy-dir <path> manifest + cch 安装目录 (default: auto)
  142. --force-new 删除已有 namespace 后强制重装 (会提示)
  143. --install-cch 把 cch 软链接到 /usr/local/bin/cch (需 sudo)
  144. --dry-render 只渲染 manifest 不 apply (用于审阅)
  145. Misc:
  146. -y, --yes 非交互模式 (用默认值)
  147. -h, --help 显示帮助
  148. --version 显示版本号
  149. Examples:
  150. # 最简,交互式
  151. $0
  152. # 非交互,纯默认
  153. $0 -y
  154. # 部署 dev 分支,自定义命名空间与域名
  155. $0 -b dev -n my-hub --ingress-host hub.example.com -y
  156. # 标准 K8s,指定 storage class
  157. $0 --storage-class standard -y
  158. # 仅渲染 manifest 不应用 (用于离线审阅)
  159. $0 --dry-render --deploy-dir /tmp/cch-k8s -y
  160. For more information: https://github.com/ding113/claude-code-hub
  161. EOF
  162. }
  163. ###############################################################################
  164. # Arg parsing
  165. ###############################################################################
  166. parse_args() {
  167. while [[ $# -gt 0 ]]; do
  168. case "$1" in
  169. -n|--namespace) NAMESPACE_ARG="$2"; shift 2 ;;
  170. --kube-context) KUBE_CONTEXT_ARG="$2"; shift 2 ;;
  171. --install-k3s) INSTALL_K3S=true; shift ;;
  172. -i|--image) IMAGE_ARG="$2"; shift 2 ;;
  173. -b|--branch) BRANCH_ARG="$2"; shift 2 ;;
  174. -t|--admin-token) TOKEN_ARG="$2"; shift 2 ;;
  175. --replicas) REPLICAS_ARG="$2"; shift 2 ;;
  176. --hpa-min) HPA_MIN_ARG="$2"; shift 2 ;;
  177. --hpa-max) HPA_MAX_ARG="$2"; shift 2 ;;
  178. --timezone) TIMEZONE_ARG="$2"; shift 2 ;;
  179. --storage-class) STORAGE_CLASS_ARG="$2"; shift 2 ;;
  180. --pg-size) PG_SIZE_ARG="$2"; shift 2 ;;
  181. --redis-size) REDIS_SIZE_ARG="$2"; shift 2 ;;
  182. --ingress-host) INGRESS_HOST_ARG="$2"; shift 2 ;;
  183. --ingress-class) INGRESS_CLASS_ARG="$2"; shift 2 ;;
  184. --disable-ingress) DISABLE_INGRESS=true; shift ;;
  185. --disable-networkpolicy) DISABLE_NETWORKPOLICY=true; shift ;;
  186. -d|--deploy-dir) DEPLOY_DIR_ARG="$2"; shift 2 ;;
  187. --force-new) FORCE_NEW=true; shift ;;
  188. --install-cch) INSTALL_CCH=true; shift ;;
  189. --dry-render) DRY_RENDER=true; shift ;;
  190. -y|--yes) NON_INTERACTIVE=true; shift ;;
  191. -h|--help) show_help; exit 0 ;;
  192. --version) echo "deploy-k8s.sh v${VERSION}"; exit 0 ;;
  193. *) log_error "Unknown argument: $1"; show_help; exit 1 ;;
  194. esac
  195. done
  196. }
  197. ###############################################################################
  198. # Banner
  199. ###############################################################################
  200. print_header() {
  201. echo -e "${BLUE}"
  202. echo "+=================================================================+"
  203. echo "| |"
  204. echo "| Claude Code Hub - K8s / k3s One-Click Deployment |"
  205. echo "| Version ${VERSION} |"
  206. echo "| |"
  207. echo "+=================================================================+"
  208. echo -e "${NC}"
  209. }
  210. ###############################################################################
  211. # OS & runtime detection
  212. ###############################################################################
  213. detect_os() {
  214. local os_type
  215. case "$OSTYPE" in
  216. linux*) os_type="linux" ;;
  217. darwin*) os_type="macos" ;;
  218. *) log_error "Unsupported OS: $OSTYPE"; exit 1 ;;
  219. esac
  220. log_info "Detected OS: $os_type"
  221. # Default deploy dir depends on permissions
  222. if [[ -z "$DEPLOY_DIR_ARG" ]]; then
  223. if [[ $EUID -eq 0 ]]; then
  224. DEPLOY_DIR="/opt/claude-code-hub"
  225. else
  226. DEPLOY_DIR="${XDG_CONFIG_HOME:-$HOME/.config}/cch"
  227. fi
  228. else
  229. DEPLOY_DIR="$DEPLOY_DIR_ARG"
  230. fi
  231. log_info "Deploy directory: $DEPLOY_DIR"
  232. }
  233. detect_runtime() {
  234. # 显式覆盖
  235. if [[ -n "$RUNTIME_OVERRIDE" ]]; then
  236. RUNTIME="$RUNTIME_OVERRIDE"
  237. KUBECTL="kubectl"
  238. if [[ -n "$KUBE_CONTEXT_ARG" ]]; then
  239. KUBECTL="kubectl --context=$KUBE_CONTEXT_ARG"
  240. fi
  241. log_info "Runtime: $RUNTIME (override)"
  242. return
  243. fi
  244. # 如果用户显式指定了 kube-context,则直接用 kubectl
  245. if [[ -n "$KUBE_CONTEXT_ARG" ]]; then
  246. if ! command -v kubectl &>/dev/null; then
  247. log_error "kubectl 不可用,无法使用 --kube-context"
  248. exit 1
  249. fi
  250. RUNTIME="kubectl"
  251. KUBECTL="kubectl --context=$KUBE_CONTEXT_ARG"
  252. log_info "Runtime: kubectl (context=$KUBE_CONTEXT_ARG)"
  253. return
  254. fi
  255. # 标准 kubectl 优先
  256. if command -v kubectl &>/dev/null; then
  257. if kubectl cluster-info &>/dev/null; then
  258. RUNTIME="kubectl"
  259. KUBECTL="kubectl"
  260. # 探测当前集群是否是 k3s (观察节点 kubelet version 或 rancher 标识)
  261. local kubelet_versions
  262. kubelet_versions="$(kubectl get nodes -o jsonpath='{.items[*].status.nodeInfo.kubeletVersion}' 2>/dev/null || echo "")"
  263. if [[ "$kubelet_versions" == *"k3s"* ]]; then
  264. RUNTIME="k3s"
  265. log_info "Runtime: k3s (via kubectl)"
  266. else
  267. log_info "Runtime: standard Kubernetes (via kubectl)"
  268. fi
  269. return
  270. fi
  271. fi
  272. # 没有 kubectl 但本机有 k3s
  273. if command -v k3s &>/dev/null; then
  274. RUNTIME="k3s"
  275. KUBECTL="sudo k3s kubectl"
  276. log_info "Runtime: k3s (via \`k3s kubectl\`)"
  277. return
  278. fi
  279. # 什么都没有
  280. if [[ "$INSTALL_K3S" == true ]]; then
  281. install_k3s
  282. return
  283. fi
  284. log_error "未检测到可用的 K8s 集群 (缺少 kubectl 或 k3s)。"
  285. log_info "可选方案:"
  286. log_info " 1. 安装 kubectl 并确保 ~/.kube/config 指向可用集群"
  287. log_info " 2. 传入 --install-k3s 让本脚本为你安装 k3s (单机场景)"
  288. exit 1
  289. }
  290. install_k3s() {
  291. if [[ "$NON_INTERACTIVE" != true ]]; then
  292. echo ""
  293. log_warning "即将在本机安装 k3s (官方脚本,curl | sh),这会修改系统服务。"
  294. log_warning "生产环境请先审阅 https://get.k3s.io 返回的脚本内容后再执行。"
  295. read -p "继续?(y/N) " -n 1 -r confirm
  296. echo ""
  297. if [[ ! "$confirm" =~ ^[Yy]$ ]]; then
  298. log_error "已取消"; exit 1
  299. fi
  300. fi
  301. log_info "Installing k3s via official installer..."
  302. if ! curl -fsSL https://get.k3s.io | sh -; then
  303. log_error "k3s 安装失败"; exit 1
  304. fi
  305. # 让 kubectl 可以读取 k3s config
  306. if [[ -r /etc/rancher/k3s/k3s.yaml ]]; then
  307. export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
  308. fi
  309. if command -v kubectl &>/dev/null; then
  310. KUBECTL="kubectl"
  311. else
  312. KUBECTL="sudo k3s kubectl"
  313. fi
  314. RUNTIME="k3s"
  315. log_success "k3s installed"
  316. # 等 API 就绪
  317. local i=0
  318. until $KUBECTL get nodes &>/dev/null; do
  319. if [[ $i -ge 30 ]]; then log_error "k3s API 长时间不可达"; exit 1; fi
  320. sleep 2; i=$((i+1))
  321. done
  322. }
  323. ###############################################################################
  324. # Preflight
  325. ###############################################################################
  326. preflight_checks() {
  327. if ! $KUBECTL get nodes &>/dev/null; then
  328. log_error "kubectl 无法连接集群。请检查 kubeconfig / context。"
  329. exit 1
  330. fi
  331. local node_count
  332. node_count=$($KUBECTL get nodes --no-headers 2>/dev/null | wc -l)
  333. log_info "Cluster reachable. Node count: $node_count"
  334. # 一些基础权限试探 (create ns 的权限)
  335. if ! $KUBECTL auth can-i create namespace &>/dev/null; then
  336. log_warning "当前用户可能无 create namespace 权限,如部署失败请用 cluster-admin 重试"
  337. fi
  338. }
  339. ###############################################################################
  340. # 应用配置合并 (CLI > 默认)
  341. ###############################################################################
  342. resolve_config() {
  343. NAMESPACE="${NAMESPACE_ARG:-$DEFAULT_NAMESPACE}"
  344. # 分支捷径:不传时保持 main/latest 为默认语义
  345. if [[ -n "$BRANCH_ARG" ]]; then
  346. case "$BRANCH_ARG" in
  347. main|master) APP_IMAGE="ghcr.io/ding113/claude-code-hub:latest" ;;
  348. dev) APP_IMAGE="ghcr.io/ding113/claude-code-hub:dev" ;;
  349. *) log_error "Unknown branch: $BRANCH_ARG (expected: main|dev)"; exit 1 ;;
  350. esac
  351. fi
  352. APP_IMAGE="${IMAGE_ARG:-${APP_IMAGE:-$DEFAULT_IMAGE}}"
  353. APP_REPLICAS="${REPLICAS_ARG:-$DEFAULT_REPLICAS}"
  354. APP_HPA_MIN="${HPA_MIN_ARG:-$DEFAULT_HPA_MIN}"
  355. APP_HPA_MAX="${HPA_MAX_ARG:-$DEFAULT_HPA_MAX}"
  356. PG_STORAGE_SIZE="${PG_SIZE_ARG:-$DEFAULT_PG_SIZE}"
  357. REDIS_STORAGE_SIZE="${REDIS_SIZE_ARG:-$DEFAULT_REDIS_SIZE}"
  358. TIMEZONE="${TIMEZONE_ARG:-$DEFAULT_TIMEZONE}"
  359. INGRESS_HOST="${INGRESS_HOST_ARG:-}"
  360. # 校验
  361. if ! [[ "$APP_REPLICAS" =~ ^[0-9]+$ ]] || [[ "$APP_REPLICAS" -lt 1 ]]; then
  362. log_error "--replicas 必须是正整数: $APP_REPLICAS"; exit 1
  363. fi
  364. if ! [[ "$APP_HPA_MIN" =~ ^[0-9]+$ ]] || [[ "$APP_HPA_MIN" -lt 1 ]]; then
  365. log_error "--hpa-min 必须是正整数: $APP_HPA_MIN"; exit 1
  366. fi
  367. if ! [[ "$APP_HPA_MAX" =~ ^[0-9]+$ ]] || [[ "$APP_HPA_MAX" -lt 1 ]]; then
  368. log_error "--hpa-max 必须是正整数: $APP_HPA_MAX"; exit 1
  369. fi
  370. if [[ "$APP_HPA_MIN" -gt "$APP_HPA_MAX" ]]; then
  371. log_error "--hpa-min ($APP_HPA_MIN) 不能大于 --hpa-max ($APP_HPA_MAX)"; exit 1
  372. fi
  373. log_info "Namespace: $NAMESPACE"
  374. log_info "App image: $APP_IMAGE"
  375. log_info "Replicas: $APP_REPLICAS (HPA: $APP_HPA_MIN-$APP_HPA_MAX)"
  376. log_info "PG storage: $PG_STORAGE_SIZE"
  377. log_info "Redis storage: $REDIS_STORAGE_SIZE"
  378. log_info "Timezone: $TIMEZONE"
  379. if [[ "$APP_REPLICAS" -gt 1 ]]; then
  380. log_info "AUTO_MIGRATE 由 PostgreSQL advisory lock 串行化,首次多副本启动会排队等待迁移完成"
  381. fi
  382. }
  383. detect_storage_class() {
  384. if [[ -n "$STORAGE_CLASS_ARG" ]]; then
  385. STORAGE_CLASS="$STORAGE_CLASS_ARG"
  386. log_info "Storage class (user): $STORAGE_CLASS"
  387. return
  388. fi
  389. # k3s → local-path
  390. if [[ "$RUNTIME" == "k3s" ]] && $KUBECTL get sc local-path &>/dev/null; then
  391. STORAGE_CLASS="local-path"
  392. log_info "Storage class (k3s default): local-path"
  393. return
  394. fi
  395. # 尝试找默认 StorageClass
  396. local default_sc
  397. if default_sc=$($KUBECTL get sc -o jsonpath='{range .items[?(@.metadata.annotations.storageclass\.kubernetes\.io/is-default-class=="true")]}{.metadata.name}{"\n"}{end}' 2>/dev/null); then
  398. default_sc="${default_sc%%$'\n'*}"
  399. else
  400. default_sc=""
  401. fi
  402. if [[ -z "$default_sc" ]]; then
  403. if default_sc=$($KUBECTL get sc -o jsonpath='{range .items[?(@.metadata.annotations.storageclass\.beta\.kubernetes\.io/is-default-class=="true")]}{.metadata.name}{"\n"}{end}' 2>/dev/null); then
  404. default_sc="${default_sc%%$'\n'*}"
  405. else
  406. default_sc=""
  407. fi
  408. fi
  409. if [[ -n "$default_sc" ]]; then
  410. STORAGE_CLASS="$default_sc"
  411. log_info "Storage class (cluster default): $default_sc"
  412. return
  413. fi
  414. # 找不到默认,用空串让集群自行决定
  415. STORAGE_CLASS=""
  416. log_warning "未检测到默认 StorageClass。PVC 将使用集群默认设置,可能无法自动绑定卷"
  417. log_info "如有需要,请传入 --storage-class <name> 指定"
  418. }
  419. detect_ingress_variant() {
  420. if [[ "$DISABLE_INGRESS" == true ]]; then
  421. INGRESS_VARIANT="nodeport"
  422. APP_SERVICE_TYPE="NodePort"
  423. log_info "Ingress: disabled (Service=NodePort)"
  424. return
  425. fi
  426. if [[ -z "$INGRESS_HOST" ]]; then
  427. INGRESS_VARIANT="nodeport"
  428. APP_SERVICE_TYPE="NodePort"
  429. log_warning "未指定 --ingress-host,将使用 NodePort 暴露"
  430. return
  431. fi
  432. # 检测 Traefik CRD
  433. if $KUBECTL get crd ingressroutes.traefik.io &>/dev/null; then
  434. INGRESS_VARIANT="traefik"
  435. APP_SERVICE_TYPE="ClusterIP"
  436. log_info "Ingress: Traefik IngressRoute (host=$INGRESS_HOST)"
  437. return
  438. fi
  439. # 标准 Ingress
  440. local api_resources
  441. api_resources="$($KUBECTL api-resources --api-group=networking.k8s.io -o name 2>/dev/null || echo "")"
  442. if [[ "$api_resources" == *"ingresses.networking.k8s.io"* ]]; then
  443. INGRESS_CLASS="${INGRESS_CLASS_ARG:-}"
  444. if [[ -z "$INGRESS_CLASS" ]]; then
  445. # 查找 IngressClass
  446. local first_ic
  447. first_ic=$($KUBECTL get ingressclass -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
  448. if [[ -n "$first_ic" ]]; then
  449. INGRESS_CLASS="$first_ic"
  450. else
  451. INGRESS_CLASS="nginx"
  452. log_warning "集群无 IngressClass,默认填 nginx。请通过 --ingress-class 显式指定"
  453. fi
  454. fi
  455. INGRESS_VARIANT="standard"
  456. APP_SERVICE_TYPE="ClusterIP"
  457. log_info "Ingress: standard (className=$INGRESS_CLASS, host=$INGRESS_HOST)"
  458. return
  459. fi
  460. # 都不可用
  461. INGRESS_VARIANT="nodeport"
  462. APP_SERVICE_TYPE="NodePort"
  463. log_warning "集群不支持 Ingress,回落到 NodePort"
  464. }
  465. ###############################################################################
  466. # Existing deployment detection
  467. ###############################################################################
  468. force_new_reset_existing_namespace() {
  469. if ! $KUBECTL get namespace "$NAMESPACE" &>/dev/null; then
  470. log_info "--force-new 已启用,但 namespace=$NAMESPACE 当前不存在,将按新装模式继续"
  471. return
  472. fi
  473. if [[ "$NON_INTERACTIVE" != true ]]; then
  474. echo ""
  475. log_warning "--force-new 将删除 namespace=$NAMESPACE 并重建所有资源"
  476. log_warning "这会清空 Deployment / StatefulSet / Secret / PVC,现有数据不会保留"
  477. read -p "输入 yes 继续: " confirm
  478. echo ""
  479. if [[ "$confirm" != "yes" ]]; then
  480. log_error "已取消"
  481. exit 1
  482. fi
  483. else
  484. log_warning "--force-new 已启用: 删除 namespace=$NAMESPACE 并重建所有资源"
  485. fi
  486. log_warning "PV 是否真正释放取决于 StorageClass reclaimPolicy; 若为 Retain,旧 PV 会进入 Released,需手动清理"
  487. log_info "删除旧 namespace: $NAMESPACE"
  488. if ! $KUBECTL delete namespace "$NAMESPACE" --timeout=180s >/dev/null; then
  489. log_error "删除 namespace 失败: $NAMESPACE"
  490. exit 1
  491. fi
  492. log_success "旧部署已清理,将按新装模式继续"
  493. }
  494. detect_existing_deployment() {
  495. if [[ "$FORCE_NEW" == true ]]; then
  496. force_new_reset_existing_namespace
  497. UPDATE_MODE=false
  498. return
  499. fi
  500. if $KUBECTL get namespace "$NAMESPACE" &>/dev/null && { \
  501. $KUBECTL -n "$NAMESPACE" get deployment claude-code-hub &>/dev/null || \
  502. $KUBECTL -n "$NAMESPACE" get statefulset postgres &>/dev/null || \
  503. $KUBECTL -n "$NAMESPACE" get statefulset redis &>/dev/null || \
  504. $KUBECTL -n "$NAMESPACE" get secret claude-code-hub-secrets &>/dev/null; \
  505. }; then
  506. UPDATE_MODE=true
  507. log_info "检测到已有安装痕迹(namespace=$NAMESPACE),进入升级模式"
  508. else
  509. UPDATE_MODE=false
  510. log_info "未检测到已有部署,进入新装模式"
  511. fi
  512. }
  513. ###############################################################################
  514. # Secret generation
  515. ###############################################################################
  516. generate_random() {
  517. local length="${1:-32}"
  518. local random=""
  519. local chunk=""
  520. if has_command openssl; then
  521. while [[ "${#random}" -lt "$length" ]]; do
  522. if ! chunk=$(openssl rand -base64 48 | tr -d '=/+'); then
  523. log_error "使用 openssl 生成随机串失败"
  524. return 1
  525. fi
  526. random+="$chunk"
  527. done
  528. else
  529. while [[ "${#random}" -lt "$length" ]]; do
  530. if ! chunk=$(LC_ALL=C dd if=/dev/urandom bs=256 count=1 status=none | tr -dc 'A-Za-z0-9'); then
  531. log_error "从 /dev/urandom 生成随机串失败"
  532. return 1
  533. fi
  534. random+="$chunk"
  535. done
  536. fi
  537. printf '%s' "${random:0:length}"
  538. }
  539. prepare_secret_values() {
  540. if [[ "$UPDATE_MODE" == true ]] && \
  541. $KUBECTL -n "$NAMESPACE" get secret claude-code-hub-secrets &>/dev/null; then
  542. log_info "升级模式:复用已有 Secret 中的密码"
  543. PG_PASSWORD=$($KUBECTL -n "$NAMESPACE" get secret claude-code-hub-secrets \
  544. -o jsonpath='{.data.pg-password}' | b64d)
  545. REDIS_PASSWORD=$($KUBECTL -n "$NAMESPACE" get secret claude-code-hub-secrets \
  546. -o jsonpath='{.data.redis-password}' | b64d)
  547. if [[ -n "$TOKEN_ARG" ]]; then
  548. ADMIN_TOKEN="$TOKEN_ARG"
  549. log_info "使用 CLI 传入的 admin-token 覆盖"
  550. else
  551. ADMIN_TOKEN=$($KUBECTL -n "$NAMESPACE" get secret claude-code-hub-secrets \
  552. -o jsonpath='{.data.admin-token}' | b64d)
  553. fi
  554. else
  555. log_info "生成随机凭据..."
  556. PG_PASSWORD=$(generate_random 40)
  557. REDIS_PASSWORD=$(generate_random 40)
  558. ADMIN_TOKEN="${TOKEN_ARG:-$(generate_random 48)}"
  559. fi
  560. }
  561. apply_secret() {
  562. local dsn redis_url tmp
  563. dsn="postgresql://claude_code_hub:${PG_PASSWORD}@postgres:5432/claude_code_hub"
  564. redis_url="redis://:${REDIS_PASSWORD}@redis:6379/0"
  565. # 避免把凭据写进命令行参数 (ps / /proc/<pid>/cmdline 会暴露 --from-literal=...)
  566. # 改为先把密码落到 600 权限的临时文件,通过 --from-file 引用,再渲染 YAML 并 apply
  567. tmp=$(mktemp -d)
  568. chmod 700 "$tmp"
  569. trap 'rm -rf "$tmp"' EXIT
  570. printf '%s' "$PG_PASSWORD" > "$tmp/pg-password"; chmod 600 "$tmp/pg-password"
  571. printf '%s' "$REDIS_PASSWORD" > "$tmp/redis-password"; chmod 600 "$tmp/redis-password"
  572. printf '%s' "$ADMIN_TOKEN" > "$tmp/admin-token"; chmod 600 "$tmp/admin-token"
  573. printf '%s' "$dsn" > "$tmp/dsn"; chmod 600 "$tmp/dsn"
  574. printf '%s' "$redis_url" > "$tmp/redis-url"; chmod 600 "$tmp/redis-url"
  575. $KUBECTL -n "$NAMESPACE" create secret generic claude-code-hub-secrets \
  576. --from-file=pg-password="$tmp/pg-password" \
  577. --from-file=redis-password="$tmp/redis-password" \
  578. --from-file=admin-token="$tmp/admin-token" \
  579. --from-file=dsn="$tmp/dsn" \
  580. --from-file=redis-url="$tmp/redis-url" \
  581. --dry-run=client -o yaml | $KUBECTL apply -f -
  582. rm -rf "$tmp"
  583. trap - EXIT
  584. log_success "Secret claude-code-hub-secrets applied"
  585. }
  586. ###############################################################################
  587. # Manifest rendering
  588. ###############################################################################
  589. render_manifests() {
  590. if [[ ! -d "$SOURCE_MANIFEST_DIR" ]]; then
  591. log_error "未找到源 manifest 目录: $SOURCE_MANIFEST_DIR"
  592. exit 1
  593. fi
  594. local target="$DEPLOY_DIR/k8s"
  595. mkdir -p "$target"
  596. log_info "复制 manifest 模板 -> $target"
  597. # 使用 `src/.` + trailing slash 写法,兼容 BSD (macOS) 与 GNU cp。
  598. # `cp -RT` 是 GNU 专有选项,BSD cp 会报错。
  599. cp -R "$SOURCE_MANIFEST_DIR/." "$target/"
  600. log_info "渲染占位符..."
  601. # 处理 storageClass 的特殊情况:空串时整行删除
  602. local sc_line_action
  603. if [[ -z "$STORAGE_CLASS" ]]; then
  604. sc_line_action="delete"
  605. else
  606. sc_line_action="replace"
  607. fi
  608. # 对所有 yaml 做占位符替换
  609. local f
  610. while IFS= read -r -d '' f; do
  611. # 跳过 README
  612. [[ "$f" == *"README.md" ]] && continue
  613. # 用 python 一次性渲染 (避免 sed 引号/特殊字符问题)
  614. python3 - "$f" "$NAMESPACE" "$APP_IMAGE" "$APP_REPLICAS" \
  615. "$APP_HPA_MIN" "$APP_HPA_MAX" "$STORAGE_CLASS" \
  616. "$PG_STORAGE_SIZE" "$REDIS_STORAGE_SIZE" \
  617. "$TIMEZONE" "$INGRESS_HOST" "$INGRESS_CLASS" \
  618. "$APP_SERVICE_TYPE" "$sc_line_action" <<'PY'
  619. import sys, re
  620. path = sys.argv[1]
  621. ns, image, replicas, hpa_min, hpa_max, sc, pg_size, redis_size, tz, ing_host, ing_class, svc_type, sc_action = sys.argv[2:]
  622. with open(path) as f:
  623. text = f.read()
  624. if sc_action == "delete":
  625. # 删除包含 storageClassName: {{STORAGE_CLASS}} 的整行
  626. text = "\n".join(l for l in text.splitlines() if "{{STORAGE_CLASS}}" not in l) + ("\n" if text.endswith("\n") else "")
  627. repl = {
  628. "{{NAMESPACE}}": ns,
  629. "{{APP_IMAGE}}": image,
  630. "{{APP_REPLICAS}}": replicas,
  631. "{{APP_HPA_MIN}}": hpa_min,
  632. "{{APP_HPA_MAX}}": hpa_max,
  633. "{{STORAGE_CLASS}}": sc,
  634. "{{PG_STORAGE_SIZE}}": pg_size,
  635. "{{REDIS_STORAGE_SIZE}}": redis_size,
  636. "{{TIMEZONE}}": tz,
  637. "{{INGRESS_HOST}}": ing_host,
  638. "{{INGRESS_CLASS}}": ing_class,
  639. "{{APP_SERVICE_TYPE}}": svc_type,
  640. }
  641. for k, v in repl.items():
  642. text = text.replace(k, v)
  643. with open(path, "w") as f:
  644. f.write(text)
  645. PY
  646. done < <(find "$target" -type f -name '*.yaml' -print0)
  647. log_success "Manifest 渲染完成: $target"
  648. }
  649. ###############################################################################
  650. # Apply
  651. ###############################################################################
  652. kube_apply() {
  653. local f="$1"
  654. if [[ ! -f "$f" ]]; then
  655. log_warning "跳过 (文件不存在): $f"
  656. return
  657. fi
  658. $KUBECTL apply -f "$f"
  659. }
  660. apply_manifests() {
  661. local base="$DEPLOY_DIR/k8s"
  662. log_info "应用 manifest (按依赖顺序)..."
  663. kube_apply "$base/namespace.yaml"
  664. apply_secret
  665. # NetworkPolicy (可选,失败不致命 — 集群可能不启用 NP)
  666. # 注意:默认 app/networkpolicy.yaml 仅放行 namespace 标签为
  667. # kube-system / ingress-nginx / traefik 的 Ingress Controller。
  668. # 若你的 Ingress Controller 位于其他 namespace,传 --disable-networkpolicy
  669. # 并改用自定义 NP,或者编辑 deploy/k8s/app/networkpolicy.yaml
  670. if [[ "$DISABLE_NETWORKPOLICY" == true ]]; then
  671. log_info "已跳过 NetworkPolicy (--disable-networkpolicy)"
  672. else
  673. kube_apply "$base/postgres/networkpolicy.yaml" || log_warning "postgres networkpolicy 应用失败,忽略"
  674. kube_apply "$base/redis/networkpolicy.yaml" || log_warning "redis networkpolicy 应用失败,忽略"
  675. if [[ "$INGRESS_VARIANT" == "nodeport" ]]; then
  676. log_warning "NodePort 模式下跳过 app NetworkPolicy,避免阻断外部访问"
  677. else
  678. kube_apply "$base/app/networkpolicy.yaml" || log_warning "app networkpolicy 应用失败,忽略"
  679. fi
  680. fi
  681. # DB & Cache
  682. kube_apply "$base/postgres/service.yaml"
  683. kube_apply "$base/postgres/statefulset.yaml"
  684. kube_apply "$base/redis/service.yaml"
  685. kube_apply "$base/redis/statefulset.yaml"
  686. log_info "等待 Postgres / Redis 就绪 (最长 5 分钟)..."
  687. if ! $KUBECTL -n "$NAMESPACE" rollout status statefulset/postgres --timeout=300s; then
  688. log_error "Postgres StatefulSet 未就绪,请检查 PVC / StorageClass / 节点资源"
  689. log_info " kubectl -n $NAMESPACE describe pod -l app=postgres"
  690. exit 1
  691. fi
  692. if ! $KUBECTL -n "$NAMESPACE" rollout status statefulset/redis --timeout=300s; then
  693. log_error "Redis StatefulSet 未就绪,请检查 PVC / StorageClass"
  694. exit 1
  695. fi
  696. # 已移除独立的 migration Job (deploy/k8s/jobs/ 目录不再存在):
  697. # 1. 应用启动时 instrumentation.ts 会自动执行 drizzle migrations (AUTO_MIGRATE=true 默认开)
  698. # 2. Job 需要 devDependency drizzle-kit,在 standalone 运行时镜像里不可用
  699. # 3. 避免 Job 与应用 AUTO_MIGRATE 的并发迁移竞态
  700. # App
  701. kube_apply "$base/app/deployment.yaml"
  702. kube_apply "$base/app/service.yaml"
  703. kube_apply "$base/app/hpa.yaml"
  704. kube_apply "$base/app/pdb.yaml"
  705. log_info "等待 App 滚动更新完成 (最长 10 分钟)..."
  706. if ! $KUBECTL -n "$NAMESPACE" rollout status deployment/claude-code-hub --timeout=600s; then
  707. log_error "App Deployment 滚动未能在 10 分钟内完成"
  708. log_info "诊断建议:"
  709. log_info " kubectl -n $NAMESPACE describe deployment claude-code-hub"
  710. log_info " kubectl -n $NAMESPACE logs deploy/claude-code-hub --tail=100"
  711. if [[ "$UPDATE_MODE" == true ]]; then
  712. log_warning "升级模式失败,执行 rollout undo 回滚..."
  713. $KUBECTL -n "$NAMESPACE" rollout undo deployment/claude-code-hub || true
  714. $KUBECTL -n "$NAMESPACE" rollout status deployment/claude-code-hub --timeout=300s || true
  715. fi
  716. exit 1
  717. fi
  718. # Ingress
  719. case "$INGRESS_VARIANT" in
  720. standard) kube_apply "$base/ingress/ingress.yaml" ;;
  721. traefik) kube_apply "$base/ingress/traefik-ingressroute.yaml" ;;
  722. nodeport) log_info "Ingress variant=nodeport,跳过 ingress manifest" ;;
  723. esac
  724. log_success "所有 manifest 已应用"
  725. }
  726. ###############################################################################
  727. # Post-install
  728. ###############################################################################
  729. install_cch_cli_if_requested() {
  730. if [[ "$INSTALL_CCH" != true ]]; then return; fi
  731. local src="$SCRIPT_DIR/cch"
  732. local dst="/usr/local/bin/cch"
  733. if [[ ! -x "$src" ]]; then
  734. log_warning "$src 不存在或不可执行,跳过 cch 安装"
  735. return
  736. fi
  737. log_info "将 cch 软链到 $dst (需要 sudo)"
  738. if sudo ln -sf "$src" "$dst"; then
  739. log_success "cch 已安装: $(which cch)"
  740. else
  741. log_warning "cch 软链失败,可手动: sudo ln -sf $src $dst"
  742. fi
  743. }
  744. write_cch_config() {
  745. # 写一份配置供 cch 读取 (namespace / image / deploy-dir)
  746. local cfg_dir="${XDG_CONFIG_HOME:-$HOME/.config}/cch"
  747. mkdir -p "$cfg_dir"
  748. # 使用 POSIX 可移植的时间戳格式,避免 GNU 专有的 `date -Iseconds`
  749. local ts
  750. ts=$(date -u +'%Y-%m-%dT%H:%M:%SZ')
  751. cat > "$cfg_dir/config" <<EOF
  752. # Auto-generated by deploy-k8s.sh v${VERSION} at $ts
  753. CCH_NAMESPACE="$NAMESPACE"
  754. CCH_IMAGE="$APP_IMAGE"
  755. CCH_DEPLOY_DIR="$DEPLOY_DIR"
  756. CCH_RUNTIME="$RUNTIME"
  757. CCH_INGRESS_HOST="$INGRESS_HOST"
  758. CCH_INGRESS_VARIANT="$INGRESS_VARIANT"
  759. EOF
  760. log_info "cch 配置已写入: $cfg_dir/config"
  761. }
  762. get_node_ip() {
  763. local node_ip
  764. node_ip=$($KUBECTL get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="ExternalIP")].address}' 2>/dev/null || echo "")
  765. if [[ -z "$node_ip" ]]; then
  766. node_ip=$($KUBECTL get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="InternalIP")].address}' 2>/dev/null || echo "")
  767. fi
  768. echo "${node_ip:-<your-node-ip>}"
  769. }
  770. print_success_message() {
  771. local url admin_note
  772. case "$INGRESS_VARIANT" in
  773. standard|traefik)
  774. url="http://$INGRESS_HOST"
  775. ;;
  776. nodeport)
  777. local np node_ip
  778. np=$($KUBECTL -n "$NAMESPACE" get svc claude-code-hub \
  779. -o jsonpath='{.spec.ports[0].nodePort}' 2>/dev/null || echo "")
  780. node_ip=$(get_node_ip)
  781. if [[ -n "$np" ]]; then
  782. url="http://${node_ip}:${np}"
  783. else
  784. url="(kubectl port-forward svc/claude-code-hub -n $NAMESPACE 13500:80)"
  785. fi
  786. ;;
  787. esac
  788. echo ""
  789. echo -e "${GREEN}+================================================================+${NC}"
  790. echo -e "${GREEN}| |${NC}"
  791. if [[ "$UPDATE_MODE" == true ]]; then
  792. echo -e "${GREEN}| Claude Code Hub Upgrade Complete! |${NC}"
  793. else
  794. echo -e "${GREEN}| Claude Code Hub Deployment Complete! |${NC}"
  795. fi
  796. echo -e "${GREEN}| |${NC}"
  797. echo -e "${GREEN}+================================================================+${NC}"
  798. echo ""
  799. echo -e "${BLUE}Access URL:${NC} ${GREEN}$url${NC}"
  800. echo -e "${BLUE}Namespace:${NC} $NAMESPACE"
  801. echo -e "${BLUE}Image:${NC} $APP_IMAGE"
  802. echo ""
  803. if [[ "$UPDATE_MODE" == false ]]; then
  804. echo -e "${BLUE}Admin Token (保管好):${NC}"
  805. echo -e " ${YELLOW}${ADMIN_TOKEN}${NC}"
  806. echo ""
  807. fi
  808. echo -e "${BLUE}常用命令 (cch):${NC}"
  809. echo -e " cch status # 查看 Pod / HPA / 资源"
  810. echo -e " cch logs # 查看日志"
  811. echo -e " cch update # 拉新镜像 + 滚动更新"
  812. echo -e " cch backup # 备份 PostgreSQL"
  813. echo -e " cch info # 展示访问地址与 Admin Token"
  814. echo ""
  815. if [[ "$INSTALL_CCH" != true ]]; then
  816. echo -e "${YELLOW}提示:${NC} cch CLI 未安装到 PATH。你可以:"
  817. echo -e " bash scripts/deploy-k8s.sh --install-cch # 软链到 /usr/local/bin/cch"
  818. echo -e " 或直接: bash scripts/cch status"
  819. fi
  820. echo ""
  821. if [[ "$UPDATE_MODE" == false ]]; then
  822. echo -e "${RED}IMPORTANT:${NC} 请妥善保存 Admin Token,丢失后只能通过集群 Secret 找回"
  823. fi
  824. echo ""
  825. }
  826. ###############################################################################
  827. # Main
  828. ###############################################################################
  829. main() {
  830. parse_args "$@"
  831. print_header
  832. detect_os
  833. if [[ "$DRY_RENDER" == true ]]; then
  834. # 离线模式:不探测集群,用用户传入或默认值渲染 manifest
  835. log_info "Dry-render mode: 跳过集群探测"
  836. RUNTIME="${RUNTIME_OVERRIDE:-kubectl}"
  837. resolve_config
  838. STORAGE_CLASS="${STORAGE_CLASS_ARG:-local-path}"
  839. log_info "Storage class (dry-render): $STORAGE_CLASS"
  840. if [[ "$DISABLE_INGRESS" == true ]] || [[ -z "$INGRESS_HOST" ]]; then
  841. INGRESS_VARIANT="nodeport"
  842. APP_SERVICE_TYPE="NodePort"
  843. else
  844. INGRESS_VARIANT="standard"
  845. APP_SERVICE_TYPE="ClusterIP"
  846. INGRESS_CLASS="${INGRESS_CLASS_ARG:-nginx}"
  847. fi
  848. UPDATE_MODE=false
  849. render_manifests
  850. log_success "Dry render 完成,manifest 位于: $DEPLOY_DIR/k8s"
  851. log_info "可用于审阅: kubectl apply --dry-run=client -R -f $DEPLOY_DIR/k8s/"
  852. exit 0
  853. fi
  854. detect_runtime
  855. preflight_checks
  856. resolve_config
  857. detect_existing_deployment
  858. detect_storage_class
  859. detect_ingress_variant
  860. prepare_secret_values
  861. render_manifests
  862. apply_manifests
  863. install_cch_cli_if_requested
  864. write_cch_config
  865. print_success_message
  866. }
  867. if [[ "${DEPLOY_K8S_SOURCE_ONLY:-0}" != "1" ]]; then
  868. main "$@"
  869. fi