cch 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774
  1. #!/usr/bin/env bash
  2. # cch - Claude Code Hub Kubernetes Management CLI
  3. # 兼容 k3s 与标准 Kubernetes,通过 env / ~/.config/cch/config 可覆盖默认值
  4. # Reference: docs/k8s-deployment.md
  5. set -euo pipefail
  6. VERSION="1.0.0"
  7. ###############################################################################
  8. # Colors (非 TTY / NO_COLOR 自动禁用)
  9. ###############################################################################
  10. if [[ -t 1 ]] && [[ -z "${NO_COLOR:-}" ]]; then
  11. RED=$'\033[0;31m'
  12. GREEN=$'\033[0;32m'
  13. YELLOW=$'\033[1;33m'
  14. CYAN=$'\033[0;36m'
  15. NC=$'\033[0m'
  16. else
  17. RED=""; GREEN=""; YELLOW=""; CYAN=""; NC=""
  18. fi
  19. info() { echo -e "${CYAN}[INFO]${NC} $*"; }
  20. ok() { echo -e "${GREEN}[OK]${NC} $*"; }
  21. warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
  22. err() { echo -e "${RED}[ERROR]${NC} $*" >&2; }
  23. # 跨平台 base64 decode:GNU coreutils 用 -d,旧版 macOS BSD 只认 -D
  24. b64d() {
  25. if base64 -d </dev/null >/dev/null 2>&1; then
  26. base64 -d
  27. elif base64 -D </dev/null >/dev/null 2>&1; then
  28. base64 -D
  29. else
  30. # 兜底:openssl 几乎所有平台都有
  31. openssl base64 -d
  32. fi
  33. }
  34. ###############################################################################
  35. # 配置解析 (优先级: env > config file > 默认)
  36. ###############################################################################
  37. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  38. REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
  39. parse_config_value() {
  40. local raw="$1"
  41. raw="${raw#"${raw%%[![:space:]]*}"}"
  42. raw="${raw%"${raw##*[![:space:]]}"}"
  43. if [[ "$raw" =~ ^\"(.*)\"$ ]]; then
  44. printf '%s' "${BASH_REMATCH[1]}"
  45. return
  46. fi
  47. if [[ "$raw" =~ ^\'(.*)\'$ ]]; then
  48. printf '%s' "${BASH_REMATCH[1]}"
  49. return
  50. fi
  51. printf '%s' "$raw"
  52. }
  53. load_config_file() {
  54. [[ -r "$CCH_CONFIG_FILE" ]] || return 0
  55. local line key raw value
  56. while IFS= read -r line || [[ -n "$line" ]]; do
  57. [[ "$line" =~ ^[[:space:]]*# ]] && continue
  58. [[ "$line" =~ ^[[:space:]]*$ ]] && continue
  59. if [[ ! "$line" =~ ^[[:space:]]*([A-Z0-9_]+)[[:space:]]*=(.*)$ ]]; then
  60. warn "跳过无法解析的配置行: $line"
  61. continue
  62. fi
  63. key="${BASH_REMATCH[1]}"
  64. raw="${BASH_REMATCH[2]}"
  65. case "$key" in
  66. CCH_NAMESPACE|CCH_IMAGE|CCH_DEPLOY_DIR|CCH_RUNTIME|CCH_INGRESS_HOST|CCH_INGRESS_VARIANT|CCH_BACKUP_DIR|CCH_BACKUP_KEEP)
  67. if [[ -z "${!key:-}" ]]; then
  68. value="$(parse_config_value "$raw")"
  69. printf -v "$key" '%s' "$value"
  70. fi
  71. ;;
  72. esac
  73. done < "$CCH_CONFIG_FILE"
  74. }
  75. # 1. 加载可选配置文件
  76. CCH_CONFIG_FILE="${CCH_CONFIG_FILE:-${XDG_CONFIG_HOME:-$HOME/.config}/cch/config}"
  77. load_config_file
  78. # 2. 合并默认值
  79. NAMESPACE="${CCH_NAMESPACE:-claude-code-hub}"
  80. IMAGE="${CCH_IMAGE:-ghcr.io/ding113/claude-code-hub:latest}"
  81. RUNTIME_OVERRIDE="${CCH_RUNTIME:-}"
  82. INGRESS_HOST="${CCH_INGRESS_HOST:-}"
  83. INGRESS_VARIANT="${CCH_INGRESS_VARIANT:-}"
  84. # Manifest 目录查找顺序
  85. resolve_deploy_dir() {
  86. if [[ -n "${CCH_DEPLOY_DIR:-}" ]] && [[ -d "$CCH_DEPLOY_DIR/k8s" ]]; then
  87. echo "$CCH_DEPLOY_DIR"; return
  88. fi
  89. local cfg_default="${XDG_CONFIG_HOME:-$HOME/.config}/cch"
  90. if [[ -d "$cfg_default/k8s" ]]; then echo "$cfg_default"; return; fi
  91. if [[ -d "/opt/claude-code-hub/k8s" ]]; then echo "/opt/claude-code-hub"; return; fi
  92. if [[ -d "$REPO_ROOT/deploy/k8s" ]]; then echo "$REPO_ROOT"; return; fi
  93. if [[ -d "$HOME/claude-code-hub-k8s/k8s" ]]; then echo "$HOME/claude-code-hub-k8s"; return; fi
  94. echo ""
  95. }
  96. DEPLOY_DIR="$(resolve_deploy_dir)"
  97. ###############################################################################
  98. # Runtime 探测 (决定是否使用 k3s ctr 等 k3s-only 命令)
  99. ###############################################################################
  100. KUBECTL="kubectl"
  101. RUNTIME=""
  102. detect_runtime() {
  103. if [[ -n "$RUNTIME_OVERRIDE" ]]; then
  104. RUNTIME="$RUNTIME_OVERRIDE"
  105. # 即使 override,也要确保 KUBECTL 能真的运行:无 kubectl 时退到 sudo k3s kubectl
  106. if ! command -v kubectl &>/dev/null; then
  107. if [[ "$RUNTIME" == "k3s" ]] && command -v k3s &>/dev/null; then
  108. KUBECTL="sudo k3s kubectl"
  109. else
  110. err "RUNTIME_OVERRIDE=$RUNTIME 指定,但本机既无 kubectl 也无 k3s 可用"
  111. return 1
  112. fi
  113. fi
  114. elif command -v kubectl &>/dev/null && kubectl cluster-info &>/dev/null; then
  115. local kubelet_versions
  116. kubelet_versions="$(kubectl get nodes -o jsonpath='{.items[*].status.nodeInfo.kubeletVersion}' 2>/dev/null || echo "")"
  117. if [[ "$kubelet_versions" == *"k3s"* ]]; then
  118. RUNTIME="k3s"
  119. else
  120. RUNTIME="kubectl"
  121. fi
  122. elif command -v k3s &>/dev/null; then
  123. KUBECTL="sudo k3s kubectl"
  124. RUNTIME="k3s"
  125. else
  126. err "未检测到 kubectl 或 k3s。请安装 kubectl 并确保 kubeconfig 可用"
  127. return 1
  128. fi
  129. }
  130. require_cluster() {
  131. detect_runtime
  132. if ! $KUBECTL get ns "$NAMESPACE" &>/dev/null; then
  133. err "命名空间 $NAMESPACE 不存在。请先运行 deploy-k8s.sh,或检查 CCH_NAMESPACE 配置"
  134. exit 1
  135. fi
  136. }
  137. ###############################################################################
  138. # Helpers
  139. ###############################################################################
  140. # 解析应用访问地址,用于 health check
  141. resolve_access_url() {
  142. if [[ -n "$INGRESS_HOST" ]]; then
  143. echo "http://$INGRESS_HOST"
  144. return
  145. fi
  146. # 尝试从 Service 拿 NodePort
  147. local np node_ip
  148. np=$($KUBECTL -n "$NAMESPACE" get svc claude-code-hub -o jsonpath='{.spec.ports[0].nodePort}' 2>/dev/null || echo "")
  149. node_ip=$($KUBECTL get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="ExternalIP")].address}' 2>/dev/null)
  150. if [[ -z "$node_ip" ]]; then
  151. node_ip=$($KUBECTL get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="InternalIP")].address}' 2>/dev/null)
  152. fi
  153. if [[ -n "$np" ]] && [[ -n "$node_ip" ]]; then
  154. echo "http://${node_ip}:${np}"
  155. return
  156. fi
  157. echo ""
  158. }
  159. # 在集群内通过 exec 做健康检查,避免依赖集群外网络
  160. health_check_in_pod() {
  161. local pod
  162. pod=$($KUBECTL -n "$NAMESPACE" get pods -l app=claude-code-hub \
  163. -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
  164. if [[ -z "$pod" ]]; then
  165. warn "没有找到 app Pod"
  166. return 1
  167. fi
  168. local status
  169. status=$($KUBECTL -n "$NAMESPACE" exec "$pod" -- \
  170. node -e "fetch('http://127.0.0.1:3000/api/health/ready').then(r=>r.json()).then(j=>{console.log(j.status);process.exit(j.components?.database?.status==='up'?0:1)}).catch(()=>process.exit(1))" 2>/dev/null) || true
  171. if [[ "$status" == "healthy" ]] || [[ "$status" == "degraded" ]]; then
  172. ok "Health check (in-pod): $status"
  173. return 0
  174. fi
  175. warn "Health check not healthy: ${status:-unknown}"
  176. return 1
  177. }
  178. wait_for_deployment_rollout() {
  179. local timeout="$1"
  180. local stage="$2"
  181. if ! $KUBECTL -n "$NAMESPACE" rollout status deployment/claude-code-hub --timeout="$timeout"; then
  182. err "$stage 未在 $timeout 内完成"
  183. err "排查命令: kubectl -n $NAMESPACE describe deployment/claude-code-hub"
  184. err "排查命令: kubectl -n $NAMESPACE logs deploy/claude-code-hub --tail=100"
  185. return 1
  186. fi
  187. }
  188. restore_update_scaling() {
  189. local target_replicas="$1"
  190. local min_replicas="$2"
  191. if $KUBECTL -n "$NAMESPACE" get hpa claude-code-hub &>/dev/null; then
  192. $KUBECTL -n "$NAMESPACE" patch hpa claude-code-hub --type merge \
  193. -p "{\"spec\":{\"minReplicas\":$min_replicas}}" >/dev/null || true
  194. fi
  195. $KUBECTL -n "$NAMESPACE" scale deployment/claude-code-hub --replicas="$target_replicas" >/dev/null || true
  196. }
  197. resolve_k3s_image_digest() {
  198. local image="$1"
  199. sudo k3s ctr images ls 2>/dev/null | awk -v img="$image" 'found==0 && $1==img { print $3; found=1 }'
  200. }
  201. restart_k3s_rollout_with_image() {
  202. local reason="$1"
  203. local image="$2"
  204. warn "$reason"
  205. if ! $KUBECTL -n "$NAMESPACE" set image deployment/claude-code-hub app="$image" >/dev/null; then
  206. err "set image 失败,未能应用目标镜像: $image"
  207. return 1
  208. fi
  209. $KUBECTL -n "$NAMESPACE" rollout restart deployment/claude-code-hub >/dev/null
  210. }
  211. build_image_ref_with_digest() {
  212. local image="$1"
  213. local image_digest="$2"
  214. local image_without_digest="${image%@*}"
  215. local last_segment
  216. if [[ "$image_without_digest" == "$image" ]]; then
  217. last_segment="${image##*/}"
  218. if [[ "$last_segment" == *:* ]]; then
  219. image_without_digest="${image%:*}"
  220. fi
  221. fi
  222. printf '%s@%s' "$image_without_digest" "$image_digest"
  223. }
  224. update_k3s_image_by_digest_or_restart() {
  225. local image="$1"
  226. local image_digest="" image_by_digest
  227. # 这里不能在 awk 命中第一条后直接 exit:
  228. # 在 set -euo pipefail 下,上游 ctr 仍继续写 pipe 时会收到 SIGPIPE(141),
  229. # 进而让整个命令替换失败并静默退出升级流程。
  230. if image_digest=$(resolve_k3s_image_digest "$image"); then
  231. if [[ -n "$image_digest" ]] && [[ "${image_digest#sha256:}" != "$image_digest" ]]; then
  232. image_by_digest="$(build_image_ref_with_digest "$image" "$image_digest")"
  233. info " digest: $image_digest"
  234. $KUBECTL -n "$NAMESPACE" set image deployment/claude-code-hub app="$image_by_digest" >/dev/null
  235. else
  236. restart_k3s_rollout_with_image "未解析到可用 digest,回落到 rollout restart" "$image"
  237. fi
  238. else
  239. restart_k3s_rollout_with_image "k3s ctr images ls 失败,回落到 rollout restart" "$image"
  240. fi
  241. }
  242. ###############################################################################
  243. # Commands
  244. ###############################################################################
  245. cmd_update() {
  246. require_cluster
  247. local TIMESTAMP
  248. TIMESTAMP=$(date +%Y%m%d%H%M%S)
  249. echo -e "${CYAN}=========================================${NC}"
  250. echo -e "${CYAN} Claude Code Hub Upgrade - $TIMESTAMP${NC}"
  251. echo -e "${CYAN}=========================================${NC}"
  252. echo ""
  253. # Step 1: Backup
  254. info "Step 1/6: Backing up database..."
  255. if cmd_backup; then
  256. ok "Database backup complete"
  257. else
  258. # 非交互场景下 (无 TTY / CCH_NONINTERACTIVE=1) 直接放弃,避免自动化任务卡死
  259. if [[ "${CCH_NONINTERACTIVE:-0}" == "1" ]] || [[ ! -t 0 ]]; then
  260. err "Backup 失败,且当前非交互式环境 — 中止升级以保护数据"
  261. err "请先人工处理 (磁盘、权限、连接数),或设置 CCH_NONINTERACTIVE=0 并在 TTY 下重试"
  262. exit 1
  263. fi
  264. warn "Backup failed, continue without backup?"
  265. # read -t 60:60 秒内无输入则退出,避免 CI 卡死
  266. if ! read -t 60 -p "输入 yes 继续 (默认 60s 超时后中止): " answer; then
  267. echo ""
  268. err "超时,中止升级"; exit 1
  269. fi
  270. if [[ "$answer" != "yes" ]]; then
  271. err "已中止"; exit 1
  272. fi
  273. fi
  274. echo ""
  275. # Step 2: (k3s only) pre-pull image
  276. info "Step 2/6: Preparing image..."
  277. if [[ "$RUNTIME" == "k3s" ]]; then
  278. if sudo k3s ctr images pull "$IMAGE" >/dev/null 2>&1; then
  279. ok "Image pre-pulled via k3s ctr"
  280. else
  281. warn "k3s ctr pull 失败,依赖 Always imagePullPolicy"
  282. fi
  283. else
  284. info "Standard k8s: 依赖 imagePullPolicy=Always 在 rollout 时拉取"
  285. fi
  286. echo ""
  287. # Step 3: Scale down to 1 for migration
  288. local CURRENT_REPLICAS MIN_REPLICAS
  289. CURRENT_REPLICAS=$($KUBECTL -n "$NAMESPACE" get hpa claude-code-hub -o jsonpath='{.status.currentReplicas}' 2>/dev/null || echo "2")
  290. MIN_REPLICAS=$($KUBECTL -n "$NAMESPACE" get hpa claude-code-hub -o jsonpath='{.spec.minReplicas}' 2>/dev/null || echo "2")
  291. [[ -z "$CURRENT_REPLICAS" || "$CURRENT_REPLICAS" == "null" ]] && CURRENT_REPLICAS=2
  292. [[ -z "$MIN_REPLICAS" || "$MIN_REPLICAS" == "null" ]] && MIN_REPLICAS=2
  293. info "Step 3/6: Scaling down to 1 replica for migration (was $CURRENT_REPLICAS)..."
  294. if $KUBECTL -n "$NAMESPACE" get hpa claude-code-hub &>/dev/null; then
  295. $KUBECTL -n "$NAMESPACE" patch hpa claude-code-hub --type merge -p '{"spec":{"minReplicas":1}}' >/dev/null
  296. fi
  297. $KUBECTL -n "$NAMESPACE" scale deployment/claude-code-hub --replicas=1 >/dev/null
  298. if ! wait_for_deployment_rollout 180s "缩容到 1 副本"; then
  299. restore_update_scaling "$CURRENT_REPLICAS" "$MIN_REPLICAS"
  300. exit 1
  301. fi
  302. ok "Scaled to 1 replica"
  303. echo ""
  304. # Step 4: Update image + migrate
  305. info "Step 4/6: Updating image on single instance (auto-migration)..."
  306. if [[ "$RUNTIME" == "k3s" ]]; then
  307. # k3s: 用 digest 固定,避免 tag 相同导致 no-op rollout
  308. if ! update_k3s_image_by_digest_or_restart "$IMAGE"; then
  309. restore_update_scaling "$CURRENT_REPLICAS" "$MIN_REPLICAS"
  310. exit 1
  311. fi
  312. else
  313. # 标准 k8s: set image 到目标 tag,触发 rollout;相同 tag 时强制 restart 拿最新 digest
  314. local CURRENT_IMAGE
  315. CURRENT_IMAGE=$($KUBECTL -n "$NAMESPACE" get deployment/claude-code-hub \
  316. -o jsonpath='{.spec.template.spec.containers[0].image}' 2>/dev/null || echo "")
  317. if [[ "$CURRENT_IMAGE" == "$IMAGE" ]]; then
  318. info "镜像 tag 未变 ($IMAGE),执行 rollout restart 重新拉取"
  319. $KUBECTL -n "$NAMESPACE" rollout restart deployment/claude-code-hub >/dev/null
  320. else
  321. $KUBECTL -n "$NAMESPACE" set image deployment/claude-code-hub app="$IMAGE" >/dev/null
  322. fi
  323. fi
  324. if ! wait_for_deployment_rollout 600s "镜像更新 rollout"; then
  325. err "新镜像 rollout 失败,正在回滚..."
  326. $KUBECTL -n "$NAMESPACE" rollout undo deployment/claude-code-hub >/dev/null || true
  327. restore_update_scaling "$CURRENT_REPLICAS" "$MIN_REPLICAS"
  328. wait_for_deployment_rollout 300s "回滚后的 deployment" || true
  329. exit 1
  330. fi
  331. sleep 3
  332. if health_check_in_pod; then
  333. ok "Migration + startup OK"
  334. else
  335. err "DB 未通过健康检查,正在回滚..."
  336. $KUBECTL -n "$NAMESPACE" rollout undo deployment/claude-code-hub >/dev/null
  337. restore_update_scaling "$CURRENT_REPLICAS" "$MIN_REPLICAS"
  338. exit 1
  339. fi
  340. echo ""
  341. # Step 5: Scale back
  342. local desired_replicas="$CURRENT_REPLICAS"
  343. if [[ "$desired_replicas" -lt "$MIN_REPLICAS" ]]; then
  344. desired_replicas="$MIN_REPLICAS"
  345. fi
  346. info "Step 5/6: Scaling back to $desired_replicas replicas..."
  347. restore_update_scaling "$desired_replicas" "$MIN_REPLICAS"
  348. if ! wait_for_deployment_rollout 300s "恢复副本"; then
  349. err "副本恢复失败,当前 deployment 可能仍停留在单副本"
  350. exit 1
  351. fi
  352. ok "Running with $desired_replicas replicas"
  353. echo ""
  354. # Step 6: Final health check
  355. info "Step 6/6: Final health check..."
  356. sleep 3
  357. if health_check_in_pod; then
  358. ok "Upgrade complete"
  359. else
  360. warn "Upgrade done but health check failed. Check: cch logs"
  361. fi
  362. echo ""
  363. $KUBECTL -n "$NAMESPACE" get pods -o wide
  364. }
  365. cmd_status() {
  366. require_cluster
  367. echo -e "${CYAN}Pods:${NC}"
  368. $KUBECTL -n "$NAMESPACE" get pods -o wide
  369. echo ""
  370. echo -e "${CYAN}HPA:${NC}"
  371. $KUBECTL -n "$NAMESPACE" get hpa 2>/dev/null || echo "(no HPA)"
  372. echo ""
  373. echo -e "${CYAN}Resources (top):${NC}"
  374. $KUBECTL -n "$NAMESPACE" top pods 2>/dev/null || warn "metrics-server 未就绪,跳过 top"
  375. }
  376. cmd_logs() {
  377. require_cluster
  378. local TAIL="100"
  379. if [[ "${1:-}" =~ ^[0-9]+$ ]]; then
  380. TAIL="$1"
  381. shift
  382. fi
  383. $KUBECTL -n "$NAMESPACE" logs deploy/claude-code-hub --all-containers --tail="$TAIL" "$@"
  384. }
  385. cmd_follow() {
  386. require_cluster
  387. $KUBECTL -n "$NAMESPACE" logs -f deploy/claude-code-hub --all-containers --tail=50
  388. }
  389. cmd_restart() {
  390. require_cluster
  391. info "Rolling restart..."
  392. $KUBECTL -n "$NAMESPACE" rollout restart deployment/claude-code-hub
  393. $KUBECTL -n "$NAMESPACE" rollout status deployment/claude-code-hub --timeout=300s
  394. ok "Restart complete"
  395. }
  396. cmd_rollback() {
  397. require_cluster
  398. warn "Rolling back to previous revision..."
  399. $KUBECTL -n "$NAMESPACE" rollout undo deployment/claude-code-hub
  400. $KUBECTL -n "$NAMESPACE" rollout status deployment/claude-code-hub --timeout=300s
  401. ok "Rollback complete"
  402. }
  403. cmd_scale() {
  404. require_cluster
  405. local N="${1:-}"
  406. if [[ -z "$N" ]]; then err "Usage: cch scale <replicas>"; exit 1; fi
  407. if ! [[ "$N" =~ ^[0-9]+$ ]] || [[ "$N" -lt 1 ]]; then
  408. err "replicas 必须是正整数: $N"; exit 1
  409. fi
  410. if $KUBECTL -n "$NAMESPACE" get hpa claude-code-hub &>/dev/null; then
  411. local hpa_min hpa_max
  412. hpa_min=$($KUBECTL -n "$NAMESPACE" get hpa claude-code-hub -o jsonpath='{.spec.minReplicas}' 2>/dev/null || echo "")
  413. hpa_max=$($KUBECTL -n "$NAMESPACE" get hpa claude-code-hub -o jsonpath='{.spec.maxReplicas}' 2>/dev/null || echo "")
  414. [[ -z "$hpa_min" || "$hpa_min" == "null" ]] && hpa_min=1
  415. [[ -z "$hpa_max" || "$hpa_max" == "null" ]] && hpa_max=0
  416. if [[ "$N" -lt "$hpa_min" ]]; then
  417. err "HPA minReplicas=$hpa_min 阻止缩到 $N。请先调整 HPA 或重新运行 deploy-k8s.sh 传入匹配的 --hpa-min"
  418. exit 1
  419. fi
  420. if [[ "$hpa_max" -gt 0 ]] && [[ "$N" -gt "$hpa_max" ]]; then
  421. err "HPA maxReplicas=$hpa_max 阻止扩到 $N。请先调整 HPA 或重新运行 deploy-k8s.sh 传入匹配的 --hpa-max"
  422. exit 1
  423. fi
  424. fi
  425. $KUBECTL -n "$NAMESPACE" scale deployment/claude-code-hub --replicas="$N"
  426. info "Scaled to $N replicas"
  427. }
  428. cmd_backup() {
  429. require_cluster
  430. local backup_dir="${CCH_BACKUP_DIR:-$HOME/backups/claude-code-hub}"
  431. mkdir -p "$backup_dir"
  432. local ts file
  433. ts=$(date +%Y%m%d_%H%M%S)
  434. file="$backup_dir/claude_code_hub_${ts}.sql.gz"
  435. info "Backing up PostgreSQL -> $file"
  436. if ! $KUBECTL -n "$NAMESPACE" exec sts/postgres -- \
  437. pg_dump -U claude_code_hub -d claude_code_hub --no-owner --no-privileges \
  438. | gzip > "$file"; then
  439. err "备份失败"; rm -f "$file"; return 1
  440. fi
  441. local size
  442. size=$(du -h "$file" 2>/dev/null | cut -f1)
  443. ok "Backup complete: $file ($size)"
  444. # 保留最近 30 份 (BSD xargs 无 -r,用 while read 代替)
  445. local keep="${CCH_BACKUP_KEEP:-30}"
  446. if ! [[ "$keep" =~ ^[0-9]+$ ]] || [[ "$keep" -lt 1 ]]; then
  447. warn "CCH_BACKUP_KEEP 必须是正整数,当前值: $keep; 使用默认值 30"
  448. keep=30
  449. fi
  450. # shellcheck disable=SC2012
  451. ls -t "$backup_dir"/claude_code_hub_*.sql.gz 2>/dev/null \
  452. | tail -n +"$((keep+1))" \
  453. | while IFS= read -r old; do rm -f "$old"; done
  454. }
  455. cmd_env() {
  456. require_cluster
  457. if command -v python3 &>/dev/null; then
  458. $KUBECTL -n "$NAMESPACE" get deployment claude-code-hub \
  459. -o jsonpath='{.spec.template.spec.containers[0].env}' | python3 -m json.tool
  460. else
  461. $KUBECTL -n "$NAMESPACE" get deployment claude-code-hub \
  462. -o jsonpath='{.spec.template.spec.containers[0].env}'
  463. echo ""
  464. fi
  465. }
  466. cmd_secret() {
  467. require_cluster
  468. local KEY="${1:-admin-token}"
  469. $KUBECTL -n "$NAMESPACE" get secret claude-code-hub-secrets \
  470. -o jsonpath="{.data.$KEY}" | b64d
  471. echo ""
  472. }
  473. cmd_shell() {
  474. require_cluster
  475. $KUBECTL -n "$NAMESPACE" exec -it deploy/claude-code-hub -- sh
  476. }
  477. cmd_dbshell() {
  478. require_cluster
  479. $KUBECTL -n "$NAMESPACE" exec -it sts/postgres -- \
  480. psql -U claude_code_hub -d claude_code_hub
  481. }
  482. cmd_info() {
  483. require_cluster
  484. local url token img digest
  485. url=$(resolve_access_url)
  486. token=$($KUBECTL -n "$NAMESPACE" get secret claude-code-hub-secrets \
  487. -o jsonpath='{.data.admin-token}' 2>/dev/null | b64d)
  488. img=$($KUBECTL -n "$NAMESPACE" get deployment claude-code-hub \
  489. -o jsonpath='{.spec.template.spec.containers[0].image}' 2>/dev/null)
  490. digest=$($KUBECTL -n "$NAMESPACE" get pods -l app=claude-code-hub \
  491. -o jsonpath='{.items[0].status.containerStatuses[0].imageID}' 2>/dev/null)
  492. echo -e "${CYAN}Namespace:${NC} $NAMESPACE"
  493. echo -e "${CYAN}Runtime:${NC} $RUNTIME"
  494. echo -e "${CYAN}Image (desired):${NC} $img"
  495. echo -e "${CYAN}Image (running):${NC} $digest"
  496. if [[ -n "$url" ]]; then
  497. echo -e "${CYAN}Access URL:${NC} $url"
  498. else
  499. echo -e "${CYAN}Access URL:${NC} (no ingress/nodeport detected; use 'kubectl port-forward')"
  500. fi
  501. echo -e "${CYAN}Admin token:${NC} ${YELLOW}${token}${NC}"
  502. echo ""
  503. $KUBECTL -n "$NAMESPACE" get deployment,statefulset,hpa,svc,ingress 2>/dev/null || true
  504. }
  505. cmd_version() {
  506. echo "cch v${VERSION}"
  507. echo " runtime : ${RUNTIME:-(not detected)}"
  508. echo " namespace : $NAMESPACE"
  509. echo " image : $IMAGE"
  510. echo " deploy-dir : ${DEPLOY_DIR:-(not found)}"
  511. echo " config-file : $CCH_CONFIG_FILE"
  512. }
  513. cmd_doctor() {
  514. echo -e "${CYAN}cch doctor${NC}"
  515. local pass=0 fail=0 warn_n=0
  516. check_pass() { ok "$1"; pass=$((pass+1)); }
  517. check_warn() { warn "$1"; warn_n=$((warn_n+1)); }
  518. check_fail() { err "$1"; fail=$((fail+1)); }
  519. if detect_runtime; then
  520. check_pass "Runtime detected (runtime=$RUNTIME, kubectl=$KUBECTL)"
  521. else
  522. check_warn "运行时探测失败,将继续尝试默认 kubectl 诊断"
  523. fi
  524. # kubectl
  525. if command -v kubectl &>/dev/null; then
  526. check_pass "kubectl installed: $(kubectl version --client -o jsonpath='{.clientVersion.gitVersion}' 2>/dev/null)"
  527. elif [[ "$RUNTIME" == "k3s" ]] && command -v k3s &>/dev/null; then
  528. check_warn "kubectl 未安装,将使用 sudo k3s kubectl"
  529. else
  530. check_fail "kubectl 未安装"
  531. fi
  532. # Cluster reachable
  533. if $KUBECTL cluster-info &>/dev/null; then
  534. check_pass "Cluster reachable (runtime=$RUNTIME)"
  535. else
  536. check_fail "无法连接集群。请检查 kubeconfig / context"
  537. echo "Summary: $pass passed, $warn_n warnings, $fail failures"; return
  538. fi
  539. # Namespace
  540. if $KUBECTL get ns "$NAMESPACE" &>/dev/null; then
  541. check_pass "Namespace $NAMESPACE exists"
  542. else
  543. check_fail "Namespace $NAMESPACE 不存在 — 请先运行 deploy-k8s.sh"
  544. fi
  545. # Secret
  546. if $KUBECTL -n "$NAMESPACE" get secret claude-code-hub-secrets &>/dev/null; then
  547. check_pass "Secret claude-code-hub-secrets 存在"
  548. else
  549. check_fail "Secret 缺失"
  550. fi
  551. # Postgres / Redis / App
  552. for comp in postgres redis; do
  553. local rs
  554. rs=$($KUBECTL -n "$NAMESPACE" get sts "$comp" -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "")
  555. if [[ "$rs" == "1" ]]; then check_pass "$comp StatefulSet ready"
  556. else check_fail "$comp 未就绪 (ready=$rs)"; fi
  557. done
  558. local app_ready
  559. app_ready=$($KUBECTL -n "$NAMESPACE" get deployment claude-code-hub \
  560. -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0")
  561. if [[ "$app_ready" -gt 0 ]]; then
  562. check_pass "App ready replicas: $app_ready"
  563. else
  564. check_fail "App 没有就绪 Pod"
  565. fi
  566. # HPA
  567. if $KUBECTL -n "$NAMESPACE" get hpa claude-code-hub &>/dev/null; then
  568. check_pass "HPA configured"
  569. else
  570. check_warn "HPA 不存在 (非必需)"
  571. fi
  572. # Ingress
  573. if $KUBECTL -n "$NAMESPACE" get ingress claude-code-hub &>/dev/null || \
  574. $KUBECTL -n "$NAMESPACE" get ingressroute claude-code-hub &>/dev/null 2>&1; then
  575. check_pass "Ingress resource present"
  576. else
  577. check_warn "未检测到 Ingress (如使用 NodePort 可忽略)"
  578. fi
  579. # StorageClass
  580. if [[ "$RUNTIME" == "k3s" ]] && $KUBECTL get sc local-path &>/dev/null; then
  581. check_pass "StorageClass local-path (k3s default)"
  582. fi
  583. # In-pod health
  584. if health_check_in_pod &>/dev/null; then
  585. check_pass "In-pod health check"
  586. else
  587. check_warn "In-pod health check failed (服务可能启动中)"
  588. fi
  589. echo ""
  590. echo "Summary: ${GREEN}$pass passed${NC}, ${YELLOW}$warn_n warnings${NC}, ${RED}$fail failures${NC}"
  591. }
  592. cmd_install() {
  593. # 快捷路径:仅当用户在仓库内运行时有用。复制 manifest 到 deploy-dir 并提示跑 deploy-k8s.sh
  594. if [[ -x "$SCRIPT_DIR/deploy-k8s.sh" ]]; then
  595. info "转交给 scripts/deploy-k8s.sh (推荐使用完整的安装流程)"
  596. exec bash "$SCRIPT_DIR/deploy-k8s.sh" "$@"
  597. fi
  598. err "scripts/deploy-k8s.sh 未找到。请在仓库内运行或手动调用"
  599. exit 1
  600. }
  601. cmd_uninstall() {
  602. detect_runtime
  603. # 非交互场景必须显式通过 CCH_CONFIRM_UNINSTALL=<namespace> 授权,避免误删
  604. if [[ ! -t 0 ]]; then
  605. if [[ "${CCH_CONFIRM_UNINSTALL:-}" != "$NAMESPACE" ]]; then
  606. err "非交互环境检测到。要 uninstall 必须显式设置:"
  607. err " CCH_CONFIRM_UNINSTALL=$NAMESPACE cch uninstall"
  608. exit 1
  609. fi
  610. info "已通过 CCH_CONFIRM_UNINSTALL 授权,继续卸载"
  611. else
  612. echo -e "${RED}!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!${NC}"
  613. echo -e "${RED} 即将删除 namespace: $NAMESPACE${NC}"
  614. echo -e "${RED} 这会永久删除所有 Pod、PVC(数据库数据)、Secret${NC}"
  615. echo -e "${RED}!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!${NC}"
  616. if ! read -t 120 -p "确认?输入 namespace 名称 '$NAMESPACE' 继续: " input; then
  617. echo ""; info "超时,已取消"; exit 0
  618. fi
  619. if [[ "$input" != "$NAMESPACE" ]]; then
  620. info "已取消"; exit 0
  621. fi
  622. fi
  623. $KUBECTL delete namespace "$NAMESPACE" --timeout=180s
  624. ok "Namespace $NAMESPACE 已删除"
  625. info "manifest 目录 ($DEPLOY_DIR) 未删除。如需清理: rm -rf $DEPLOY_DIR"
  626. }
  627. ###############################################################################
  628. # Help
  629. ###############################################################################
  630. cmd_help() {
  631. cat <<EOF
  632. cch - Claude Code Hub Kubernetes Management CLI v${VERSION}
  633. Usage: cch <command> [args]
  634. Lifecycle:
  635. update Pull latest image, auto-migrate DB, rolling deploy (带回滚)
  636. restart Rolling restart (no image change)
  637. rollback Rollback to previous deployment revision
  638. scale <n> Scale app to n replicas
  639. Install / Teardown:
  640. install [opts] 调用 scripts/deploy-k8s.sh (透传参数)
  641. uninstall Delete namespace + PVCs (破坏性,带二次确认)
  642. Observe:
  643. status Show pods, HPA, resource usage
  644. logs [n] [args] Show last n log lines (default 100), or pass through kubectl log flags
  645. follow Tail logs in real-time
  646. env Show app environment variables (JSON)
  647. info 展示访问 URL、Admin Token、镜像 digest
  648. doctor 诊断 (kubectl / 集群 / 资源 / 健康)
  649. version 显示版本与当前配置
  650. Data:
  651. backup Backup PostgreSQL (gzip, 保留最近 30 份)
  652. secret [key] Show secret value (default: admin-token)
  653. dbshell Open psql shell
  654. shell Open sh in app pod
  655. Config:
  656. 环境变量(或 ~/.config/cch/config):
  657. CCH_NAMESPACE K8s namespace (default: claude-code-hub)
  658. CCH_IMAGE 应用镜像 (default: ghcr.io/ding113/claude-code-hub:latest)
  659. CCH_DEPLOY_DIR manifest 目录 (default: 自动查找)
  660. CCH_RUNTIME 覆盖 runtime: k3s | kubectl
  661. CCH_INGRESS_HOST Ingress 域名,用于访问 URL 解析
  662. CCH_BACKUP_DIR 备份目录 (default: ~/backups/claude-code-hub)
  663. CCH_BACKUP_KEEP 保留数量 (default: 30)
  664. NO_COLOR 禁用彩色输出
  665. Examples:
  666. cch status
  667. cch logs 500
  668. cch update
  669. CCH_NAMESPACE=staging cch status
  670. cch install -y # 一键部署
  671. cch backup
  672. cch info
  673. cch doctor
  674. EOF
  675. }
  676. ###############################################################################
  677. # Dispatch
  678. ###############################################################################
  679. if [[ "${CCH_SOURCE_ONLY:-0}" != "1" ]]; then
  680. case "${1:-help}" in
  681. update) shift; cmd_update "$@" ;;
  682. status) shift; cmd_status "$@" ;;
  683. logs) shift; cmd_logs "$@" ;;
  684. follow) cmd_follow ;;
  685. restart) cmd_restart ;;
  686. rollback) cmd_rollback ;;
  687. backup) cmd_backup ;;
  688. scale) shift; cmd_scale "$@" ;;
  689. env) cmd_env ;;
  690. secret) shift; cmd_secret "$@" ;;
  691. shell) cmd_shell ;;
  692. dbshell) cmd_dbshell ;;
  693. info) cmd_info ;;
  694. version|--version|-v) cmd_version ;;
  695. doctor) cmd_doctor ;;
  696. install) shift; cmd_install "$@" ;;
  697. uninstall) cmd_uninstall ;;
  698. help|--help|-h|*) cmd_help ;;
  699. esac
  700. fi