cch 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725
  1. #!/usr/bin/env bash
  2. # cch - Claude Code Hub Kubernetes Management CLI
  3. # 兼容 k3s 与标准 Kubernetes,通过 env / ~/.config/cch/config 可覆盖默认值
  4. # Reference: docs/k8s-deployment.md
  5. set -euo pipefail
  6. VERSION="1.0.0"
  7. ###############################################################################
  8. # Colors (非 TTY / NO_COLOR 自动禁用)
  9. ###############################################################################
  10. if [[ -t 1 ]] && [[ -z "${NO_COLOR:-}" ]]; then
  11. RED=$'\033[0;31m'
  12. GREEN=$'\033[0;32m'
  13. YELLOW=$'\033[1;33m'
  14. CYAN=$'\033[0;36m'
  15. NC=$'\033[0m'
  16. else
  17. RED=""; GREEN=""; YELLOW=""; CYAN=""; NC=""
  18. fi
  19. info() { echo -e "${CYAN}[INFO]${NC} $*"; }
  20. ok() { echo -e "${GREEN}[OK]${NC} $*"; }
  21. warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
  22. err() { echo -e "${RED}[ERROR]${NC} $*" >&2; }
  23. # 跨平台 base64 decode:GNU coreutils 用 -d,旧版 macOS BSD 只认 -D
  24. b64d() {
  25. if base64 -d </dev/null >/dev/null 2>&1; then
  26. base64 -d
  27. elif base64 -D </dev/null >/dev/null 2>&1; then
  28. base64 -D
  29. else
  30. # 兜底:openssl 几乎所有平台都有
  31. openssl base64 -d
  32. fi
  33. }
  34. ###############################################################################
  35. # 配置解析 (优先级: env > config file > 默认)
  36. ###############################################################################
  37. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  38. REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
  39. parse_config_value() {
  40. local raw="$1"
  41. raw="${raw#"${raw%%[![:space:]]*}"}"
  42. raw="${raw%"${raw##*[![:space:]]}"}"
  43. if [[ "$raw" =~ ^\"(.*)\"$ ]]; then
  44. printf '%s' "${BASH_REMATCH[1]}"
  45. return
  46. fi
  47. if [[ "$raw" =~ ^\'(.*)\'$ ]]; then
  48. printf '%s' "${BASH_REMATCH[1]}"
  49. return
  50. fi
  51. printf '%s' "$raw"
  52. }
  53. load_config_file() {
  54. [[ -r "$CCH_CONFIG_FILE" ]] || return 0
  55. local line key raw value
  56. while IFS= read -r line || [[ -n "$line" ]]; do
  57. [[ "$line" =~ ^[[:space:]]*# ]] && continue
  58. [[ "$line" =~ ^[[:space:]]*$ ]] && continue
  59. if [[ ! "$line" =~ ^[[:space:]]*([A-Z0-9_]+)[[:space:]]*=(.*)$ ]]; then
  60. warn "跳过无法解析的配置行: $line"
  61. continue
  62. fi
  63. key="${BASH_REMATCH[1]}"
  64. raw="${BASH_REMATCH[2]}"
  65. case "$key" in
  66. CCH_NAMESPACE|CCH_IMAGE|CCH_DEPLOY_DIR|CCH_RUNTIME|CCH_INGRESS_HOST|CCH_INGRESS_VARIANT|CCH_BACKUP_DIR|CCH_BACKUP_KEEP)
  67. if [[ -z "${!key:-}" ]]; then
  68. value="$(parse_config_value "$raw")"
  69. printf -v "$key" '%s' "$value"
  70. fi
  71. ;;
  72. esac
  73. done < "$CCH_CONFIG_FILE"
  74. }
  75. # 1. 加载可选配置文件
  76. CCH_CONFIG_FILE="${CCH_CONFIG_FILE:-${XDG_CONFIG_HOME:-$HOME/.config}/cch/config}"
  77. load_config_file
  78. # 2. 合并默认值
  79. NAMESPACE="${CCH_NAMESPACE:-claude-code-hub}"
  80. IMAGE="${CCH_IMAGE:-ghcr.io/ding113/claude-code-hub:latest}"
  81. RUNTIME_OVERRIDE="${CCH_RUNTIME:-}"
  82. INGRESS_HOST="${CCH_INGRESS_HOST:-}"
  83. INGRESS_VARIANT="${CCH_INGRESS_VARIANT:-}"
  84. # Manifest 目录查找顺序
  85. resolve_deploy_dir() {
  86. if [[ -n "${CCH_DEPLOY_DIR:-}" ]] && [[ -d "$CCH_DEPLOY_DIR/k8s" ]]; then
  87. echo "$CCH_DEPLOY_DIR"; return
  88. fi
  89. local cfg_default="${XDG_CONFIG_HOME:-$HOME/.config}/cch"
  90. if [[ -d "$cfg_default/k8s" ]]; then echo "$cfg_default"; return; fi
  91. if [[ -d "/opt/claude-code-hub/k8s" ]]; then echo "/opt/claude-code-hub"; return; fi
  92. if [[ -d "$REPO_ROOT/deploy/k8s" ]]; then echo "$REPO_ROOT"; return; fi
  93. if [[ -d "$HOME/claude-code-hub-k8s/k8s" ]]; then echo "$HOME/claude-code-hub-k8s"; return; fi
  94. echo ""
  95. }
  96. DEPLOY_DIR="$(resolve_deploy_dir)"
  97. ###############################################################################
  98. # Runtime 探测 (决定是否使用 k3s ctr 等 k3s-only 命令)
  99. ###############################################################################
  100. KUBECTL="kubectl"
  101. RUNTIME=""
  102. detect_runtime() {
  103. if [[ -n "$RUNTIME_OVERRIDE" ]]; then
  104. RUNTIME="$RUNTIME_OVERRIDE"
  105. # 即使 override,也要确保 KUBECTL 能真的运行:无 kubectl 时退到 sudo k3s kubectl
  106. if ! command -v kubectl &>/dev/null; then
  107. if [[ "$RUNTIME" == "k3s" ]] && command -v k3s &>/dev/null; then
  108. KUBECTL="sudo k3s kubectl"
  109. else
  110. err "RUNTIME_OVERRIDE=$RUNTIME 指定,但本机既无 kubectl 也无 k3s 可用"
  111. return 1
  112. fi
  113. fi
  114. elif command -v kubectl &>/dev/null && kubectl cluster-info &>/dev/null; then
  115. if kubectl get nodes -o jsonpath='{.items[*].status.nodeInfo.kubeletVersion}' 2>/dev/null | grep -q 'k3s'; then
  116. RUNTIME="k3s"
  117. else
  118. RUNTIME="kubectl"
  119. fi
  120. elif command -v k3s &>/dev/null; then
  121. KUBECTL="sudo k3s kubectl"
  122. RUNTIME="k3s"
  123. else
  124. err "未检测到 kubectl 或 k3s。请安装 kubectl 并确保 kubeconfig 可用"
  125. return 1
  126. fi
  127. }
  128. require_cluster() {
  129. detect_runtime
  130. if ! $KUBECTL get ns "$NAMESPACE" &>/dev/null; then
  131. err "命名空间 $NAMESPACE 不存在。请先运行 deploy-k8s.sh,或检查 CCH_NAMESPACE 配置"
  132. exit 1
  133. fi
  134. }
  135. ###############################################################################
  136. # Helpers
  137. ###############################################################################
  138. # 解析应用访问地址,用于 health check
  139. resolve_access_url() {
  140. if [[ -n "$INGRESS_HOST" ]]; then
  141. echo "http://$INGRESS_HOST"
  142. return
  143. fi
  144. # 尝试从 Service 拿 NodePort
  145. local np node_ip
  146. np=$($KUBECTL -n "$NAMESPACE" get svc claude-code-hub -o jsonpath='{.spec.ports[0].nodePort}' 2>/dev/null || echo "")
  147. node_ip=$($KUBECTL get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="ExternalIP")].address}' 2>/dev/null)
  148. if [[ -z "$node_ip" ]]; then
  149. node_ip=$($KUBECTL get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="InternalIP")].address}' 2>/dev/null)
  150. fi
  151. if [[ -n "$np" ]] && [[ -n "$node_ip" ]]; then
  152. echo "http://${node_ip}:${np}"
  153. return
  154. fi
  155. echo ""
  156. }
  157. # 在集群内通过 exec 做健康检查,避免依赖集群外网络
  158. health_check_in_pod() {
  159. local pod
  160. pod=$($KUBECTL -n "$NAMESPACE" get pods -l app=claude-code-hub \
  161. -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
  162. if [[ -z "$pod" ]]; then
  163. warn "没有找到 app Pod"
  164. return 1
  165. fi
  166. local status
  167. status=$($KUBECTL -n "$NAMESPACE" exec "$pod" -- \
  168. node -e "fetch('http://127.0.0.1:3000/api/health/ready').then(r=>r.json()).then(j=>{console.log(j.status);process.exit(j.components?.database?.status==='up'?0:1)}).catch(()=>process.exit(1))" 2>/dev/null) || true
  169. if [[ "$status" == "healthy" ]] || [[ "$status" == "degraded" ]]; then
  170. ok "Health check (in-pod): $status"
  171. return 0
  172. fi
  173. warn "Health check not healthy: ${status:-unknown}"
  174. return 1
  175. }
  176. wait_for_deployment_rollout() {
  177. local timeout="$1"
  178. local stage="$2"
  179. if ! $KUBECTL -n "$NAMESPACE" rollout status deployment/claude-code-hub --timeout="$timeout"; then
  180. err "$stage 未在 $timeout 内完成"
  181. err "排查命令: kubectl -n $NAMESPACE describe deployment/claude-code-hub"
  182. err "排查命令: kubectl -n $NAMESPACE logs deploy/claude-code-hub --tail=100"
  183. return 1
  184. fi
  185. }
  186. restore_update_scaling() {
  187. local target_replicas="$1"
  188. local min_replicas="$2"
  189. if $KUBECTL -n "$NAMESPACE" get hpa claude-code-hub &>/dev/null; then
  190. $KUBECTL -n "$NAMESPACE" patch hpa claude-code-hub --type merge \
  191. -p "{\"spec\":{\"minReplicas\":$min_replicas}}" >/dev/null || true
  192. fi
  193. $KUBECTL -n "$NAMESPACE" scale deployment/claude-code-hub --replicas="$target_replicas" >/dev/null || true
  194. }
  195. ###############################################################################
  196. # Commands
  197. ###############################################################################
  198. cmd_update() {
  199. require_cluster
  200. local TIMESTAMP
  201. TIMESTAMP=$(date +%Y%m%d%H%M%S)
  202. echo -e "${CYAN}=========================================${NC}"
  203. echo -e "${CYAN} Claude Code Hub Upgrade - $TIMESTAMP${NC}"
  204. echo -e "${CYAN}=========================================${NC}"
  205. echo ""
  206. # Step 1: Backup
  207. info "Step 1/6: Backing up database..."
  208. if cmd_backup; then
  209. ok "Database backup complete"
  210. else
  211. # 非交互场景下 (无 TTY / CCH_NONINTERACTIVE=1) 直接放弃,避免自动化任务卡死
  212. if [[ "${CCH_NONINTERACTIVE:-0}" == "1" ]] || [[ ! -t 0 ]]; then
  213. err "Backup 失败,且当前非交互式环境 — 中止升级以保护数据"
  214. err "请先人工处理 (磁盘、权限、连接数),或设置 CCH_NONINTERACTIVE=0 并在 TTY 下重试"
  215. exit 1
  216. fi
  217. warn "Backup failed, continue without backup?"
  218. # read -t 60:60 秒内无输入则退出,避免 CI 卡死
  219. if ! read -t 60 -p "输入 yes 继续 (默认 60s 超时后中止): " answer; then
  220. echo ""
  221. err "超时,中止升级"; exit 1
  222. fi
  223. if [[ "$answer" != "yes" ]]; then
  224. err "已中止"; exit 1
  225. fi
  226. fi
  227. echo ""
  228. # Step 2: (k3s only) pre-pull image
  229. info "Step 2/6: Preparing image..."
  230. if [[ "$RUNTIME" == "k3s" ]]; then
  231. if sudo k3s ctr images pull "$IMAGE" >/dev/null 2>&1; then
  232. ok "Image pre-pulled via k3s ctr"
  233. else
  234. warn "k3s ctr pull 失败,依赖 Always imagePullPolicy"
  235. fi
  236. else
  237. info "Standard k8s: 依赖 imagePullPolicy=Always 在 rollout 时拉取"
  238. fi
  239. echo ""
  240. # Step 3: Scale down to 1 for migration
  241. local CURRENT_REPLICAS MIN_REPLICAS
  242. CURRENT_REPLICAS=$($KUBECTL -n "$NAMESPACE" get hpa claude-code-hub -o jsonpath='{.status.currentReplicas}' 2>/dev/null || echo "2")
  243. MIN_REPLICAS=$($KUBECTL -n "$NAMESPACE" get hpa claude-code-hub -o jsonpath='{.spec.minReplicas}' 2>/dev/null || echo "2")
  244. [[ -z "$CURRENT_REPLICAS" || "$CURRENT_REPLICAS" == "null" ]] && CURRENT_REPLICAS=2
  245. [[ -z "$MIN_REPLICAS" || "$MIN_REPLICAS" == "null" ]] && MIN_REPLICAS=2
  246. info "Step 3/6: Scaling down to 1 replica for migration (was $CURRENT_REPLICAS)..."
  247. if $KUBECTL -n "$NAMESPACE" get hpa claude-code-hub &>/dev/null; then
  248. $KUBECTL -n "$NAMESPACE" patch hpa claude-code-hub --type merge -p '{"spec":{"minReplicas":1}}' >/dev/null
  249. fi
  250. $KUBECTL -n "$NAMESPACE" scale deployment/claude-code-hub --replicas=1 >/dev/null
  251. if ! wait_for_deployment_rollout 180s "缩容到 1 副本"; then
  252. restore_update_scaling "$CURRENT_REPLICAS" "$MIN_REPLICAS"
  253. exit 1
  254. fi
  255. ok "Scaled to 1 replica"
  256. echo ""
  257. # Step 4: Update image + migrate
  258. info "Step 4/6: Updating image on single instance (auto-migration)..."
  259. if [[ "$RUNTIME" == "k3s" ]]; then
  260. # k3s: 用 digest 固定,避免 tag 相同导致 no-op rollout
  261. local IMAGE_DIGEST IMAGE_BY_DIGEST
  262. IMAGE_DIGEST=$(sudo k3s ctr images ls 2>/dev/null | awk -v img="$IMAGE" '$1==img {print $3; exit}')
  263. if [[ -n "$IMAGE_DIGEST" ]] && [[ "${IMAGE_DIGEST#sha256:}" != "$IMAGE_DIGEST" ]]; then
  264. IMAGE_BY_DIGEST="${IMAGE%:*}@${IMAGE_DIGEST}"
  265. info " digest: $IMAGE_DIGEST"
  266. $KUBECTL -n "$NAMESPACE" set image deployment/claude-code-hub app="$IMAGE_BY_DIGEST" >/dev/null
  267. else
  268. warn "无法解析 digest,回落到 rollout restart"
  269. $KUBECTL -n "$NAMESPACE" set image deployment/claude-code-hub app="$IMAGE" >/dev/null || true
  270. $KUBECTL -n "$NAMESPACE" rollout restart deployment/claude-code-hub >/dev/null
  271. fi
  272. else
  273. # 标准 k8s: set image 到目标 tag,触发 rollout;相同 tag 时强制 restart 拿最新 digest
  274. local CURRENT_IMAGE
  275. CURRENT_IMAGE=$($KUBECTL -n "$NAMESPACE" get deployment/claude-code-hub \
  276. -o jsonpath='{.spec.template.spec.containers[0].image}' 2>/dev/null || echo "")
  277. if [[ "$CURRENT_IMAGE" == "$IMAGE" ]]; then
  278. info "镜像 tag 未变 ($IMAGE),执行 rollout restart 重新拉取"
  279. $KUBECTL -n "$NAMESPACE" rollout restart deployment/claude-code-hub >/dev/null
  280. else
  281. $KUBECTL -n "$NAMESPACE" set image deployment/claude-code-hub app="$IMAGE" >/dev/null
  282. fi
  283. fi
  284. if ! wait_for_deployment_rollout 600s "镜像更新 rollout"; then
  285. err "新镜像 rollout 失败,正在回滚..."
  286. $KUBECTL -n "$NAMESPACE" rollout undo deployment/claude-code-hub >/dev/null || true
  287. restore_update_scaling "$CURRENT_REPLICAS" "$MIN_REPLICAS"
  288. wait_for_deployment_rollout 300s "回滚后的 deployment" || true
  289. exit 1
  290. fi
  291. sleep 3
  292. if health_check_in_pod; then
  293. ok "Migration + startup OK"
  294. else
  295. err "DB 未通过健康检查,正在回滚..."
  296. $KUBECTL -n "$NAMESPACE" rollout undo deployment/claude-code-hub >/dev/null
  297. restore_update_scaling "$CURRENT_REPLICAS" "$MIN_REPLICAS"
  298. exit 1
  299. fi
  300. echo ""
  301. # Step 5: Scale back
  302. local desired_replicas="$CURRENT_REPLICAS"
  303. if [[ "$desired_replicas" -lt "$MIN_REPLICAS" ]]; then
  304. desired_replicas="$MIN_REPLICAS"
  305. fi
  306. info "Step 5/6: Scaling back to $desired_replicas replicas..."
  307. restore_update_scaling "$desired_replicas" "$MIN_REPLICAS"
  308. if ! wait_for_deployment_rollout 300s "恢复副本"; then
  309. err "副本恢复失败,当前 deployment 可能仍停留在单副本"
  310. exit 1
  311. fi
  312. ok "Running with $desired_replicas replicas"
  313. echo ""
  314. # Step 6: Final health check
  315. info "Step 6/6: Final health check..."
  316. sleep 3
  317. if health_check_in_pod; then
  318. ok "Upgrade complete"
  319. else
  320. warn "Upgrade done but health check failed. Check: cch logs"
  321. fi
  322. echo ""
  323. $KUBECTL -n "$NAMESPACE" get pods -o wide
  324. }
  325. cmd_status() {
  326. require_cluster
  327. echo -e "${CYAN}Pods:${NC}"
  328. $KUBECTL -n "$NAMESPACE" get pods -o wide
  329. echo ""
  330. echo -e "${CYAN}HPA:${NC}"
  331. $KUBECTL -n "$NAMESPACE" get hpa 2>/dev/null || echo "(no HPA)"
  332. echo ""
  333. echo -e "${CYAN}Resources (top):${NC}"
  334. $KUBECTL -n "$NAMESPACE" top pods 2>/dev/null || warn "metrics-server 未就绪,跳过 top"
  335. }
  336. cmd_logs() {
  337. require_cluster
  338. local TAIL="100"
  339. if [[ "${1:-}" =~ ^[0-9]+$ ]]; then
  340. TAIL="$1"
  341. shift
  342. fi
  343. $KUBECTL -n "$NAMESPACE" logs deploy/claude-code-hub --all-containers --tail="$TAIL" "$@"
  344. }
  345. cmd_follow() {
  346. require_cluster
  347. $KUBECTL -n "$NAMESPACE" logs -f deploy/claude-code-hub --all-containers --tail=50
  348. }
  349. cmd_restart() {
  350. require_cluster
  351. info "Rolling restart..."
  352. $KUBECTL -n "$NAMESPACE" rollout restart deployment/claude-code-hub
  353. $KUBECTL -n "$NAMESPACE" rollout status deployment/claude-code-hub --timeout=300s
  354. ok "Restart complete"
  355. }
  356. cmd_rollback() {
  357. require_cluster
  358. warn "Rolling back to previous revision..."
  359. $KUBECTL -n "$NAMESPACE" rollout undo deployment/claude-code-hub
  360. $KUBECTL -n "$NAMESPACE" rollout status deployment/claude-code-hub --timeout=300s
  361. ok "Rollback complete"
  362. }
  363. cmd_scale() {
  364. require_cluster
  365. local N="${1:-}"
  366. if [[ -z "$N" ]]; then err "Usage: cch scale <replicas>"; exit 1; fi
  367. if ! [[ "$N" =~ ^[0-9]+$ ]] || [[ "$N" -lt 1 ]]; then
  368. err "replicas 必须是正整数: $N"; exit 1
  369. fi
  370. if $KUBECTL -n "$NAMESPACE" get hpa claude-code-hub &>/dev/null; then
  371. local hpa_min hpa_max
  372. hpa_min=$($KUBECTL -n "$NAMESPACE" get hpa claude-code-hub -o jsonpath='{.spec.minReplicas}' 2>/dev/null || echo "")
  373. hpa_max=$($KUBECTL -n "$NAMESPACE" get hpa claude-code-hub -o jsonpath='{.spec.maxReplicas}' 2>/dev/null || echo "")
  374. [[ -z "$hpa_min" || "$hpa_min" == "null" ]] && hpa_min=1
  375. [[ -z "$hpa_max" || "$hpa_max" == "null" ]] && hpa_max=0
  376. if [[ "$N" -lt "$hpa_min" ]]; then
  377. err "HPA minReplicas=$hpa_min 阻止缩到 $N。请先调整 HPA 或重新运行 deploy-k8s.sh 传入匹配的 --hpa-min"
  378. exit 1
  379. fi
  380. if [[ "$hpa_max" -gt 0 ]] && [[ "$N" -gt "$hpa_max" ]]; then
  381. err "HPA maxReplicas=$hpa_max 阻止扩到 $N。请先调整 HPA 或重新运行 deploy-k8s.sh 传入匹配的 --hpa-max"
  382. exit 1
  383. fi
  384. fi
  385. $KUBECTL -n "$NAMESPACE" scale deployment/claude-code-hub --replicas="$N"
  386. info "Scaled to $N replicas"
  387. }
  388. cmd_backup() {
  389. require_cluster
  390. local backup_dir="${CCH_BACKUP_DIR:-$HOME/backups/claude-code-hub}"
  391. mkdir -p "$backup_dir"
  392. local ts file
  393. ts=$(date +%Y%m%d_%H%M%S)
  394. file="$backup_dir/claude_code_hub_${ts}.sql.gz"
  395. info "Backing up PostgreSQL -> $file"
  396. if ! $KUBECTL -n "$NAMESPACE" exec sts/postgres -- \
  397. pg_dump -U claude_code_hub -d claude_code_hub --no-owner --no-privileges \
  398. | gzip > "$file"; then
  399. err "备份失败"; rm -f "$file"; return 1
  400. fi
  401. local size
  402. size=$(du -h "$file" 2>/dev/null | cut -f1)
  403. ok "Backup complete: $file ($size)"
  404. # 保留最近 30 份 (BSD xargs 无 -r,用 while read 代替)
  405. local keep="${CCH_BACKUP_KEEP:-30}"
  406. if ! [[ "$keep" =~ ^[0-9]+$ ]] || [[ "$keep" -lt 1 ]]; then
  407. warn "CCH_BACKUP_KEEP 必须是正整数,当前值: $keep; 使用默认值 30"
  408. keep=30
  409. fi
  410. # shellcheck disable=SC2012
  411. ls -t "$backup_dir"/claude_code_hub_*.sql.gz 2>/dev/null \
  412. | tail -n +"$((keep+1))" \
  413. | while IFS= read -r old; do rm -f "$old"; done
  414. }
  415. cmd_env() {
  416. require_cluster
  417. if command -v python3 &>/dev/null; then
  418. $KUBECTL -n "$NAMESPACE" get deployment claude-code-hub \
  419. -o jsonpath='{.spec.template.spec.containers[0].env}' | python3 -m json.tool
  420. else
  421. $KUBECTL -n "$NAMESPACE" get deployment claude-code-hub \
  422. -o jsonpath='{.spec.template.spec.containers[0].env}'
  423. echo ""
  424. fi
  425. }
  426. cmd_secret() {
  427. require_cluster
  428. local KEY="${1:-admin-token}"
  429. $KUBECTL -n "$NAMESPACE" get secret claude-code-hub-secrets \
  430. -o jsonpath="{.data.$KEY}" | b64d
  431. echo ""
  432. }
  433. cmd_shell() {
  434. require_cluster
  435. $KUBECTL -n "$NAMESPACE" exec -it deploy/claude-code-hub -- sh
  436. }
  437. cmd_dbshell() {
  438. require_cluster
  439. $KUBECTL -n "$NAMESPACE" exec -it sts/postgres -- \
  440. psql -U claude_code_hub -d claude_code_hub
  441. }
  442. cmd_info() {
  443. require_cluster
  444. local url token img digest
  445. url=$(resolve_access_url)
  446. token=$($KUBECTL -n "$NAMESPACE" get secret claude-code-hub-secrets \
  447. -o jsonpath='{.data.admin-token}' 2>/dev/null | b64d)
  448. img=$($KUBECTL -n "$NAMESPACE" get deployment claude-code-hub \
  449. -o jsonpath='{.spec.template.spec.containers[0].image}' 2>/dev/null)
  450. digest=$($KUBECTL -n "$NAMESPACE" get pods -l app=claude-code-hub \
  451. -o jsonpath='{.items[0].status.containerStatuses[0].imageID}' 2>/dev/null)
  452. echo -e "${CYAN}Namespace:${NC} $NAMESPACE"
  453. echo -e "${CYAN}Runtime:${NC} $RUNTIME"
  454. echo -e "${CYAN}Image (desired):${NC} $img"
  455. echo -e "${CYAN}Image (running):${NC} $digest"
  456. if [[ -n "$url" ]]; then
  457. echo -e "${CYAN}Access URL:${NC} $url"
  458. else
  459. echo -e "${CYAN}Access URL:${NC} (no ingress/nodeport detected; use 'kubectl port-forward')"
  460. fi
  461. echo -e "${CYAN}Admin token:${NC} ${YELLOW}${token}${NC}"
  462. echo ""
  463. $KUBECTL -n "$NAMESPACE" get deployment,statefulset,hpa,svc,ingress 2>/dev/null || true
  464. }
  465. cmd_version() {
  466. echo "cch v${VERSION}"
  467. echo " runtime : ${RUNTIME:-(not detected)}"
  468. echo " namespace : $NAMESPACE"
  469. echo " image : $IMAGE"
  470. echo " deploy-dir : ${DEPLOY_DIR:-(not found)}"
  471. echo " config-file : $CCH_CONFIG_FILE"
  472. }
  473. cmd_doctor() {
  474. echo -e "${CYAN}cch doctor${NC}"
  475. local pass=0 fail=0 warn_n=0
  476. check_pass() { ok "$1"; pass=$((pass+1)); }
  477. check_warn() { warn "$1"; warn_n=$((warn_n+1)); }
  478. check_fail() { err "$1"; fail=$((fail+1)); }
  479. if detect_runtime; then
  480. check_pass "Runtime detected (runtime=$RUNTIME, kubectl=$KUBECTL)"
  481. else
  482. check_warn "运行时探测失败,将继续尝试默认 kubectl 诊断"
  483. fi
  484. # kubectl
  485. if command -v kubectl &>/dev/null; then
  486. check_pass "kubectl installed: $(kubectl version --client -o yaml 2>/dev/null | awk '/gitVersion/{print $2; exit}')"
  487. elif [[ "$RUNTIME" == "k3s" ]] && command -v k3s &>/dev/null; then
  488. check_warn "kubectl 未安装,将使用 sudo k3s kubectl"
  489. else
  490. check_fail "kubectl 未安装"
  491. fi
  492. # Cluster reachable
  493. if $KUBECTL cluster-info &>/dev/null; then
  494. check_pass "Cluster reachable (runtime=$RUNTIME)"
  495. else
  496. check_fail "无法连接集群。请检查 kubeconfig / context"
  497. echo "Summary: $pass passed, $warn_n warnings, $fail failures"; return
  498. fi
  499. # Namespace
  500. if $KUBECTL get ns "$NAMESPACE" &>/dev/null; then
  501. check_pass "Namespace $NAMESPACE exists"
  502. else
  503. check_fail "Namespace $NAMESPACE 不存在 — 请先运行 deploy-k8s.sh"
  504. fi
  505. # Secret
  506. if $KUBECTL -n "$NAMESPACE" get secret claude-code-hub-secrets &>/dev/null; then
  507. check_pass "Secret claude-code-hub-secrets 存在"
  508. else
  509. check_fail "Secret 缺失"
  510. fi
  511. # Postgres / Redis / App
  512. for comp in postgres redis; do
  513. local rs
  514. rs=$($KUBECTL -n "$NAMESPACE" get sts "$comp" -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "")
  515. if [[ "$rs" == "1" ]]; then check_pass "$comp StatefulSet ready"
  516. else check_fail "$comp 未就绪 (ready=$rs)"; fi
  517. done
  518. local app_ready
  519. app_ready=$($KUBECTL -n "$NAMESPACE" get deployment claude-code-hub \
  520. -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0")
  521. if [[ "$app_ready" -gt 0 ]]; then
  522. check_pass "App ready replicas: $app_ready"
  523. else
  524. check_fail "App 没有就绪 Pod"
  525. fi
  526. # HPA
  527. if $KUBECTL -n "$NAMESPACE" get hpa claude-code-hub &>/dev/null; then
  528. check_pass "HPA configured"
  529. else
  530. check_warn "HPA 不存在 (非必需)"
  531. fi
  532. # Ingress
  533. if $KUBECTL -n "$NAMESPACE" get ingress claude-code-hub &>/dev/null || \
  534. $KUBECTL -n "$NAMESPACE" get ingressroute claude-code-hub &>/dev/null 2>&1; then
  535. check_pass "Ingress resource present"
  536. else
  537. check_warn "未检测到 Ingress (如使用 NodePort 可忽略)"
  538. fi
  539. # StorageClass
  540. if [[ "$RUNTIME" == "k3s" ]] && $KUBECTL get sc local-path &>/dev/null; then
  541. check_pass "StorageClass local-path (k3s default)"
  542. fi
  543. # In-pod health
  544. if health_check_in_pod &>/dev/null; then
  545. check_pass "In-pod health check"
  546. else
  547. check_warn "In-pod health check failed (服务可能启动中)"
  548. fi
  549. echo ""
  550. echo "Summary: ${GREEN}$pass passed${NC}, ${YELLOW}$warn_n warnings${NC}, ${RED}$fail failures${NC}"
  551. }
  552. cmd_install() {
  553. # 快捷路径:仅当用户在仓库内运行时有用。复制 manifest 到 deploy-dir 并提示跑 deploy-k8s.sh
  554. if [[ -x "$SCRIPT_DIR/deploy-k8s.sh" ]]; then
  555. info "转交给 scripts/deploy-k8s.sh (推荐使用完整的安装流程)"
  556. exec bash "$SCRIPT_DIR/deploy-k8s.sh" "$@"
  557. fi
  558. err "scripts/deploy-k8s.sh 未找到。请在仓库内运行或手动调用"
  559. exit 1
  560. }
  561. cmd_uninstall() {
  562. detect_runtime
  563. # 非交互场景必须显式通过 CCH_CONFIRM_UNINSTALL=<namespace> 授权,避免误删
  564. if [[ ! -t 0 ]]; then
  565. if [[ "${CCH_CONFIRM_UNINSTALL:-}" != "$NAMESPACE" ]]; then
  566. err "非交互环境检测到。要 uninstall 必须显式设置:"
  567. err " CCH_CONFIRM_UNINSTALL=$NAMESPACE cch uninstall"
  568. exit 1
  569. fi
  570. info "已通过 CCH_CONFIRM_UNINSTALL 授权,继续卸载"
  571. else
  572. echo -e "${RED}!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!${NC}"
  573. echo -e "${RED} 即将删除 namespace: $NAMESPACE${NC}"
  574. echo -e "${RED} 这会永久删除所有 Pod、PVC(数据库数据)、Secret${NC}"
  575. echo -e "${RED}!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!${NC}"
  576. if ! read -t 120 -p "确认?输入 namespace 名称 '$NAMESPACE' 继续: " input; then
  577. echo ""; info "超时,已取消"; exit 0
  578. fi
  579. if [[ "$input" != "$NAMESPACE" ]]; then
  580. info "已取消"; exit 0
  581. fi
  582. fi
  583. $KUBECTL delete namespace "$NAMESPACE" --timeout=180s
  584. ok "Namespace $NAMESPACE 已删除"
  585. info "manifest 目录 ($DEPLOY_DIR) 未删除。如需清理: rm -rf $DEPLOY_DIR"
  586. }
  587. ###############################################################################
  588. # Help
  589. ###############################################################################
  590. cmd_help() {
  591. cat <<EOF
  592. cch - Claude Code Hub Kubernetes Management CLI v${VERSION}
  593. Usage: cch <command> [args]
  594. Lifecycle:
  595. update Pull latest image, auto-migrate DB, rolling deploy (带回滚)
  596. restart Rolling restart (no image change)
  597. rollback Rollback to previous deployment revision
  598. scale <n> Scale app to n replicas
  599. Install / Teardown:
  600. install [opts] 调用 scripts/deploy-k8s.sh (透传参数)
  601. uninstall Delete namespace + PVCs (破坏性,带二次确认)
  602. Observe:
  603. status Show pods, HPA, resource usage
  604. logs [n] [args] Show last n log lines (default 100), or pass through kubectl log flags
  605. follow Tail logs in real-time
  606. env Show app environment variables (JSON)
  607. info 展示访问 URL、Admin Token、镜像 digest
  608. doctor 诊断 (kubectl / 集群 / 资源 / 健康)
  609. version 显示版本与当前配置
  610. Data:
  611. backup Backup PostgreSQL (gzip, 保留最近 30 份)
  612. secret [key] Show secret value (default: admin-token)
  613. dbshell Open psql shell
  614. shell Open sh in app pod
  615. Config:
  616. 环境变量(或 ~/.config/cch/config):
  617. CCH_NAMESPACE K8s namespace (default: claude-code-hub)
  618. CCH_IMAGE 应用镜像 (default: ghcr.io/ding113/claude-code-hub:latest)
  619. CCH_DEPLOY_DIR manifest 目录 (default: 自动查找)
  620. CCH_RUNTIME 覆盖 runtime: k3s | kubectl
  621. CCH_INGRESS_HOST Ingress 域名,用于访问 URL 解析
  622. CCH_BACKUP_DIR 备份目录 (default: ~/backups/claude-code-hub)
  623. CCH_BACKUP_KEEP 保留数量 (default: 30)
  624. NO_COLOR 禁用彩色输出
  625. Examples:
  626. cch status
  627. cch logs 500
  628. cch update
  629. CCH_NAMESPACE=staging cch status
  630. cch install -y # 一键部署
  631. cch backup
  632. cch info
  633. cch doctor
  634. EOF
  635. }
  636. ###############################################################################
  637. # Dispatch
  638. ###############################################################################
  639. case "${1:-help}" in
  640. update) shift; cmd_update "$@" ;;
  641. status) shift; cmd_status "$@" ;;
  642. logs) shift; cmd_logs "$@" ;;
  643. follow) cmd_follow ;;
  644. restart) cmd_restart ;;
  645. rollback) cmd_rollback ;;
  646. backup) cmd_backup ;;
  647. scale) shift; cmd_scale "$@" ;;
  648. env) cmd_env ;;
  649. secret) shift; cmd_secret "$@" ;;
  650. shell) cmd_shell ;;
  651. dbshell) cmd_dbshell ;;
  652. info) cmd_info ;;
  653. version|--version|-v) cmd_version ;;
  654. doctor) cmd_doctor ;;
  655. install) shift; cmd_install "$@" ;;
  656. uninstall) cmd_uninstall ;;
  657. help|--help|-h|*) cmd_help ;;
  658. esac