#!/usr/bin/env bash # deploy_crm_only.sh — 把 sub2api-cn-relay-manager(CRM 控制面)单进程部署到 # remote43。不起 sub2api host / PG / Redis 容器。 # # 复用 scripts/deploy/remote43_patched_stack_lib.sh 的 env 渲染 helper。 set -euo pipefail ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" DEPLOY_ENV_FILE="${DEPLOY_ENV_FILE:-$ROOT_DIR/scripts/deploy/.env.deploy}" if [[ -f "$DEPLOY_ENV_FILE" ]]; then set -a # shellcheck disable=SC1090 source "$DEPLOY_ENV_FILE" set +a fi # shellcheck disable=SC1091 source "$ROOT_DIR/scripts/deploy/remote43_patched_stack_lib.sh" KEY="${KEY:-}" REMOTE="${REMOTE:-}" STACK_NAME="${STACK_NAME:-crm-only-$(date +%Y%m%d)}" CRM_PORT="${CRM_PORT:-18190}" CRM_BINARY="${CRM_BINARY:-$ROOT_DIR/server}" LOCAL_REPO_BUNDLE="${LOCAL_REPO_BUNDLE:-/tmp/${STACK_NAME}-repo.bundle}" LOCAL_OPERATOR_ENV_FILE="${LOCAL_OPERATOR_ENV_FILE:-/tmp/${STACK_NAME}.env}" LOCAL_TUNNEL_SCRIPT="${LOCAL_TUNNEL_SCRIPT:-/tmp/${STACK_NAME}.tunnel.sh}" LOCAL_DEPLOY_DIR="${LOCAL_DEPLOY_DIR:-/tmp/${STACK_NAME}-stage}" REMOTE_ROOT="${REMOTE_ROOT:-/home/ubuntu/${STACK_NAME}_${CRM_PORT}}" REMOTE_REPO_ROOT="${REMOTE_REPO_ROOT:-/home/ubuntu/sub2api-cn-relay-manager-git-current}" REMOTE_REPO_BUNDLE="$REMOTE_ROOT/sub2api-cn-relay-manager.bundle" REMOTE_CRM_ENV_FILE="$REMOTE_ROOT/.env.crm" REMOTE_BOOTSTRAP_FILE="$REMOTE_ROOT/bootstrap.sh" REMOTE_CRM_BINARY="$REMOTE_ROOT/sub2api-cn-relay-manager-server" REMOTE_CRM_DB_FILE="$REMOTE_ROOT/sub2api-cn-relay-manager.db" REMOTE_CRM_PID_FILE="$REMOTE_ROOT/crm.pid" REMOTE_CRM_LOG_FILE="$REMOTE_ROOT/crm.log" crm_admin_token="${crm_admin_token:-$(remote43_random_hex 24)}" crm_admin_username="${crm_admin_username:-admin}" crm_admin_password="${crm_admin_password:-$crm_admin_token}" DRY_RUN="${DRY_RUN:-0}" die() { echo "$*" >&2; exit 1; } require_cmd() { command -v "$1" >/dev/null 2>&1 || die "missing command: $1"; } run_cmd() { if [[ "$DRY_RUN" == "1" ]]; then printf "DRY_RUN:"; printf " %q" "$@"; printf "\n" return 0 fi "$@" } ssh_remote() { run_cmd ssh -i "$KEY" -o StrictHostKeyChecking=no "$REMOTE" "$@"; } scp_remote() { run_cmd scp -i "$KEY" -o StrictHostKeyChecking=no "$@"; } write_local_tunnel_script() { cat > "$LOCAL_TUNNEL_SCRIPT" < "$tmp_bootstrap" <<'BOOTSTRAP_EOF' #!/usr/bin/env bash set -euo pipefail export REMOTE_ROOT=__REMOTE_ROOT__ export CRM_ENV_FILE=__CRM_ENV_FILE__ export CRM_BINARY=__CRM_BINARY__ export CRM_DB_FILE=__CRM_DB_FILE__ export CRM_PID_FILE=__CRM_PID_FILE__ export CRM_LOG_FILE=__CRM_LOG_FILE__ export REMOTE_REPO_ROOT=__REMOTE_REPO_ROOT__ export REMOTE_REPO_BUNDLE=__REMOTE_REPO_BUNDLE__ export CRM_PORT=__CRM_PORT__ mkdir -p "$REMOTE_ROOT" "$(dirname "$REMOTE_REPO_ROOT")" chmod 755 "$CRM_BINARY" if [[ -f "$REMOTE_REPO_BUNDLE" ]]; then if [[ -d "$REMOTE_REPO_ROOT/.git" ]]; then git -C "$REMOTE_REPO_ROOT" fetch "$REMOTE_REPO_BUNDLE" main git -C "$REMOTE_REPO_ROOT" reset --hard FETCH_HEAD else rm -rf "$REMOTE_REPO_ROOT" git clone "$REMOTE_REPO_BUNDLE" "$REMOTE_REPO_ROOT" git -C "$REMOTE_REPO_ROOT" checkout main fi git -C "$REMOTE_REPO_ROOT" config user.name "Remote43 CRM" git -C "$REMOTE_REPO_ROOT" config user.email "remote43-crm@tksea.top" fi # 非破坏性热更新:先确认旧进程退出,再启动新进程 # 禁止删除DB:生产数据必须保留 # 改进的停止逻辑:不仅按 PID 文件,还按进程名和端口清理 echo "Stopping any existing CRM processes..." # 1. 按 PID 文件停止(如果存在) if [[ -f "$CRM_PID_FILE" ]]; then OLD_PID="$(cat "$CRM_PID_FILE")" if kill -0 "$OLD_PID" >/dev/null 2>&1; then echo "Stopping PID from pidfile: $OLD_PID" kill "$OLD_PID" >/dev/null 2>&1 || true for i in {1..20}; do if ! kill -0 "$OLD_PID" >/dev/null 2>&1; then break; fi sleep 0.5 done if kill -0 "$OLD_PID" >/dev/null 2>&1; then kill -9 "$OLD_PID" >/dev/null 2>&1 || true sleep 1 fi fi rm -f "$CRM_PID_FILE" fi # 2. 按进程名停止任何残留的 CRM 进程 for pattern in 'sub2api.*crm' 'sub2api.*relay-manager'; do for pid in $(pgrep -f "$pattern" 2>/dev/null); do echo "Stopping process by pattern ($pattern): $pid" kill "$pid" 2>/dev/null || true sleep 0.5 if kill -0 "$pid" 2>/dev/null; then kill -9 "$pid" 2>/dev/null || true fi done done # 3. 强制释放端口(如有必要) if command -v fuser >/dev/null 2>&1; then fuser -k "${CRM_PORT}/tcp" 2>/dev/null || true fi # 清理日志但不碰DB rm -f "$CRM_LOG_FILE" # 验证端口未被占用 for i in {1..10}; do if ! ss -tlnp 2>/dev/null | grep -q ":$CRM_PORT " && \ ! netstat -tlnp 2>/dev/null | grep -q ":$CRM_PORT "; then break fi echo "Waiting for port $CRM_PORT to be released... (attempt $i/10)" sleep 1 done if ss -tlnp 2>/dev/null | grep -q ":$CRM_PORT " || netstat -tlnp 2>/dev/null | grep -q ":$CRM_PORT "; then echo "ERROR: Port $CRM_PORT is still in use after cleanup. Cannot start new CRM." >&2 ss -tlnp 2>/dev/null | grep ":$CRM_PORT " || netstat -tlnp 2>/dev/null | grep ":$CRM_PORT " exit 1 fi echo "Port $CRM_PORT is free. Starting new CRM..." # 使用更可靠的方式启动(优先systemd,回退nohup) if command -v systemctl >/dev/null 2>&1 && [[ -f /etc/systemd/system/sub2api-crm.service ]]; then systemctl restart sub2api-crm || exit 1 else nohup bash -lc 'set -a; source "$CRM_ENV_FILE"; set +a; exec "$CRM_BINARY"' >"$CRM_LOG_FILE" 2>&1 & echo $! > "$CRM_PID_FILE" fi python3 - "$CRM_PORT" "$CRM_PID_FILE" <<'PY' import subprocess, sys, time, os port = sys.argv[1] pid_file = sys.argv[2] # 1. 等待 healthz healthz_url = f"http://127.0.0.1:{port}/healthz" for i in range(30): r = subprocess.run(["curl", "-fsS", healthz_url], text=True, capture_output=True) if r.returncode == 0 and r.stdout.strip() == "ok": print(f"Health check passed on attempt {i+1}") break time.sleep(1) else: raise SystemExit(f"crm healthz did not become ready on {healthz_url}") # 2. 验证二进制不是 deleted 状态 with open(pid_file) as f: pid = f.read().strip() exe_link = f"/proc/{pid}/exe" if os.path.islink(exe_link): target = os.readlink(exe_link) if "deleted" in target: raise SystemExit(f"ERROR: Binary shows (deleted): {target}") print(f"Binary OK: {target}") # 3. 验证 portal session 路由(新版本应有此路由) session_url = f"http://127.0.0.1:{port}/api/portal/session/state" r = subprocess.run(["curl", "-fsS", session_url], text=True, capture_output=True) if r.returncode == 0: print(f"Portal session route OK: {r.stdout.strip()}") elif r.returncode == 22 and "404" in r.stderr: raise SystemExit(f"ERROR: Portal session route returns 404 - may be running old version") else: print(f"Warning: Portal session route check failed: {r.stderr}") raise SystemExit(0) PY # 部署验证完成 echo "=== Deployment Verification ===" NEW_PID=$(cat "$CRM_PID_FILE") echo "New CRM PID: $NEW_PID" ls -la "/proc/$NEW_PID/exe" 2>/dev/null | grep -v deleted && echo "Binary state: OK (not deleted)" || echo "WARNING: Binary may be deleted" printf "crm_base=http://127.0.0.1:%s\n" "$CRM_PORT" printf "crm_pid_file=%s\n" "$CRM_PID_FILE" printf "crm_log=%s\n" "$CRM_LOG_FILE" printf "remote_repo_root=%s\n" "$REMOTE_REPO_ROOT" BOOTSTRAP_EOF sed -i -e "s|__REMOTE_ROOT__|$remote_root_q|g" -e "s|__CRM_ENV_FILE__|$crm_env_q|g" -e "s|__CRM_BINARY__|$crm_binary_q|g" -e "s|__CRM_DB_FILE__|$crm_db_q|g" -e "s|__CRM_PID_FILE__|$crm_pid_q|g" -e "s|__CRM_LOG_FILE__|$crm_log_q|g" -e "s|__REMOTE_REPO_ROOT__|$remote_repo_root_q|g" -e "s|__REMOTE_REPO_BUNDLE__|$remote_repo_bundle_q|g" -e "s|__CRM_PORT__|$crm_port_q|g" "$tmp_bootstrap" cat "$tmp_bootstrap" rm -f "$tmp_bootstrap" } main() { require_cmd bash curl git python3 ssh scp [[ -n "$KEY" ]] || die "KEY is required; copy scripts/deploy/.env.deploy.example to scripts/deploy/.env.deploy and fill it" [[ -n "$REMOTE" ]] || die "REMOTE is required; copy scripts/deploy/.env.deploy.example to scripts/deploy/.env.deploy and fill it" remote43_require_file "$KEY" "ssh key" remote43_require_file "$CRM_BINARY" "crm server binary" rm -f "$LOCAL_REPO_BUNDLE" git -C "$ROOT_DIR" bundle create "$LOCAL_REPO_BUNDLE" main write_local_tunnel_script write_operator_env local crm_env_file bootstrap_file crm_env_file="$(mktemp)" bootstrap_file="$(mktemp)" trap "rm -f \"$crm_env_file\" \"$bootstrap_file\"" EXIT render_remote43_crm_env \ "$CRM_PORT" \ "file:${REMOTE_CRM_DB_FILE}?_foreign_keys=on&_busy_timeout=5000" \ "$crm_admin_token" \ "$REMOTE_REPO_ROOT" \ "$crm_admin_username" \ "$crm_admin_password" > "$crm_env_file" render_crm_only_bootstrap > "$bootstrap_file" chmod +x "$bootstrap_file" mkdir -p "$LOCAL_DEPLOY_DIR" cp "$crm_env_file" "$LOCAL_DEPLOY_DIR/.env.crm" cp "$bootstrap_file" "$LOCAL_DEPLOY_DIR/bootstrap.sh" ssh_remote "mkdir -p $(printf "%q" "$REMOTE_ROOT") # 改进的停止逻辑:不仅按 PID 文件,还按进程名和端口清理 echo 'Stopping any existing CRM processes...' # 1. 按 PID 文件停止(如果存在) if [[ -f $(printf "%q" "$REMOTE_CRM_PID_FILE") ]]; then OLDPID=\$(cat $(printf "%q" "$REMOTE_CRM_PID_FILE")) if kill -0 \$OLDPID 2>/dev/null; then echo \"Stopping PID from pidfile: \$OLDPID\" kill \$OLDPID 2>/dev/null || true for i in {1..20}; do if ! kill -0 \$OLDPID 2>/dev/null; then break; fi sleep 0.5 done if kill -0 \$OLDPID 2>/dev/null; then kill -9 \$OLDPID 2>/dev/null || true; sleep 1; fi fi rm -f $(printf "%q" "$REMOTE_CRM_PID_FILE") fi # 2. 按进程名停止任何残留的 CRM 进程 for pattern in 'sub2api.*crm' 'sub2api.*relay-manager'; do for pid in \$(pgrep -f \"\$pattern\" 2>/dev/null); do echo \"Stopping process by pattern (\$pattern): \$pid\" kill \$pid 2>/dev/null || true sleep 0.5 if kill -0 \$pid 2>/dev/null; then kill -9 \$pid 2>/dev/null || true; fi done done # 3. 强制释放端口 fuser -k $(printf "%q" "$CRM_PORT")/tcp 2>/dev/null || true # 4. 验证端口释放 for i in {1..5}; do if ! ss -tlnp 2>/dev/null | grep -q '$(printf "%q" ":$CRM_PORT")' && \\ ! netstat -tlnp 2>/dev/null | grep -q '$(printf "%q" ":$CRM_PORT")'; then break fi echo \"Waiting for port release... (\$i/5)\" sleep 1 done # 禁止删除DB:rm -f DB_FILE 已被移除 rm -f $(printf "%q" "$REMOTE_CRM_LOG_FILE") $(printf "%q" "$REMOTE_CRM_BINARY")" scp_remote "$CRM_BINARY" "$REMOTE:$REMOTE_CRM_BINARY" scp_remote "$LOCAL_REPO_BUNDLE" "$REMOTE:$REMOTE_REPO_BUNDLE" scp_remote "$crm_env_file" "$REMOTE:$REMOTE_CRM_ENV_FILE" scp_remote "$bootstrap_file" "$REMOTE:$REMOTE_BOOTSTRAP_FILE" ssh_remote "bash $(printf "%q" "$REMOTE_BOOTSTRAP_FILE")" ssh_remote "bash $(printf '%q' "$REMOTE_BOOTSTRAP_FILE")" echo "" echo "=== Post-Deployment Verification ===" # 等待服务启动 sleep 3 # 验证 healthz echo -n "1. Health check: " if ssh_remote "curl -fsS http://127.0.0.1:${CRM_PORT}/healthz 2>/dev/null" | grep -q "^ok$"; then echo "[PASS]" else echo "[FAIL]" fi # 验证 portal session 路由 echo -n "2. Portal session route: " SESSION_RESULT=$(ssh_remote "curl -s -o /dev/null -w '%{http_code}' http://127.0.0.1:${CRM_PORT}/api/portal/session/state 2>/dev/null") if [[ "$SESSION_RESULT" == "200" ]]; then echo "[PASS] Returned 200 - new version" elif [[ "$SESSION_RESULT" == "404" ]]; then echo "[WARN] Returned 404 - may be old version" else echo "[UNKNOWN] Returned $SESSION_RESULT" fi # 验证二进制状态 echo -n "3. Binary state check: " PID_VAL=$(ssh_remote "cat $(printf '%q' "$REMOTE_CRM_PID_FILE") 2>/dev/null") if [[ -n "$PID_VAL" ]]; then BINARY_LINK=$(ssh_remote "ls /proc/${PID_VAL}/exe 2>/dev/null") if echo "$BINARY_LINK" | grep -q deleted; then echo "[FAIL] Binary shows deleted" else echo "[OK] Binary not deleted" fi else echo "[WARN] Cannot check binary state" fi echo "" } main "$@"