Files
sub2api-cn-relay-manager/scripts/deploy/deploy_crm_only.sh
phamnazage-jpg 47ced19c7b fix(deploy): production CRM deployment improvements
- Fix deploy_crm_only.sh: non-destructive hot reload
  - Enhanced stop logic with pgrep + fuser for port release
  - Added 3-layer verification (process/control/user)
  - Check /proc/$pid/exe for (deleted) marker
  - Never delete DB

- Fix portal script contracts: crm_session → crm_subject
  - deploy_tksea_portal.sh: use $cookie_crm_subject
  - test_tksea_portal_assets.sh: assert crm_subject exists
  - nginx.example.conf: updated trusted subject header

- Add systemd service management
  - sub2api-crm.service.template
  - install_crm_systemd.sh
  - verify_crm_deployment.sh

Update docs/plans/2026-06-04-next-version-plan.md with deployment findings.
2026-06-10 15:44:45 +08:00

384 lines
13 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
# deploy_crm_only.sh — 把 sub2api-cn-relay-managerCRM 控制面)单进程部署到
# remote43。不起 sub2api host / PG / Redis 容器。
#
# 复用 scripts/deploy/remote43_patched_stack_lib.sh 的 env 渲染 helper。
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
DEPLOY_ENV_FILE="${DEPLOY_ENV_FILE:-$ROOT_DIR/scripts/deploy/.env.deploy}"
if [[ -f "$DEPLOY_ENV_FILE" ]]; then
set -a
# shellcheck disable=SC1090
source "$DEPLOY_ENV_FILE"
set +a
fi
# shellcheck disable=SC1091
source "$ROOT_DIR/scripts/deploy/remote43_patched_stack_lib.sh"
KEY="${KEY:-}"
REMOTE="${REMOTE:-}"
STACK_NAME="${STACK_NAME:-crm-only-$(date +%Y%m%d)}"
CRM_PORT="${CRM_PORT:-18190}"
CRM_BINARY="${CRM_BINARY:-$ROOT_DIR/server}"
LOCAL_REPO_BUNDLE="${LOCAL_REPO_BUNDLE:-/tmp/${STACK_NAME}-repo.bundle}"
LOCAL_OPERATOR_ENV_FILE="${LOCAL_OPERATOR_ENV_FILE:-/tmp/${STACK_NAME}.env}"
LOCAL_TUNNEL_SCRIPT="${LOCAL_TUNNEL_SCRIPT:-/tmp/${STACK_NAME}.tunnel.sh}"
LOCAL_DEPLOY_DIR="${LOCAL_DEPLOY_DIR:-/tmp/${STACK_NAME}-stage}"
REMOTE_ROOT="${REMOTE_ROOT:-/home/ubuntu/${STACK_NAME}_${CRM_PORT}}"
REMOTE_REPO_ROOT="${REMOTE_REPO_ROOT:-/home/ubuntu/sub2api-cn-relay-manager-git-current}"
REMOTE_REPO_BUNDLE="$REMOTE_ROOT/sub2api-cn-relay-manager.bundle"
REMOTE_CRM_ENV_FILE="$REMOTE_ROOT/.env.crm"
REMOTE_BOOTSTRAP_FILE="$REMOTE_ROOT/bootstrap.sh"
REMOTE_CRM_BINARY="$REMOTE_ROOT/sub2api-cn-relay-manager-server"
REMOTE_CRM_DB_FILE="$REMOTE_ROOT/sub2api-cn-relay-manager.db"
REMOTE_CRM_PID_FILE="$REMOTE_ROOT/crm.pid"
REMOTE_CRM_LOG_FILE="$REMOTE_ROOT/crm.log"
crm_admin_token="${crm_admin_token:-$(remote43_random_hex 24)}"
crm_admin_username="${crm_admin_username:-admin}"
crm_admin_password="${crm_admin_password:-$crm_admin_token}"
DRY_RUN="${DRY_RUN:-0}"
die() { echo "$*" >&2; exit 1; }
require_cmd() { command -v "$1" >/dev/null 2>&1 || die "missing command: $1"; }
run_cmd() {
if [[ "$DRY_RUN" == "1" ]]; then
printf "DRY_RUN:"; printf " %q" "$@"; printf "\n"
return 0
fi
"$@"
}
ssh_remote() { run_cmd ssh -i "$KEY" -o StrictHostKeyChecking=no "$REMOTE" "$@"; }
scp_remote() { run_cmd scp -i "$KEY" -o StrictHostKeyChecking=no "$@"; }
write_local_tunnel_script() {
cat > "$LOCAL_TUNNEL_SCRIPT" <<EOF
#!/usr/bin/env bash
set -euo pipefail
exec ssh -N \\
-L ${CRM_PORT}:127.0.0.1:${CRM_PORT} \\
-i $(printf "%q" "$KEY") \\
-o StrictHostKeyChecking=no \\
$(printf "%q" "$REMOTE")
EOF
chmod +x "$LOCAL_TUNNEL_SCRIPT"
}
write_operator_env() {
remote43_write_env_file "$LOCAL_OPERATOR_ENV_FILE" \
CRM_BASE "http://127.0.0.1:${CRM_PORT}" \
REMOTE_CRM_BASE "http://127.0.0.1:${CRM_PORT}" \
REMOTE_ROOT "$REMOTE_ROOT" \
REMOTE_CRM_ENV_FILE "$REMOTE_CRM_ENV_FILE" \
REMOTE_REPO_ROOT "$REMOTE_REPO_ROOT" \
KEY "$KEY" \
REMOTE "$REMOTE" \
crm_admin_token "$crm_admin_token" \
crm_admin_username "$crm_admin_username" \
crm_admin_password "$crm_admin_password" \
CRM_PORT "$CRM_PORT"
chmod 600 "$LOCAL_OPERATOR_ENV_FILE"
}
render_crm_only_bootstrap() {
local crm_env_q remote_root_q remote_repo_bundle_q
local crm_binary_q crm_db_q crm_pid_q crm_log_q remote_repo_root_q crm_port_q
printf -v crm_env_q "%q" "$REMOTE_CRM_ENV_FILE"
printf -v remote_root_q "%q" "$REMOTE_ROOT"
printf -v remote_repo_bundle_q "%q" "$REMOTE_REPO_BUNDLE"
printf -v crm_binary_q "%q" "$REMOTE_CRM_BINARY"
printf -v crm_db_q "%q" "$REMOTE_CRM_DB_FILE"
printf -v crm_pid_q "%q" "$REMOTE_CRM_PID_FILE"
printf -v crm_log_q "%q" "$REMOTE_CRM_LOG_FILE"
printf -v remote_repo_root_q "%q" "$REMOTE_REPO_ROOT"
printf -v crm_port_q "%q" "$CRM_PORT"
local tmp_bootstrap
tmp_bootstrap="$(mktemp)"
cat > "$tmp_bootstrap" <<'BOOTSTRAP_EOF'
#!/usr/bin/env bash
set -euo pipefail
export REMOTE_ROOT=__REMOTE_ROOT__
export CRM_ENV_FILE=__CRM_ENV_FILE__
export CRM_BINARY=__CRM_BINARY__
export CRM_DB_FILE=__CRM_DB_FILE__
export CRM_PID_FILE=__CRM_PID_FILE__
export CRM_LOG_FILE=__CRM_LOG_FILE__
export REMOTE_REPO_ROOT=__REMOTE_REPO_ROOT__
export REMOTE_REPO_BUNDLE=__REMOTE_REPO_BUNDLE__
export CRM_PORT=__CRM_PORT__
mkdir -p "$REMOTE_ROOT" "$(dirname "$REMOTE_REPO_ROOT")"
chmod 755 "$CRM_BINARY"
if [[ -f "$REMOTE_REPO_BUNDLE" ]]; then
if [[ -d "$REMOTE_REPO_ROOT/.git" ]]; then
git -C "$REMOTE_REPO_ROOT" fetch "$REMOTE_REPO_BUNDLE" main
git -C "$REMOTE_REPO_ROOT" reset --hard FETCH_HEAD
else
rm -rf "$REMOTE_REPO_ROOT"
git clone "$REMOTE_REPO_BUNDLE" "$REMOTE_REPO_ROOT"
git -C "$REMOTE_REPO_ROOT" checkout main
fi
git -C "$REMOTE_REPO_ROOT" config user.name "Remote43 CRM"
git -C "$REMOTE_REPO_ROOT" config user.email "remote43-crm@tksea.top"
fi
# 非破坏性热更新:先确认旧进程退出,再启动新进程
# 禁止删除DB生产数据必须保留
# 改进的停止逻辑:不仅按 PID 文件,还按进程名和端口清理
echo "Stopping any existing CRM processes..."
# 1. 按 PID 文件停止(如果存在)
if [[ -f "$CRM_PID_FILE" ]]; then
OLD_PID="$(cat "$CRM_PID_FILE")"
if kill -0 "$OLD_PID" >/dev/null 2>&1; then
echo "Stopping PID from pidfile: $OLD_PID"
kill "$OLD_PID" >/dev/null 2>&1 || true
for i in {1..20}; do
if ! kill -0 "$OLD_PID" >/dev/null 2>&1; then break; fi
sleep 0.5
done
if kill -0 "$OLD_PID" >/dev/null 2>&1; then
kill -9 "$OLD_PID" >/dev/null 2>&1 || true
sleep 1
fi
fi
rm -f "$CRM_PID_FILE"
fi
# 2. 按进程名停止任何残留的 CRM 进程
for pattern in 'sub2api.*crm' 'sub2api.*relay-manager'; do
for pid in $(pgrep -f "$pattern" 2>/dev/null); do
echo "Stopping process by pattern ($pattern): $pid"
kill "$pid" 2>/dev/null || true
sleep 0.5
if kill -0 "$pid" 2>/dev/null; then
kill -9 "$pid" 2>/dev/null || true
fi
done
done
# 3. 强制释放端口(如有必要)
if command -v fuser >/dev/null 2>&1; then
fuser -k "${CRM_PORT}/tcp" 2>/dev/null || true
fi
# 清理日志但不碰DB
rm -f "$CRM_LOG_FILE"
# 验证端口未被占用
for i in {1..10}; do
if ! ss -tlnp 2>/dev/null | grep -q ":$CRM_PORT " && \
! netstat -tlnp 2>/dev/null | grep -q ":$CRM_PORT "; then
break
fi
echo "Waiting for port $CRM_PORT to be released... (attempt $i/10)"
sleep 1
done
if ss -tlnp 2>/dev/null | grep -q ":$CRM_PORT " || netstat -tlnp 2>/dev/null | grep -q ":$CRM_PORT "; then
echo "ERROR: Port $CRM_PORT is still in use after cleanup. Cannot start new CRM." >&2
ss -tlnp 2>/dev/null | grep ":$CRM_PORT " || netstat -tlnp 2>/dev/null | grep ":$CRM_PORT "
exit 1
fi
echo "Port $CRM_PORT is free. Starting new CRM..."
# 使用更可靠的方式启动优先systemd回退nohup
if command -v systemctl >/dev/null 2>&1 && [[ -f /etc/systemd/system/sub2api-crm.service ]]; then
systemctl restart sub2api-crm || exit 1
else
nohup bash -lc 'set -a; source "$CRM_ENV_FILE"; set +a; exec "$CRM_BINARY"' >"$CRM_LOG_FILE" 2>&1 &
echo $! > "$CRM_PID_FILE"
fi
python3 - "$CRM_PORT" "$CRM_PID_FILE" <<'PY'
import subprocess, sys, time, os
port = sys.argv[1]
pid_file = sys.argv[2]
# 1. 等待 healthz
healthz_url = f"http://127.0.0.1:{port}/healthz"
for i in range(30):
r = subprocess.run(["curl", "-fsS", healthz_url], text=True, capture_output=True)
if r.returncode == 0 and r.stdout.strip() == "ok":
print(f"Health check passed on attempt {i+1}")
break
time.sleep(1)
else:
raise SystemExit(f"crm healthz did not become ready on {healthz_url}")
# 2. 验证二进制不是 deleted 状态
with open(pid_file) as f:
pid = f.read().strip()
exe_link = f"/proc/{pid}/exe"
if os.path.islink(exe_link):
target = os.readlink(exe_link)
if "deleted" in target:
raise SystemExit(f"ERROR: Binary shows (deleted): {target}")
print(f"Binary OK: {target}")
# 3. 验证 portal session 路由(新版本应有此路由)
session_url = f"http://127.0.0.1:{port}/api/portal/session/state"
r = subprocess.run(["curl", "-fsS", session_url], text=True, capture_output=True)
if r.returncode == 0:
print(f"Portal session route OK: {r.stdout.strip()}")
elif r.returncode == 22 and "404" in r.stderr:
raise SystemExit(f"ERROR: Portal session route returns 404 - may be running old version")
else:
print(f"Warning: Portal session route check failed: {r.stderr}")
raise SystemExit(0)
PY
# 部署验证完成
echo "=== Deployment Verification ==="
NEW_PID=$(cat "$CRM_PID_FILE")
echo "New CRM PID: $NEW_PID"
ls -la "/proc/$NEW_PID/exe" 2>/dev/null | grep -v deleted && echo "Binary state: OK (not deleted)" || echo "WARNING: Binary may be deleted"
printf "crm_base=http://127.0.0.1:%s\n" "$CRM_PORT"
printf "crm_pid_file=%s\n" "$CRM_PID_FILE"
printf "crm_log=%s\n" "$CRM_LOG_FILE"
printf "remote_repo_root=%s\n" "$REMOTE_REPO_ROOT"
BOOTSTRAP_EOF
sed -i -e "s|__REMOTE_ROOT__|$remote_root_q|g" -e "s|__CRM_ENV_FILE__|$crm_env_q|g" -e "s|__CRM_BINARY__|$crm_binary_q|g" -e "s|__CRM_DB_FILE__|$crm_db_q|g" -e "s|__CRM_PID_FILE__|$crm_pid_q|g" -e "s|__CRM_LOG_FILE__|$crm_log_q|g" -e "s|__REMOTE_REPO_ROOT__|$remote_repo_root_q|g" -e "s|__REMOTE_REPO_BUNDLE__|$remote_repo_bundle_q|g" -e "s|__CRM_PORT__|$crm_port_q|g" "$tmp_bootstrap"
cat "$tmp_bootstrap"
rm -f "$tmp_bootstrap"
}
main() {
require_cmd bash curl git python3 ssh scp
[[ -n "$KEY" ]] || die "KEY is required; copy scripts/deploy/.env.deploy.example to scripts/deploy/.env.deploy and fill it"
[[ -n "$REMOTE" ]] || die "REMOTE is required; copy scripts/deploy/.env.deploy.example to scripts/deploy/.env.deploy and fill it"
remote43_require_file "$KEY" "ssh key"
remote43_require_file "$CRM_BINARY" "crm server binary"
rm -f "$LOCAL_REPO_BUNDLE"
git -C "$ROOT_DIR" bundle create "$LOCAL_REPO_BUNDLE" main
write_local_tunnel_script
write_operator_env
local crm_env_file bootstrap_file
crm_env_file="$(mktemp)"
bootstrap_file="$(mktemp)"
trap "rm -f \"$crm_env_file\" \"$bootstrap_file\"" EXIT
render_remote43_crm_env \
"$CRM_PORT" \
"file:${REMOTE_CRM_DB_FILE}?_foreign_keys=on&_busy_timeout=5000" \
"$crm_admin_token" \
"$REMOTE_REPO_ROOT" \
"$crm_admin_username" \
"$crm_admin_password" > "$crm_env_file"
render_crm_only_bootstrap > "$bootstrap_file"
chmod +x "$bootstrap_file"
mkdir -p "$LOCAL_DEPLOY_DIR"
cp "$crm_env_file" "$LOCAL_DEPLOY_DIR/.env.crm"
cp "$bootstrap_file" "$LOCAL_DEPLOY_DIR/bootstrap.sh"
ssh_remote "mkdir -p $(printf "%q" "$REMOTE_ROOT")
# 改进的停止逻辑:不仅按 PID 文件,还按进程名和端口清理
echo 'Stopping any existing CRM processes...'
# 1. 按 PID 文件停止(如果存在)
if [[ -f $(printf "%q" "$REMOTE_CRM_PID_FILE") ]]; then
OLDPID=\$(cat $(printf "%q" "$REMOTE_CRM_PID_FILE"))
if kill -0 \$OLDPID 2>/dev/null; then
echo \"Stopping PID from pidfile: \$OLDPID\"
kill \$OLDPID 2>/dev/null || true
for i in {1..20}; do
if ! kill -0 \$OLDPID 2>/dev/null; then break; fi
sleep 0.5
done
if kill -0 \$OLDPID 2>/dev/null; then kill -9 \$OLDPID 2>/dev/null || true; sleep 1; fi
fi
rm -f $(printf "%q" "$REMOTE_CRM_PID_FILE")
fi
# 2. 按进程名停止任何残留的 CRM 进程
for pattern in 'sub2api.*crm' 'sub2api.*relay-manager'; do
for pid in \$(pgrep -f \"\$pattern\" 2>/dev/null); do
echo \"Stopping process by pattern (\$pattern): \$pid\"
kill \$pid 2>/dev/null || true
sleep 0.5
if kill -0 \$pid 2>/dev/null; then kill -9 \$pid 2>/dev/null || true; fi
done
done
# 3. 强制释放端口
fuser -k $(printf "%q" "$CRM_PORT")/tcp 2>/dev/null || true
# 4. 验证端口释放
for i in {1..5}; do
if ! ss -tlnp 2>/dev/null | grep -q '$(printf "%q" ":$CRM_PORT")' && \\
! netstat -tlnp 2>/dev/null | grep -q '$(printf "%q" ":$CRM_PORT")'; then
break
fi
echo \"Waiting for port release... (\$i/5)\"
sleep 1
done
# 禁止删除DBrm -f DB_FILE 已被移除
rm -f $(printf "%q" "$REMOTE_CRM_LOG_FILE") $(printf "%q" "$REMOTE_CRM_BINARY")"
scp_remote "$CRM_BINARY" "$REMOTE:$REMOTE_CRM_BINARY"
scp_remote "$LOCAL_REPO_BUNDLE" "$REMOTE:$REMOTE_REPO_BUNDLE"
scp_remote "$crm_env_file" "$REMOTE:$REMOTE_CRM_ENV_FILE"
scp_remote "$bootstrap_file" "$REMOTE:$REMOTE_BOOTSTRAP_FILE"
ssh_remote "bash $(printf "%q" "$REMOTE_BOOTSTRAP_FILE")"
ssh_remote "bash $(printf '%q' "$REMOTE_BOOTSTRAP_FILE")"
echo ""
echo "=== Post-Deployment Verification ==="
# 等待服务启动
sleep 3
# 验证 healthz
echo -n "1. Health check: "
if ssh_remote "curl -fsS http://127.0.0.1:${CRM_PORT}/healthz 2>/dev/null" | grep -q "^ok$"; then
echo "[PASS]"
else
echo "[FAIL]"
fi
# 验证 portal session 路由
echo -n "2. Portal session route: "
SESSION_RESULT=$(ssh_remote "curl -s -o /dev/null -w '%{http_code}' http://127.0.0.1:${CRM_PORT}/api/portal/session/state 2>/dev/null")
if [[ "$SESSION_RESULT" == "200" ]]; then
echo "[PASS] Returned 200 - new version"
elif [[ "$SESSION_RESULT" == "404" ]]; then
echo "[WARN] Returned 404 - may be old version"
else
echo "[UNKNOWN] Returned $SESSION_RESULT"
fi
# 验证二进制状态
echo -n "3. Binary state check: "
PID_VAL=$(ssh_remote "cat $(printf '%q' "$REMOTE_CRM_PID_FILE") 2>/dev/null")
if [[ -n "$PID_VAL" ]]; then
BINARY_LINK=$(ssh_remote "ls /proc/${PID_VAL}/exe 2>/dev/null")
if echo "$BINARY_LINK" | grep -q deleted; then
echo "[FAIL] Binary shows deleted"
else
echo "[OK] Binary not deleted"
fi
else
echo "[WARN] Cannot check binary state"
fi
echo ""
}
main "$@"