Files

362 lines
14 KiB
Bash
Raw Permalink Normal View History

#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
. "$SCRIPT_DIR/verify_common.sh"
. "$SCRIPT_DIR/secret_gate_lib.sh"
DB_URL="${DATABASE_URL:-host=/var/run/postgresql dbname=llm_intelligence user=long sslmode=disable}"
SERVER_BIN="/tmp/llm_phase6_server"
SERVER_LOG="/tmp/llm_phase6_server.log"
SERVER_PORT="${PHASE6_PORT:-}"
SERVER_PID=""
API_AUTH_TOKEN="${API_AUTH_TOKEN:-phase6-local-token}"
ROOT_CAUSE_CLASS="none"
ROOT_CAUSE_SOURCE="none"
ROOT_CAUSE_SUMMARY="none"
RELEASE_SEMANTICS_CLASS="release-ready"
RELEASE_SEMANTICS_GATE="phase6"
RELEASE_SEMANTICS_POLICY="release-allowed"
BLOCKER_SWITCH_CLASS="none"
BLOCKER_SWITCH_OLD="none"
BLOCKER_SWITCH_NEW="none"
cleanup() {
if [ -n "${SERVER_PID:-}" ] && kill -0 "$SERVER_PID" >/dev/null 2>&1; then
kill "$SERVER_PID" >/dev/null 2>&1 || true
wait "$SERVER_PID" >/dev/null 2>&1 || true
fi
rm -f "$SERVER_BIN"
}
trap cleanup EXIT
port_in_use() {
local port="$1"
(echo >"/dev/tcp/127.0.0.1/$port") >/dev/null 2>&1
}
reserve_server_port() {
if [ -n "${SERVER_PORT:-}" ]; then
return 0
fi
for candidate in $(seq 18080 18120); do
if ! port_in_use "$candidate"; then
SERVER_PORT="$candidate"
return 0
fi
done
return 1
}
start_server() {
DATABASE_URL="$DB_URL" PORT="$SERVER_PORT" API_AUTH_TOKEN="$API_AUTH_TOKEN" "$SERVER_BIN" >"$SERVER_LOG" 2>&1 &
SERVER_PID=$!
for _ in $(seq 1 20); do
if ! kill -0 "$SERVER_PID" >/dev/null 2>&1; then
return 1
fi
if curl -fsS "http://127.0.0.1:${SERVER_PORT}/health" >/tmp/llm_phase6_health.out 2>/tmp/llm_phase6_health.err &&
grep -q '"status":"ok"' /tmp/llm_phase6_health.out; then
return 0
fi
sleep 0.5
done
return 1
}
last_nonempty_line() {
awk 'NF { line=$0 } END { print line }'
}
last_meaningful_failure_line() {
awk 'NF && $0 !~ /^exit status [0-9]+$/ { line=$0 } END { print line }'
}
extract_window_metric() {
local name="$1"
local payload="$2"
printf '%s\n' "$payload" | awk -v key="$name" '
$0 ~ key"=" {
for (i = 1; i <= NF; i++) {
split($i, parts, "=")
if (parts[1] == key) {
print parts[2]
exit
}
}
}
'
}
classify_window_failure() {
local payload="$1"
local precondition_missing external_provider_failure collector_runtime_failure unknown_failure
precondition_missing="$(extract_window_metric precondition_missing "$payload")"
external_provider_failure="$(extract_window_metric external_provider_failure "$payload")"
collector_runtime_failure="$(extract_window_metric collector_runtime_failure "$payload")"
unknown_failure="$(extract_window_metric unknown_failure "$payload")"
precondition_missing="${precondition_missing:-0}"
external_provider_failure="${external_provider_failure:-0}"
collector_runtime_failure="${collector_runtime_failure:-0}"
unknown_failure="${unknown_failure:-0}"
if [ "$precondition_missing" -gt 0 ] && [ "$external_provider_failure" -eq 0 ] && [ "$collector_runtime_failure" -eq 0 ] && [ "$unknown_failure" -eq 0 ]; then
echo "precondition_missing_only"
elif [ "$external_provider_failure" -gt 0 ] && [ "$precondition_missing" -eq 0 ] && [ "$collector_runtime_failure" -eq 0 ] && [ "$unknown_failure" -eq 0 ]; then
echo "external_provider_failure_only"
else
echo "mixed"
fi
}
set_root_cause_once() {
local class="$1"
local source="$2"
local summary="$3"
if [ "$ROOT_CAUSE_CLASS" != "none" ]; then
return
fi
ROOT_CAUSE_CLASS="$class"
ROOT_CAUSE_SOURCE="$source"
ROOT_CAUSE_SUMMARY="$summary"
}
set_release_semantics() {
local class="$1"
local gate="$2"
local policy="$3"
RELEASE_SEMANTICS_CLASS="$class"
RELEASE_SEMANTICS_GATE="$gate"
RELEASE_SEMANTICS_POLICY="$policy"
}
set_blocker_switch_once() {
local class="$1"
local old="$2"
local new="$3"
if [ "$BLOCKER_SWITCH_CLASS" != "none" ]; then
return
fi
BLOCKER_SWITCH_CLASS="$class"
BLOCKER_SWITCH_OLD="$old"
BLOCKER_SWITCH_NEW="$new"
}
classify_live_run_failure() {
local live_tail="$1"
local normalized
normalized="$(printf '%s' "$live_tail" | tr '[:upper:]' '[:lower:]')"
case "$normalized" in
*"api key"*|*"database_url"*|*"must provide"*|*"未设置"*|*"permission denied"*|*"role does not exist"*|*"relation does not exist"*)
printf '%s\n' "precondition_missing"
;;
*"signature_guard"*|*"unexpected status 403"*|*"unexpected status 502"*|*"unexpected status 503"*|*"unexpected status 504"*|*"no pricing cards found"*|*"no model rows parsed"*|*"no model overview cards parsed"*|*"context deadline exceeded"*|*"client.timeout"*|*"i/o timeout"*|*"tls handshake timeout"*|*"transport closed"*|*"connection reset"*|*"connection refused"*|*"no such host"*)
printf '%s\n' "external_provider_failure"
;;
*)
printf '%s\n' "primary_pipeline_failure"
;;
esac
}
classify_live_run_provider() {
local live_tail="$1"
local normalized
normalized="$(printf '%s' "$live_tail" | tr '[:upper:]' '[:lower:]')"
case "$normalized" in
*"import_vertex_pricing"*) printf '%s\n' 'vertex_pricing' ;;
*"import_cloudflare_pricing"*|*"cloudflare_pricing"*) printf '%s\n' 'cloudflare_pricing' ;;
*"import_perplexity_pricing"*|*"perplexity_pricing"*) printf '%s\n' 'perplexity_pricing' ;;
*"import_xfyun_pricing"*|*"xfyun_pricing"*) printf '%s\n' 'xfyun_pricing' ;;
*) printf '%s\n' 'unknown_external_provider' ;;
esac
}
run_live_pipeline_gate() {
local live_output live_rc live_tail
set +e
live_output="$(bash scripts/run_real_pipeline.sh 2>&1)"
live_rc=$?
set -e
printf '%s\n' "$live_output" >/tmp/llm_phase6_live_pipeline.out
live_tail="$(printf '%s\n' "$live_output" | last_meaningful_failure_line)"
if [ "$live_rc" -eq 0 ]; then
pass "live_run_result=PASS 主链路真实采集并输出今日日报"
else
live_failure_class="$(classify_live_run_failure "${live_tail:-}")"
case "$live_failure_class" in
precondition_missing)
set_root_cause_once "precondition_missing" "live_run" "主链路因前置条件缺失未执行"
fail "live_run_result=FAIL 主链路因前置条件缺失未执行 (${live_tail:-see /tmp/llm_phase6_live_pipeline.out})"
;;
external_provider_failure)
live_provider="$(classify_live_run_provider "${live_tail:-}")"
set_root_cause_once "external_provider_failure" "live_run:${live_provider}" "外部文档站/价格页异常阻断主链路"
fail "live_run_result=FAIL 外部文档站/价格页异常阻断主链路 (${live_tail:-see /tmp/llm_phase6_live_pipeline.out})"
;;
*)
set_root_cause_once "primary_pipeline_failure" "live_run" "主链路真实采集失败"
fail "live_run_result=FAIL 主链路真实采集失败 (${live_tail:-see /tmp/llm_phase6_live_pipeline.out})"
;;
esac
fi
}
run_importer_smoke_gate() {
local smoke_output smoke_rc smoke_tail
set +e
smoke_output="$(bash scripts/verify_importer_smoke.sh 2>&1)"
smoke_rc=$?
set -e
printf '%s\n' "$smoke_output"
printf '%s\n' "$smoke_output" >/tmp/llm_phase6_importer_smoke.out
if [ "$smoke_rc" -eq 0 ]; then
pass "importer_smoke_gate_result=PASS 新增导入器 smoke gate 通过"
return 0
fi
smoke_tail="$(printf '%s\n' "$smoke_output" | last_meaningful_failure_line)"
set_root_cause_once "importer_smoke_gate_failure" "importer_smoke_gate" "新增导入器 smoke gate 未通过"
fail "importer_smoke_gate_result=FAIL 新增导入器 smoke gate 未通过 (${smoke_tail:-see /tmp/llm_phase6_importer_smoke.out})"
return 1
}
run_window_gate() {
local collector_window_output collector_window_rc window_failure_class
set +e
collector_window_output="$(bash scripts/collector_stats_window_audit.sh --db "$DB_URL" --limit 7 --assert-success-rate 95 2>&1)"
collector_window_rc=$?
set -e
echo "$collector_window_output"
if [ "$collector_window_rc" -eq 0 ]; then
set_release_semantics "release-ready" "window_gate" "release-allowed"
pass "window_gate_result=PASS 最近 7 次采集成功率达到 95%已输出分类摘要stability_label=stable-window"
return
fi
window_failure_class="$(classify_window_failure "$collector_window_output")"
if [ "$window_failure_class" = "precondition_missing_only" ]; then
set_release_semantics "precondition-degraded" "window_gate" "release-allowed-with-warning"
pass "window_gate_result=PASS 最近 7 次采集成功率达到 95%环境纪律问题precondition_missing_only调度环境缺 OPENROUTER_API_KEY非系统缺陷stability_label=precondition-only-window"
elif [ "$window_failure_class" = "external_provider_failure_only" ]; then
set_release_semantics "degraded-external-provider" "window_gate" "release-allowed-with-warning"
set_root_cause_once "external_provider_failure_only" "window_gate" "最近 7 次采集窗口仅被外部依赖失败拖低"
warn "window_gate_result=WARN 最近 7 次采集成功率未达 95%仅外部文档站失败external_provider_failure_only需要 release 语义降级而非误判为 collector bugstability_label=recovered-external-incident"
else
set_release_semantics "release-blocked" "window_gate" "release-blocked"
set_root_cause_once "mixed_window_failure" "window_gate" "最近 7 次采集窗口存在混合失败"
fail "window_gate_result=FAIL 最近 7 次采集成功率达到 95%window_failure_class=${window_failure_class}stability_label=unstable-window"
fi
}
echo "=== Phase 6 综合验收检查 ==="
check_shell "Phase 1~5 总门禁通过" "bash scripts/verify_pre_phase6.sh"
check_shell "全仓 Go 测试通过" "go test ./..."
check_shell "脚本级采集器单测通过" "bash scripts/test.sh"
if run_importer_smoke_gate; then
run_live_pipeline_gate
else
set_blocker_switch_once "global-blocker-shift" "importer_smoke_gate" "live_run"
warn "live_run_result=SKIPPED 因 importer_smoke_gate_result=FAIL"
fi
if [ "$BLOCKER_SWITCH_CLASS" = "none" ] && [ "$ROOT_CAUSE_CLASS" != "none" ] && grep -q 'importer_smoke_gate_result=PASS' /tmp/llm_phase6_importer_smoke.out 2>/dev/null; then
set_blocker_switch_once "global-blocker-shift" "importer_smoke_gate" "$ROOT_CAUSE_SOURCE"
fi
check_shell "API Server 可构建" "go build -o /dev/null ./cmd/server"
check_shell "健康检查脚本通过" "DATABASE_URL='$DB_URL' bash healthcheck.sh"
check_shell "源码与环境文件未包含明显硬编码密钥" "source scripts/secret_gate_lib.sh && secret_scan_paths . cmd internal frontend/src scripts .github/workflows && secret_env_files .dockerignore"
run_window_gate
if go build -o "$SERVER_BIN" ./cmd/server >/tmp/llm_phase6_server_build.out 2>/tmp/llm_phase6_server_build.err; then
if reserve_server_port && start_server; then
pass "API /health 可用"
set +e
api_metrics="$(curl -sS -H "Authorization: Bearer ${API_AUTH_TOKEN}" -o /tmp/llm_phase6_models.json -w '%{http_code} %{time_total}' "http://127.0.0.1:${SERVER_PORT}/api/v1/models")"
api_rc=$?
set -e
if [ "$api_rc" -eq 0 ]; then
api_code="$(printf '%s' "$api_metrics" | awk '{print $1}')"
api_time="$(printf '%s' "$api_metrics" | awk '{print $2}')"
if [ "$api_code" = "200" ]; then
pass "API /api/v1/models 返回 200"
else
fail "API /api/v1/models 返回异常状态 (HTTP ${api_code:-unknown})"
fi
if awk "BEGIN { exit !($api_time < 0.5) }"; then
pass "API 响应 < 500ms (当前: ${api_time}s)"
else
fail "API 响应 >= 500ms (当前: ${api_time}s)"
fi
if grep -q '"data"' /tmp/llm_phase6_models.json; then
pass "API 返回模型数据载荷"
else
fail "API 返回体缺少 data 字段"
fi
else
fail "API /api/v1/models 请求失败"
fi
set +e
plan_metrics="$(curl -sS -H "Authorization: Bearer ${API_AUTH_TOKEN}" -o /tmp/llm_phase6_subscription_plans.json -w '%{http_code} %{time_total}' "http://127.0.0.1:${SERVER_PORT}/api/v1/subscription-plans")"
plan_rc=$?
set -e
if [ "$plan_rc" -eq 0 ]; then
plan_code="$(printf '%s' "$plan_metrics" | awk '{print $1}')"
if [ "$plan_code" = "200" ]; then
pass "API /api/v1/subscription-plans 返回 200"
else
fail "API /api/v1/subscription-plans 返回异常状态 (HTTP ${plan_code:-unknown})"
fi
if grep -q '"data"' /tmp/llm_phase6_subscription_plans.json; then
pass "API 返回套餐数据载荷"
else
fail "套餐 API 返回体缺少 data 字段"
fi
else
fail "API /api/v1/subscription-plans 请求失败"
fi
printf 'RELEASE_SEMANTICS class=%s gate=%s policy=%s\n' "$RELEASE_SEMANTICS_CLASS" "$RELEASE_SEMANTICS_GATE" "$RELEASE_SEMANTICS_POLICY"
printf 'BLOCKER_SWITCH class=%s old=%s new=%s\n' "$BLOCKER_SWITCH_CLASS" "$BLOCKER_SWITCH_OLD" "$BLOCKER_SWITCH_NEW"
else
details="$(tr '\n' ' ' <"$SERVER_LOG" | sed 's/[[:space:]]\+/ /g' | sed 's/ $//')"
set_root_cause_once "api_server_start_failure" "api_server" "API Server 启动失败"
fail "API Server 启动失败 (${details:-no server log})"
fi
else
details="$(tr '\n' ' ' </tmp/llm_phase6_server_build.err | sed 's/[[:space:]]\+/ /g' | sed 's/ $//')"
set_root_cause_once "api_server_build_failure" "api_server" "API Server 构建失败"
fail "API Server 构建失败 (${details:-unknown build error})"
fi
check_shell "Phase 6 性能文档存在" "test -f docs/PERFORMANCE_TEST.md"
check_shell "前端已具备测试入口" "cd frontend && npm run test -- --run >/tmp/llm_phase6_frontend_test.out 2>/tmp/llm_phase6_frontend_test.err"
check_shell "secret gate 独立测试通过" "bash scripts/secret_gate_test.sh"
printf 'ROOT_CAUSE class=%s source=%s summary=%s\n' "$ROOT_CAUSE_CLASS" "$ROOT_CAUSE_SOURCE" "$ROOT_CAUSE_SUMMARY"
finish_phase