Files
llm-intelligence/scripts/run_daily.sh
phamnazage-jpg d5d18e987e feat(pipeline): wire collectors into real pipeline gates
Wire the new subscription and official pricing collectors into the daily, real, and intel pipeline entrypoints.

This commit also upgrades Phase 6 verification with recent-window collector classification so gate failures distinguish preconditions from true runtime or provider issues.
2026-05-15 22:37:06 +08:00

430 lines
17 KiB
Bash
Executable File
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
# run_daily.sh - 每日数据采集与报告生成流水线
# Sprint 3: 完整调度脚本(采集→质量检查→报告生成→归档→通知)
set -euo pipefail
PROJECT_DIR="/home/long/project/llm-intelligence"
. "$PROJECT_DIR/scripts/report_utils.sh"
if [[ -f "$PROJECT_DIR/.env.local" ]]; then
# shellcheck disable=SC1091
source "$PROJECT_DIR/.env.local"
fi
if [[ -f "$PROJECT_DIR/.env" ]]; then
# shellcheck disable=SC1091
source "$PROJECT_DIR/.env"
fi
DB_URL="${DATABASE_URL:-host=/var/run/postgresql dbname=llm_intelligence user=long sslmode=disable}"
export DATABASE_URL="$DB_URL"
REPORT_DATE="$(report_date_value)"
LOG_FILE="/tmp/llm_hub_daily_${REPORT_DATE}.log"
FEISHU_WEBHOOK="${FEISHU_WEBHOOK:-}"
MODEL_COUNT=""
FETCH_OUT="${PROJECT_DIR}/models.json"
FETCH_TOTAL="0"
PIPELINE_STAGE_SET="openrouter,multi_source,official_imports,daily_signal_snapshot,daily_report"
PIPELINE_SOURCE_SET="openrouter,moonshot,deepseek,openai,zhipu,baidu,bytedance,aliyun_subscription,baidu_subscription,ctyun_subscription,bytedance_subscription,huawei_package,zhipu_coding_plan,minimax_subscription,cucloud_catalog,mobile_cloud_catalog,youdao_pricing,platform360_pricing,siliconflow_pricing,ppio_pricing,ucloud_pricing,cloudflare_pricing,perplexity_pricing,vertex_pricing,bedrock_pricing,azure_openai_pricing,catalog_seed_verification"
PIPELINE_FAILED_SOURCE_SET="none"
MULTI_SOURCE_AUDIT="multi_source_audit=unavailable"
PIPELINE_AUDIT_SUMMARY=""
# 日志函数
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
}
normalize_summary_file() {
local path="$1"
if [ ! -f "$path" ]; then
return
fi
tr '\n' ' ' < "$path" | sed 's/[[:space:]]\+/ /g; s/^ //; s/ $//'
}
extract_failed_source_keys() {
local summary="$1"
printf '%s\n' "$summary" | sed -n 's/.*failed_source_keys=\([^ ]*\).*/\1/p'
}
merge_failed_source_keys() {
local keys="$1"
if [ -z "$keys" ] || [ "$keys" = "none" ]; then
return
fi
if [ "$PIPELINE_FAILED_SOURCE_SET" = "none" ]; then
PIPELINE_FAILED_SOURCE_SET="$keys"
return
fi
PIPELINE_FAILED_SOURCE_SET="${PIPELINE_FAILED_SOURCE_SET},${keys}"
}
refresh_pipeline_audit() {
PIPELINE_AUDIT_SUMMARY="runtime_audit stage_set=${PIPELINE_STAGE_SET} selected_source_keys=${PIPELINE_SOURCE_SET} failed_source_keys=${PIPELINE_FAILED_SOURCE_SET} openrouter_total=${FETCH_TOTAL:-0} ${MULTI_SOURCE_AUDIT}"
}
# 错误处理
error_exit() {
local output_path=""
log "❌ 错误: $1"
refresh_pipeline_audit
# 降级:复制昨日报告
fallback_report
if [ -f "$(report_markdown_path "$REPORT_DATE")" ]; then
output_path="$(report_markdown_path "$REPORT_DATE")"
fi
track_report_state "$DB_URL" "$REPORT_DATE" "failed" "${MODEL_COUNT:-}" "$PIPELINE_AUDIT_SUMMARY" "$output_path" "$1" "scheduled" "cron" "true" >> "$LOG_FILE" 2>&1 || true
# 发送告警
if [ -n "$FEISHU_WEBHOOK" ]; then
send_alert "$1"
fi
exit 1
}
refresh_pipeline_audit
# 降级:复制昨日报告
fallback_report() {
local yesterday yesterday_md today_md yesterday_html today_html
yesterday=$(date -d "yesterday" +%Y-%m-%d)
yesterday_md="${PROJECT_DIR}/$(report_markdown_path "$yesterday")"
today_md="${PROJECT_DIR}/$(report_markdown_path "$REPORT_DATE")"
yesterday_html="${PROJECT_DIR}/$(report_html_path "$yesterday")"
today_html="${PROJECT_DIR}/$(report_html_path "$REPORT_DATE")"
if [ -f "$yesterday_md" ]; then
cp "$yesterday_md" "$today_md"
sed -i "s/${yesterday}/${REPORT_DATE}/g" "$today_md"
sed -i "1s/^/# [数据延迟] /" "$today_md"
if [ -f "$yesterday_html" ]; then
cp "$yesterday_html" "$today_html"
sed -i "s/${yesterday}/${REPORT_DATE}/g" "$today_html"
fi
if [ -f "$today_md" ] && [ -f "$today_html" ]; then
archive_report_artifacts "$REPORT_DATE" >> "$LOG_FILE" 2>&1 || true
fi
log "⚠️ 已复制昨日报告并标记[数据延迟]"
else
log "⚠️ 无昨日报告可供复制"
fi
}
# 发送飞书告警
send_alert() {
local msg="$1"
local payload="{\"msg_type\":\"text\",\"content\":{\"text\":\"🚨 LLM Hub 日报失败\\n日期: ${REPORT_DATE}\\n错误: ${msg}\\n请检查日志: ${LOG_FILE}\"}}"
curl -s -X POST -H "Content-Type: application/json" \
-d "$payload" \
"$FEISHU_WEBHOOK" > /dev/null || true
log "📢 飞书告警已发送"
}
# 主流程
log "🚀 开始每日流水线: ${REPORT_DATE}"
cd "$PROJECT_DIR"
# 1. 数据采集
log "1⃣ 数据采集..."
if ! go run scripts/fetch_openrouter.go -strict-real -out "$FETCH_OUT" >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "openrouter"
error_exit "数据采集失败"
fi
FETCH_TOTAL=$(python3 - <<'PY' "$FETCH_OUT"
import json, sys
path = sys.argv[1]
with open(path, 'r', encoding='utf-8') as f:
data = json.load(f)
print(int(data.get("total", 0)))
PY
)
if [ "${FETCH_TOTAL:-0}" -lt 10 ]; then
merge_failed_source_keys "openrouter"
error_exit "本次采集结果异常: total=${FETCH_TOTAL:-0} < 10"
fi
refresh_pipeline_audit
log "✅ 数据采集完成"
# 1.5 多源补充同步
log "1 多源补充同步..."
MULTI_SOURCE_OUTPUT="$(mktemp)"
if ! go run scripts/fetch_multi_source.go --sources moonshot,deepseek,openai > "$MULTI_SOURCE_OUTPUT" 2>> "$LOG_FILE"; then
MULTI_SOURCE_SUMMARY="$(normalize_summary_file "$MULTI_SOURCE_OUTPUT")"
if [ -n "$MULTI_SOURCE_SUMMARY" ]; then
MULTI_SOURCE_AUDIT="multi_source_audit=${MULTI_SOURCE_SUMMARY}"
merge_failed_source_keys "$(extract_failed_source_keys "$MULTI_SOURCE_SUMMARY")"
else
MULTI_SOURCE_AUDIT="multi_source_audit=stage_failed"
merge_failed_source_keys "moonshot,deepseek,openai"
fi
cat "$MULTI_SOURCE_OUTPUT" >> "$LOG_FILE"
rm -f "$MULTI_SOURCE_OUTPUT"
error_exit "多源补充同步失败"
fi
MULTI_SOURCE_SUMMARY="$(normalize_summary_file "$MULTI_SOURCE_OUTPUT")"
MULTI_SOURCE_AUDIT="multi_source_audit=${MULTI_SOURCE_SUMMARY:-none}"
merge_failed_source_keys "$(extract_failed_source_keys "$MULTI_SOURCE_SUMMARY")"
refresh_pipeline_audit
cat "$MULTI_SOURCE_OUTPUT" >> "$LOG_FILE"
rm -f "$MULTI_SOURCE_OUTPUT"
if ! go run -tags llm_script scripts/import_zhipu_data.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "zhipu"
error_exit "智谱官方导入失败"
fi
if ! go run -tags llm_script scripts/export_official_seed_json.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "official_seed_export"
error_exit "官方种子导出失败"
fi
if ! go run -tags llm_script scripts/import_phase2_data.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "baidu"
error_exit "百度官方导入失败"
fi
if ! go run -tags llm_script scripts/import_bytedance_data.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "bytedance"
error_exit "字节官方导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/aliyun_subscription_lib.go \
scripts/import_aliyun_subscription.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "aliyun_subscription"
error_exit "阿里云套餐导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/baidu_subscription_lib.go \
scripts/import_baidu_subscription.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "baidu_subscription"
error_exit "百度套餐导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/ctyun_subscription_lib.go \
scripts/import_ctyun_subscription.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "ctyun_subscription"
error_exit "天翼云套餐导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/bytedance_subscription_lib.go \
scripts/import_bytedance_subscription.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "bytedance_subscription"
error_exit "火山方舟套餐导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/huawei_package_lib.go \
scripts/import_huawei_package.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "huawei_package"
error_exit "华为云套餐包导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/zhipu_coding_plan_lib.go \
scripts/import_zhipu_coding_plan.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "zhipu_coding_plan"
error_exit "智谱 Coding Plan 导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/minimax_subscription_lib.go \
scripts/import_minimax_subscription.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "minimax_subscription"
error_exit "MiniMax Token Plan 导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/catalog_verification_common.go \
scripts/import_cucloud_catalog.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "cucloud_catalog"
error_exit "联通云目录校验失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/catalog_verification_common.go \
scripts/import_mobile_cloud_catalog.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "mobile_cloud_catalog"
error_exit "移动云目录校验失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/youdao_pricing_lib.go \
scripts/import_youdao_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "youdao_pricing"
error_exit "网易有道价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/platform360_pricing_lib.go \
scripts/import_360_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "platform360_pricing"
error_exit "360 智脑价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/siliconflow_pricing_lib.go \
scripts/import_siliconflow_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "siliconflow_pricing"
error_exit "硅基流动价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/ppio_pricing_lib.go \
scripts/import_ppio_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "ppio_pricing"
error_exit "PPIO 价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/ucloud_pricing_lib.go \
scripts/import_ucloud_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "ucloud_pricing"
error_exit "UCloud 价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/pricing_markdown_snapshot_lib.go \
scripts/cloudflare_pricing_snapshot_lib.go \
scripts/signature_guard_common.go \
scripts/official_import_signature_audit_lib.go \
scripts/cloudflare_pricing_signature_guard_lib.go \
scripts/cloudflare_pricing_import_runner.go \
scripts/cloudflare_pricing_lib.go \
scripts/cloudflare_pricing_signature_guard.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "cloudflare_pricing_signature"
error_exit "Cloudflare Workers AI 价格页结构签名漂移"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/pricing_markdown_snapshot_lib.go \
scripts/cloudflare_pricing_snapshot_lib.go \
scripts/cloudflare_pricing_import_runner.go \
scripts/cloudflare_pricing_lib.go \
scripts/import_cloudflare_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "cloudflare_pricing"
error_exit "Cloudflare Workers AI 价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/pricing_markdown_snapshot_lib.go \
scripts/perplexity_pricing_snapshot_lib.go \
scripts/signature_guard_common.go \
scripts/official_import_signature_audit_lib.go \
scripts/perplexity_pricing_signature_guard_lib.go \
scripts/perplexity_pricing_import_runner.go \
scripts/perplexity_pricing_lib.go \
scripts/perplexity_pricing_signature_guard.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "perplexity_pricing_signature"
error_exit "Perplexity API 价格页结构签名漂移"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/pricing_markdown_snapshot_lib.go \
scripts/perplexity_pricing_snapshot_lib.go \
scripts/perplexity_pricing_import_runner.go \
scripts/perplexity_pricing_lib.go \
scripts/import_perplexity_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "perplexity_pricing"
error_exit "Perplexity API 价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/pricing_markdown_snapshot_lib.go \
scripts/signature_guard_common.go \
scripts/official_import_signature_audit_lib.go \
scripts/vertex_pricing_snapshot_lib.go \
scripts/vertex_pricing_signature_guard_lib.go \
scripts/vertex_pricing_import_runner.go \
scripts/vertex_pricing_lib.go \
scripts/vertex_pricing_signature_guard.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "vertex_pricing_signature"
error_exit "Vertex AI 价格页结构签名漂移"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/vertex_pricing_snapshot_lib.go \
scripts/vertex_pricing_import_runner.go \
scripts/vertex_pricing_lib.go \
scripts/import_vertex_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "vertex_pricing"
error_exit "Vertex AI 价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/bedrock_pricing_lib.go \
scripts/import_bedrock_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "bedrock_pricing"
error_exit "Amazon Bedrock 价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/azure_openai_pricing_lib.go \
scripts/import_azure_openai_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "azure_openai_pricing"
error_exit "Azure OpenAI 价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/import_catalog_seed_verification.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "catalog_seed_verification"
error_exit "目录级官方入口核验失败"
fi
if ! SIGNAL_SOURCE_AUDIT="$PIPELINE_AUDIT_SUMMARY" go run -tags llm_script \
scripts/materialize_daily_signals.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "daily_signal_snapshot"
error_exit "每日关键信号物化失败"
fi
refresh_pipeline_audit
log "✅ 多源补充同步完成"
# 2. 数据质量检查
log "2⃣ 数据质量检查..."
MODEL_COUNT=$(psql "$DB_URL" -t -c "SELECT COUNT(*) FROM models WHERE deleted_at IS NULL" 2>/dev/null | tr -d ' ')
if [ "$MODEL_COUNT" -lt 10 ]; then
error_exit "模型数量不足: ${MODEL_COUNT} < 10"
fi
log "✅ 数据质量检查通过 (模型数: ${MODEL_COUNT})"
# 3. 生成日报
log "3⃣ 生成日报..."
export DATABASE_URL="$DB_URL"
if ! REPORT_RUN_KIND="scheduled" REPORT_TRIGGER_SOURCE="cron" REPORT_IS_OFFICIAL_DAILY="true" REPORT_RUNTIME_AUDIT="$PIPELINE_AUDIT_SUMMARY" go run scripts/generate_daily_report.go scripts/official_import_signature_audit_query_lib.go >> "$LOG_FILE" 2>&1; then
error_exit "日报生成失败"
fi
log "✅ 日报生成完成"
# 4. 校验归档
log "4⃣ 校验归档..."
if [ ! -f "$(report_archive_markdown_path "$REPORT_DATE")" ] || [ ! -f "$(report_archive_html_path "$REPORT_DATE")" ]; then
error_exit "日报归档失败"
fi
log "✅ 归档完成"
# 5. 校验运行记录
log "5⃣ 校验运行记录..."
if ! psql "$DB_URL" -Atqc "select count(*) from daily_report where report_date = DATE '${REPORT_DATE}' and status = 'generated';" | awk '{ exit !($1 >= 1) }'; then
error_exit "daily_report 未写入 generated 记录"
fi
if ! psql "$DB_URL" -Atqc "select count(*) from report_runs where report_date = DATE '${REPORT_DATE}' and status = 'generated';" | awk '{ exit !($1 >= 1) }'; then
error_exit "report_runs 未写入 generated 记录"
fi
log "✅ 日报记录更新完成"
log "🎉 每日流水线全部完成!"
log "📄 Markdown: $(report_markdown_path "$REPORT_DATE")"
log "🌐 HTML: $(report_html_path "$REPORT_DATE")"
exit 0