Files
llm-intelligence/scripts/run_daily.sh

438 lines
17 KiB
Bash
Raw Normal View History

#!/bin/bash
# run_daily.sh - 每日数据采集与报告生成流水线
# Sprint 3: 完整调度脚本(采集→质量检查→报告生成→归档→通知)
set -euo pipefail
PROJECT_DIR="/home/long/project/llm-intelligence"
. "$PROJECT_DIR/scripts/report_utils.sh"
if [[ -f "$PROJECT_DIR/.env.local" ]]; then
# shellcheck disable=SC1091
source "$PROJECT_DIR/.env.local"
fi
if [[ -f "$PROJECT_DIR/.env" ]]; then
# shellcheck disable=SC1091
source "$PROJECT_DIR/.env"
fi
DB_URL="${DATABASE_URL:-host=/var/run/postgresql dbname=llm_intelligence user=long sslmode=disable}"
export DATABASE_URL="$DB_URL"
REPORT_DATE="$(report_date_value)"
LOG_FILE="/tmp/llm_hub_daily_${REPORT_DATE}.log"
FEISHU_WEBHOOK="${FEISHU_WEBHOOK:-}"
MODEL_COUNT=""
FETCH_OUT="${PROJECT_DIR}/models.json"
FETCH_TOTAL="0"
PIPELINE_STAGE_SET="openrouter,multi_source,official_imports,daily_signal_snapshot,daily_report"
PIPELINE_SOURCE_SET="openrouter,moonshot,deepseek,openai,zhipu,baidu,bytedance,aliyun_subscription,baidu_subscription,ctyun_subscription,bytedance_subscription,huawei_package,zhipu_coding_plan,minimax_subscription,cucloud_catalog,mobile_cloud_catalog,youdao_pricing,platform360_pricing,siliconflow_pricing,ppio_pricing,ucloud_pricing,coreshub_pricing,cloudflare_pricing,perplexity_pricing,vertex_pricing,bedrock_pricing,azure_openai_pricing,catalog_seed_verification"
PIPELINE_FAILED_SOURCE_SET="none"
MULTI_SOURCE_AUDIT="multi_source_audit=unavailable"
PIPELINE_AUDIT_SUMMARY=""
# 日志函数
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
}
normalize_summary_file() {
local path="$1"
if [ ! -f "$path" ]; then
return
fi
tr '\n' ' ' < "$path" | sed 's/[[:space:]]\+/ /g; s/^ //; s/ $//'
}
extract_failed_source_keys() {
local summary="$1"
printf '%s\n' "$summary" | sed -n 's/.*failed_source_keys=\([^ ]*\).*/\1/p'
}
merge_failed_source_keys() {
local keys="$1"
if [ -z "$keys" ] || [ "$keys" = "none" ]; then
return
fi
if [ "$PIPELINE_FAILED_SOURCE_SET" = "none" ]; then
PIPELINE_FAILED_SOURCE_SET="$keys"
return
fi
PIPELINE_FAILED_SOURCE_SET="${PIPELINE_FAILED_SOURCE_SET},${keys}"
}
refresh_pipeline_audit() {
PIPELINE_AUDIT_SUMMARY="runtime_audit stage_set=${PIPELINE_STAGE_SET} selected_source_keys=${PIPELINE_SOURCE_SET} failed_source_keys=${PIPELINE_FAILED_SOURCE_SET} openrouter_total=${FETCH_TOTAL:-0} ${MULTI_SOURCE_AUDIT}"
}
# 错误处理
error_exit() {
local output_path=""
log "❌ 错误: $1"
refresh_pipeline_audit
# 降级:复制昨日报告
fallback_report
if [ -f "$(report_markdown_path "$REPORT_DATE")" ]; then
output_path="$(report_markdown_path "$REPORT_DATE")"
fi
track_report_state "$DB_URL" "$REPORT_DATE" "failed" "${MODEL_COUNT:-}" "$PIPELINE_AUDIT_SUMMARY" "$output_path" "$1" "scheduled" "cron" "true" >> "$LOG_FILE" 2>&1 || true
# 发送告警
if [ -n "$FEISHU_WEBHOOK" ]; then
send_alert "$1"
fi
exit 1
}
refresh_pipeline_audit
# 降级:复制昨日报告
fallback_report() {
local yesterday yesterday_md today_md yesterday_html today_html
yesterday=$(date -d "yesterday" +%Y-%m-%d)
yesterday_md="${PROJECT_DIR}/$(report_markdown_path "$yesterday")"
today_md="${PROJECT_DIR}/$(report_markdown_path "$REPORT_DATE")"
yesterday_html="${PROJECT_DIR}/$(report_html_path "$yesterday")"
today_html="${PROJECT_DIR}/$(report_html_path "$REPORT_DATE")"
if [ -f "$yesterday_md" ]; then
cp "$yesterday_md" "$today_md"
sed -i "s/${yesterday}/${REPORT_DATE}/g" "$today_md"
sed -i "1s/^/# [数据延迟] /" "$today_md"
if [ -f "$yesterday_html" ]; then
cp "$yesterday_html" "$today_html"
sed -i "s/${yesterday}/${REPORT_DATE}/g" "$today_html"
fi
if [ -f "$today_md" ] && [ -f "$today_html" ]; then
archive_report_artifacts "$REPORT_DATE" >> "$LOG_FILE" 2>&1 || true
fi
log "⚠️ 已复制昨日报告并标记[数据延迟]"
else
log "⚠️ 无昨日报告可供复制"
fi
}
# 发送飞书告警
send_alert() {
local msg="$1"
local payload="{\"msg_type\":\"text\",\"content\":{\"text\":\"🚨 LLM Hub 日报失败\\n日期: ${REPORT_DATE}\\n错误: ${msg}\\n请检查日志: ${LOG_FILE}\"}}"
curl -s -X POST -H "Content-Type: application/json" \
-d "$payload" \
"$FEISHU_WEBHOOK" > /dev/null || true
log "📢 飞书告警已发送"
}
# 主流程
log "🚀 开始每日流水线: ${REPORT_DATE}"
cd "$PROJECT_DIR"
# 1. 数据采集
log "1⃣ 数据采集..."
if ! go run scripts/fetch_openrouter.go -strict-real -out "$FETCH_OUT" >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "openrouter"
error_exit "数据采集失败"
fi
FETCH_TOTAL=$(python3 - <<'PY' "$FETCH_OUT"
import json, sys
path = sys.argv[1]
with open(path, 'r', encoding='utf-8') as f:
data = json.load(f)
print(int(data.get("total", 0)))
PY
)
if [ "${FETCH_TOTAL:-0}" -lt 10 ]; then
merge_failed_source_keys "openrouter"
error_exit "本次采集结果异常: total=${FETCH_TOTAL:-0} < 10"
fi
refresh_pipeline_audit
log "✅ 数据采集完成"
# 1.5 多源补充同步
log "1 多源补充同步..."
MULTI_SOURCE_OUTPUT="$(mktemp)"
if ! go run scripts/fetch_multi_source.go --sources moonshot,deepseek,openai > "$MULTI_SOURCE_OUTPUT" 2>> "$LOG_FILE"; then
MULTI_SOURCE_SUMMARY="$(normalize_summary_file "$MULTI_SOURCE_OUTPUT")"
if [ -n "$MULTI_SOURCE_SUMMARY" ]; then
MULTI_SOURCE_AUDIT="multi_source_audit=${MULTI_SOURCE_SUMMARY}"
merge_failed_source_keys "$(extract_failed_source_keys "$MULTI_SOURCE_SUMMARY")"
else
MULTI_SOURCE_AUDIT="multi_source_audit=stage_failed"
merge_failed_source_keys "moonshot,deepseek,openai"
fi
cat "$MULTI_SOURCE_OUTPUT" >> "$LOG_FILE"
rm -f "$MULTI_SOURCE_OUTPUT"
error_exit "多源补充同步失败"
fi
MULTI_SOURCE_SUMMARY="$(normalize_summary_file "$MULTI_SOURCE_OUTPUT")"
MULTI_SOURCE_AUDIT="multi_source_audit=${MULTI_SOURCE_SUMMARY:-none}"
merge_failed_source_keys "$(extract_failed_source_keys "$MULTI_SOURCE_SUMMARY")"
refresh_pipeline_audit
cat "$MULTI_SOURCE_OUTPUT" >> "$LOG_FILE"
rm -f "$MULTI_SOURCE_OUTPUT"
if ! go run -tags llm_script scripts/import_zhipu_data.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "zhipu"
error_exit "智谱官方导入失败"
fi
if ! go run -tags llm_script scripts/export_official_seed_json.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "official_seed_export"
error_exit "官方种子导出失败"
fi
if ! go run -tags llm_script scripts/import_phase2_data.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "baidu"
error_exit "百度官方导入失败"
fi
if ! go run -tags llm_script scripts/import_bytedance_data.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "bytedance"
error_exit "字节官方导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/aliyun_subscription_lib.go \
scripts/import_aliyun_subscription.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "aliyun_subscription"
error_exit "阿里云套餐导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/baidu_subscription_lib.go \
scripts/import_baidu_subscription.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "baidu_subscription"
error_exit "百度套餐导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/ctyun_subscription_lib.go \
scripts/import_ctyun_subscription.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "ctyun_subscription"
error_exit "天翼云套餐导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/bytedance_subscription_lib.go \
scripts/import_bytedance_subscription.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "bytedance_subscription"
error_exit "火山方舟套餐导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/huawei_package_lib.go \
scripts/import_huawei_package.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "huawei_package"
error_exit "华为云套餐包导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/zhipu_coding_plan_lib.go \
scripts/import_zhipu_coding_plan.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "zhipu_coding_plan"
error_exit "智谱 Coding Plan 导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/minimax_subscription_lib.go \
scripts/import_minimax_subscription.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "minimax_subscription"
error_exit "MiniMax Token Plan 导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/catalog_verification_common.go \
scripts/import_cucloud_catalog.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "cucloud_catalog"
error_exit "联通云目录校验失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/catalog_verification_common.go \
scripts/import_mobile_cloud_catalog.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "mobile_cloud_catalog"
error_exit "移动云目录校验失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/youdao_pricing_lib.go \
scripts/import_youdao_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "youdao_pricing"
error_exit "网易有道价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/platform360_pricing_lib.go \
scripts/import_360_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "platform360_pricing"
error_exit "360 智脑价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/siliconflow_pricing_lib.go \
scripts/import_siliconflow_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "siliconflow_pricing"
error_exit "硅基流动价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/ppio_pricing_lib.go \
scripts/import_ppio_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "ppio_pricing"
error_exit "PPIO 价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/ucloud_pricing_lib.go \
scripts/import_ucloud_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "ucloud_pricing"
error_exit "UCloud 价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/coreshub_pricing_lib.go \
scripts/import_coreshub_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "coreshub_pricing"
error_exit "CoresHub 价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/pricing_markdown_snapshot_lib.go \
scripts/cloudflare_pricing_snapshot_lib.go \
scripts/signature_guard_common.go \
scripts/official_import_signature_audit_lib.go \
scripts/cloudflare_pricing_signature_guard_lib.go \
scripts/cloudflare_pricing_import_runner.go \
scripts/cloudflare_pricing_lib.go \
scripts/cloudflare_pricing_signature_guard.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "cloudflare_pricing_signature"
error_exit "Cloudflare Workers AI 价格页结构签名漂移"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/pricing_markdown_snapshot_lib.go \
scripts/cloudflare_pricing_snapshot_lib.go \
scripts/cloudflare_pricing_import_runner.go \
scripts/cloudflare_pricing_lib.go \
scripts/import_cloudflare_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "cloudflare_pricing"
error_exit "Cloudflare Workers AI 价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/pricing_markdown_snapshot_lib.go \
scripts/perplexity_pricing_snapshot_lib.go \
scripts/signature_guard_common.go \
scripts/official_import_signature_audit_lib.go \
scripts/perplexity_pricing_signature_guard_lib.go \
scripts/perplexity_pricing_import_runner.go \
scripts/perplexity_pricing_lib.go \
scripts/perplexity_pricing_signature_guard.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "perplexity_pricing_signature"
error_exit "Perplexity API 价格页结构签名漂移"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/pricing_markdown_snapshot_lib.go \
scripts/perplexity_pricing_snapshot_lib.go \
scripts/perplexity_pricing_import_runner.go \
scripts/perplexity_pricing_lib.go \
scripts/import_perplexity_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "perplexity_pricing"
error_exit "Perplexity API 价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/pricing_markdown_snapshot_lib.go \
scripts/signature_guard_common.go \
scripts/official_import_signature_audit_lib.go \
scripts/vertex_pricing_snapshot_lib.go \
scripts/vertex_pricing_signature_guard_lib.go \
scripts/vertex_pricing_import_runner.go \
scripts/vertex_pricing_lib.go \
scripts/vertex_pricing_signature_guard.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "vertex_pricing_signature"
error_exit "Vertex AI 价格页结构签名漂移"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/vertex_pricing_snapshot_lib.go \
scripts/vertex_pricing_import_runner.go \
scripts/vertex_pricing_lib.go \
scripts/import_vertex_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "vertex_pricing"
error_exit "Vertex AI 价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/bedrock_pricing_lib.go \
scripts/import_bedrock_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "bedrock_pricing"
error_exit "Amazon Bedrock 价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/official_pricing_import_common.go \
scripts/azure_openai_pricing_lib.go \
scripts/import_azure_openai_pricing.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "azure_openai_pricing"
error_exit "Azure OpenAI 价格导入失败"
fi
if ! go run -tags llm_script \
scripts/subscription_import_common.go \
scripts/import_catalog_seed_verification.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "catalog_seed_verification"
error_exit "目录级官方入口核验失败"
fi
if ! SIGNAL_SOURCE_AUDIT="$PIPELINE_AUDIT_SUMMARY" go run -tags llm_script \
scripts/materialize_daily_signals.go >> "$LOG_FILE" 2>&1; then
merge_failed_source_keys "daily_signal_snapshot"
error_exit "每日关键信号物化失败"
fi
refresh_pipeline_audit
log "✅ 多源补充同步完成"
# 2. 数据质量检查
log "2⃣ 数据质量检查..."
MODEL_COUNT=$(psql "$DB_URL" -t -c "SELECT COUNT(*) FROM models WHERE deleted_at IS NULL" 2>/dev/null | tr -d ' ')
if [ "$MODEL_COUNT" -lt 10 ]; then
error_exit "模型数量不足: ${MODEL_COUNT} < 10"
fi
log "✅ 数据质量检查通过 (模型数: ${MODEL_COUNT})"
# 3. 生成日报
log "3⃣ 生成日报..."
export DATABASE_URL="$DB_URL"
if ! REPORT_RUN_KIND="scheduled" REPORT_TRIGGER_SOURCE="cron" REPORT_IS_OFFICIAL_DAILY="true" REPORT_RUNTIME_AUDIT="$PIPELINE_AUDIT_SUMMARY" go run scripts/generate_daily_report.go scripts/official_import_signature_audit_query_lib.go >> "$LOG_FILE" 2>&1; then
error_exit "日报生成失败"
fi
log "✅ 日报生成完成"
# 4. 校验归档
log "4⃣ 校验归档..."
if [ ! -f "$(report_archive_markdown_path "$REPORT_DATE")" ] || [ ! -f "$(report_archive_html_path "$REPORT_DATE")" ]; then
error_exit "日报归档失败"
fi
log "✅ 归档完成"
# 5. 校验运行记录
log "5⃣ 校验运行记录..."
if ! psql "$DB_URL" -Atqc "select count(*) from daily_report where report_date = DATE '${REPORT_DATE}' and status = 'generated';" | awk '{ exit !($1 >= 1) }'; then
error_exit "daily_report 未写入 generated 记录"
fi
if ! psql "$DB_URL" -Atqc "select count(*) from report_runs where report_date = DATE '${REPORT_DATE}' and status = 'generated';" | awk '{ exit !($1 >= 1) }'; then
error_exit "report_runs 未写入 generated 记录"
fi
log "✅ 日报记录更新完成"
log "🎉 每日流水线全部完成!"
log "📄 Markdown: $(report_markdown_path "$REPORT_DATE")"
log "🌐 HTML: $(report_html_path "$REPORT_DATE")"
exit 0