2026-05-13 14:42:45 +08:00
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR = " $( cd " $( dirname " ${ BASH_SOURCE [0] } " ) /.. " && pwd ) "
2026-05-13 20:13:02 +08:00
. " $ROOT_DIR /scripts/report_utils.sh "
2026-05-13 14:42:45 +08:00
cd " $ROOT_DIR "
if [ [ -f ".env.local" ] ] ; then
# shellcheck disable=SC1091
source ".env.local"
fi
if [ [ -f ".env" ] ] ; then
# shellcheck disable=SC1091
source ".env"
fi
if [ [ -z " ${ DATABASE_URL :- } " ] ] ; then
echo "DATABASE_URL 未设置" >& 2
exit 1
fi
if [ [ -z " ${ OPENROUTER_API_KEY :- } " ] ] ; then
echo "OPENROUTER_API_KEY 未设置,无法执行真实采集" >& 2
exit 1
fi
2026-05-13 20:13:02 +08:00
REPORT_DATE = " $( report_date_value) "
2026-05-14 16:17:39 +08:00
FETCH_OUT = " $ROOT_DIR /models.json "
FETCH_TOTAL = "0"
PIPELINE_STAGE_SET = "openrouter,multi_source,official_imports,daily_report"
PIPELINE_SOURCE_SET = "openrouter,moonshot,deepseek,openai,zhipu,baidu,bytedance"
PIPELINE_FAILED_SOURCE_SET = "none"
MULTI_SOURCE_AUDIT = "multi_source_audit=unavailable"
PIPELINE_AUDIT_SUMMARY = ""
normalize_summary_file( ) {
local path = " $1 "
if [ [ ! -f " $path " ] ] ; then
return
fi
tr '\n' ' ' < " $path " | sed 's/[[:space:]]\+/ /g; s/^ //; s/ $//'
}
extract_failed_source_keys( ) {
local summary = " $1 "
printf '%s\n' " $summary " | sed -n 's/.*failed_source_keys=\([^ ]*\).*/\1/p'
}
merge_failed_source_keys( ) {
local keys = " $1 "
if [ [ -z " $keys " || " $keys " = = "none" ] ] ; then
return
fi
if [ [ " $PIPELINE_FAILED_SOURCE_SET " = = "none" ] ] ; then
PIPELINE_FAILED_SOURCE_SET = " $keys "
return
fi
PIPELINE_FAILED_SOURCE_SET = " ${ PIPELINE_FAILED_SOURCE_SET } , ${ keys } "
}
refresh_pipeline_audit( ) {
PIPELINE_AUDIT_SUMMARY = " runtime_audit stage_set= ${ PIPELINE_STAGE_SET } selected_source_keys= ${ PIPELINE_SOURCE_SET } failed_source_keys= ${ PIPELINE_FAILED_SOURCE_SET } openrouter_total= ${ FETCH_TOTAL :- 0 } ${ MULTI_SOURCE_AUDIT } "
}
2026-05-13 20:13:02 +08:00
record_failure( ) {
local error_message output_path
error_message = " $1 "
output_path = ""
2026-05-14 16:17:39 +08:00
refresh_pipeline_audit
2026-05-13 20:13:02 +08:00
if [ [ -f " $( report_markdown_path " $REPORT_DATE " ) " ] ] ; then
output_path = " $( report_markdown_path " $REPORT_DATE " ) "
fi
2026-05-14 16:17:39 +08:00
track_report_state " $DATABASE_URL " " $REPORT_DATE " "failed" "" " $PIPELINE_AUDIT_SUMMARY " " $output_path " " $error_message " "manual" "pipeline" "false" >/dev/null 2>& 1 || true
2026-05-13 20:13:02 +08:00
}
2026-05-14 16:17:39 +08:00
refresh_pipeline_audit
2026-05-13 14:42:45 +08:00
" $ROOT_DIR /scripts/apply_migration.sh "
2026-05-13 20:13:02 +08:00
if ! go run "./scripts/fetch_openrouter.go" \
2026-05-13 14:42:45 +08:00
-api-key " $OPENROUTER_API_KEY " \
-db " $DATABASE_URL " \
2026-05-14 16:17:39 +08:00
-out " $FETCH_OUT " \
-strict-real; then
merge_failed_source_keys "openrouter"
2026-05-13 20:13:02 +08:00
record_failure "真实采集失败"
exit 1
fi
2026-05-14 16:17:39 +08:00
FETCH_TOTAL = $( python3 - <<'PY' " $FETCH_OUT "
import json, sys
path = sys.argv[ 1]
with open( path, 'r' , encoding = 'utf-8' ) as f:
data = json.load( f)
print( int( data.get( "total" , 0) ) )
PY
)
if [ [ " ${ FETCH_TOTAL :- 0 } " -lt 10 ] ] ; then
merge_failed_source_keys "openrouter"
record_failure " 本次采集结果异常: total= ${ FETCH_TOTAL :- 0 } < 10 "
exit 1
fi
refresh_pipeline_audit
MULTI_SOURCE_OUTPUT = " $( mktemp) "
if ! go run "./scripts/fetch_multi_source.go" --sources moonshot,deepseek,openai > " $MULTI_SOURCE_OUTPUT " ; then
MULTI_SOURCE_SUMMARY = " $( normalize_summary_file " $MULTI_SOURCE_OUTPUT " ) "
if [ [ -n " $MULTI_SOURCE_SUMMARY " ] ] ; then
MULTI_SOURCE_AUDIT = " multi_source_audit= ${ MULTI_SOURCE_SUMMARY } "
merge_failed_source_keys " $( extract_failed_source_keys " $MULTI_SOURCE_SUMMARY " ) "
else
MULTI_SOURCE_AUDIT = "multi_source_audit=stage_failed"
merge_failed_source_keys "moonshot,deepseek,openai"
fi
cat " $MULTI_SOURCE_OUTPUT "
rm -f " $MULTI_SOURCE_OUTPUT "
record_failure "多源补充同步失败"
exit 1
fi
MULTI_SOURCE_SUMMARY = " $( normalize_summary_file " $MULTI_SOURCE_OUTPUT " ) "
MULTI_SOURCE_AUDIT = " multi_source_audit= ${ MULTI_SOURCE_SUMMARY :- none } "
merge_failed_source_keys " $( extract_failed_source_keys " $MULTI_SOURCE_SUMMARY " ) "
refresh_pipeline_audit
cat " $MULTI_SOURCE_OUTPUT "
rm -f " $MULTI_SOURCE_OUTPUT "
if ! go run -tags llm_script "./scripts/import_zhipu_data.go" ; then
merge_failed_source_keys "zhipu"
record_failure "智谱官方导入失败"
exit 1
fi
if ! go run -tags llm_script "./scripts/export_official_seed_json.go" ; then
merge_failed_source_keys "official_seed_export"
record_failure "官方种子导出失败"
exit 1
fi
if ! go run -tags llm_script "./scripts/import_phase2_data.go" ; then
merge_failed_source_keys "baidu"
record_failure "百度官方导入失败"
exit 1
fi
if ! go run -tags llm_script "./scripts/import_bytedance_data.go" ; then
merge_failed_source_keys "bytedance"
record_failure "字节官方导入失败"
exit 1
fi
refresh_pipeline_audit
if ! REPORT_RUN_KIND = "manual" REPORT_TRIGGER_SOURCE = "pipeline" REPORT_IS_OFFICIAL_DAILY = "false" REPORT_RUNTIME_AUDIT = " $PIPELINE_AUDIT_SUMMARY " go run "./scripts/generate_daily_report.go" ; then
2026-05-13 20:13:02 +08:00
record_failure "日报生成失败"
exit 1
fi
2026-05-13 14:42:45 +08:00
2026-05-13 20:13:02 +08:00
if [ [ ! -f " $( report_archive_markdown_path " $REPORT_DATE " ) " || ! -f " $( report_archive_html_path " $REPORT_DATE " ) " ] ] ; then
record_failure "日报归档缺失"
exit 1
fi
if ! psql " $DATABASE_URL " -Atqc "select count(*) from daily_report where report_date = current_date and status = 'generated';" | awk '{ exit !($1 >= 1) }' ; then
record_failure "daily_report 未写入 generated 记录"
exit 1
fi
if ! psql " $DATABASE_URL " -Atqc "select count(*) from report_runs where report_date = current_date and status = 'generated';" | awk '{ exit !($1 >= 1) }' ; then
record_failure "report_runs 未写入 generated 记录"
exit 1
fi
2026-05-13 14:42:45 +08:00
psql " $DATABASE_URL " -Atqc \
2026-05-13 20:13:02 +08:00
" select 'daily_report', count(*) from daily_report where report_date = current_date
union all
select 'models' , count( *) from models
union all
select 'region_pricing' , count( *) from region_pricing
union all
select 'report_runs' , count( *) from report_runs where report_date = current_date
order by 1; "