feat(intraday): add discovery and verification watch pipeline
This commit is contained in:
@@ -51,6 +51,9 @@ export OPENROUTER_API_KEY="your-api-key"
|
|||||||
export API_AUTH_TOKEN="replace-with-long-random-token"
|
export API_AUTH_TOKEN="replace-with-long-random-token"
|
||||||
# 或者:export API_BASIC_AUTH_USER="review" && export API_BASIC_AUTH_PASS="replace-with-password"
|
# 或者:export API_BASIC_AUTH_USER="review" && export API_BASIC_AUTH_PASS="replace-with-password"
|
||||||
export FEISHU_WEBHOOK="your-webhook-url" # 可选
|
export FEISHU_WEBHOOK="your-webhook-url" # 可选
|
||||||
|
export INTRADAY_DISCOVERY_SEARCH_PROVIDER="command_json" # 候选发现链路可选
|
||||||
|
export INTRADAY_DISCOVERY_LLM_PROVIDER="command_json" # 候选归纳链路可选
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@@ -75,6 +78,10 @@ crontab -e
|
|||||||
# 日内价格追踪(推荐每 4 小时一次)
|
# 日内价格追踪(推荐每 4 小时一次)
|
||||||
0 */4 * * * cd /path/to/llm-intelligence && bash scripts/run_intraday_price_watch.sh >> /tmp/llm_hub_intraday.log 2>&1
|
0 */4 * * * cd /path/to/llm-intelligence && bash scripts/run_intraday_price_watch.sh >> /tmp/llm_hub_intraday.log 2>&1
|
||||||
|
|
||||||
|
# 日内新闻发现与验证(推荐每 2 小时一次)
|
||||||
|
0 */2 * * * cd /path/to/llm-intelligence && bash scripts/run_intraday_discovery_watch.sh >> /tmp/llm_hub_intraday_discovery.log 2>&1
|
||||||
|
|
||||||
|
|
||||||
# 真实采集 + 写库 + 报告生成的手动复跑入口
|
# 真实采集 + 写库 + 报告生成的手动复跑入口
|
||||||
cd /path/to/llm-intelligence && bash scripts/run_real_pipeline.sh
|
cd /path/to/llm-intelligence && bash scripts/run_real_pipeline.sh
|
||||||
```
|
```
|
||||||
@@ -106,6 +113,11 @@ docker-compose up -d
|
|||||||
| API_RATE_LIMIT_WINDOW_SEC | ❌ | `/api/*` 限流窗口秒数,默认 `60` |
|
| API_RATE_LIMIT_WINDOW_SEC | ❌ | `/api/*` 限流窗口秒数,默认 `60` |
|
||||||
| FEISHU_WEBHOOK | ❌ | 飞书告警 Webhook |
|
| FEISHU_WEBHOOK | ❌ | 飞书告警 Webhook |
|
||||||
| REPORT_DATE | ❌ | 手工指定日内追踪/日报日期 |
|
| REPORT_DATE | ❌ | 手工指定日内追踪/日报日期 |
|
||||||
|
| INTRADAY_DISCOVERY_SEARCH_PROVIDER / INTRADAY_DISCOVERY_LLM_PROVIDER | 条件必填 | discovery 链路 provider 类型;支持 `fixture` / `command_json` / `http_json` |
|
||||||
|
| INTRADAY_DISCOVERY_SEARCH_COMMAND / INTRADAY_DISCOVERY_LLM_COMMAND | 条件必填 | 当 provider 为 `command_json` 时执行的命令,stdout 必须输出 JSON |
|
||||||
|
| INTRADAY_DISCOVERY_SEARCH_URL / INTRADAY_DISCOVERY_LLM_URL | 条件必填 | 当 provider 为 `http_json` 时调用的接口 URL |
|
||||||
|
| INTRADAY_DISCOVERY_SEARCH_FIXTURE / INTRADAY_DISCOVERY_LLM_FIXTURE | ❌ | dry-run / 本地 fixture 输入 |
|
||||||
|
| INTRADAY_DISCOVERY_TIMEOUT_SEC | ❌ | discovery 与验证抓取超时秒数,默认 `20` |
|
||||||
| PORT | ❌ | API Server 监听端口,默认 8080 |
|
| PORT | ❌ | API Server 监听端口,默认 8080 |
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
25
README.md
25
README.md
@@ -24,6 +24,7 @@
|
|||||||
- 手工复跑使用 `scripts/run_real_pipeline.sh`,不会把产物标记成正式日报
|
- 手工复跑使用 `scripts/run_real_pipeline.sh`,不会把产物标记成正式日报
|
||||||
- 历史补跑使用 `scripts/rebuild_historical_report.sh YYYY-MM-DD`
|
- 历史补跑使用 `scripts/rebuild_historical_report.sh YYYY-MM-DD`
|
||||||
- 日内价格追踪使用 `scripts/run_intraday_price_watch.sh`,只刷新价格与信号,不生成正式日报
|
- 日内价格追踪使用 `scripts/run_intraday_price_watch.sh`,只刷新价格与信号,不生成正式日报
|
||||||
|
- 日内新闻候选发现与验证使用 `scripts/run_intraday_discovery_watch.sh`,只刷新候选池、验证轨迹与已验证信号,不生成正式日报
|
||||||
|
|
||||||
- HTTP API 当前未内建认证、授权和限流;公网暴露前必须在网关层补齐
|
- HTTP API 当前未内建认证、授权和限流;公网暴露前必须在网关层补齐
|
||||||
|
|
||||||
@@ -104,6 +105,7 @@ bash scripts/run_intel_pipeline.sh
|
|||||||
3. 平台目录核验
|
3. 平台目录核验
|
||||||
4. 每日关键信号物化到 `daily_signal_snapshot`
|
4. 每日关键信号物化到 `daily_signal_snapshot`
|
||||||
5. 日内价格追踪可由 `scripts/run_intraday_price_watch.sh` 独立执行,不生成正式日报
|
5. 日内价格追踪可由 `scripts/run_intraday_price_watch.sh` 独立执行,不生成正式日报
|
||||||
|
6. 日内新闻候选发现与验证可由 `scripts/run_intraday_discovery_watch.sh` 独立执行,不生成正式日报
|
||||||
|
|
||||||
|
|
||||||
### 正式日报调度
|
### 正式日报调度
|
||||||
@@ -125,13 +127,6 @@ bash scripts/run_daily.sh
|
|||||||
9. 失败时降级复制昨日报告并可选飞书告警
|
9. 失败时降级复制昨日报告并可选飞书告警
|
||||||
|
|
||||||
### 手工真实复跑
|
### 手工真实复跑
|
||||||
### 日内价格追踪
|
|
||||||
|
|
||||||
```bash
|
|
||||||
bash scripts/run_intraday_price_watch.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
适用于捕捉“小米大降价”“活动窗口上线”“泄露情报”等日内价格事件。该入口只刷新价格与信号层,不写正式 `daily_report`,也不会覆盖 `latest_report` 语义。
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
bash scripts/run_real_pipeline.sh
|
bash scripts/run_real_pipeline.sh
|
||||||
@@ -143,6 +138,22 @@ bash scripts/run_real_pipeline.sh
|
|||||||
- `trigger_source=pipeline`
|
- `trigger_source=pipeline`
|
||||||
- `is_official_daily=false`
|
- `is_official_daily=false`
|
||||||
|
|
||||||
|
### 日内价格追踪
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash scripts/run_intraday_price_watch.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
适用于捕捉“小米大降价”“活动窗口上线”等已知入口里的结构化价格变化。该入口只刷新价格与信号层,不写正式 `daily_report`,也不会覆盖 `latest_report` 语义。
|
||||||
|
|
||||||
|
### 日内新闻发现与验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash scripts/run_intraday_discovery_watch.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
适用于搜索引擎 + LLM 高召回发现“当天可能发生的价格新闻 / 版本发布 / 活动窗口”,再通过官方页面 / 价格页 / docs 做验证。该入口只刷新候选池、验证轨迹与 `daily_signal_snapshot` 中的已验证事实,不写正式 `daily_report`,也不会覆盖 `latest_report` 语义。
|
||||||
|
|
||||||
### 历史补跑
|
### 历史补跑
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
106
db/migrations/017_intraday_news_candidates.sql
Normal file
106
db/migrations/017_intraday_news_candidates.sql
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
-- 日内新闻候选与验证持久化结构
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS intraday_news_candidate (
|
||||||
|
id BIGSERIAL PRIMARY KEY,
|
||||||
|
candidate_date DATE NOT NULL,
|
||||||
|
discovered_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
event_type TEXT NOT NULL,
|
||||||
|
provider_name TEXT NOT NULL,
|
||||||
|
model_name TEXT,
|
||||||
|
provider_country TEXT,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
summary TEXT,
|
||||||
|
candidate_urls JSONB NOT NULL DEFAULT '[]'::jsonb,
|
||||||
|
discovery_source TEXT NOT NULL,
|
||||||
|
discovery_query TEXT,
|
||||||
|
discovery_evidence JSONB NOT NULL DEFAULT '{}'::jsonb,
|
||||||
|
normalized_key TEXT NOT NULL,
|
||||||
|
status TEXT NOT NULL DEFAULT 'candidate',
|
||||||
|
verification_confidence TEXT NOT NULL DEFAULT 'candidate',
|
||||||
|
verification_notes TEXT,
|
||||||
|
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (
|
||||||
|
SELECT 1 FROM pg_constraint WHERE conname = 'chk_intraday_news_candidate_status'
|
||||||
|
) THEN
|
||||||
|
ALTER TABLE intraday_news_candidate
|
||||||
|
ADD CONSTRAINT chk_intraday_news_candidate_status
|
||||||
|
CHECK (status IN ('candidate', 'verifying', 'verified', 'rejected', 'stale'));
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
IF NOT EXISTS (
|
||||||
|
SELECT 1 FROM pg_constraint WHERE conname = 'chk_intraday_news_candidate_confidence'
|
||||||
|
) THEN
|
||||||
|
ALTER TABLE intraday_news_candidate
|
||||||
|
ADD CONSTRAINT chk_intraday_news_candidate_confidence
|
||||||
|
CHECK (verification_confidence IN ('candidate', 'secondary_confirmed', 'official_confirmed'));
|
||||||
|
END IF;
|
||||||
|
END
|
||||||
|
$$;
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_intraday_news_candidate_normalized_key
|
||||||
|
ON intraday_news_candidate(normalized_key);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_intraday_news_candidate_date
|
||||||
|
ON intraday_news_candidate(candidate_date DESC, discovered_at DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_intraday_news_candidate_status
|
||||||
|
ON intraday_news_candidate(status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_intraday_news_candidate_provider_event
|
||||||
|
ON intraday_news_candidate(provider_name, event_type, candidate_date DESC);
|
||||||
|
|
||||||
|
COMMENT ON TABLE intraday_news_candidate IS '搜索引擎与 LLM 发现的日内新闻候选池,尚未直接进入正式日报事实层';
|
||||||
|
COMMENT ON COLUMN intraday_news_candidate.candidate_urls IS '候选来源 URL 数组,按发现层输出原样保留';
|
||||||
|
COMMENT ON COLUMN intraday_news_candidate.discovery_evidence IS '发现阶段原始证据 JSONB,例如搜索命中、LLM 归纳结果';
|
||||||
|
COMMENT ON COLUMN intraday_news_candidate.normalized_key IS '同日同事件的去重键,避免重复发现候选';
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS intraday_news_verification (
|
||||||
|
id BIGSERIAL PRIMARY KEY,
|
||||||
|
candidate_id BIGINT NOT NULL REFERENCES intraday_news_candidate(id) ON DELETE CASCADE,
|
||||||
|
verified_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
verifier_source TEXT NOT NULL,
|
||||||
|
verifier_url TEXT,
|
||||||
|
verifier_status TEXT NOT NULL,
|
||||||
|
extracted_facts JSONB NOT NULL DEFAULT '{}'::jsonb,
|
||||||
|
notes TEXT,
|
||||||
|
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (
|
||||||
|
SELECT 1 FROM pg_constraint WHERE conname = 'chk_intraday_news_verification_status'
|
||||||
|
) THEN
|
||||||
|
ALTER TABLE intraday_news_verification
|
||||||
|
ADD CONSTRAINT chk_intraday_news_verification_status
|
||||||
|
CHECK (verifier_status IN ('matched', 'contradicted', 'insufficient', 'error'));
|
||||||
|
END IF;
|
||||||
|
END
|
||||||
|
$$;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_intraday_news_verification_candidate_verified_at
|
||||||
|
ON intraday_news_verification(candidate_id, verified_at DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_intraday_news_verification_source
|
||||||
|
ON intraday_news_verification(verifier_source);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_intraday_news_verification_status
|
||||||
|
ON intraday_news_verification(verifier_status);
|
||||||
|
|
||||||
|
COMMENT ON TABLE intraday_news_verification IS '日内新闻候选的验证轨迹,记录验证来源、状态和提取事实';
|
||||||
|
COMMENT ON COLUMN intraday_news_verification.extracted_facts IS '验证阶段提取出的结构化事实 JSONB';
|
||||||
|
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (
|
||||||
|
SELECT 1
|
||||||
|
FROM pg_trigger
|
||||||
|
WHERE tgname = 'intraday_news_candidate_updated_at'
|
||||||
|
) THEN
|
||||||
|
CREATE TRIGGER intraday_news_candidate_updated_at
|
||||||
|
BEFORE UPDATE ON intraday_news_candidate
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE FUNCTION update_updated_at_column();
|
||||||
|
END IF;
|
||||||
|
END
|
||||||
|
$$;
|
||||||
@@ -27,11 +27,20 @@
|
|||||||
| `API_RATE_LIMIT_WINDOW_SEC` | 否 | `cmd/server/main.go` | `60` | `/api/*` 限流窗口长度(秒) |
|
| `API_RATE_LIMIT_WINDOW_SEC` | 否 | `cmd/server/main.go` | `60` | `/api/*` 限流窗口长度(秒) |
|
||||||
| `FEISHU_WEBHOOK` | 否 | `run_daily.sh`、`feishu_alert.sh` | 空 | 正式日报失败时发送飞书告警 |
|
| `FEISHU_WEBHOOK` | 否 | `run_daily.sh`、`feishu_alert.sh` | 空 | 正式日报失败时发送飞书告警 |
|
||||||
| `REPORT_OUTPUT_DIR` | 否 | `generate_daily_report.go` | `reports/daily` | 日报主产物输出目录 |
|
| `REPORT_OUTPUT_DIR` | 否 | `generate_daily_report.go` | `reports/daily` | 日报主产物输出目录 |
|
||||||
| `REPORT_DATE` | 否 | `generate_daily_report.go`、`rebuild_historical_report.sh`、`run_intraday_price_watch.sh` | 当天日期 | 指定日报或日内价格追踪的日期,格式 `YYYY-MM-DD` |
|
| `REPORT_DATE` | 否 | `generate_daily_report.go`、`rebuild_historical_report.sh`、`run_intraday_price_watch.sh`、`run_intraday_discovery_watch.sh` | 当天日期 | 指定日报或日内链路日期,格式 `YYYY-MM-DD` |
|
||||||
| `REPORT_RUN_KIND` | 否 | `generate_daily_report.go` | `manual` | 运行语义,如 `scheduled` / `manual` / `historical_rebuild` |
|
| `REPORT_RUN_KIND` | 否 | `generate_daily_report.go` | `manual` | 运行语义,如 `scheduled` / `manual` / `historical_rebuild` |
|
||||||
| `REPORT_TRIGGER_SOURCE` | 否 | `generate_daily_report.go`、`materialize_daily_signals.go` | `cli` | 触发来源,如 `cron` / `pipeline` / `intraday` / `rebuild_script` |
|
| `REPORT_TRIGGER_SOURCE` | 否 | `generate_daily_report.go`、`materialize_daily_signals.go` | `cli` | 触发来源,如 `cron` / `pipeline` / `intraday` / `intraday_discovery` / `rebuild_script` |
|
||||||
| `REPORT_IS_OFFICIAL_DAILY` | 否 | `generate_daily_report.go` | `false` | 是否属于正式日报产出 |
|
| `REPORT_IS_OFFICIAL_DAILY` | 否 | `generate_daily_report.go` | `false` | 是否属于正式日报产出 |
|
||||||
| `REPORT_RUNTIME_AUDIT` | 否 | `generate_daily_report.go` | 空 | 来源级运行审计摘要,通常由流水线脚本注入 |
|
| `REPORT_RUNTIME_AUDIT` | 否 | `generate_daily_report.go` | 空 | 来源级运行审计摘要,通常由流水线脚本注入 |
|
||||||
|
| `INTRADAY_DISCOVERY_SEARCH_PROVIDER` | 条件必填 | `discover_intraday_news_candidates.go`、`run_intraday_discovery_watch.sh` | 空 | 候选发现搜索 provider 类型;计划支持 `fixture` / `command_json` / `http_json` |
|
||||||
|
| `INTRADAY_DISCOVERY_SEARCH_COMMAND` | 条件必填 | `discover_intraday_news_candidates.go` | 空 | 当 `INTRADAY_DISCOVERY_SEARCH_PROVIDER=command_json` 时执行的搜索命令,stdout 必须输出 JSON 数组 |
|
||||||
|
| `INTRADAY_DISCOVERY_SEARCH_URL` | 条件必填 | `discover_intraday_news_candidates.go` | 空 | 当 `INTRADAY_DISCOVERY_SEARCH_PROVIDER=http_json` 时调用的搜索接口 URL |
|
||||||
|
| `INTRADAY_DISCOVERY_SEARCH_FIXTURE` | 否 | `discover_intraday_news_candidates.go` | 空 | 搜索 provider 样例文件,用于 dry-run / 本地测试 |
|
||||||
|
| `INTRADAY_DISCOVERY_LLM_PROVIDER` | 条件必填 | `discover_intraday_news_candidates.go`、`run_intraday_discovery_watch.sh` | 空 | 候选归纳 LLM provider 类型;计划支持 `fixture` / `command_json` / `http_json` |
|
||||||
|
| `INTRADAY_DISCOVERY_LLM_COMMAND` | 条件必填 | `discover_intraday_news_candidates.go` | 空 | 当 `INTRADAY_DISCOVERY_LLM_PROVIDER=command_json` 时执行的 LLM 命令,stdout 必须输出 JSON 数组 |
|
||||||
|
| `INTRADAY_DISCOVERY_LLM_URL` | 条件必填 | `discover_intraday_news_candidates.go` | 空 | 当 `INTRADAY_DISCOVERY_LLM_PROVIDER=http_json` 时调用的 LLM 接口 URL |
|
||||||
|
| `INTRADAY_DISCOVERY_LLM_FIXTURE` | 否 | `discover_intraday_news_candidates.go` | 空 | LLM provider 样例文件,用于 dry-run / 本地测试 |
|
||||||
|
| `INTRADAY_DISCOVERY_TIMEOUT_SEC` | 否 | `discover_intraday_news_candidates.go`、`verify_intraday_news_candidates.go` | `20` | discovery provider 与验证抓取的默认超时秒数 |
|
||||||
| `PHASE6_PORT` | 否 | `verify_phase6.sh` | 自动挑选 `18080-18120` | Phase 6 验收时临时启动 API Server 的端口 |
|
| `PHASE6_PORT` | 否 | `verify_phase6.sh` | 自动挑选 `18080-18120` | Phase 6 验收时临时启动 API Server 的端口 |
|
||||||
| `LIGHTHOUSE_PORT` | 否 | `verify_lighthouse.sh` | `4173` | Lighthouse 预览端口 |
|
| `LIGHTHOUSE_PORT` | 否 | `verify_lighthouse.sh` | `4173` | Lighthouse 预览端口 |
|
||||||
| `LIGHTHOUSE_SCORE_THRESHOLD` | 否 | `verify_lighthouse.sh` | `80` | 前端性能分数门槛 |
|
| `LIGHTHOUSE_SCORE_THRESHOLD` | 否 | `verify_lighthouse.sh` | `80` | 前端性能分数门槛 |
|
||||||
@@ -83,6 +92,23 @@ bash scripts/run_intraday_price_watch.sh
|
|||||||
- 不生成正式 HTML / Markdown 日报
|
- 不生成正式 HTML / Markdown 日报
|
||||||
- 推荐先按每 4 小时一次调度,再根据外部源稳定性决定是否收紧到每 2 小时
|
- 推荐先按每 4 小时一次调度,再根据外部源稳定性决定是否收紧到每 2 小时
|
||||||
|
|
||||||
|
### 日内候选发现与验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export DATABASE_URL="postgres://app_user:***@db:5432/llm_intelligence?sslmode=disable"
|
||||||
|
export INTRADAY_DISCOVERY_SEARCH_PROVIDER="command_json"
|
||||||
|
export INTRADAY_DISCOVERY_SEARCH_COMMAND="/usr/local/bin/intraday-search --date $REPORT_DATE"
|
||||||
|
export INTRADAY_DISCOVERY_LLM_PROVIDER="command_json"
|
||||||
|
export INTRADAY_DISCOVERY_LLM_COMMAND="/usr/local/bin/intraday-llm --date $REPORT_DATE"
|
||||||
|
bash scripts/run_intraday_discovery_watch.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
说明:
|
||||||
|
- 该入口只刷新候选池、验证轨迹与 `daily_signal_snapshot` 中的已验证事实
|
||||||
|
- 它不会直接写 `daily_report`,不会覆盖 `/api/v1/reports/latest` 对应的正式日报
|
||||||
|
- 搜索 / LLM provider 缺失时应明确报前置条件错误,不能伪装成“今日无新闻”
|
||||||
|
- `leak_or_rumor` 默认留在候选层,不进入正式日报事实
|
||||||
|
|
||||||
## 日报运行语义
|
## 日报运行语义
|
||||||
|
|
||||||
项目用以下字段区分正式日报、手工复跑和历史补跑:
|
项目用以下字段区分正式日报、手工复跑和历史补跑:
|
||||||
|
|||||||
@@ -59,9 +59,11 @@
|
|||||||
- 手工复跑命令已确定:`bash scripts/run_real_pipeline.sh`
|
- 手工复跑命令已确定:`bash scripts/run_real_pipeline.sh`
|
||||||
- 历史补跑命令已确定:`bash scripts/rebuild_historical_report.sh YYYY-MM-DD`
|
- 历史补跑命令已确定:`bash scripts/rebuild_historical_report.sh YYYY-MM-DD`
|
||||||
- 日内价格追踪命令已确定:`bash scripts/run_intraday_price_watch.sh`
|
- 日内价格追踪命令已确定:`bash scripts/run_intraday_price_watch.sh`
|
||||||
|
- 日内新闻发现与验证命令已确定:`bash scripts/run_intraday_discovery_watch.sh`
|
||||||
- `OPENROUTER_API_KEY` 已在正式调度环境可用
|
- `OPENROUTER_API_KEY` 已在正式调度环境可用
|
||||||
- `FEISHU_WEBHOOK` 已配置或明确不上告警
|
- `FEISHU_WEBHOOK` 已配置或明确不上告警
|
||||||
|
- 候选发现所需 search / LLM provider 已配置,缺失时会以前置条件错误失败,不会伪装成“无新闻”
|
||||||
|
|
||||||
|
|
||||||
### 安全与访问控制
|
### 安全与访问控制
|
||||||
|
|
||||||
@@ -141,6 +143,8 @@ bash scripts/run_real_pipeline.sh
|
|||||||
```
|
```
|
||||||
# 日内价格追踪(推荐)
|
# 日内价格追踪(推荐)
|
||||||
0 */4 * * * cd /path/to/llm-intelligence && bash scripts/run_intraday_price_watch.sh >> /tmp/llm_hub_intraday.log 2>&1
|
0 */4 * * * cd /path/to/llm-intelligence && bash scripts/run_intraday_price_watch.sh >> /tmp/llm_hub_intraday.log 2>&1
|
||||||
|
# 日内新闻发现与验证(推荐)
|
||||||
|
0 */2 * * * cd /path/to/llm-intelligence && bash scripts/run_intraday_discovery_watch.sh >> /tmp/llm_hub_intraday_discovery.log 2>&1
|
||||||
|
|
||||||
### 7. 线上冒烟
|
### 7. 线上冒烟
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,420 @@
|
|||||||
|
# Intraday Discovery + Verification Implementation Plan
|
||||||
|
|
||||||
|
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
||||||
|
|
||||||
|
**Goal:** 在不污染正式日报语义的前提下,为现有日内链路增加“搜索引擎 + 大模型候选发现层”和“官方来源验证层”,让当天的大模型价格新闻、版本发布、活动窗口能更早进入候选池,并只把已验证事实接入现有 `daily_signal_snapshot` / 日报语义链路。
|
||||||
|
|
||||||
|
**Architecture:** 保留现有 `scripts/run_intraday_price_watch.sh` 作为结构化价格事实刷新入口,不改它“只刷新价格/信号、不生成正式日报”的边界。新增一条独立的 `run_intraday_discovery_watch.sh` 发现链路:先用搜索引擎与 LLM 生成候选事件,再通过官方页面 / 价格页 / docs / 公告页做二次验证。候选与验证结果分别落入新表;只有 `official_confirmed` 的事件才允许映射进 `materialize_daily_signals.go` 的 `signalModelEvent`,并由现有 `generate_daily_report.go` 继续消费,不新造第二套日报事实系统。发现层与验证层必须通过仓库内可运行的 provider adapter 落地,不能依赖当前会话专属工具;实现上采用“命令或 HTTP provider 适配层 + fixture 测试”的方式,确保本地 cron 和 CI 环境可执行。已验证 discovery 事件接入现有事件流时必须去重:若同一 `provider + model + event_type + date` 已由 importer / 原生 loader 给出,则以原生事实为准,discovery 事件只补缺,不覆盖。
|
||||||
|
|
||||||
|
**Tech Stack:** Go 1.22、PostgreSQL、Bash、可配置搜索/LLM provider adapter、JSONB
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 1: 为候选发现与验证链路定义持久化结构
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `db/migrations/017_intraday_news_candidates.sql`
|
||||||
|
- Modify: `docs/CONFIGURATION.md`
|
||||||
|
- Modify: `DEPLOYMENT.md`
|
||||||
|
|
||||||
|
**Step 1: 新增候选表与验证表 migration**
|
||||||
|
|
||||||
|
创建两张表:
|
||||||
|
- `intraday_news_candidate`
|
||||||
|
- `intraday_news_verification`
|
||||||
|
|
||||||
|
候选表至少包含:
|
||||||
|
- `candidate_date`
|
||||||
|
- `event_type`
|
||||||
|
- `provider_name`
|
||||||
|
- `model_name`
|
||||||
|
- `provider_country`
|
||||||
|
- `title`
|
||||||
|
- `summary`
|
||||||
|
- `candidate_urls JSONB`
|
||||||
|
- `discovery_source`
|
||||||
|
- `discovery_query`
|
||||||
|
- `discovery_evidence JSONB`
|
||||||
|
- `normalized_key`
|
||||||
|
- `status`
|
||||||
|
- `verification_confidence`
|
||||||
|
- `verification_notes`
|
||||||
|
|
||||||
|
验证表至少包含:
|
||||||
|
- `candidate_id`
|
||||||
|
- `verifier_source`
|
||||||
|
- `verifier_url`
|
||||||
|
- `verifier_status`
|
||||||
|
- `extracted_facts JSONB`
|
||||||
|
- `notes`
|
||||||
|
|
||||||
|
约束:
|
||||||
|
- `intraday_news_candidate.normalized_key` 必须唯一,用于防止同日重复发现
|
||||||
|
- `status` 至少支持:`candidate` / `verifying` / `verified` / `rejected` / `stale`
|
||||||
|
- `verification_confidence` 至少支持:`candidate` / `secondary_confirmed` / `official_confirmed`
|
||||||
|
|
||||||
|
**Step 2: 明确与正式事实层的边界文档**
|
||||||
|
|
||||||
|
在 `docs/CONFIGURATION.md` 和 `DEPLOYMENT.md` 写明:
|
||||||
|
- 候选发现层不会直接写 `daily_report`
|
||||||
|
- 候选发现层不会覆盖 `latest_report`
|
||||||
|
- `daily_signal_snapshot` 只消费已验证事实,不消费 `candidate_only`
|
||||||
|
- `leak_or_rumor` 默认只保留在候选层,不进入正式日报事实
|
||||||
|
|
||||||
|
**Step 3: 运行 migration 验证**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
- `bash scripts/apply_migration.sh`
|
||||||
|
|
||||||
|
Expected:
|
||||||
|
- 新表创建成功
|
||||||
|
- 重复执行 migration 不报错
|
||||||
|
|
||||||
|
**Step 4: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add db/migrations/017_intraday_news_candidates.sql docs/CONFIGURATION.md DEPLOYMENT.md
|
||||||
|
git commit -m "feat(intraday): add candidate and verification persistence"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 2: 实现候选发现层最小闭环
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `scripts/discover_intraday_news_candidates.go`
|
||||||
|
- Create: `scripts/discover_intraday_news_candidates_test.go`
|
||||||
|
- Create: `scripts/testdata/intraday_discovery_search_sample.json`
|
||||||
|
- Create: `scripts/testdata/intraday_discovery_llm_sample.json`
|
||||||
|
- Modify: `docs/CONFIGURATION.md`
|
||||||
|
- Create: `scripts/intraday_discovery_provider.go`
|
||||||
|
|
||||||
|
**Step 1: 先写失败测试**
|
||||||
|
|
||||||
|
补 4 组测试:
|
||||||
|
- 搜索结果解析测试:验证能从样例结果提取 title / summary / url / provider 线索
|
||||||
|
- LLM 输出解析测试:验证能把 LLM JSON 输出转成候选事件
|
||||||
|
- 候选归一化测试:验证同一事件经过标题差异改写后仍生成同一 `normalized_key`
|
||||||
|
- URL 过滤测试:验证没有 URL 的候选被丢弃,避免 LLM 空口造线索
|
||||||
|
|
||||||
|
**Step 2: 运行失败测试**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
- `go test -count=1 -tags llm_script ./scripts/discover_intraday_news_candidates.go ./scripts/discover_intraday_news_candidates_test.go`
|
||||||
|
|
||||||
|
Expected:
|
||||||
|
- 新增测试失败
|
||||||
|
- 失败原因是缺少解析、归一化或去重逻辑
|
||||||
|
|
||||||
|
**Step 3: 实现最小候选发现器**
|
||||||
|
|
||||||
|
在 `discover_intraday_news_candidates.go` 中实现:
|
||||||
|
- 固定 provider 查询模板集(中英双语)
|
||||||
|
- 搜索结果抓取适配层
|
||||||
|
- LLM 候选摘要适配层
|
||||||
|
- 去重与归一化逻辑
|
||||||
|
- 写入 `intraday_news_candidate`
|
||||||
|
- provider adapter 抽象层(搜索 / LLM 均可通过命令或 HTTP provider 接入,默认实现不可依赖当前会话专属工具)
|
||||||
|
|
||||||
|
限制:
|
||||||
|
- LLM 只允许输出候选,不允许直接标成 `verified`
|
||||||
|
- 无 URL 候选直接丢弃
|
||||||
|
- 搜索 / LLM provider 未配置时必须以前置条件错误退出,不能伪装成业务无新闻
|
||||||
|
- 默认事件类型至少支持:
|
||||||
|
- `price_cut`
|
||||||
|
- `price_increase`
|
||||||
|
- `official_release`
|
||||||
|
- `promo_campaign`
|
||||||
|
- `leak_or_rumor`
|
||||||
|
- `unknown`
|
||||||
|
|
||||||
|
**Step 4: 重新运行测试**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
- `go test -count=1 -tags llm_script ./scripts/discover_intraday_news_candidates.go ./scripts/discover_intraday_news_candidates_test.go`
|
||||||
|
|
||||||
|
Expected:
|
||||||
|
- 候选解析与归一化测试通过
|
||||||
|
|
||||||
|
**Step 5: 运行一次 dry-run 验证**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
- `go run -tags llm_script ./scripts/discover_intraday_news_candidates.go --date=2026-05-25 --dry-run`
|
||||||
|
|
||||||
|
Expected:
|
||||||
|
- 输出 `candidate_total` / `provider_hit_count` / `event_type_counts`
|
||||||
|
- dry-run 不写 `daily_report`
|
||||||
|
- dry-run 不改 `latest_report`
|
||||||
|
|
||||||
|
**Step 6: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add scripts/discover_intraday_news_candidates.go scripts/discover_intraday_news_candidates_test.go scripts/testdata/intraday_discovery_search_sample.json scripts/testdata/intraday_discovery_llm_sample.json docs/CONFIGURATION.md
|
||||||
|
git commit -m "feat(intraday): add news candidate discovery pipeline"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 3: 实现候选验证层并固化“只信官方事实”的规则
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `scripts/verify_intraday_news_candidates.go`
|
||||||
|
- Create: `scripts/verify_intraday_news_candidates_test.go`
|
||||||
|
- Create: `scripts/testdata/intraday_verification_official_release.html`
|
||||||
|
- Create: `scripts/testdata/intraday_verification_pricing_page.html`
|
||||||
|
- Create: `scripts/testdata/intraday_verification_secondary_media.html`
|
||||||
|
- Modify: `docs/CONFIGURATION.md`
|
||||||
|
|
||||||
|
**Step 1: 先写失败测试**
|
||||||
|
|
||||||
|
补 5 组测试:
|
||||||
|
- 官方发布页验证测试:命中模型名与发布时间时,产出 `official_confirmed`
|
||||||
|
- 官方价格页验证测试:只有拿到真实价格变化时,才允许产出 `price_cut` / `price_increase`
|
||||||
|
- 活动页验证测试:官方活动页可映射为 `promo_campaign`
|
||||||
|
- 二手媒体降级测试:二手媒体最多得到 `secondary_confirmed`,不能直接进入正式事实层
|
||||||
|
- 泄露类隔离测试:`leak_or_rumor` 即使有外部讨论,也不会升级为正式日报事实
|
||||||
|
|
||||||
|
**Step 2: 运行失败测试**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
- `go test -count=1 -tags llm_script ./scripts/verify_intraday_news_candidates.go ./scripts/verify_intraday_news_candidates_test.go`
|
||||||
|
|
||||||
|
Expected:
|
||||||
|
- 新增测试失败
|
||||||
|
- 失败原因是缺少来源分类与验证状态映射逻辑
|
||||||
|
|
||||||
|
**Step 3: 实现验证器**
|
||||||
|
|
||||||
|
在 `verify_intraday_news_candidates.go` 中实现:
|
||||||
|
- 读取 `candidate` / `verifying` 状态候选
|
||||||
|
- 拉取 `candidate_urls`
|
||||||
|
- 基于域名与页面内容判定:
|
||||||
|
- `official_page`
|
||||||
|
- `pricing_page`
|
||||||
|
- `official_docs`
|
||||||
|
- `official_blog`
|
||||||
|
- `secondary_media`
|
||||||
|
- 把验证轨迹写入 `intraday_news_verification`
|
||||||
|
- 更新 `intraday_news_candidate.status` 与 `verification_confidence`
|
||||||
|
- 验证成功后只更新候选层状态,不直接写 `daily_signal_snapshot`;正式事实仍统一由物化器汇总
|
||||||
|
|
||||||
|
规则:
|
||||||
|
- 只有官方页面 / 价格页 / docs / 公告页可以产出 `official_confirmed`
|
||||||
|
- 价格新闻若无法拿到真实价格事实,只能维持候选或二级确认,不能伪造价格变化事件
|
||||||
|
- `leak_or_rumor` 默认不升级为正式事实
|
||||||
|
|
||||||
|
**Step 4: 重新运行测试**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
- `go test -count=1 -tags llm_script ./scripts/verify_intraday_news_candidates.go ./scripts/verify_intraday_news_candidates_test.go`
|
||||||
|
|
||||||
|
Expected:
|
||||||
|
- 验证规则测试通过
|
||||||
|
|
||||||
|
**Step 5: 运行一次 dry-run 验证**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
- `go run -tags llm_script ./scripts/verify_intraday_news_candidates.go --date=2026-05-25 --dry-run`
|
||||||
|
|
||||||
|
Expected:
|
||||||
|
- 输出 `verified_total` / `official_confirmed_total` / `secondary_confirmed_total`
|
||||||
|
- dry-run 只打印摘要,不写 `daily_report`
|
||||||
|
|
||||||
|
**Step 6: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add scripts/verify_intraday_news_candidates.go scripts/verify_intraday_news_candidates_test.go scripts/testdata/intraday_verification_official_release.html scripts/testdata/intraday_verification_pricing_page.html scripts/testdata/intraday_verification_secondary_media.html docs/CONFIGURATION.md
|
||||||
|
git commit -m "feat(intraday): add candidate verification pipeline"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 4: 把已验证事件接入现有 `materialize_daily_signals.go`
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `scripts/materialize_daily_signals.go`
|
||||||
|
- Create or Modify: `scripts/materialize_daily_signals_test.go`
|
||||||
|
- Modify: `docs/plans/2026-05-27-intraday-price-watch-plan.md`
|
||||||
|
- Modify: `README.md`
|
||||||
|
- Modify: `docs/PRODUCTION_CHECKLIST.md`
|
||||||
|
|
||||||
|
**Step 1: 先写失败测试**
|
||||||
|
|
||||||
|
补 4 组测试:
|
||||||
|
- 已验证官方发布事件会进入 `daily_signal_snapshot.top_events`
|
||||||
|
- 已验证活动事件会进入 `daily_signal_snapshot.top_events`
|
||||||
|
- `candidate_only` 与 `leak_or_rumor` 不进入正式快照
|
||||||
|
- 未拿到真实价格变化数据的“价格新闻”不会被错误映射为 `price_cut` / `price_increase`
|
||||||
|
|
||||||
|
**Step 2: 运行失败测试**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
- `go test -count=1 -tags llm_script ./scripts/materialize_daily_signals.go ./scripts/materialize_daily_signals_test.go`
|
||||||
|
|
||||||
|
Expected:
|
||||||
|
- 新增测试失败
|
||||||
|
- 失败原因是当前物化器还不会读取已验证候选事件
|
||||||
|
|
||||||
|
**Step 3: 最小实现 verified event loader**
|
||||||
|
|
||||||
|
在 `materialize_daily_signals.go` 中新增:
|
||||||
|
- `loadVerifiedIntradayNewsEvents(db, date string)`
|
||||||
|
- 将 `official_confirmed` 的:
|
||||||
|
- `official_release`
|
||||||
|
- `promo_campaign`
|
||||||
|
- 已确认真实价格变化的 `price_cut` / `price_increase`
|
||||||
|
映射为现有 `signalModelEvent`
|
||||||
|
- 与现有 `loadSignalModelEvents` 结果做去重合并;同日同模型同事件类型若已由 importer / 原生 loader 给出,则 discovery 事件仅补 `SourceURL` / 证据缺口,不抢占优先级
|
||||||
|
|
||||||
|
约束:
|
||||||
|
- 不新造第二套快照表
|
||||||
|
- 不改变 `daily_signal_snapshot` 的正式事实语义
|
||||||
|
- `secondary_confirmed` 默认不进入正式快照
|
||||||
|
|
||||||
|
**Step 4: 重新运行测试**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
- `go test -count=1 -tags llm_script ./scripts/materialize_daily_signals.go ./scripts/materialize_daily_signals_test.go`
|
||||||
|
|
||||||
|
Expected:
|
||||||
|
- verified event 相关测试通过
|
||||||
|
|
||||||
|
**Step 5: 联合验证日内边界**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
- `REPORT_TRIGGER_SOURCE=intraday_discovery go run -tags llm_script ./scripts/materialize_daily_signals.go --date=2026-05-25 --dry-run`
|
||||||
|
|
||||||
|
Expected:
|
||||||
|
- 输出含 `page_mode` / `event_count`
|
||||||
|
- 不写 `daily_report`
|
||||||
|
- 不覆盖 `latest_report`
|
||||||
|
|
||||||
|
**Step 6: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add scripts/materialize_daily_signals.go scripts/materialize_daily_signals_test.go README.md docs/PRODUCTION_CHECKLIST.md docs/plans/2026-05-27-intraday-price-watch-plan.md
|
||||||
|
git commit -m "feat(intraday): materialize verified discovery events"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 5: 组装新的日内发现入口并补部署说明
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `scripts/run_intraday_discovery_watch.sh`
|
||||||
|
- Modify: `README.md`
|
||||||
|
- Modify: `docs/CONFIGURATION.md`
|
||||||
|
- Modify: `DEPLOYMENT.md`
|
||||||
|
- Modify: `docs/PRODUCTION_CHECKLIST.md`
|
||||||
|
|
||||||
|
**Step 1: 实现独立入口脚本**
|
||||||
|
|
||||||
|
脚本顺序固定为:
|
||||||
|
1. `discover_intraday_news_candidates.go`
|
||||||
|
2. `verify_intraday_news_candidates.go`
|
||||||
|
3. `materialize_daily_signals.go`(仅消费 verified 事件)
|
||||||
|
|
||||||
|
要求:
|
||||||
|
- 明确要求 `DATABASE_URL`
|
||||||
|
- 搜索 / LLM 所需 key 缺失时,输出前置条件错误,不伪装成代码失败
|
||||||
|
- 不执行 `generate_daily_report.go`
|
||||||
|
- 不写 `daily_report`
|
||||||
|
- 不覆盖 `latest_report`
|
||||||
|
|
||||||
|
**Step 2: 更新调度文档**
|
||||||
|
|
||||||
|
文档里明确两条 cron:
|
||||||
|
- 结构化价格刷新:`run_intraday_price_watch.sh`
|
||||||
|
- 新闻发现与验证:`run_intraday_discovery_watch.sh`
|
||||||
|
|
||||||
|
推荐起步频率:
|
||||||
|
- `run_intraday_discovery_watch.sh`:每 2 小时一次
|
||||||
|
- `run_intraday_price_watch.sh`:每 4 小时一次
|
||||||
|
|
||||||
|
**Step 3: 运行脚本级 dry-run**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
- `bash scripts/run_intraday_discovery_watch.sh --dry-run`
|
||||||
|
|
||||||
|
Expected:
|
||||||
|
- 输出候选发现摘要 + 验证摘要 + 信号物化摘要
|
||||||
|
- 不生成正式日报产物
|
||||||
|
|
||||||
|
**Step 4: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add scripts/run_intraday_discovery_watch.sh README.md docs/CONFIGURATION.md DEPLOYMENT.md docs/PRODUCTION_CHECKLIST.md
|
||||||
|
git commit -m "feat(intraday): add discovery watch runner"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 6: 运行最终联合验收并准备本地提交
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `README.md`(仅在最终说明缺失时)
|
||||||
|
- Modify: `docs/CONFIGURATION.md`(仅在最终说明缺失时)
|
||||||
|
- Modify: `DEPLOYMENT.md`(仅在最终说明缺失时)
|
||||||
|
|
||||||
|
**Step 1: 运行 focused Go tests**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
- `go test -count=1 -tags llm_script ./scripts/discover_intraday_news_candidates.go ./scripts/discover_intraday_news_candidates_test.go`
|
||||||
|
- `go test -count=1 -tags llm_script ./scripts/verify_intraday_news_candidates.go ./scripts/verify_intraday_news_candidates_test.go`
|
||||||
|
- `go test -count=1 -tags llm_script ./scripts/materialize_daily_signals.go ./scripts/materialize_daily_signals_test.go`
|
||||||
|
|
||||||
|
Expected:
|
||||||
|
- 发现层、验证层、信号物化层 focused tests 全通过
|
||||||
|
|
||||||
|
**Step 2: 运行现有日报/前端回归边界**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
- `go test -count=1 -tags llm_script ./scripts/generate_daily_report.go ./scripts/generate_daily_report_test.go ./scripts/official_import_signature_audit_query_lib.go`
|
||||||
|
- `bash scripts/secret_gate_test.sh`
|
||||||
|
- `bash scripts/test_importers.sh`
|
||||||
|
- `cd frontend && npm test -- --run`
|
||||||
|
- `cd frontend && npm run build`
|
||||||
|
|
||||||
|
Expected:
|
||||||
|
- 原有日报与前端链路不回归
|
||||||
|
- discovery 新增能力不污染正式日报边界
|
||||||
|
|
||||||
|
**Step 3: 运行脚本级联合 dry-run**
|
||||||
|
|
||||||
|
Run:
|
||||||
|
- `bash scripts/run_intraday_discovery_watch.sh --dry-run`
|
||||||
|
- `REPORT_TRIGGER_SOURCE=intraday go run -tags llm_script ./scripts/materialize_daily_signals.go --date=2026-05-25 --dry-run`
|
||||||
|
|
||||||
|
Expected:
|
||||||
|
- 不写 `daily_report`
|
||||||
|
- 不覆盖 `latest_report`
|
||||||
|
- 能稳定输出候选数、验证数、事件数、page_mode、source_audit
|
||||||
|
|
||||||
|
**Step 4: 本地提交**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add db/migrations/017_intraday_news_candidates.sql scripts/discover_intraday_news_candidates.go scripts/discover_intraday_news_candidates_test.go scripts/verify_intraday_news_candidates.go scripts/verify_intraday_news_candidates_test.go scripts/materialize_daily_signals.go scripts/materialize_daily_signals_test.go scripts/run_intraday_discovery_watch.sh README.md docs/CONFIGURATION.md DEPLOYMENT.md docs/PRODUCTION_CHECKLIST.md docs/plans/2026-05-25-intraday-discovery-verification-implementation-plan.md docs/plans/2026-05-27-intraday-price-watch-plan.md
|
||||||
|
git commit -m "feat(intraday): add discovery and verification watch pipeline"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 验收标准
|
||||||
|
|
||||||
|
实现完成后,必须同时满足:
|
||||||
|
- 搜索 + LLM 只能产生候选事件,不能直接写成正式日报事实
|
||||||
|
- 只有 `official_confirmed` 的事件才能进入正式 `daily_signal_snapshot` 语义链路
|
||||||
|
- `leak_or_rumor` 不进入正式日报事实层
|
||||||
|
- `run_intraday_discovery_watch.sh` 与 `run_intraday_price_watch.sh` 职责分离
|
||||||
|
- 正式日报仍只由 `run_daily.sh` 负责
|
||||||
|
- 新增链路不会写 `daily_report`、不会覆盖 `latest_report`
|
||||||
|
- discovery provider adapter 在无配置时会明确报前置条件错误;有 fixture / dry-run 模式可本地验证
|
||||||
|
- 新增 focused tests、现有日报测试、前端构建全部通过
|
||||||
|
|
||||||
|
## 非目标
|
||||||
|
|
||||||
|
本计划刻意不做:
|
||||||
|
- 不新增第二套正式日报系统
|
||||||
|
- 不让 LLM 直接替代价格 importer 或官方发布 importer
|
||||||
|
- 不把二手媒体新闻直接映射为 `price_cut` / `price_increase`
|
||||||
|
- 不在第一阶段引入新的前端“候选情报面板”复杂交互;若后续需要,单独立计划
|
||||||
@@ -55,6 +55,6 @@
|
|||||||
|
|
||||||
## 下一步建议
|
## 下一步建议
|
||||||
|
|
||||||
1. 把前端查询页增加“最近一次价格追踪时间”提示
|
1. 为 `run_intraday_discovery_watch.sh` 补充生产级 provider adapter 和调度说明
|
||||||
2. 给 `materialize_daily_signals.go` 增加 `trigger_source=intraday` 的文档说明
|
2. 给前端查询页增加“最近一次价格追踪时间 / 最近一次 discovery 验证时间”提示
|
||||||
3. 如果日内事件仍不够敏感,再考虑引入独立 `intraday_signal_snapshot` 表
|
3. 如果日内事件仍不够敏感,再考虑引入独立 `intraday_signal_snapshot` 或候选情报面板
|
||||||
|
|||||||
410
scripts/discover_intraday_news_candidates.go
Normal file
410
scripts/discover_intraday_news_candidates.go
Normal file
@@ -0,0 +1,410 @@
|
|||||||
|
//go:build llm_script
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"database/sql"
|
||||||
|
"encoding/json"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
_ "github.com/lib/pq"
|
||||||
|
)
|
||||||
|
|
||||||
|
type intradayNewsCandidate struct {
|
||||||
|
CandidateDate string
|
||||||
|
EventType string
|
||||||
|
ProviderName string
|
||||||
|
ModelName string
|
||||||
|
ProviderCountry string
|
||||||
|
Title string
|
||||||
|
Summary string
|
||||||
|
CandidateURLs []string
|
||||||
|
DiscoverySource string
|
||||||
|
DiscoveryQuery string
|
||||||
|
DiscoveryEvidence map[string]any
|
||||||
|
NormalizedKey string
|
||||||
|
Status string
|
||||||
|
VerificationConfidence string
|
||||||
|
VerificationNotes string
|
||||||
|
}
|
||||||
|
|
||||||
|
type intradayDiscoveryConfig struct {
|
||||||
|
Date string
|
||||||
|
DryRun bool
|
||||||
|
Search intradayProviderConfig
|
||||||
|
LLM intradayProviderConfig
|
||||||
|
DatabaseURL string
|
||||||
|
Timeout time.Duration
|
||||||
|
ProviderLimit int
|
||||||
|
}
|
||||||
|
|
||||||
|
type intradayDiscoverySummary struct {
|
||||||
|
CandidateTotal int `json:"candidate_total"`
|
||||||
|
ProviderHitCount int `json:"provider_hit_count"`
|
||||||
|
EventTypeCounts map[string]int `json:"event_type_counts"`
|
||||||
|
DiscoverySourceSet []string `json:"discovery_source_set"`
|
||||||
|
DryRun bool `json:"dry_run"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var intradayDiscoveryLogger *slog.Logger
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
intradayDiscoveryLogger = slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo}))
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
loadIntradayEnv()
|
||||||
|
cfg := loadIntradayDiscoveryConfig()
|
||||||
|
if err := runIntradayCandidateDiscovery(cfg); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "discover_intraday_news_candidates: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadIntradayDiscoveryConfig() intradayDiscoveryConfig {
|
||||||
|
var cfg intradayDiscoveryConfig
|
||||||
|
flag.StringVar(&cfg.Date, "date", intradayDateValue(), "候选发现日期,格式 YYYY-MM-DD")
|
||||||
|
flag.BoolVar(&cfg.DryRun, "dry-run", false, "仅输出摘要,不写数据库")
|
||||||
|
flag.IntVar(&cfg.ProviderLimit, "provider-limit", 10, "最大 provider 数")
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
cfg.DatabaseURL = intradayDefaultDSN()
|
||||||
|
cfg.Timeout = discoveryTimeoutFromEnv()
|
||||||
|
cfg.Search = intradayProviderConfig{
|
||||||
|
Mode: strings.TrimSpace(os.Getenv("INTRADAY_DISCOVERY_SEARCH_PROVIDER")),
|
||||||
|
Command: strings.TrimSpace(os.Getenv("INTRADAY_DISCOVERY_SEARCH_COMMAND")),
|
||||||
|
URL: strings.TrimSpace(os.Getenv("INTRADAY_DISCOVERY_SEARCH_URL")),
|
||||||
|
Fixture: strings.TrimSpace(os.Getenv("INTRADAY_DISCOVERY_SEARCH_FIXTURE")),
|
||||||
|
Timeout: cfg.Timeout,
|
||||||
|
}
|
||||||
|
cfg.LLM = intradayProviderConfig{
|
||||||
|
Mode: strings.TrimSpace(os.Getenv("INTRADAY_DISCOVERY_LLM_PROVIDER")),
|
||||||
|
Command: strings.TrimSpace(os.Getenv("INTRADAY_DISCOVERY_LLM_COMMAND")),
|
||||||
|
URL: strings.TrimSpace(os.Getenv("INTRADAY_DISCOVERY_LLM_URL")),
|
||||||
|
Fixture: strings.TrimSpace(os.Getenv("INTRADAY_DISCOVERY_LLM_FIXTURE")),
|
||||||
|
Timeout: cfg.Timeout,
|
||||||
|
}
|
||||||
|
return cfg
|
||||||
|
}
|
||||||
|
|
||||||
|
func runIntradayCandidateDiscovery(cfg intradayDiscoveryConfig) error {
|
||||||
|
if strings.TrimSpace(cfg.Date) == "" {
|
||||||
|
return fmt.Errorf("date 未设置")
|
||||||
|
}
|
||||||
|
if err := validateIntradayProviderConfig("search", cfg.Search); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := validateIntradayProviderConfig("llm", cfg.LLM); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
queries := buildIntradayQueries(cfg.Date, cfg.ProviderLimit)
|
||||||
|
searchRecords, err := loadIntradaySearchRecords(cfg.Search, cfg.Date, queries)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
llmRecords, err := loadIntradayLLMRecords(cfg.LLM, cfg.Date, searchRecords)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
candidates := normalizeIntradayCandidates(cfg.Date, searchRecords, llmRecords)
|
||||||
|
summary := summarizeIntradayCandidates(candidates, cfg.DryRun)
|
||||||
|
if cfg.DryRun {
|
||||||
|
return printIntradayDiscoverySummary(summary)
|
||||||
|
}
|
||||||
|
|
||||||
|
db, err := sql.Open("postgres", cfg.DatabaseURL)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("open db: %w", err)
|
||||||
|
}
|
||||||
|
defer db.Close()
|
||||||
|
if err := upsertIntradayCandidates(context.Background(), db, candidates); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return printIntradayDiscoverySummary(summary)
|
||||||
|
}
|
||||||
|
|
||||||
|
func validateIntradayProviderConfig(name string, cfg intradayProviderConfig) error {
|
||||||
|
if strings.TrimSpace(cfg.Mode) == "" {
|
||||||
|
return fmt.Errorf("%s provider 未设置", name)
|
||||||
|
}
|
||||||
|
switch cfg.Mode {
|
||||||
|
case "fixture":
|
||||||
|
if strings.TrimSpace(cfg.Fixture) == "" {
|
||||||
|
return fmt.Errorf("%s provider fixture 未设置", name)
|
||||||
|
}
|
||||||
|
case "command_json":
|
||||||
|
if strings.TrimSpace(cfg.Command) == "" {
|
||||||
|
return fmt.Errorf("%s provider command 未设置", name)
|
||||||
|
}
|
||||||
|
case "http_json":
|
||||||
|
if strings.TrimSpace(cfg.URL) == "" {
|
||||||
|
return fmt.Errorf("%s provider url 未设置", name)
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("%s provider mode 不支持: %s", name, cfg.Mode)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildIntradayQueries(date string, providerLimit int) []string {
|
||||||
|
providers := []string{
|
||||||
|
"OpenAI", "Anthropic", "Google Gemini", "xAI", "DeepSeek",
|
||||||
|
"DashScope", "Qwen", "智谱", "百度文心", "腾讯混元", "火山方舟", "MiniMax",
|
||||||
|
}
|
||||||
|
keywords := []string{"pricing release announcement", "模型 降价 发布 活动"}
|
||||||
|
if providerLimit > 0 && providerLimit < len(providers) {
|
||||||
|
providers = providers[:providerLimit]
|
||||||
|
}
|
||||||
|
queries := make([]string, 0, len(providers)*len(keywords))
|
||||||
|
for _, provider := range providers {
|
||||||
|
for _, keyword := range keywords {
|
||||||
|
queries = append(queries, strings.TrimSpace(date+" "+provider+" "+keyword))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return queries
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeIntradayCandidates(date string, searchRecords []intradaySearchRecord, llmRecords []intradayLLMRecord) []intradayNewsCandidate {
|
||||||
|
searchIndex := indexSearchRecordsByURL(searchRecords)
|
||||||
|
candidatesByKey := map[string]intradayNewsCandidate{}
|
||||||
|
for _, record := range llmRecords {
|
||||||
|
candidate := candidateFromLLMRecord(date, record, searchIndex)
|
||||||
|
if len(candidate.CandidateURLs) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if candidate.ProviderName == "" {
|
||||||
|
candidate.ProviderName = inferProviderFromTitle(candidate.Title)
|
||||||
|
}
|
||||||
|
candidate.EventType = normalizeIntradayEventType(candidate.EventType)
|
||||||
|
candidate.NormalizedKey = buildIntradayNormalizedKey(candidate)
|
||||||
|
mergeIntradayCandidate(candidatesByKey, candidate)
|
||||||
|
}
|
||||||
|
result := make([]intradayNewsCandidate, 0, len(candidatesByKey))
|
||||||
|
for _, candidate := range candidatesByKey {
|
||||||
|
result = append(result, candidate)
|
||||||
|
}
|
||||||
|
sort.Slice(result, func(i, j int) bool {
|
||||||
|
if result[i].ProviderName != result[j].ProviderName {
|
||||||
|
return result[i].ProviderName < result[j].ProviderName
|
||||||
|
}
|
||||||
|
if result[i].EventType != result[j].EventType {
|
||||||
|
return result[i].EventType < result[j].EventType
|
||||||
|
}
|
||||||
|
return result[i].NormalizedKey < result[j].NormalizedKey
|
||||||
|
})
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func candidateFromLLMRecord(date string, record intradayLLMRecord, searchIndex map[string]intradaySearchRecord) intradayNewsCandidate {
|
||||||
|
candidate := intradayNewsCandidate{
|
||||||
|
CandidateDate: date,
|
||||||
|
EventType: record.EventType,
|
||||||
|
ProviderName: strings.TrimSpace(record.ProviderName),
|
||||||
|
ModelName: strings.TrimSpace(record.ModelName),
|
||||||
|
ProviderCountry: strings.TrimSpace(record.ProviderCountry),
|
||||||
|
Title: strings.TrimSpace(record.Title),
|
||||||
|
Summary: strings.TrimSpace(record.Summary),
|
||||||
|
CandidateURLs: dedupeStrings(record.CandidateURLs),
|
||||||
|
DiscoverySource: "llm_answer",
|
||||||
|
DiscoveryEvidence: map[string]any{"llm_record": record},
|
||||||
|
Status: "candidate",
|
||||||
|
VerificationConfidence: "candidate",
|
||||||
|
}
|
||||||
|
for _, url := range candidate.CandidateURLs {
|
||||||
|
if searchRecord, ok := searchIndex[url]; ok {
|
||||||
|
candidate.DiscoverySource = "web_search+llm"
|
||||||
|
candidate.DiscoveryQuery = searchRecord.Title
|
||||||
|
candidate.DiscoveryEvidence["search_record"] = searchRecord
|
||||||
|
if candidate.ProviderName == "" {
|
||||||
|
candidate.ProviderName = strings.TrimSpace(searchRecord.Provider)
|
||||||
|
}
|
||||||
|
if candidate.Title == "" {
|
||||||
|
candidate.Title = strings.TrimSpace(searchRecord.Title)
|
||||||
|
}
|
||||||
|
if candidate.Summary == "" {
|
||||||
|
candidate.Summary = strings.TrimSpace(searchRecord.Summary)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return candidate
|
||||||
|
}
|
||||||
|
|
||||||
|
func indexSearchRecordsByURL(records []intradaySearchRecord) map[string]intradaySearchRecord {
|
||||||
|
indexed := make(map[string]intradaySearchRecord, len(records))
|
||||||
|
for _, record := range records {
|
||||||
|
url := strings.TrimSpace(record.URL)
|
||||||
|
if url == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
indexed[url] = record
|
||||||
|
}
|
||||||
|
return indexed
|
||||||
|
}
|
||||||
|
|
||||||
|
func mergeIntradayCandidate(target map[string]intradayNewsCandidate, candidate intradayNewsCandidate) {
|
||||||
|
if candidate.NormalizedKey == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
existing, ok := target[candidate.NormalizedKey]
|
||||||
|
if !ok {
|
||||||
|
target[candidate.NormalizedKey] = candidate
|
||||||
|
return
|
||||||
|
}
|
||||||
|
merged := existing
|
||||||
|
merged.CandidateURLs = dedupeStrings(append(existing.CandidateURLs, candidate.CandidateURLs...))
|
||||||
|
if strings.TrimSpace(merged.Summary) == "" {
|
||||||
|
merged.Summary = candidate.Summary
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(merged.ProviderCountry) == "" {
|
||||||
|
merged.ProviderCountry = candidate.ProviderCountry
|
||||||
|
}
|
||||||
|
if merged.DiscoverySource != candidate.DiscoverySource && candidate.DiscoverySource != "" {
|
||||||
|
merged.DiscoverySource = "web_search+llm"
|
||||||
|
}
|
||||||
|
if merged.DiscoveryEvidence == nil {
|
||||||
|
merged.DiscoveryEvidence = map[string]any{}
|
||||||
|
}
|
||||||
|
if llmRecord, ok := candidate.DiscoveryEvidence["llm_record"]; ok {
|
||||||
|
merged.DiscoveryEvidence["llm_record"] = llmRecord
|
||||||
|
}
|
||||||
|
if searchRecord, ok := candidate.DiscoveryEvidence["search_record"]; ok {
|
||||||
|
merged.DiscoveryEvidence["search_record"] = searchRecord
|
||||||
|
}
|
||||||
|
target[candidate.NormalizedKey] = merged
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildIntradayNormalizedKey(candidate intradayNewsCandidate) string {
|
||||||
|
provider := normalizeWord(candidate.ProviderName)
|
||||||
|
model := normalizeWord(candidate.ModelName)
|
||||||
|
if model == "" {
|
||||||
|
model = normalizeWord(candidate.Title)
|
||||||
|
}
|
||||||
|
return strings.Join([]string{
|
||||||
|
candidate.CandidateDate,
|
||||||
|
normalizeWord(candidate.EventType),
|
||||||
|
provider,
|
||||||
|
model,
|
||||||
|
}, "|")
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
func summarizeIntradayCandidates(candidates []intradayNewsCandidate, dryRun bool) intradayDiscoverySummary {
|
||||||
|
eventTypeCounts := make(map[string]int)
|
||||||
|
providerSet := map[string]struct{}{}
|
||||||
|
sourceSet := map[string]struct{}{}
|
||||||
|
for _, candidate := range candidates {
|
||||||
|
eventTypeCounts[candidate.EventType]++
|
||||||
|
if candidate.ProviderName != "" {
|
||||||
|
providerSet[candidate.ProviderName] = struct{}{}
|
||||||
|
}
|
||||||
|
if candidate.DiscoverySource != "" {
|
||||||
|
sourceSet[candidate.DiscoverySource] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sources := make([]string, 0, len(sourceSet))
|
||||||
|
for source := range sourceSet {
|
||||||
|
sources = append(sources, source)
|
||||||
|
}
|
||||||
|
sort.Strings(sources)
|
||||||
|
return intradayDiscoverySummary{
|
||||||
|
CandidateTotal: len(candidates),
|
||||||
|
ProviderHitCount: len(providerSet),
|
||||||
|
EventTypeCounts: eventTypeCounts,
|
||||||
|
DiscoverySourceSet: sources,
|
||||||
|
DryRun: dryRun,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func printIntradayDiscoverySummary(summary intradayDiscoverySummary) error {
|
||||||
|
payload, err := json.Marshal(summary)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
fmt.Println(string(payload))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func upsertIntradayCandidates(ctx context.Context, db *sql.DB, candidates []intradayNewsCandidate) error {
|
||||||
|
if db == nil {
|
||||||
|
return fmt.Errorf("db is nil")
|
||||||
|
}
|
||||||
|
for _, candidate := range candidates {
|
||||||
|
urls, err := json.Marshal(candidate.CandidateURLs)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("marshal candidate urls: %w", err)
|
||||||
|
}
|
||||||
|
evidence, err := json.Marshal(candidate.DiscoveryEvidence)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("marshal discovery evidence: %w", err)
|
||||||
|
}
|
||||||
|
_, err = db.ExecContext(ctx, `
|
||||||
|
INSERT INTO intraday_news_candidate (
|
||||||
|
candidate_date, event_type, provider_name, model_name, provider_country,
|
||||||
|
title, summary, candidate_urls, discovery_source, discovery_query,
|
||||||
|
discovery_evidence, normalized_key, status, verification_confidence, verification_notes
|
||||||
|
) VALUES (
|
||||||
|
$1::date, $2, $3, NULLIF($4, ''), NULLIF($5, ''),
|
||||||
|
$6, NULLIF($7, ''), $8::jsonb, $9, NULLIF($10, ''),
|
||||||
|
$11::jsonb, $12, $13, $14, NULLIF($15, '')
|
||||||
|
)
|
||||||
|
ON CONFLICT (normalized_key) DO UPDATE SET
|
||||||
|
title = EXCLUDED.title,
|
||||||
|
summary = COALESCE(NULLIF(EXCLUDED.summary, ''), intraday_news_candidate.summary),
|
||||||
|
candidate_urls = EXCLUDED.candidate_urls,
|
||||||
|
discovery_source = EXCLUDED.discovery_source,
|
||||||
|
discovery_query = COALESCE(NULLIF(EXCLUDED.discovery_query, ''), intraday_news_candidate.discovery_query),
|
||||||
|
discovery_evidence = EXCLUDED.discovery_evidence,
|
||||||
|
provider_country = COALESCE(NULLIF(EXCLUDED.provider_country, ''), intraday_news_candidate.provider_country),
|
||||||
|
updated_at = CURRENT_TIMESTAMP`,
|
||||||
|
candidate.CandidateDate,
|
||||||
|
candidate.EventType,
|
||||||
|
candidate.ProviderName,
|
||||||
|
candidate.ModelName,
|
||||||
|
candidate.ProviderCountry,
|
||||||
|
candidate.Title,
|
||||||
|
candidate.Summary,
|
||||||
|
string(urls),
|
||||||
|
candidate.DiscoverySource,
|
||||||
|
candidate.DiscoveryQuery,
|
||||||
|
string(evidence),
|
||||||
|
candidate.NormalizedKey,
|
||||||
|
candidate.Status,
|
||||||
|
candidate.VerificationConfidence,
|
||||||
|
candidate.VerificationNotes,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("upsert intraday candidate %s: %w", candidate.NormalizedKey, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func inferProviderFromTitle(title string) string {
|
||||||
|
lower := strings.ToLower(title)
|
||||||
|
for _, pair := range []struct{ match, provider string }{
|
||||||
|
{"openai", "OpenAI"},
|
||||||
|
{"anthropic", "Anthropic"},
|
||||||
|
{"gemini", "Google"},
|
||||||
|
{"deepseek", "DeepSeek"},
|
||||||
|
{"qwen", "Qwen"},
|
||||||
|
{"dashscope", "DashScope"},
|
||||||
|
{"xai", "xAI"},
|
||||||
|
{"minimax", "MiniMax"},
|
||||||
|
{"智谱", "智谱"},
|
||||||
|
{"百度", "百度"},
|
||||||
|
{"腾讯", "腾讯"},
|
||||||
|
} {
|
||||||
|
if strings.Contains(lower, pair.match) {
|
||||||
|
return pair.provider
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
127
scripts/discover_intraday_news_candidates_test.go
Normal file
127
scripts/discover_intraday_news_candidates_test.go
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
//go:build llm_script
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"database/sql"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestLoadIntradaySearchRecordsFromFixture(t *testing.T) {
|
||||||
|
cfg := intradayProviderConfig{
|
||||||
|
Mode: "fixture",
|
||||||
|
Fixture: filepath.Join("testdata", "intraday_discovery_search_sample.json"),
|
||||||
|
}
|
||||||
|
records, err := loadIntradaySearchRecords(cfg, "2026-05-25", []string{"OpenAI pricing release"})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("loadIntradaySearchRecords 返回错误: %v", err)
|
||||||
|
}
|
||||||
|
if len(records) != 2 {
|
||||||
|
t.Fatalf("搜索样例条数错误: got=%d", len(records))
|
||||||
|
}
|
||||||
|
if records[0].URL == "" || records[0].Provider == "" {
|
||||||
|
t.Fatalf("搜索样例未保留 URL/provider: %+v", records[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadIntradayLLMRecordsFromFixture(t *testing.T) {
|
||||||
|
cfg := intradayProviderConfig{
|
||||||
|
Mode: "fixture",
|
||||||
|
Fixture: filepath.Join("testdata", "intraday_discovery_llm_sample.json"),
|
||||||
|
}
|
||||||
|
records, err := loadIntradayLLMRecords(cfg, "2026-05-25", nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("loadIntradayLLMRecords 返回错误: %v", err)
|
||||||
|
}
|
||||||
|
if len(records) != 2 {
|
||||||
|
t.Fatalf("LLM 样例条数错误: got=%d", len(records))
|
||||||
|
}
|
||||||
|
if records[0].EventType != "official_release" {
|
||||||
|
t.Fatalf("LLM 事件类型错误: %+v", records[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeIntradayCandidatesDedupesEquivalentEvents(t *testing.T) {
|
||||||
|
searchRecords := []intradaySearchRecord{{
|
||||||
|
Title: "OpenAI announces GPT-5.6 preview pricing update",
|
||||||
|
Summary: "Search summary",
|
||||||
|
URL: "https://openai.example.com/news/gpt-5-6-pricing",
|
||||||
|
Provider: "OpenAI",
|
||||||
|
}}
|
||||||
|
llmRecords := []intradayLLMRecord{
|
||||||
|
{
|
||||||
|
EventType: "official_release",
|
||||||
|
ProviderName: "OpenAI",
|
||||||
|
ModelName: "GPT-5.6",
|
||||||
|
ProviderCountry: "US",
|
||||||
|
Title: "GPT-5.6 preview pricing update",
|
||||||
|
Summary: "First summary",
|
||||||
|
CandidateURLs: []string{"https://openai.example.com/news/gpt-5-6-pricing"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
EventType: "official_release",
|
||||||
|
ProviderName: "OpenAI",
|
||||||
|
ModelName: "GPT 5.6",
|
||||||
|
ProviderCountry: "US",
|
||||||
|
Title: "OpenAI GPT 5.6 preview pricing update",
|
||||||
|
Summary: "Second summary",
|
||||||
|
CandidateURLs: []string{"https://openai.example.com/news/gpt-5-6-pricing"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
candidates := normalizeIntradayCandidates("2026-05-25", searchRecords, llmRecords)
|
||||||
|
if len(candidates) != 1 {
|
||||||
|
t.Fatalf("期望去重后只剩 1 条候选, got=%d", len(candidates))
|
||||||
|
}
|
||||||
|
if candidates[0].DiscoverySource != "web_search+llm" {
|
||||||
|
t.Fatalf("期望 discovery source 合并, got=%q", candidates[0].DiscoverySource)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeIntradayCandidatesDropsURLlessRecords(t *testing.T) {
|
||||||
|
llmRecords := []intradayLLMRecord{{
|
||||||
|
EventType: "promo_campaign",
|
||||||
|
ProviderName: "DeepSeek",
|
||||||
|
ModelName: "DeepSeek-V4-Flash",
|
||||||
|
Title: "No URL candidate",
|
||||||
|
Summary: "Should be dropped",
|
||||||
|
}}
|
||||||
|
candidates := normalizeIntradayCandidates("2026-05-25", nil, llmRecords)
|
||||||
|
if len(candidates) != 0 {
|
||||||
|
t.Fatalf("无 URL 候选应被丢弃, got=%d", len(candidates))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValidateIntradayProviderConfigRequiresCommandOrURLOrFixture(t *testing.T) {
|
||||||
|
if err := validateIntradayProviderConfig("search", intradayProviderConfig{Mode: "command_json"}); err == nil {
|
||||||
|
t.Fatal("缺少 command 时应报错")
|
||||||
|
}
|
||||||
|
if err := validateIntradayProviderConfig("llm", intradayProviderConfig{Mode: "http_json"}); err == nil {
|
||||||
|
t.Fatal("缺少 url 时应报错")
|
||||||
|
}
|
||||||
|
if err := validateIntradayProviderConfig("search", intradayProviderConfig{Mode: "fixture", Fixture: "fixture.json"}); err != nil {
|
||||||
|
t.Fatalf("fixture provider 不应报错: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildIntradayNormalizedKeyUsesProviderModelAndDate(t *testing.T) {
|
||||||
|
key := buildIntradayNormalizedKey(intradayNewsCandidate{
|
||||||
|
CandidateDate: "2026-05-25",
|
||||||
|
EventType: "official_release",
|
||||||
|
ProviderName: "OpenAI",
|
||||||
|
ModelName: "GPT-5.6",
|
||||||
|
})
|
||||||
|
if !strings.Contains(key, "2026-05-25") || !strings.Contains(key, "openai") || !strings.Contains(key, "gpt-5-6") {
|
||||||
|
t.Fatalf("normalized key 不符合预期: %q", key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUpsertIntradayCandidatesRequiresDB(t *testing.T) {
|
||||||
|
var db *sql.DB
|
||||||
|
err := upsertIntradayCandidates(context.Background(), db, nil)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("nil db 时应报错")
|
||||||
|
}
|
||||||
|
}
|
||||||
111
scripts/intraday_discovery_common.go
Normal file
111
scripts/intraday_discovery_common.go
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
//go:build llm_script
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func loadIntradayEnv() {
|
||||||
|
for _, path := range []string{".env.local", ".env"} {
|
||||||
|
data, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for _, line := range strings.Split(string(data), "\n") {
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
if line == "" || strings.HasPrefix(line, "#") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
key, value, ok := strings.Cut(line, "=")
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
key = strings.TrimSpace(key)
|
||||||
|
value = strings.Trim(strings.TrimSpace(value), `"'`)
|
||||||
|
if key == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, exists := os.LookupEnv(key); exists {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = os.Setenv(key, value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func intradayDefaultDSN() string {
|
||||||
|
if dsn := os.Getenv("DATABASE_URL"); dsn != "" {
|
||||||
|
return dsn
|
||||||
|
}
|
||||||
|
return "postgres://long@/llm_intelligence?host=/var/run/postgresql"
|
||||||
|
}
|
||||||
|
|
||||||
|
func intradayDateValue() string {
|
||||||
|
if value := strings.TrimSpace(os.Getenv("REPORT_DATE")); value != "" {
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
return time.Now().Format("2006-01-02")
|
||||||
|
}
|
||||||
|
|
||||||
|
func discoveryTimeoutFromEnv() time.Duration {
|
||||||
|
raw := strings.TrimSpace(os.Getenv("INTRADAY_DISCOVERY_TIMEOUT_SEC"))
|
||||||
|
if raw == "" {
|
||||||
|
return 20 * time.Second
|
||||||
|
}
|
||||||
|
var seconds int
|
||||||
|
if _, err := fmt.Sscanf(raw, "%d", &seconds); err != nil || seconds <= 0 {
|
||||||
|
return 20 * time.Second
|
||||||
|
}
|
||||||
|
return time.Duration(seconds) * time.Second
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeIntradayEventType(value string) string {
|
||||||
|
switch strings.TrimSpace(strings.ToLower(value)) {
|
||||||
|
case "price_cut":
|
||||||
|
return "price_cut"
|
||||||
|
case "price_increase":
|
||||||
|
return "price_increase"
|
||||||
|
case "official_release":
|
||||||
|
return "official_release"
|
||||||
|
case "promo_campaign":
|
||||||
|
return "promo_campaign"
|
||||||
|
case "leak_or_rumor":
|
||||||
|
return "leak_or_rumor"
|
||||||
|
default:
|
||||||
|
return "unknown"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeWord(value string) string {
|
||||||
|
value = strings.ToLower(strings.TrimSpace(value))
|
||||||
|
value = strings.ReplaceAll(value, "_", "-")
|
||||||
|
re := regexp.MustCompile(`[^a-z0-9\-]+`)
|
||||||
|
value = re.ReplaceAllString(value, "-")
|
||||||
|
value = strings.Trim(value, "-")
|
||||||
|
if value == "" {
|
||||||
|
return "unknown"
|
||||||
|
}
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
func dedupeStrings(values []string) []string {
|
||||||
|
seen := map[string]struct{}{}
|
||||||
|
result := make([]string, 0, len(values))
|
||||||
|
for _, value := range values {
|
||||||
|
trimmed := strings.TrimSpace(value)
|
||||||
|
if trimmed == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, exists := seen[trimmed]; exists {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[trimmed] = struct{}{}
|
||||||
|
result = append(result, trimmed)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
188
scripts/intraday_discovery_provider.go
Normal file
188
scripts/intraday_discovery_provider.go
Normal file
@@ -0,0 +1,188 @@
|
|||||||
|
//go:build llm_script
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type intradayProviderConfig struct {
|
||||||
|
Mode string
|
||||||
|
Command string
|
||||||
|
URL string
|
||||||
|
Fixture string
|
||||||
|
Timeout time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
type intradaySearchRecord struct {
|
||||||
|
Title string `json:"title"`
|
||||||
|
Summary string `json:"summary"`
|
||||||
|
URL string `json:"url"`
|
||||||
|
Provider string `json:"provider"`
|
||||||
|
ProviderURL string `json:"provider_url"`
|
||||||
|
PublishedAt string `json:"published_at"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type intradayLLMRecord struct {
|
||||||
|
EventType string `json:"event_type"`
|
||||||
|
ProviderName string `json:"provider_name"`
|
||||||
|
ModelName string `json:"model_name"`
|
||||||
|
ProviderCountry string `json:"provider_country"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
Summary string `json:"summary"`
|
||||||
|
CandidateURLs []string `json:"candidate_urls"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type intradayLLMRequest struct {
|
||||||
|
Date string `json:"date"`
|
||||||
|
SearchResults []intradaySearchRecord `json:"search_results"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadIntradaySearchRecords(cfg intradayProviderConfig, date string, queries []string) ([]intradaySearchRecord, error) {
|
||||||
|
var all []intradaySearchRecord
|
||||||
|
for _, query := range queries {
|
||||||
|
payload, err := loadIntradayProviderPayload(cfg, intradayProviderPayloadInput{
|
||||||
|
Date: date,
|
||||||
|
Query: query,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if len(bytes.TrimSpace(payload)) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var records []intradaySearchRecord
|
||||||
|
if err := json.Unmarshal(payload, &records); err != nil {
|
||||||
|
return nil, fmt.Errorf("unmarshal search records for query %q: %w", query, err)
|
||||||
|
}
|
||||||
|
all = append(all, records...)
|
||||||
|
if cfg.Mode == "fixture" {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return all, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadIntradayLLMRecords(cfg intradayProviderConfig, date string, searchResults []intradaySearchRecord) ([]intradayLLMRecord, error) {
|
||||||
|
request := intradayLLMRequest{Date: date, SearchResults: searchResults}
|
||||||
|
body, err := json.Marshal(request)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("marshal llm request: %w", err)
|
||||||
|
}
|
||||||
|
payload, err := loadIntradayProviderPayload(cfg, intradayProviderPayloadInput{
|
||||||
|
Date: date,
|
||||||
|
RequestBody: body,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if len(bytes.TrimSpace(payload)) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
var records []intradayLLMRecord
|
||||||
|
if err := json.Unmarshal(payload, &records); err != nil {
|
||||||
|
return nil, fmt.Errorf("unmarshal llm records: %w", err)
|
||||||
|
}
|
||||||
|
return records, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type intradayProviderPayloadInput struct {
|
||||||
|
Date string
|
||||||
|
Query string
|
||||||
|
RequestBody []byte
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadIntradayProviderPayload(cfg intradayProviderConfig, input intradayProviderPayloadInput) ([]byte, error) {
|
||||||
|
mode := strings.TrimSpace(cfg.Mode)
|
||||||
|
switch mode {
|
||||||
|
case "fixture":
|
||||||
|
if strings.TrimSpace(cfg.Fixture) == "" {
|
||||||
|
return nil, fmt.Errorf("provider fixture 未设置")
|
||||||
|
}
|
||||||
|
return os.ReadFile(cfg.Fixture)
|
||||||
|
case "command_json":
|
||||||
|
if strings.TrimSpace(cfg.Command) == "" {
|
||||||
|
return nil, fmt.Errorf("provider command 未设置")
|
||||||
|
}
|
||||||
|
return runIntradayCommand(cfg, input)
|
||||||
|
case "http_json":
|
||||||
|
if strings.TrimSpace(cfg.URL) == "" {
|
||||||
|
return nil, fmt.Errorf("provider url 未设置")
|
||||||
|
}
|
||||||
|
return fetchIntradayHTTP(cfg, input)
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("unsupported provider mode %q", mode)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func runIntradayCommand(cfg intradayProviderConfig, input intradayProviderPayloadInput) ([]byte, error) {
|
||||||
|
command := strings.TrimSpace(cfg.Command)
|
||||||
|
command = strings.ReplaceAll(command, "{{date}}", input.Date)
|
||||||
|
command = strings.ReplaceAll(command, "{{query}}", shellEscapeSingleArg(input.Query))
|
||||||
|
cmd := exec.Command("sh", "-c", command)
|
||||||
|
cmd.Env = append(os.Environ(),
|
||||||
|
"INTRADAY_DISCOVERY_DATE="+input.Date,
|
||||||
|
"INTRADAY_DISCOVERY_QUERY="+input.Query,
|
||||||
|
)
|
||||||
|
if len(input.RequestBody) > 0 {
|
||||||
|
cmd.Stdin = bytes.NewReader(input.RequestBody)
|
||||||
|
}
|
||||||
|
out, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
if exitErr, ok := err.(*exec.ExitError); ok {
|
||||||
|
return nil, fmt.Errorf("run provider command: %w: %s", err, strings.TrimSpace(string(exitErr.Stderr)))
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("run provider command: %w", err)
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func fetchIntradayHTTP(cfg intradayProviderConfig, input intradayProviderPayloadInput) ([]byte, error) {
|
||||||
|
client := &http.Client{Timeout: cfg.Timeout}
|
||||||
|
rawURL := strings.TrimSpace(cfg.URL)
|
||||||
|
rawURL = strings.ReplaceAll(rawURL, "{{date}}", input.Date)
|
||||||
|
rawURL = strings.ReplaceAll(rawURL, "{{query}}", input.Query)
|
||||||
|
|
||||||
|
method := http.MethodGet
|
||||||
|
var body io.Reader
|
||||||
|
if len(input.RequestBody) > 0 {
|
||||||
|
method = http.MethodPost
|
||||||
|
body = bytes.NewReader(input.RequestBody)
|
||||||
|
}
|
||||||
|
req, err := http.NewRequest(method, rawURL, body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("build provider request: %w", err)
|
||||||
|
}
|
||||||
|
if len(input.RequestBody) > 0 {
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
}
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("call provider url: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
|
payload, _ := io.ReadAll(resp.Body)
|
||||||
|
return nil, fmt.Errorf("call provider url: unexpected status %d: %s", resp.StatusCode, strings.TrimSpace(string(payload)))
|
||||||
|
}
|
||||||
|
payload, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("read provider response: %w", err)
|
||||||
|
}
|
||||||
|
return payload, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func shellEscapeSingleArg(value string) string {
|
||||||
|
if value == "" {
|
||||||
|
return "''"
|
||||||
|
}
|
||||||
|
return "'" + strings.ReplaceAll(value, "'", "'\"'\"'") + "'"
|
||||||
|
}
|
||||||
@@ -46,6 +46,7 @@ type signalModelEvent struct {
|
|||||||
TrustLabel string `json:"trust_label"`
|
TrustLabel string `json:"trust_label"`
|
||||||
SourceKindLabel string `json:"source_kind_label"`
|
SourceKindLabel string `json:"source_kind_label"`
|
||||||
PrimarySource string `json:"primary_source"`
|
PrimarySource string `json:"primary_source"`
|
||||||
|
SourceURL string `json:"source_url"`
|
||||||
UpdatedAt string `json:"updated_at"`
|
UpdatedAt string `json:"updated_at"`
|
||||||
EvidenceDetail string `json:"evidence_detail"`
|
EvidenceDetail string `json:"evidence_detail"`
|
||||||
Baseline string `json:"baseline"`
|
Baseline string `json:"baseline"`
|
||||||
@@ -367,6 +368,12 @@ func loadSignalModelEvents(db *sql.DB, date string) ([]signalModelEvent, error)
|
|||||||
}
|
}
|
||||||
events = append(events, priceEvents...)
|
events = append(events, priceEvents...)
|
||||||
|
|
||||||
|
discoveryEvents, err := loadVerifiedDiscoverySignalEvents(db, date)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
events = mergeVerifiedDiscoveryEvents(events, discoveryEvents)
|
||||||
|
|
||||||
sort.Slice(events, func(i, j int) bool {
|
sort.Slice(events, func(i, j int) bool {
|
||||||
if events[i].Priority != events[j].Priority {
|
if events[i].Priority != events[j].Priority {
|
||||||
return events[i].Priority > events[j].Priority
|
return events[i].Priority > events[j].Priority
|
||||||
@@ -409,6 +416,7 @@ func loadSignalPromoCampaignEvents(date string) ([]signalModelEvent, error) {
|
|||||||
TrustLabel: signalFirstNonEmpty(definition.TrustLabel, "官方来源 / 一级证据"),
|
TrustLabel: signalFirstNonEmpty(definition.TrustLabel, "官方来源 / 一级证据"),
|
||||||
SourceKindLabel: signalFirstNonEmpty(definition.SourceKindLabel, "官方活动页"),
|
SourceKindLabel: signalFirstNonEmpty(definition.SourceKindLabel, "官方活动页"),
|
||||||
PrimarySource: definition.PrimarySource,
|
PrimarySource: definition.PrimarySource,
|
||||||
|
SourceURL: definition.PrimarySource,
|
||||||
UpdatedAt: signalFormatEventUpdatedAt("", definition.Date),
|
UpdatedAt: signalFormatEventUpdatedAt("", definition.Date),
|
||||||
EvidenceDetail: definition.EvidenceDetail,
|
EvidenceDetail: definition.EvidenceDetail,
|
||||||
Baseline: signalFirstNonEmpty(definition.Baseline, "活动窗口开启"),
|
Baseline: signalFirstNonEmpty(definition.Baseline, "活动窗口开启"),
|
||||||
@@ -520,6 +528,7 @@ func loadSignalOfficialReleaseEvents(db *sql.DB, date string) ([]signalModelEven
|
|||||||
TrustLabel: buildSignalReleaseTrustLabel(model, dateConfidence),
|
TrustLabel: buildSignalReleaseTrustLabel(model, dateConfidence),
|
||||||
SourceKindLabel: buildSignalReleaseSourceKindLabel(dateSourceKind, dateConfidence),
|
SourceKindLabel: buildSignalReleaseSourceKindLabel(dateSourceKind, dateConfidence),
|
||||||
PrimarySource: sourceURL,
|
PrimarySource: sourceURL,
|
||||||
|
SourceURL: sourceURL,
|
||||||
UpdatedAt: releaseDate.Format("2006-01-02 15:04"),
|
UpdatedAt: releaseDate.Format("2006-01-02 15:04"),
|
||||||
EvidenceDetail: buildSignalReleaseEvidenceDetail(dateSourceKind, dateConfidence),
|
EvidenceDetail: buildSignalReleaseEvidenceDetail(dateSourceKind, dateConfidence),
|
||||||
Baseline: "官方首次发布",
|
Baseline: "官方首次发布",
|
||||||
@@ -610,6 +619,7 @@ func loadSignalNewModelEvents(db *sql.DB, date string) ([]signalModelEvent, erro
|
|||||||
TrustLabel: buildSignalTrustLabel(model),
|
TrustLabel: buildSignalTrustLabel(model),
|
||||||
SourceKindLabel: "模型快照",
|
SourceKindLabel: "模型快照",
|
||||||
PrimarySource: buildSignalPrimarySource("region_pricing", model.OperatorName),
|
PrimarySource: buildSignalPrimarySource("region_pricing", model.OperatorName),
|
||||||
|
SourceURL: buildSignalPrimarySource("region_pricing", model.OperatorName),
|
||||||
UpdatedAt: createdAt.Format("2006-01-02 15:04"),
|
UpdatedAt: createdAt.Format("2006-01-02 15:04"),
|
||||||
EvidenceDetail: "models.created_at = 今日,且已存在最新价格快照",
|
EvidenceDetail: "models.created_at = 今日,且已存在最新价格快照",
|
||||||
Baseline: "首次出现",
|
Baseline: "首次出现",
|
||||||
@@ -709,6 +719,7 @@ func loadSignalPriceChangeEvents(db *sql.DB, date string) ([]signalModelEvent, e
|
|||||||
TrustLabel: buildSignalTrustLabel(model),
|
TrustLabel: buildSignalTrustLabel(model),
|
||||||
SourceKindLabel: "价格快照",
|
SourceKindLabel: "价格快照",
|
||||||
PrimarySource: "pricing_history",
|
PrimarySource: "pricing_history",
|
||||||
|
SourceURL: buildSignalPrimarySource("region_pricing", model.OperatorName),
|
||||||
UpdatedAt: changedAt.Format("2006-01-02 15:04"),
|
UpdatedAt: changedAt.Format("2006-01-02 15:04"),
|
||||||
EvidenceDetail: buildSignalPriceEvidenceDetail(changePct, oldInputPrice, newInputPrice, model.Currency),
|
EvidenceDetail: buildSignalPriceEvidenceDetail(changePct, oldInputPrice, newInputPrice, model.Currency),
|
||||||
Baseline: fmt.Sprintf("较昨日 %+.0f%%", changePct),
|
Baseline: fmt.Sprintf("较昨日 %+.0f%%", changePct),
|
||||||
@@ -747,6 +758,241 @@ func dedupeSignalEvents(events []signalModelEvent) []signalModelEvent {
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func loadVerifiedDiscoverySignalEvents(db *sql.DB, date string) ([]signalModelEvent, error) {
|
||||||
|
rows, err := db.Query(`
|
||||||
|
SELECT
|
||||||
|
event_type,
|
||||||
|
provider_name,
|
||||||
|
COALESCE(model_name, ''),
|
||||||
|
COALESCE(provider_country, ''),
|
||||||
|
title,
|
||||||
|
COALESCE(summary, ''),
|
||||||
|
COALESCE(candidate_urls::text, '[]'),
|
||||||
|
COALESCE(verification_notes, ''),
|
||||||
|
updated_at
|
||||||
|
FROM intraday_news_candidate
|
||||||
|
WHERE candidate_date = $1::date
|
||||||
|
AND status = 'verified'
|
||||||
|
AND verification_confidence = 'official_confirmed'
|
||||||
|
ORDER BY updated_at DESC, id DESC
|
||||||
|
`, date)
|
||||||
|
if err != nil {
|
||||||
|
if strings.Contains(err.Error(), `relation "intraday_news_candidate" does not exist`) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
|
||||||
|
var events []signalModelEvent
|
||||||
|
for rows.Next() {
|
||||||
|
var (
|
||||||
|
eventType string
|
||||||
|
providerName string
|
||||||
|
modelName string
|
||||||
|
providerCountry string
|
||||||
|
title string
|
||||||
|
summary string
|
||||||
|
rawURLs string
|
||||||
|
notes string
|
||||||
|
updatedAt time.Time
|
||||||
|
)
|
||||||
|
if err := rows.Scan(&eventType, &providerName, &modelName, &providerCountry, &title, &summary, &rawURLs, ¬es, &updatedAt); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var urls []string
|
||||||
|
if err := json.Unmarshal([]byte(rawURLs), &urls); err != nil {
|
||||||
|
return nil, fmt.Errorf("unmarshal discovery candidate urls: %w", err)
|
||||||
|
}
|
||||||
|
primaryURL := firstString(urls)
|
||||||
|
if strings.TrimSpace(primaryURL) == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
normalizedType := signalNormalizeIntradayEventType(eventType)
|
||||||
|
events = append(events, signalModelEvent{
|
||||||
|
EventType: normalizedType,
|
||||||
|
ModelName: signalFirstNonEmpty(modelName, title),
|
||||||
|
ProviderName: providerName,
|
||||||
|
OperatorName: providerName,
|
||||||
|
Audience: buildDiscoveryAudience(normalizedType),
|
||||||
|
TrustLabel: "官方来源 / discovery 验证",
|
||||||
|
SourceKindLabel: buildDiscoverySourceKind(normalizedType),
|
||||||
|
PrimarySource: primaryURL,
|
||||||
|
SourceURL: primaryURL,
|
||||||
|
UpdatedAt: updatedAt.Format("2006-01-02 15:04"),
|
||||||
|
EvidenceDetail: signalFirstNonEmpty(notes, summary),
|
||||||
|
Baseline: buildDiscoveryBaseline(normalizedType),
|
||||||
|
Summary: signalFirstNonEmpty(summary, title),
|
||||||
|
Priority: buildDiscoveryPriority(normalizedType),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
if err := rows.Err(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return filterVerifiedDiscoverySignalEvents(events), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func filterVerifiedDiscoverySignalEvents(events []signalModelEvent) []signalModelEvent {
|
||||||
|
filtered := make([]signalModelEvent, 0, len(events))
|
||||||
|
for _, event := range events {
|
||||||
|
switch event.EventType {
|
||||||
|
case "official_release", "promo_campaign", "price_cut", "price_increase":
|
||||||
|
filtered = append(filtered, event)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return filtered
|
||||||
|
}
|
||||||
|
|
||||||
|
func mergeVerifiedDiscoveryEvents(nativeEvents, discoveryEvents []signalModelEvent) []signalModelEvent {
|
||||||
|
merged := append([]signalModelEvent{}, nativeEvents...)
|
||||||
|
index := make(map[string]int, len(merged))
|
||||||
|
for i, event := range merged {
|
||||||
|
index[signalEventMergeKey(event)] = i
|
||||||
|
}
|
||||||
|
for _, event := range filterVerifiedDiscoverySignalEvents(discoveryEvents) {
|
||||||
|
key := signalEventMergeKey(event)
|
||||||
|
if idx, exists := index[key]; exists {
|
||||||
|
merged[idx] = mergeSignalEventEvidence(merged[idx], event)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
index[key] = len(merged)
|
||||||
|
merged = append(merged, event)
|
||||||
|
}
|
||||||
|
return merged
|
||||||
|
}
|
||||||
|
|
||||||
|
func mergeSignalEventEvidence(native, discovery signalModelEvent) signalModelEvent {
|
||||||
|
merged := native
|
||||||
|
if strings.TrimSpace(merged.SourceKindLabel) == "" {
|
||||||
|
merged.SourceKindLabel = discovery.SourceKindLabel
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(merged.SourceURL) == "" {
|
||||||
|
merged.SourceURL = discovery.SourceURL
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(merged.PrimarySource) == "" {
|
||||||
|
merged.PrimarySource = discovery.PrimarySource
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(merged.EvidenceDetail) == "" {
|
||||||
|
merged.EvidenceDetail = discovery.EvidenceDetail
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(merged.TrustLabel) == "" {
|
||||||
|
merged.TrustLabel = discovery.TrustLabel
|
||||||
|
}
|
||||||
|
return merged
|
||||||
|
}
|
||||||
|
|
||||||
|
func signalEventMergeKey(event signalModelEvent) string {
|
||||||
|
return strings.Join([]string{
|
||||||
|
signalNormalizeIntradayEventType(event.EventType),
|
||||||
|
signalNormalizeWord(event.ProviderName),
|
||||||
|
signalNormalizeWord(event.ModelName),
|
||||||
|
}, "|")
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildDiscoveryAudience(eventType string) string {
|
||||||
|
switch eventType {
|
||||||
|
case "official_release":
|
||||||
|
return "适合需要尽快复查默认选型与路线图影响的团队"
|
||||||
|
case "promo_campaign":
|
||||||
|
return "适合想利用活动窗口压低成本的团队"
|
||||||
|
case "price_cut":
|
||||||
|
return "适合准备趁降价重排默认模型的团队"
|
||||||
|
case "price_increase":
|
||||||
|
return "适合提前准备替代模型和预算回退方案的团队"
|
||||||
|
default:
|
||||||
|
return "适合关注日内情报变化的读者"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildDiscoverySourceKind(eventType string) string {
|
||||||
|
switch eventType {
|
||||||
|
case "official_release":
|
||||||
|
return "discovery 验证 / 官方发布页"
|
||||||
|
case "promo_campaign":
|
||||||
|
return "discovery 验证 / 官方活动页"
|
||||||
|
case "price_cut", "price_increase":
|
||||||
|
return "discovery 验证 / 官方价格页"
|
||||||
|
default:
|
||||||
|
return "discovery 验证"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildDiscoveryBaseline(eventType string) string {
|
||||||
|
switch eventType {
|
||||||
|
case "official_release":
|
||||||
|
return "discovery 验证通过"
|
||||||
|
case "promo_campaign":
|
||||||
|
return "活动窗口已验证"
|
||||||
|
case "price_cut", "price_increase":
|
||||||
|
return "official_confirmed"
|
||||||
|
default:
|
||||||
|
return "discovery verified"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildDiscoveryPriority(eventType string) int {
|
||||||
|
switch eventType {
|
||||||
|
case "official_release":
|
||||||
|
return 118
|
||||||
|
case "promo_campaign":
|
||||||
|
return 112
|
||||||
|
case "price_cut":
|
||||||
|
return 96
|
||||||
|
case "price_increase":
|
||||||
|
return 94
|
||||||
|
default:
|
||||||
|
return 80
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func firstString(values []string) string {
|
||||||
|
for _, value := range values {
|
||||||
|
if strings.TrimSpace(value) != "" {
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func signalNormalizeIntradayEventType(value string) string {
|
||||||
|
switch strings.TrimSpace(strings.ToLower(value)) {
|
||||||
|
case "price_cut":
|
||||||
|
return "price_cut"
|
||||||
|
case "price_increase":
|
||||||
|
return "price_increase"
|
||||||
|
case "official_release":
|
||||||
|
return "official_release"
|
||||||
|
case "promo_campaign":
|
||||||
|
return "promo_campaign"
|
||||||
|
default:
|
||||||
|
return "unknown"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func signalNormalizeWord(value string) string {
|
||||||
|
value = strings.ToLower(strings.TrimSpace(value))
|
||||||
|
value = strings.ReplaceAll(value, "_", "-")
|
||||||
|
var b strings.Builder
|
||||||
|
lastDash := false
|
||||||
|
for _, r := range value {
|
||||||
|
isAlphaNum := (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9')
|
||||||
|
if isAlphaNum {
|
||||||
|
b.WriteRune(r)
|
||||||
|
lastDash = false
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !lastDash {
|
||||||
|
b.WriteByte('-')
|
||||||
|
lastDash = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result := strings.Trim(b.String(), "-")
|
||||||
|
if result == "" {
|
||||||
|
return "unknown"
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
func classifySignalFreeSource(model signalModelInfo) string {
|
func classifySignalFreeSource(model signalModelInfo) string {
|
||||||
switch model.OperatorType {
|
switch model.OperatorType {
|
||||||
case "official", "cloud":
|
case "official", "cloud":
|
||||||
|
|||||||
@@ -31,3 +31,64 @@ func TestBuildSignalPageMode(t *testing.T) {
|
|||||||
t.Fatalf("官方发布日 page_mode 错误: %q", got)
|
t.Fatalf("官方发布日 page_mode 错误: %q", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestBuildSignalPageModeTreatsVerifiedDiscoveryPromoAsHot(t *testing.T) {
|
||||||
|
got := buildSignalPageMode(signalDailySignals{}, []signalModelEvent{{EventType: "promo_campaign", ModelName: "GPT-5.6"}})
|
||||||
|
if got != "hot" {
|
||||||
|
t.Fatalf("已验证活动事件应触发 hot, got=%q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFilterDiscoveryEventsDropsLeakAndCandidateOnly(t *testing.T) {
|
||||||
|
events := []signalModelEvent{
|
||||||
|
{EventType: "official_release", ModelName: "GPT-5.6", Priority: 120},
|
||||||
|
{EventType: "leak_or_rumor", ModelName: "GPT-5.6", Priority: 200},
|
||||||
|
{EventType: "unknown", ModelName: "Mystery", Priority: 50},
|
||||||
|
}
|
||||||
|
filtered := filterVerifiedDiscoverySignalEvents(events)
|
||||||
|
if len(filtered) != 1 {
|
||||||
|
t.Fatalf("期望仅保留 1 条正式事实事件, got=%d", len(filtered))
|
||||||
|
}
|
||||||
|
if filtered[0].EventType != "official_release" {
|
||||||
|
t.Fatalf("错误保留了非正式事件: %+v", filtered)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMergeVerifiedDiscoveryEventsPrefersNativeFact(t *testing.T) {
|
||||||
|
native := []signalModelEvent{{
|
||||||
|
EventType: "official_release",
|
||||||
|
ModelName: "GPT-5.6",
|
||||||
|
ProviderName: "OpenAI",
|
||||||
|
PrimarySource: "native_release",
|
||||||
|
EvidenceDetail: "native evidence",
|
||||||
|
Priority: 120,
|
||||||
|
}}
|
||||||
|
discovery := []signalModelEvent{{
|
||||||
|
EventType: "official_release",
|
||||||
|
ModelName: "GPT-5.6",
|
||||||
|
ProviderName: "OpenAI",
|
||||||
|
PrimarySource: "discovery_release",
|
||||||
|
EvidenceDetail: "discovery evidence",
|
||||||
|
SourceKindLabel: "官方博客",
|
||||||
|
Priority: 110,
|
||||||
|
}}
|
||||||
|
merged := mergeVerifiedDiscoveryEvents(native, discovery)
|
||||||
|
if len(merged) != 1 {
|
||||||
|
t.Fatalf("期望去重后只剩 1 条事件, got=%d", len(merged))
|
||||||
|
}
|
||||||
|
if merged[0].PrimarySource != "native_release" {
|
||||||
|
t.Fatalf("原生事实不应被 discovery 覆盖: %+v", merged[0])
|
||||||
|
}
|
||||||
|
if merged[0].SourceKindLabel != "官方博客" {
|
||||||
|
t.Fatalf("原生事实应补入 discovery 证据缺口: %+v", merged[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMergeVerifiedDiscoveryEventsDropsUnverifiedPriceNarrative(t *testing.T) {
|
||||||
|
native := []signalModelEvent{{EventType: "new_model", ModelName: "DeepSeek-V4-Flash", ProviderName: "DeepSeek", Priority: 80}}
|
||||||
|
discovery := []signalModelEvent{{EventType: "leak_or_rumor", ModelName: "DeepSeek-V4-Flash", ProviderName: "DeepSeek", Priority: 130}}
|
||||||
|
merged := mergeVerifiedDiscoveryEvents(native, discovery)
|
||||||
|
if len(merged) != 1 || merged[0].EventType != "new_model" {
|
||||||
|
t.Fatalf("非正式 discovery 事件不应进入正式快照: %+v", merged)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
46
scripts/run_intraday_discovery_watch.sh
Normal file
46
scripts/run_intraday_discovery_watch.sh
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||||
|
cd "$ROOT_DIR"
|
||||||
|
|
||||||
|
if [[ -f ".env.local" ]]; then
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source ".env.local"
|
||||||
|
fi
|
||||||
|
if [[ -f ".env" ]]; then
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
source ".env"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -z "${DATABASE_URL:-}" ]]; then
|
||||||
|
echo "DATABASE_URL 未设置" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [[ -z "${INTRADAY_DISCOVERY_SEARCH_PROVIDER:-}" ]]; then
|
||||||
|
echo "INTRADAY_DISCOVERY_SEARCH_PROVIDER 未设置" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [[ -z "${INTRADAY_DISCOVERY_LLM_PROVIDER:-}" ]]; then
|
||||||
|
echo "INTRADAY_DISCOVERY_LLM_PROVIDER 未设置" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
REPORT_DATE="${REPORT_DATE:-$(date +%F)}"
|
||||||
|
DRY_RUN="false"
|
||||||
|
if [[ "${1:-}" == "--dry-run" ]]; then
|
||||||
|
DRY_RUN="true"
|
||||||
|
fi
|
||||||
|
|
||||||
|
discovery_args=(--date "$REPORT_DATE")
|
||||||
|
verification_args=(--date "$REPORT_DATE")
|
||||||
|
materialize_args=(--date "$REPORT_DATE")
|
||||||
|
if [[ "$DRY_RUN" == "true" ]]; then
|
||||||
|
discovery_args+=(--dry-run)
|
||||||
|
verification_args+=(--dry-run)
|
||||||
|
materialize_args+=(--dry-run)
|
||||||
|
fi
|
||||||
|
|
||||||
|
go run -tags llm_script ./scripts/discover_intraday_news_candidates.go ./scripts/intraday_discovery_provider.go ./scripts/intraday_discovery_common.go "${discovery_args[@]}"
|
||||||
|
go run -tags llm_script ./scripts/verify_intraday_news_candidates.go ./scripts/intraday_discovery_common.go "${verification_args[@]}"
|
||||||
|
REPORT_TRIGGER_SOURCE="intraday_discovery" go run -tags llm_script ./scripts/materialize_daily_signals.go "${materialize_args[@]}"
|
||||||
24
scripts/testdata/intraday_discovery_llm_sample.json
vendored
Normal file
24
scripts/testdata/intraday_discovery_llm_sample.json
vendored
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"event_type": "official_release",
|
||||||
|
"provider_name": "OpenAI",
|
||||||
|
"model_name": "GPT-5.6",
|
||||||
|
"provider_country": "US",
|
||||||
|
"title": "GPT-5.6 preview pricing update",
|
||||||
|
"summary": "OpenAI preview material indicates GPT-5.6 entered a preview pricing window.",
|
||||||
|
"candidate_urls": [
|
||||||
|
"https://openai.example.com/news/gpt-5-6-pricing"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"event_type": "promo_campaign",
|
||||||
|
"provider_name": "DeepSeek",
|
||||||
|
"model_name": "DeepSeek-V4-Flash",
|
||||||
|
"provider_country": "CN",
|
||||||
|
"title": "DeepSeek V4 Flash campaign",
|
||||||
|
"summary": "Official campaign page shows a temporary promotional window for DeepSeek-V4-Flash.",
|
||||||
|
"candidate_urls": [
|
||||||
|
"https://deepseek.example.com/campaign/v4-flash"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
18
scripts/testdata/intraday_discovery_search_sample.json
vendored
Normal file
18
scripts/testdata/intraday_discovery_search_sample.json
vendored
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"title": "OpenAI announces GPT-5.6 preview pricing update",
|
||||||
|
"summary": "OpenAI preview announcement mentions GPT-5.6 and updated API pricing references.",
|
||||||
|
"url": "https://openai.example.com/news/gpt-5-6-pricing",
|
||||||
|
"provider": "OpenAI",
|
||||||
|
"provider_url": "https://openai.example.com",
|
||||||
|
"published_at": "2026-05-25T09:00:00Z"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "DeepSeek launches V4 Flash campaign",
|
||||||
|
"summary": "Campaign page suggests temporary promotional pricing for DeepSeek-V4-Flash.",
|
||||||
|
"url": "https://deepseek.example.com/campaign/v4-flash",
|
||||||
|
"provider": "DeepSeek",
|
||||||
|
"provider_url": "https://deepseek.example.com",
|
||||||
|
"published_at": "2026-05-25T10:00:00Z"
|
||||||
|
}
|
||||||
|
]
|
||||||
7
scripts/testdata/intraday_verification_official_release.html
vendored
Normal file
7
scripts/testdata/intraday_verification_official_release.html
vendored
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
<html><body>
|
||||||
|
<article>
|
||||||
|
<h1>OpenAI announces GPT-5.6 preview pricing update</h1>
|
||||||
|
<p>GPT-5.6 preview is now available in official preview channels.</p>
|
||||||
|
<p>Published 2026-05-25.</p>
|
||||||
|
</article>
|
||||||
|
</body></html>
|
||||||
8
scripts/testdata/intraday_verification_pricing_page.html
vendored
Normal file
8
scripts/testdata/intraday_verification_pricing_page.html
vendored
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
<html><body>
|
||||||
|
<section>
|
||||||
|
<h1>DeepSeek-V4-Flash pricing</h1>
|
||||||
|
<p>Old price: $10</p>
|
||||||
|
<p>New price: $6</p>
|
||||||
|
<p>Campaign window active now.</p>
|
||||||
|
</section>
|
||||||
|
</body></html>
|
||||||
6
scripts/testdata/intraday_verification_secondary_media.html
vendored
Normal file
6
scripts/testdata/intraday_verification_secondary_media.html
vendored
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
<html><body>
|
||||||
|
<article>
|
||||||
|
<h1>Industry blog discusses GPT-5.6 leak</h1>
|
||||||
|
<p>Writers speculate GPT-5.6 may appear soon based on references.</p>
|
||||||
|
</article>
|
||||||
|
</body></html>
|
||||||
501
scripts/verify_intraday_news_candidates.go
Normal file
501
scripts/verify_intraday_news_candidates.go
Normal file
@@ -0,0 +1,501 @@
|
|||||||
|
//go:build llm_script
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"database/sql"
|
||||||
|
"encoding/json"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"os"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
_ "github.com/lib/pq"
|
||||||
|
)
|
||||||
|
|
||||||
|
type verificationCandidateRow struct {
|
||||||
|
ID int64
|
||||||
|
CandidateDate string
|
||||||
|
EventType string
|
||||||
|
ProviderName string
|
||||||
|
ModelName string
|
||||||
|
ProviderCountry string
|
||||||
|
Title string
|
||||||
|
Summary string
|
||||||
|
CandidateURLs []string
|
||||||
|
Status string
|
||||||
|
VerificationConfidence string
|
||||||
|
}
|
||||||
|
|
||||||
|
type intradayVerificationConfig struct {
|
||||||
|
Date string
|
||||||
|
DryRun bool
|
||||||
|
DatabaseURL string
|
||||||
|
Timeout time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
type intradayVerificationResult struct {
|
||||||
|
CandidateID int64
|
||||||
|
CandidateStatus string
|
||||||
|
VerificationConfidence string
|
||||||
|
VerifierSource string
|
||||||
|
VerifierURL string
|
||||||
|
VerifierStatus string
|
||||||
|
ExtractedFacts map[string]any
|
||||||
|
Notes string
|
||||||
|
}
|
||||||
|
|
||||||
|
type intradayVerificationSummary struct {
|
||||||
|
CandidateTotal int `json:"candidate_total"`
|
||||||
|
VerifiedTotal int `json:"verified_total"`
|
||||||
|
OfficialConfirmedTotal int `json:"official_confirmed_total"`
|
||||||
|
SecondaryConfirmedTotal int `json:"secondary_confirmed_total"`
|
||||||
|
RejectedTotal int `json:"rejected_total"`
|
||||||
|
DryRun bool `json:"dry_run"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
loadIntradayEnv()
|
||||||
|
cfg := intradayVerificationConfig{}
|
||||||
|
flag.StringVar(&cfg.Date, "date", intradayDateValue(), "验证日期,格式 YYYY-MM-DD")
|
||||||
|
flag.BoolVar(&cfg.DryRun, "dry-run", false, "仅输出摘要,不写数据库")
|
||||||
|
flag.Parse()
|
||||||
|
cfg.DatabaseURL = intradayDefaultDSN()
|
||||||
|
cfg.Timeout = discoveryTimeoutFromEnv()
|
||||||
|
if err := runIntradayCandidateVerification(cfg); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "verify_intraday_news_candidates: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func runIntradayCandidateVerification(cfg intradayVerificationConfig) error {
|
||||||
|
if strings.TrimSpace(cfg.Date) == "" {
|
||||||
|
return fmt.Errorf("date 未设置")
|
||||||
|
}
|
||||||
|
db, err := sql.Open("postgres", cfg.DatabaseURL)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("open db: %w", err)
|
||||||
|
}
|
||||||
|
defer db.Close()
|
||||||
|
candidates, err := loadIntradayVerificationCandidates(context.Background(), db, cfg.Date)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
results := make([]intradayVerificationResult, 0, len(candidates))
|
||||||
|
for _, candidate := range candidates {
|
||||||
|
result, err := verifyIntradayCandidate(candidate, cfg.Timeout)
|
||||||
|
if err != nil {
|
||||||
|
result = intradayVerificationResult{
|
||||||
|
CandidateID: candidate.ID,
|
||||||
|
CandidateStatus: "candidate",
|
||||||
|
VerificationConfidence: candidate.VerificationConfidence,
|
||||||
|
VerifierStatus: "error",
|
||||||
|
Notes: err.Error(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
results = append(results, result)
|
||||||
|
}
|
||||||
|
if !cfg.DryRun {
|
||||||
|
if err := persistIntradayVerificationResults(context.Background(), db, results); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return printIntradayVerificationSummary(summarizeIntradayVerification(results, cfg.DryRun))
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadIntradayVerificationCandidates(ctx context.Context, db *sql.DB, date string) ([]verificationCandidateRow, error) {
|
||||||
|
rows, err := db.QueryContext(ctx, `
|
||||||
|
SELECT id, candidate_date::text, event_type, provider_name, COALESCE(model_name, ''), COALESCE(provider_country, ''),
|
||||||
|
title, COALESCE(summary, ''), COALESCE(candidate_urls::text, '[]'), status, verification_confidence
|
||||||
|
FROM intraday_news_candidate
|
||||||
|
WHERE candidate_date = $1::date
|
||||||
|
AND status IN ('candidate', 'verifying')
|
||||||
|
ORDER BY discovered_at DESC, id DESC`, date)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("query intraday candidates: %w", err)
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
var candidates []verificationCandidateRow
|
||||||
|
for rows.Next() {
|
||||||
|
var row verificationCandidateRow
|
||||||
|
var rawURLs string
|
||||||
|
if err := rows.Scan(&row.ID, &row.CandidateDate, &row.EventType, &row.ProviderName, &row.ModelName, &row.ProviderCountry, &row.Title, &row.Summary, &rawURLs, &row.Status, &row.VerificationConfidence); err != nil {
|
||||||
|
return nil, fmt.Errorf("scan intraday candidate: %w", err)
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal([]byte(rawURLs), &row.CandidateURLs); err != nil {
|
||||||
|
return nil, fmt.Errorf("unmarshal candidate urls: %w", err)
|
||||||
|
}
|
||||||
|
candidates = append(candidates, row)
|
||||||
|
}
|
||||||
|
return candidates, rows.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
func verifyIntradayCandidate(candidate verificationCandidateRow, timeout time.Duration) (intradayVerificationResult, error) {
|
||||||
|
client := &http.Client{Timeout: timeout}
|
||||||
|
best := intradayVerificationResult{
|
||||||
|
CandidateID: candidate.ID,
|
||||||
|
CandidateStatus: "candidate",
|
||||||
|
VerificationConfidence: candidate.VerificationConfidence,
|
||||||
|
VerifierStatus: "insufficient",
|
||||||
|
Notes: "未找到足够证据",
|
||||||
|
ExtractedFacts: map[string]any{},
|
||||||
|
}
|
||||||
|
for _, candidateURL := range candidate.CandidateURLs {
|
||||||
|
body, err := fetchVerificationDocument(candidateURL, client)
|
||||||
|
if err != nil {
|
||||||
|
best = preferVerificationResult(best, intradayVerificationResult{
|
||||||
|
CandidateID: candidate.ID,
|
||||||
|
CandidateStatus: "candidate",
|
||||||
|
VerificationConfidence: candidate.VerificationConfidence,
|
||||||
|
VerifierURL: candidateURL,
|
||||||
|
VerifierStatus: "error",
|
||||||
|
Notes: err.Error(),
|
||||||
|
ExtractedFacts: map[string]any{},
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
result := verifyCandidateDocument(candidate, candidateURL, body)
|
||||||
|
if result.CandidateID == 0 {
|
||||||
|
result.CandidateID = candidate.ID
|
||||||
|
}
|
||||||
|
best = preferVerificationResult(best, result)
|
||||||
|
if best.CandidateStatus == "verified" && best.VerificationConfidence == "official_confirmed" {
|
||||||
|
return best, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return best, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func fetchVerificationDocument(rawURL string, client *http.Client) (string, error) {
|
||||||
|
req, err := http.NewRequest(http.MethodGet, rawURL, nil)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("build verification request: %w", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; llm-intelligence intraday verifier)")
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("fetch verification document: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
|
payload, _ := io.ReadAll(resp.Body)
|
||||||
|
return "", fmt.Errorf("fetch verification document: unexpected status %d: %s", resp.StatusCode, strings.TrimSpace(string(payload)))
|
||||||
|
}
|
||||||
|
payload, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("read verification document: %w", err)
|
||||||
|
}
|
||||||
|
return string(payload), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func verifyCandidateDocument(candidate verificationCandidateRow, candidateURL, body string) intradayVerificationResult {
|
||||||
|
source := classifyVerificationSource(candidate.ProviderName, candidateURL)
|
||||||
|
facts := extractVerificationFacts(body)
|
||||||
|
modelMatched := bodyMentionsModel(body, candidate.ModelName, candidate.Title)
|
||||||
|
result := intradayVerificationResult{
|
||||||
|
CandidateID: candidate.ID,
|
||||||
|
CandidateStatus: "candidate",
|
||||||
|
VerificationConfidence: "candidate",
|
||||||
|
VerifierSource: source,
|
||||||
|
VerifierURL: candidateURL,
|
||||||
|
VerifierStatus: "insufficient",
|
||||||
|
ExtractedFacts: facts,
|
||||||
|
Notes: "证据不足",
|
||||||
|
}
|
||||||
|
if isOfficialVerificationSource(source) {
|
||||||
|
switch normalizeIntradayEventType(candidate.EventType) {
|
||||||
|
case "official_release":
|
||||||
|
if modelMatched {
|
||||||
|
result.CandidateStatus = "verified"
|
||||||
|
result.VerificationConfidence = "official_confirmed"
|
||||||
|
result.VerifierStatus = "matched"
|
||||||
|
result.Notes = "官方页面命中模型发布线索"
|
||||||
|
}
|
||||||
|
case "promo_campaign":
|
||||||
|
if modelMatched && bodyMentionsPromo(body) {
|
||||||
|
result.CandidateStatus = "verified"
|
||||||
|
result.VerificationConfidence = "official_confirmed"
|
||||||
|
result.VerifierStatus = "matched"
|
||||||
|
result.Notes = "官方页面命中活动窗口或促销语义"
|
||||||
|
}
|
||||||
|
case "price_cut", "price_increase":
|
||||||
|
if priceResult, ok := deriveVerifiedPriceEvent(candidate.EventType, facts); ok {
|
||||||
|
result.CandidateStatus = "verified"
|
||||||
|
result.VerificationConfidence = "official_confirmed"
|
||||||
|
result.VerifierStatus = "matched"
|
||||||
|
result.ExtractedFacts = priceResult
|
||||||
|
result.Notes = "官方价格页命中真实价格变化"
|
||||||
|
} else if modelMatched {
|
||||||
|
result.VerifierStatus = "insufficient"
|
||||||
|
result.Notes = "命中模型但缺少可计算的价格变化事实"
|
||||||
|
}
|
||||||
|
case "leak_or_rumor":
|
||||||
|
if modelMatched {
|
||||||
|
result.CandidateStatus = "verified"
|
||||||
|
result.VerificationConfidence = "secondary_confirmed"
|
||||||
|
result.VerifierStatus = "matched"
|
||||||
|
result.Notes = "保留为待确认情报,不进入正式事实层"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if modelMatched {
|
||||||
|
result.CandidateStatus = "verified"
|
||||||
|
result.VerificationConfidence = "secondary_confirmed"
|
||||||
|
result.VerifierStatus = "matched"
|
||||||
|
result.Notes = "仅二手来源命中,不能进入正式事实层"
|
||||||
|
}
|
||||||
|
if result.VerifierStatus == "insufficient" && modelMatched && !isOfficialVerificationSource(source) {
|
||||||
|
result.VerificationConfidence = "secondary_confirmed"
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func isOfficialVerificationSource(source string) bool {
|
||||||
|
switch source {
|
||||||
|
case "official_page", "official_docs", "official_blog", "pricing_page":
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func classifyVerificationSource(providerName, rawURL string) string {
|
||||||
|
parsed, err := url.Parse(rawURL)
|
||||||
|
if err != nil {
|
||||||
|
return "secondary_media"
|
||||||
|
}
|
||||||
|
host := strings.ToLower(parsed.Host)
|
||||||
|
path := strings.ToLower(parsed.Path)
|
||||||
|
if isOfficialProviderHost(providerName, host) {
|
||||||
|
switch {
|
||||||
|
case strings.Contains(host, "docs.") || strings.Contains(path, "/docs"):
|
||||||
|
return "official_docs"
|
||||||
|
case strings.Contains(host, "pricing") || strings.Contains(path, "pricing") || strings.Contains(path, "price"):
|
||||||
|
return "pricing_page"
|
||||||
|
case strings.Contains(path, "blog") || strings.Contains(path, "news") || strings.Contains(path, "announcement"):
|
||||||
|
return "official_blog"
|
||||||
|
default:
|
||||||
|
return "official_page"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "secondary_media"
|
||||||
|
}
|
||||||
|
|
||||||
|
func isOfficialProviderHost(providerName, host string) bool {
|
||||||
|
tokens := providerHostTokens(providerName)
|
||||||
|
for _, token := range tokens {
|
||||||
|
if token != "" && strings.Contains(host, token) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func providerHostTokens(providerName string) []string {
|
||||||
|
switch strings.ToLower(strings.TrimSpace(providerName)) {
|
||||||
|
case "openai":
|
||||||
|
return []string{"openai.com"}
|
||||||
|
case "anthropic":
|
||||||
|
return []string{"anthropic.com"}
|
||||||
|
case "google", "google gemini", "gemini":
|
||||||
|
return []string{"google.com", "google.dev", "ai.google.dev"}
|
||||||
|
case "deepseek":
|
||||||
|
return []string{"deepseek.com", "deepseek.ai"}
|
||||||
|
case "qwen", "dashscope":
|
||||||
|
return []string{"aliyun.com", "dashscope.com"}
|
||||||
|
case "xai":
|
||||||
|
return []string{"x.ai"}
|
||||||
|
case "智谱":
|
||||||
|
return []string{"zhipuai.cn"}
|
||||||
|
case "百度", "百度文心":
|
||||||
|
return []string{"baidu.com", "cloud.baidu.com"}
|
||||||
|
case "腾讯", "腾讯混元":
|
||||||
|
return []string{"tencent.com", "cloud.tencent.com"}
|
||||||
|
case "minimax":
|
||||||
|
return []string{"minimax.io", "minimax.chat"}
|
||||||
|
default:
|
||||||
|
clean := strings.ToLower(strings.TrimSpace(providerName))
|
||||||
|
if clean == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return []string{clean}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func bodyMentionsModel(body, modelName, title string) bool {
|
||||||
|
normBody := normalizeEvidenceText(body)
|
||||||
|
for _, candidate := range []string{modelName, title} {
|
||||||
|
normCandidate := normalizeEvidenceText(candidate)
|
||||||
|
if normCandidate != "" && strings.Contains(normBody, normCandidate) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func bodyMentionsPromo(body string) bool {
|
||||||
|
lower := strings.ToLower(body)
|
||||||
|
for _, marker := range []string{"campaign", "promo", "promotion", "discount", "活动", "优惠", "限时", "窗口"} {
|
||||||
|
if strings.Contains(lower, marker) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractVerificationFacts(body string) map[string]any {
|
||||||
|
facts := map[string]any{}
|
||||||
|
oldPrice, newPrice, ok := extractPricePair(body)
|
||||||
|
if ok {
|
||||||
|
facts["old_input_price"] = oldPrice
|
||||||
|
facts["new_input_price"] = newPrice
|
||||||
|
if oldPrice != 0 {
|
||||||
|
facts["price_change_pct"] = ((newPrice - oldPrice) / oldPrice) * 100
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return facts
|
||||||
|
}
|
||||||
|
|
||||||
|
func deriveVerifiedPriceEvent(eventType string, facts map[string]any) (map[string]any, bool) {
|
||||||
|
oldValue, oldOK := facts["old_input_price"].(float64)
|
||||||
|
newValue, newOK := facts["new_input_price"].(float64)
|
||||||
|
changePct, pctOK := facts["price_change_pct"].(float64)
|
||||||
|
if !oldOK || !newOK || !pctOK || oldValue <= 0 || newValue <= 0 {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
normalized := normalizeIntradayEventType(eventType)
|
||||||
|
if normalized == "price_cut" && changePct >= 0 {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
if normalized == "price_increase" && changePct <= 0 {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
return facts, true
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractPricePair(body string) (float64, float64, bool) {
|
||||||
|
oldRe := regexp.MustCompile(`(?i)(old|from)\s*price[^0-9$¥]*[$¥]?([0-9]+(?:\.[0-9]+)?)`)
|
||||||
|
newRe := regexp.MustCompile(`(?i)(new|to)\s*price[^0-9$¥]*[$¥]?([0-9]+(?:\.[0-9]+)?)`)
|
||||||
|
oldMatch := oldRe.FindStringSubmatch(body)
|
||||||
|
newMatch := newRe.FindStringSubmatch(body)
|
||||||
|
if len(oldMatch) < 3 || len(newMatch) < 3 {
|
||||||
|
return 0, 0, false
|
||||||
|
}
|
||||||
|
var oldValue, newValue float64
|
||||||
|
if _, err := fmt.Sscanf(oldMatch[2], "%f", &oldValue); err != nil {
|
||||||
|
return 0, 0, false
|
||||||
|
}
|
||||||
|
if _, err := fmt.Sscanf(newMatch[2], "%f", &newValue); err != nil {
|
||||||
|
return 0, 0, false
|
||||||
|
}
|
||||||
|
return oldValue, newValue, true
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeEvidenceText(value string) string {
|
||||||
|
value = strings.ToLower(value)
|
||||||
|
re := regexp.MustCompile(`[^a-z0-9\p{Han}]+`)
|
||||||
|
value = re.ReplaceAllString(value, "")
|
||||||
|
return strings.TrimSpace(value)
|
||||||
|
}
|
||||||
|
|
||||||
|
func preferVerificationResult(current, next intradayVerificationResult) intradayVerificationResult {
|
||||||
|
if verificationScore(next) > verificationScore(current) {
|
||||||
|
return next
|
||||||
|
}
|
||||||
|
return current
|
||||||
|
}
|
||||||
|
|
||||||
|
func verificationScore(result intradayVerificationResult) int {
|
||||||
|
score := 0
|
||||||
|
switch result.CandidateStatus {
|
||||||
|
case "verified":
|
||||||
|
score += 20
|
||||||
|
case "rejected":
|
||||||
|
score += 5
|
||||||
|
}
|
||||||
|
switch result.VerificationConfidence {
|
||||||
|
case "official_confirmed":
|
||||||
|
score += 10
|
||||||
|
case "secondary_confirmed":
|
||||||
|
score += 5
|
||||||
|
}
|
||||||
|
switch result.VerifierStatus {
|
||||||
|
case "matched":
|
||||||
|
score += 3
|
||||||
|
case "contradicted":
|
||||||
|
score += 1
|
||||||
|
}
|
||||||
|
return score
|
||||||
|
}
|
||||||
|
|
||||||
|
func persistIntradayVerificationResults(ctx context.Context, db *sql.DB, results []intradayVerificationResult) error {
|
||||||
|
for _, result := range results {
|
||||||
|
facts, err := json.Marshal(result.ExtractedFacts)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("marshal extracted facts: %w", err)
|
||||||
|
}
|
||||||
|
_, err = db.ExecContext(ctx, `
|
||||||
|
INSERT INTO intraday_news_verification (
|
||||||
|
candidate_id, verifier_source, verifier_url, verifier_status, extracted_facts, notes
|
||||||
|
) VALUES ($1, NULLIF($2, ''), NULLIF($3, ''), $4, $5::jsonb, NULLIF($6, ''))`,
|
||||||
|
result.CandidateID,
|
||||||
|
result.VerifierSource,
|
||||||
|
result.VerifierURL,
|
||||||
|
result.VerifierStatus,
|
||||||
|
string(facts),
|
||||||
|
result.Notes,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("insert intraday verification: %w", err)
|
||||||
|
}
|
||||||
|
_, err = db.ExecContext(ctx, `
|
||||||
|
UPDATE intraday_news_candidate
|
||||||
|
SET status = $2,
|
||||||
|
verification_confidence = $3,
|
||||||
|
verification_notes = NULLIF($4, ''),
|
||||||
|
updated_at = CURRENT_TIMESTAMP
|
||||||
|
WHERE id = $1`,
|
||||||
|
result.CandidateID,
|
||||||
|
result.CandidateStatus,
|
||||||
|
result.VerificationConfidence,
|
||||||
|
result.Notes,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("update intraday candidate: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func summarizeIntradayVerification(results []intradayVerificationResult, dryRun bool) intradayVerificationSummary {
|
||||||
|
summary := intradayVerificationSummary{CandidateTotal: len(results), DryRun: dryRun}
|
||||||
|
for _, result := range results {
|
||||||
|
if result.CandidateStatus == "verified" {
|
||||||
|
summary.VerifiedTotal++
|
||||||
|
}
|
||||||
|
switch result.VerificationConfidence {
|
||||||
|
case "official_confirmed":
|
||||||
|
summary.OfficialConfirmedTotal++
|
||||||
|
case "secondary_confirmed":
|
||||||
|
summary.SecondaryConfirmedTotal++
|
||||||
|
}
|
||||||
|
if result.CandidateStatus == "rejected" {
|
||||||
|
summary.RejectedTotal++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return summary
|
||||||
|
}
|
||||||
|
|
||||||
|
func printIntradayVerificationSummary(summary intradayVerificationSummary) error {
|
||||||
|
payload, err := json.Marshal(summary)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
fmt.Println(string(payload))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
99
scripts/verify_intraday_news_candidates_test.go
Normal file
99
scripts/verify_intraday_news_candidates_test.go
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
//go:build llm_script
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestVerifyCandidateDocumentOfficialRelease(t *testing.T) {
|
||||||
|
body, err := os.ReadFile(filepath.Join("testdata", "intraday_verification_official_release.html"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("读取 official release fixture 失败: %v", err)
|
||||||
|
}
|
||||||
|
candidate := verificationCandidateRow{
|
||||||
|
ID: 1,
|
||||||
|
EventType: "official_release",
|
||||||
|
ProviderName: "OpenAI",
|
||||||
|
ModelName: "GPT-5.6",
|
||||||
|
Title: "GPT-5.6 preview pricing update",
|
||||||
|
}
|
||||||
|
result := verifyCandidateDocument(candidate, "https://openai.com/news/gpt-5-6-preview", string(body))
|
||||||
|
if result.CandidateStatus != "verified" || result.VerificationConfidence != "official_confirmed" {
|
||||||
|
t.Fatalf("官方发布应被确认: %+v", result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVerifyCandidateDocumentPriceCutNeedsRealPriceFacts(t *testing.T) {
|
||||||
|
body, err := os.ReadFile(filepath.Join("testdata", "intraday_verification_pricing_page.html"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("读取 pricing fixture 失败: %v", err)
|
||||||
|
}
|
||||||
|
candidate := verificationCandidateRow{
|
||||||
|
ID: 2,
|
||||||
|
EventType: "price_cut",
|
||||||
|
ProviderName: "DeepSeek",
|
||||||
|
ModelName: "DeepSeek-V4-Flash",
|
||||||
|
Title: "DeepSeek-V4-Flash price cut",
|
||||||
|
}
|
||||||
|
result := verifyCandidateDocument(candidate, "https://deepseek.com/pricing/v4-flash", string(body))
|
||||||
|
if result.CandidateStatus != "verified" || result.VerificationConfidence != "official_confirmed" {
|
||||||
|
t.Fatalf("价格页命中真实价格变化后应确认: %+v", result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVerifyCandidateDocumentPromoCampaignOfficial(t *testing.T) {
|
||||||
|
body, err := os.ReadFile(filepath.Join("testdata", "intraday_verification_pricing_page.html"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("读取 promo fixture 失败: %v", err)
|
||||||
|
}
|
||||||
|
candidate := verificationCandidateRow{
|
||||||
|
ID: 3,
|
||||||
|
EventType: "promo_campaign",
|
||||||
|
ProviderName: "DeepSeek",
|
||||||
|
ModelName: "DeepSeek-V4-Flash",
|
||||||
|
Title: "DeepSeek V4 Flash campaign",
|
||||||
|
}
|
||||||
|
result := verifyCandidateDocument(candidate, "https://deepseek.com/campaign/v4-flash", string(body))
|
||||||
|
if result.CandidateStatus != "verified" || result.VerificationConfidence != "official_confirmed" {
|
||||||
|
t.Fatalf("官方活动页应被确认: %+v", result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVerifyCandidateDocumentSecondaryMediaDowngrades(t *testing.T) {
|
||||||
|
body, err := os.ReadFile(filepath.Join("testdata", "intraday_verification_secondary_media.html"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("读取 secondary fixture 失败: %v", err)
|
||||||
|
}
|
||||||
|
candidate := verificationCandidateRow{
|
||||||
|
ID: 4,
|
||||||
|
EventType: "official_release",
|
||||||
|
ProviderName: "OpenAI",
|
||||||
|
ModelName: "GPT-5.6",
|
||||||
|
Title: "GPT-5.6 leak discussion",
|
||||||
|
}
|
||||||
|
result := verifyCandidateDocument(candidate, "https://someblog.example.com/gpt-5-6-leak", string(body))
|
||||||
|
if result.VerificationConfidence != "secondary_confirmed" {
|
||||||
|
t.Fatalf("二手媒体应降级为 secondary_confirmed: %+v", result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVerifyCandidateDocumentLeakStaysOutOfOfficialFacts(t *testing.T) {
|
||||||
|
body, err := os.ReadFile(filepath.Join("testdata", "intraday_verification_secondary_media.html"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("读取 leak fixture 失败: %v", err)
|
||||||
|
}
|
||||||
|
candidate := verificationCandidateRow{
|
||||||
|
ID: 5,
|
||||||
|
EventType: "leak_or_rumor",
|
||||||
|
ProviderName: "OpenAI",
|
||||||
|
ModelName: "GPT-5.6",
|
||||||
|
Title: "GPT-5.6 leak discussion",
|
||||||
|
}
|
||||||
|
result := verifyCandidateDocument(candidate, "https://someblog.example.com/gpt-5-6-leak", string(body))
|
||||||
|
if result.VerificationConfidence == "official_confirmed" {
|
||||||
|
t.Fatalf("泄露类不应升级为正式事实: %+v", result)
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user