diff --git a/db/migrations/008_plan_catalog_inventory.sql b/db/migrations/008_plan_catalog_inventory.sql new file mode 100644 index 0000000..40636c4 --- /dev/null +++ b/db/migrations/008_plan_catalog_inventory.sql @@ -0,0 +1,61 @@ +-- Phase 2: Token Plan / Coding Plan 基础目录清单 + +CREATE TABLE IF NOT EXISTS plan_catalog_inventory ( + id BIGSERIAL PRIMARY KEY, + provider_id BIGINT REFERENCES model_provider(id) ON DELETE SET NULL, + operator_id BIGINT REFERENCES operator(id) ON DELETE SET NULL, + catalog_code TEXT NOT NULL UNIQUE, + platform_name TEXT NOT NULL, + platform_name_cn TEXT, + platform_type TEXT NOT NULL, + plan_family TEXT NOT NULL, + plan_status TEXT NOT NULL DEFAULT 'confirmed', + source_url TEXT NOT NULL, + source_title TEXT, + source_kind TEXT NOT NULL DEFAULT 'official_doc', + region TEXT NOT NULL DEFAULT 'global', + currency TEXT, + billing_cycle TEXT, + last_checked_at TIMESTAMP NOT NULL, + importer_key TEXT, + notes TEXT, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + created_by TEXT DEFAULT 'system', + updated_by TEXT DEFAULT 'system', + CONSTRAINT chk_plan_catalog_platform_type + CHECK (platform_type IN ('official_vendor', 'cloud_operator', 'relay_platform')), + CONSTRAINT chk_plan_catalog_family + CHECK (plan_family IN ('token_plan', 'coding_plan', 'package_plan', 'pay_as_you_go', 'unknown')), + CONSTRAINT chk_plan_catalog_status + CHECK (plan_status IN ('confirmed', 'pending_verification', 'retired')), + CONSTRAINT chk_plan_catalog_source_kind + CHECK (source_kind IN ('official_doc', 'official_pricing', 'official_product_page', 'official_community', 'inferred')), + CONSTRAINT chk_plan_catalog_currency + CHECK (currency IS NULL OR currency IN ('CNY', 'USD', 'EUR')) +); + +CREATE INDEX IF NOT EXISTS idx_plan_catalog_provider_id ON plan_catalog_inventory(provider_id); +CREATE INDEX IF NOT EXISTS idx_plan_catalog_operator_id ON plan_catalog_inventory(operator_id); +CREATE INDEX IF NOT EXISTS idx_plan_catalog_family ON plan_catalog_inventory(plan_family); +CREATE INDEX IF NOT EXISTS idx_plan_catalog_platform_type ON plan_catalog_inventory(platform_type); +CREATE INDEX IF NOT EXISTS idx_plan_catalog_status ON plan_catalog_inventory(plan_status); +CREATE INDEX IF NOT EXISTS idx_plan_catalog_last_checked_at ON plan_catalog_inventory(last_checked_at); + +COMMENT ON TABLE plan_catalog_inventory IS 'Token Plan / Coding Plan / 套餐包 / 按量计费基础目录清单,用于后续 importer 排期与证据管理'; + +DO $$ +BEGIN + IF EXISTS (SELECT 1 FROM pg_proc WHERE proname = 'update_updated_at_column') + AND NOT EXISTS ( + SELECT 1 + FROM pg_trigger + WHERE tgname = 'plan_catalog_inventory_updated_at' + ) THEN + CREATE TRIGGER plan_catalog_inventory_updated_at + BEFORE UPDATE ON plan_catalog_inventory + FOR EACH ROW + EXECUTE FUNCTION update_updated_at_column(); + END IF; +END +$$; diff --git a/db/migrations/009_plan_catalog_inventory_segments.sql b/db/migrations/009_plan_catalog_inventory_segments.sql new file mode 100644 index 0000000..8339883 --- /dev/null +++ b/db/migrations/009_plan_catalog_inventory_segments.sql @@ -0,0 +1,35 @@ +-- Phase 2: 基础目录增加榜单分组与排名信息 + +ALTER TABLE plan_catalog_inventory + ADD COLUMN IF NOT EXISTS catalog_segment TEXT NOT NULL DEFAULT 'general', + ADD COLUMN IF NOT EXISTS market_rank INTEGER; + +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 + FROM pg_constraint + WHERE conname = 'chk_plan_catalog_segment' + ) THEN + ALTER TABLE plan_catalog_inventory + ADD CONSTRAINT chk_plan_catalog_segment + CHECK (catalog_segment IN ('general', 'vendor_top20', 'relay_top20plus', 'global_reference')); + END IF; + + IF NOT EXISTS ( + SELECT 1 + FROM pg_constraint + WHERE conname = 'chk_plan_catalog_market_rank' + ) THEN + ALTER TABLE plan_catalog_inventory + ADD CONSTRAINT chk_plan_catalog_market_rank + CHECK (market_rank IS NULL OR market_rank > 0); + END IF; +END +$$; + +CREATE INDEX IF NOT EXISTS idx_plan_catalog_segment ON plan_catalog_inventory(catalog_segment); +CREATE INDEX IF NOT EXISTS idx_plan_catalog_market_rank ON plan_catalog_inventory(market_rank); + +COMMENT ON COLUMN plan_catalog_inventory.catalog_segment IS '目录分组:general / vendor_top20 / relay_top20plus / global_reference'; +COMMENT ON COLUMN plan_catalog_inventory.market_rank IS '榜单排序,数字越小优先级越高'; diff --git a/db/migrations/010_operator_type_column.sql b/db/migrations/010_operator_type_column.sql new file mode 100644 index 0000000..f929417 --- /dev/null +++ b/db/migrations/010_operator_type_column.sql @@ -0,0 +1,22 @@ +-- 补齐 operator.type 字段,避免订阅与目录 importer 在新库中失败 + +ALTER TABLE operator + ADD COLUMN IF NOT EXISTS type TEXT NOT NULL DEFAULT 'reseller'; + +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 + FROM pg_constraint + WHERE conname = 'chk_operator_type' + ) THEN + ALTER TABLE operator + ADD CONSTRAINT chk_operator_type + CHECK (type IN ('official', 'cloud', 'relay', 'reseller')); + END IF; +END +$$; + +CREATE INDEX IF NOT EXISTS idx_operator_type ON operator(type); + +COMMENT ON COLUMN operator.type IS '运营方类型:official / cloud / relay / reseller'; diff --git a/db/migrations/011_subscription_plan_package_family.sql b/db/migrations/011_subscription_plan_package_family.sql new file mode 100644 index 0000000..52f0297 --- /dev/null +++ b/db/migrations/011_subscription_plan_package_family.sql @@ -0,0 +1,8 @@ +-- Phase 2: 订阅套餐表支持 package_plan + +ALTER TABLE subscription_plan + DROP CONSTRAINT IF EXISTS subscription_plan_plan_family_check; + +ALTER TABLE subscription_plan + ADD CONSTRAINT subscription_plan_plan_family_check + CHECK (plan_family IN ('token_plan', 'coding_plan', 'package_plan')); diff --git a/db/migrations/012_daily_signal_snapshot.sql b/db/migrations/012_daily_signal_snapshot.sql new file mode 100644 index 0000000..4b5ce69 --- /dev/null +++ b/db/migrations/012_daily_signal_snapshot.sql @@ -0,0 +1,41 @@ +-- 第一模块:每日关键信号快照 + +CREATE TABLE IF NOT EXISTS daily_signal_snapshot ( + id BIGSERIAL PRIMARY KEY, + signal_date DATE NOT NULL UNIQUE, + status TEXT NOT NULL DEFAULT 'generated', + new_models INTEGER NOT NULL DEFAULT 0, + price_changes INTEGER NOT NULL DEFAULT 0, + official_free INTEGER NOT NULL DEFAULT 0, + aggregator_free INTEGER NOT NULL DEFAULT 0, + unknown_free INTEGER NOT NULL DEFAULT 0, + event_count INTEGER NOT NULL DEFAULT 0, + page_mode TEXT NOT NULL DEFAULT 'standard', + event_type_counts JSONB NOT NULL DEFAULT '{}'::jsonb, + top_events JSONB NOT NULL DEFAULT '[]'::jsonb, + source_audit TEXT, + generated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); + +CREATE INDEX IF NOT EXISTS idx_daily_signal_snapshot_date ON daily_signal_snapshot(signal_date); +CREATE INDEX IF NOT EXISTS idx_daily_signal_snapshot_status ON daily_signal_snapshot(status); + +COMMENT ON TABLE daily_signal_snapshot IS '第一模块产出的每日关键信号快照,用于日报与其他下游形态消费'; +COMMENT ON COLUMN daily_signal_snapshot.top_events IS '已筛选的关键事件数组,JSONB 序列化 ModelEvent'; +COMMENT ON COLUMN daily_signal_snapshot.event_type_counts IS '按事件类型聚合的数量统计'; + +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 + FROM pg_trigger + WHERE tgname = 'daily_signal_snapshot_updated_at' + ) THEN + CREATE TRIGGER daily_signal_snapshot_updated_at + BEFORE UPDATE ON daily_signal_snapshot + FOR EACH ROW + EXECUTE FUNCTION update_updated_at_column(); + END IF; +END +$$; diff --git a/db/migrations/015_models_date_source_kind_official_pricing.sql b/db/migrations/015_models_date_source_kind_official_pricing.sql new file mode 100644 index 0000000..4f7f956 --- /dev/null +++ b/db/migrations/015_models_date_source_kind_official_pricing.sql @@ -0,0 +1,21 @@ +DO $$ +BEGIN + IF EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'chk_models_date_source_kind') THEN + ALTER TABLE models DROP CONSTRAINT chk_models_date_source_kind; + END IF; + + ALTER TABLE models + ADD CONSTRAINT chk_models_date_source_kind + CHECK ( + date_source_kind IN ( + 'official_announcement', + 'official_product_page', + 'official_pricing', + 'secondary_authoritative_report', + 'catalog_backfill', + 'unknown' + ) + ); +END $$; + +COMMENT ON COLUMN models.date_source_kind IS '发布日期证据来源类型:official_announcement / official_product_page / official_pricing / secondary_authoritative_report / catalog_backfill / unknown'; diff --git a/docs/PLAN_CATALOG_INVENTORY.md b/docs/PLAN_CATALOG_INVENTORY.md new file mode 100644 index 0000000..36f679c --- /dev/null +++ b/docs/PLAN_CATALOG_INVENTORY.md @@ -0,0 +1,234 @@ +# Token Plan / Coding Plan 基础目录 + +更新时间:2026-05-15(Asia/Shanghai) + +## 目标 + +这份清单解决两个问题: + +1. 先把“哪些平台确实存在 Token Plan / Coding Plan / 套餐包,哪些只有按量计费”整理成统一基线。 +2. 再把这份基线落到数据库 `plan_catalog_inventory`,为后续每个平台的 importer 排期、证据追踪和验收提供稳定入口。 + +注意:这里记录的是**平台级事实**,不是最终的套餐明细落库。真正的套餐条目仍然应进入 `subscription_plan`,按模型按量价格仍然应进入 `region_pricing`。 + +截至 2026-05-15,这份基线已经扩展到: + +- 国内官方模型厂家 Top 20 +- 国内中转 / 聚合 / 云厂商平台 20+ +- 全球官方模型平台与全球多模型中转平台参考集 +- `plan_catalog_inventory` 最终落库 70 条目录记录 +- `subscription_plan` 新增一批手工核实套餐 seed,用于在真正抓取器到位前先支撑日报对比 + +## 分类约定 + +- `token_plan`:按 token 或 credits 统一额度管理的订阅型方案 +- `coding_plan`:按 AI 编码场景设计的包月/限额订阅方案 +- `package_plan`:华为云这类“按量 + 套餐包并存”的资源包方案 +- `pay_as_you_go`:官方当前只提供按量计费,未发现独立 Token Plan / Coding Plan +- `unknown`:官方已确认平台存在,但公开页面暂未给出可稳定结构化的套餐命名 + +## 国内官方厂家 Top 20 + +这不是第三方市场报告意义上的绝对“排名”,而是基于 2026-05-14 当天可公开验证的开放平台能力、行业知名度和接入优先级整理出的 Top 20 清单,方便后续 importer 排期: + +1. 阿里巴巴 / 通义千问 +2. 腾讯 / 混元 +3. 百度 / 文心 +4. 字节跳动 / 豆包、Seed +5. 智谱 AI +6. 华为 / 盘古 +7. DeepSeek +8. Moonshot AI +9. MiniMax +10. 阶跃星辰 +11. 百川智能 +12. 零一万物 +13. 商汤日日新 +14. 科大讯飞星火 +15. 360 智脑 +16. 网易有道子曰 +17. 面壁智能 MiniCPM +18. 智源 FlagOpen +19. 昆仑万维天工 / Skywork +20. 无问芯穹 + +对应 seed:`seeds/plan_catalog_inventory_seed_cn_vendors_top20.json` + +## 国内中转 / 聚合平台 20+ + +当前已经纳入目录基线的平台包括: + +1. 腾讯云 TokenHub +2. 腾讯云 CloudBase AI+ +3. 腾讯云 TI 平台大模型广场 +4. 阿里云百炼 +5. 魔搭 API-Inference +6. 百度千帆 +7. 火山方舟 +8. 华为云 MaaS +9. 天翼云模型推理服务 +10. 天翼云息壤 +11. 联通云 AICP +12. 联通云 AI 应用开发平台 +13. 移动云 AI 应用专区 +14. 有道智云 MaaS +15. 360 智脑开放平台 +16. 硅基流动 SiliconCloud +17. PPIO Model API +18. UCloud UModelVerse +19. 青云 CoresHub +20. 金山云星流平台 +21. 以及腾讯云、阿里云、百度千帆各自拆分出的 Token Plan / Coding Plan / 企业版目录项 + +对应 seed:`seeds/plan_catalog_inventory_seed_cn_relays_top20plus.json` + +## 全球官方 / 中转参考集 + +本轮通过 web 搜索补录并进入目录基线的平台包括: + +1. Google Gemini API +2. Mistral La Plateforme +3. Cohere Platform +4. OpenRouter +5. Together AI +6. Fireworks AI +7. DeepInfra +8. GroqCloud +9. Replicate +10. Hyperbolic +11. Novita AI +12. Azure OpenAI Service +13. Amazon Bedrock +14. Vertex AI Generative AI +15. Cloudflare Workers AI +16. Baseten +17. Cerebras Inference +18. Perplexity Agent API +19. SambaNova Cloud +20. 京东云 JoyBuilder + +对应 seed:`seeds/plan_catalog_inventory_seed_web_research.json` + +## 云服务中转 / 云厂商平台 + +| 平台 | 当前结论 | 目录归类 | 后续 importer | +|------|----------|----------|---------------| +| 腾讯云 TokenHub | 已确认 `Token Plan`(个人版、企业版专业、企业版轻享)与 `Coding Plan` 并存 | `token_plan` + `coding_plan` | 已接入 `tencent_catalog` / `import_tencent_subscription.go` | +| 阿里云百炼 | 已确认 `Token Plan(团队版)` 与 `Coding Plan` 并存,且仍保留按量计费 | `token_plan` + `coding_plan` | 已接入 `import_aliyun_subscription.go` | +| 百度千帆 | 已确认 `Coding Plan` 与 `Token 福利包` 并存,后者存在首购优惠价 | `coding_plan` + `token_plan` | 已接入 `import_baidu_subscription.go` | +| 火山方舟 | 已从官方开发者社区确认 `Coding Plan` 已上线,且公开披露标准月费与首月活动价 | `coding_plan` | 已接入 `import_bytedance_subscription.go` | +| 天翼云模型推理服务 | 已确认 `Coding Plan` 与活动型 `Token Plan` 并存 | `coding_plan` + `token_plan` | 已接入 `import_ctyun_subscription.go` | +| 华为云 MaaS | 当前明确支持“按 Token 付费 + 套餐包/资源包计费”,不是 `Coding Plan` 命名体系 | `package_plan` | 已接入 `import_huawei_package.go` | + +### 证据入口 + +- 腾讯云 Token Plan 个人版:[cloud.tencent.com/document/product/1823/130060](https://cloud.tencent.com/document/product/1823/130060) +- 腾讯云 Token Plan 企业版专业套餐:[cloud.tencent.com/document/product/1823/130659](https://cloud.tencent.com/document/product/1823/130659) +- 腾讯云 Token Plan 企业版轻享套餐:[cloud.tencent.com/document/product/1823/131173](https://cloud.tencent.com/document/product/1823/131173) +- 腾讯云 Coding Plan 规则页:[cloud.tencent.com/document/product/1823/130103](https://cloud.tencent.com/document/product/1823/130103) +- 阿里云百炼 Token Plan 概述:[help.aliyun.com/zh/model-studio/token-plan-overview](https://help.aliyun.com/zh/model-studio/token-plan-overview) +- 阿里云百炼 Coding Plan 概述:[help.aliyun.com/zh/model-studio/coding-plan-quickstart](https://help.aliyun.com/zh/model-studio/coding-plan-quickstart) +- 百度千帆 Coding Plan:[cloud.baidu.com/doc/qianfan/s/imlg0beiu](https://cloud.baidu.com/doc/qianfan/s/imlg0beiu) +- 百度千帆 Token 福利包:[cloud.baidu.com/doc/qianfan/s/Smoghsq3g](https://cloud.baidu.com/doc/qianfan/s/Smoghsq3g) +- 火山方舟 Coding Plan 社区文章:[developer.volcengine.com/articles/7604465649330749490](https://developer.volcengine.com/articles/7604465649330749490) +- 华为云 MaaS 文本生成模型计费说明:[support.huaweicloud.com/price-maas/price-maas-0002.html](https://support.huaweicloud.com/price-maas/price-maas-0002.html) + +## 官方模型平台 + +| 平台 | 当前结论 | 目录归类 | 说明 | +|------|----------|----------|------| +| 智谱 AI | 已确认 `GLM Coding Plan` | `coding_plan` | 已接入 `import_zhipu_coding_plan.go`,当前先落公开活动底价与套餐能力说明 | +| MiniMax | 已确认 `Token Plan` | `token_plan` | 同时保留按量计费 API Key 切换路径 | +| OpenAI | 当前以按量计费为主,未检索到官方 `Token Plan` / `Coding Plan` | `pay_as_you_go` | 继续走现有官方价格 importer 思路 | +| Anthropic | 当前以按量计费为主,未检索到官方 `Token Plan` / `Coding Plan` | `pay_as_you_go` | 只有模型定价、缓存与批处理折扣 | +| DeepSeek | 当前以按量计费为主,未检索到官方 `Token Plan` / `Coding Plan` | `pay_as_you_go` | 支持赠送余额与限时折扣 | +| Moonshot AI | 当前以按量计费为主,未检索到官方 `Token Plan` / `Coding Plan` | `pay_as_you_go` | 官方重点仍是 Token 单价与缓存计费 | +| xAI | 当前以按量计费为主,未检索到官方 `Token Plan` / `Coding Plan` | `pay_as_you_go` | 同时支持工具调用计费和批处理折扣 | + +### 证据入口 + +- 智谱 GLM Coding Plan:[docs.bigmodel.cn/cn/coding-plan/overview](https://docs.bigmodel.cn/cn/coding-plan/overview) +- MiniMax Token Plan:[platform.minimaxi.com/docs/token-plan/intro](https://platform.minimaxi.com/docs/token-plan/intro) +- OpenAI Pricing:[platform.openai.com/docs/pricing](https://platform.openai.com/docs/pricing/) +- Anthropic Pricing:[docs.anthropic.com/en/docs/about-claude/pricing](https://docs.anthropic.com/en/docs/about-claude/pricing) +- DeepSeek Pricing:[api-docs.deepseek.com/zh-cn/quick_start/pricing](https://api-docs.deepseek.com/zh-cn/quick_start/pricing/) +- Moonshot Pricing:[platform.moonshot.cn/docs/pricing/chat](https://platform.moonshot.cn/docs/pricing/chat) +- xAI Pricing:[docs.x.ai/developers/pricing](https://docs.x.ai/developers/pricing) + +## 数据库落点 + +本次新增的数据库清单表: + +- 表名:`plan_catalog_inventory` +- 作用:保存平台级证据与 importer 排期,而不是最终套餐明细 +- 导入脚本:`scripts/import_plan_catalog.go` +- seed 文件: + - `seeds/plan_catalog_inventory_seed.json` + - `seeds/plan_catalog_inventory_seed_cn_vendors_top20.json` + - `seeds/plan_catalog_inventory_seed_cn_relays_top20plus.json` + - `seeds/plan_catalog_inventory_seed_web_research.json` +- 新增字段: + - `catalog_segment`:`general / vendor_top20 / relay_top20plus / global_reference` + - `market_rank`:榜单顺序 + +本次还保留了一个手工套餐 seed 导入器,作为极少数暂无稳定公开结构化页面的平台兜底手段: + +- 导入脚本:`scripts/import_manual_subscription_seed.go` +- seed 文件:`seeds/subscription_plan_manual_seed.json` +- 当前覆盖:无生产链路默认启用的平台;MiniMax Token Plan 已切换到真实 importer + +建议使用顺序: + +1. 先更新 `plan_catalog_inventory` +2. 再根据 `catalog_segment + market_rank + plan_family + importer_key` 排出平台实现顺序 +3. 已确认且价格明确的套餐,先通过手工 seed 进入 `subscription_plan` +4. 官方按量价格继续进入 `region_pricing` + +## 当前 importer 状态 + +已完成: + +1. `tencent_catalog` / `import_tencent_subscription.go` +2. `import_aliyun_subscription.go` +3. `import_baidu_subscription.go` +4. `import_ctyun_subscription.go` + +这批平台现在都已经进入真实抓取或目录级实时校验链路: + +1. `import_bytedance_subscription.go` +2. `import_huawei_package.go` +3. `import_zhipu_coding_plan.go` +4. `import_minimax_subscription.go` +5. `import_cucloud_catalog.go` +6. `import_mobile_cloud_catalog.go` + +新增已完成: + +1. `import_youdao_pricing.go` +2. `import_360_pricing.go` +3. `import_siliconflow_pricing.go` +4. `import_ppio_pricing.go` +5. `import_ucloud_pricing.go` +6. `import_cloudflare_pricing.go` +7. `import_perplexity_pricing.go` +8. `import_vertex_pricing.go` +9. `import_bedrock_pricing.go` +10. `import_azure_openai_pricing.go` +11. `import_minimax_subscription.go` + +这些平台统一按 `pay_as_you_go -> region_pricing` 处理,直接抓取官方公开模型价格,不再停留在 `future_official_pricing`。 +其中 `SiliconFlow` 当前优先尝试官方价格入口;若入口返回站点落地页或临时不可用,则回退到仓库内最近核验的官方快照,避免日跑流水线因前端路由问题中断。 + +对于暂时没有稳定公开结构化价格页、但官方平台入口已经确认的长尾平台,当前统一归到: + +- `import_catalog_seed_verification.go` + +这条链路属于目录级官方入口核验,会持续回写 `plan_catalog_inventory.last_checked_at` 和核验备注,确保第一模块的覆盖方式已经定型,不再保留 `future_official_pricing` 占位状态。 + +下一步建议优先级: + +1. `QingCloud / CoresHub` +2. `火山方舟按量模型价格官方页` +3. `华为云 MaaS 按量模型价格页` +4. `移动云更细颗粒度的模型 API 价格` +5. `联通云更细颗粒度的模型 API 价格` diff --git a/docs/PRODUCTION_CHECKLIST.md b/docs/PRODUCTION_CHECKLIST.md index d011d5c..7404955 100644 --- a/docs/PRODUCTION_CHECKLIST.md +++ b/docs/PRODUCTION_CHECKLIST.md @@ -38,7 +38,7 @@ ### 数据与迁移 - 已执行 `bash scripts/apply_migration.sh` -- `daily_report`、`report_runs`、`subscription_plan`、`region_pricing` 等关键表存在 +- `daily_report`、`report_runs`、`subscription_plan`、`region_pricing`、`daily_signal_snapshot` 等关键表存在 - 历史数据回填策略已确认,避免上线首日“空库” ### 应用与产物 diff --git a/scripts/aliyun_subscription_lib.go b/scripts/aliyun_subscription_lib.go new file mode 100644 index 0000000..8343c23 --- /dev/null +++ b/scripts/aliyun_subscription_lib.go @@ -0,0 +1,179 @@ +//go:build llm_script + +package main + +import ( + "fmt" + "regexp" + "strings" +) + +const ( + defaultAliyunTokenPlanURL = "https://help.aliyun.com/zh/model-studio/token-plan-overview" + defaultAliyunCodingPlanURL = "https://help.aliyun.com/zh/model-studio/coding-plan-quickstart" +) + +func parseAliyunSubscriptionCatalog(tokenRaw string, codingRaw string) ([]subscriptionImportRecord, error) { + publishedAt, known := publishedAtFromText(firstNonEmptyText(tokenRaw, codingRaw)) + + tokenRecords, err := parseAliyunTokenPlan(tokenRaw, publishedAt) + if err != nil { + return nil, err + } + codingRecords, err := parseAliyunCodingPlan(codingRaw, publishedAt) + if err != nil { + return nil, err + } + records := append(tokenRecords, codingRecords...) + for i := range records { + records[i].PublishedAtKnown = known + } + return records, nil +} + +func parseAliyunTokenPlan(raw string, publishedAt string) ([]subscriptionImportRecord, error) { + seatPattern := regexp.MustCompile(`(?s)(标准坐席|高级坐席|尊享坐席)\s+¥([\d,]+)(?:/坐席/月)?\s+([\d,]+)\s*Credits/坐席/月\s+([^\n]+)`) + matches := seatPattern.FindAllStringSubmatch(raw, -1) + if len(matches) != 3 { + return nil, fmt.Errorf("unexpected aliyun token seat count: %d", len(matches)) + } + + tierCodes := map[string]string{ + "标准坐席": "standard-seat", + "高级坐席": "advanced-seat", + "尊享坐席": "premium-seat", + } + tierNames := map[string]string{ + "标准坐席": "Standard", + "高级坐席": "Advanced", + "尊享坐席": "Premium", + } + + records := make([]subscriptionImportRecord, 0, 4) + for _, match := range matches { + records = append(records, subscriptionImportRecord{ + ProviderName: "Alibaba", + ProviderNameCn: "阿里巴巴", + ProviderCountry: "CN", + ProviderWebsite: "https://www.aliyun.com", + OperatorName: "Alibaba Bailian", + OperatorNameCn: "阿里云百炼", + OperatorCountry: "CN", + OperatorWebsite: "https://help.aliyun.com/zh/model-studio/", + OperatorType: "cloud", + PlanFamily: "token_plan", + PlanCode: "aliyun-token-plan-" + tierCodes[match[1]], + PlanName: "Token Plan 团队版 " + match[1], + Tier: tierNames[match[1]], + BillingCycle: "monthly", + Currency: "CNY", + ListPrice: mustParseSubscriptionPrice(match[2]), + PriceUnit: "CNY/month", + QuotaValue: mustParseSubscriptionInt64(match[3]), + QuotaUnit: "credits/month", + PlanScope: "Token Plan 团队版", + SourceURL: defaultAliyunTokenPlanURL, + PublishedAt: publishedAt, + EffectiveDate: effectiveDateFromPublishedAt(publishedAt), + Notes: strings.TrimSpace(match[4]), + }) + } + + sharedPattern := regexp.MustCompile(`共享用量包\s+¥([\d,]+)(?:/个)?\s+([\d,]+)\s*Credits/个`) + shared := sharedPattern.FindStringSubmatch(raw) + if len(shared) != 3 { + return nil, fmt.Errorf("aliyun shared pack not found") + } + records = append(records, subscriptionImportRecord{ + ProviderName: "Alibaba", + ProviderNameCn: "阿里巴巴", + ProviderCountry: "CN", + ProviderWebsite: "https://www.aliyun.com", + OperatorName: "Alibaba Bailian", + OperatorNameCn: "阿里云百炼", + OperatorCountry: "CN", + OperatorWebsite: "https://help.aliyun.com/zh/model-studio/", + OperatorType: "cloud", + PlanFamily: "token_plan", + PlanCode: "aliyun-token-plan-shared-pack", + PlanName: "Token Plan 团队版 共享用量包", + Tier: "SharedPack", + BillingCycle: "monthly", + Currency: "CNY", + ListPrice: mustParseSubscriptionPrice(shared[1]), + PriceUnit: "CNY/pack", + QuotaValue: mustParseSubscriptionInt64(shared[2]), + QuotaUnit: "credits/pack", + PlanScope: "Token Plan 团队版", + SourceURL: defaultAliyunTokenPlanURL, + PublishedAt: publishedAt, + EffectiveDate: effectiveDateFromPublishedAt(publishedAt), + Notes: "跨坐席共享的弹性用量包,有效期 1 个月。", + }) + return records, nil +} + +func parseAliyunCodingPlan(raw string, publishedAt string) ([]subscriptionImportRecord, error) { + pricePattern := regexp.MustCompile(`价格\s+¥\s*([\d,]+)\s*/月`) + priceMatch := pricePattern.FindStringSubmatch(raw) + if len(priceMatch) != 2 { + return nil, fmt.Errorf("aliyun coding plan price not found") + } + + modelsPattern := regexp.MustCompile(`支持的模型\s+\|\s+推荐模型:([^\n]+)`) + modelsMatch := modelsPattern.FindStringSubmatch(raw) + modelScope := []string{} + if len(modelsMatch) == 2 { + for _, item := range strings.Split(modelsMatch[1], "、") { + item = strings.TrimSpace(item) + if item != "" { + modelScope = append(modelScope, item) + } + } + } + + limitPattern := regexp.MustCompile(`每月\s*([\d,]+)\s*次请求`) + limitMatch := limitPattern.FindStringSubmatch(raw) + quotaValue := int64(0) + if len(limitMatch) == 2 { + quotaValue = mustParseSubscriptionInt64(limitMatch[1]) + } + + notes := []string{} + for _, fragment := range []string{ + "Lite 套餐自 2026 年 3 月 20 日 00:00:00(UTC+08:00)起停止新购", + "活动已结束", + } { + if strings.Contains(raw, fragment) { + notes = append(notes, fragment) + } + } + + return []subscriptionImportRecord{{ + ProviderName: "Alibaba", + ProviderNameCn: "阿里巴巴", + ProviderCountry: "CN", + ProviderWebsite: "https://www.aliyun.com", + OperatorName: "Alibaba Bailian", + OperatorNameCn: "阿里云百炼", + OperatorCountry: "CN", + OperatorWebsite: "https://help.aliyun.com/zh/model-studio/", + OperatorType: "cloud", + PlanFamily: "coding_plan", + PlanCode: "aliyun-coding-plan-pro", + PlanName: "百炼 Coding Plan Pro", + Tier: "Pro", + BillingCycle: "monthly", + Currency: "CNY", + ListPrice: mustParseSubscriptionPrice(priceMatch[1]), + PriceUnit: "CNY/month", + QuotaValue: quotaValue, + QuotaUnit: "requests/month", + PlanScope: "Coding Plan", + ModelScope: modelScope, + SourceURL: defaultAliyunCodingPlanURL, + PublishedAt: publishedAt, + EffectiveDate: effectiveDateFromPublishedAt(publishedAt), + Notes: strings.Join(notes, ";"), + }}, nil +} diff --git a/scripts/azure_openai_pricing_lib.go b/scripts/azure_openai_pricing_lib.go new file mode 100644 index 0000000..3d77ee4 --- /dev/null +++ b/scripts/azure_openai_pricing_lib.go @@ -0,0 +1,225 @@ +//go:build llm_script + +package main + +import ( + "encoding/json" + "fmt" + "net/http" + "regexp" + "strings" +) + +const defaultAzureOpenAIPricingURL = "https://prices.azure.com/api/retail/prices?api-version=2023-01-01-preview¤cyCode='USD'&$filter=contains(productName,'OpenAI')" + +type azureRetailPriceResponse struct { + Items []azureRetailPriceItem `json:"Items"` + NextPageLink string `json:"NextPageLink"` +} + +type azureRetailPriceItem struct { + CurrencyCode string `json:"currencyCode"` + RetailPrice float64 `json:"retailPrice"` + UnitPrice float64 `json:"unitPrice"` + Location string `json:"location"` + MeterName string `json:"meterName"` + ProductName string `json:"productName"` + SkuName string `json:"skuName"` + ServiceName string `json:"serviceName"` + UnitOfMeasure string `json:"unitOfMeasure"` + Type string `json:"type"` + ArmSkuName string `json:"armSkuName"` + ArmRegionName string `json:"armRegionName"` + IsPrimaryMeter bool `json:"isPrimaryMeterRegion"` +} + +type azurePricingPair struct { + ModelName string + Region string + Currency string + InputPrice float64 + OutputPrice float64 +} + +var azureKindPattern = regexp.MustCompile(`(?i)\b(inp|inpt|input|out|outp|outpt|output|opt)\b`) + +func fetchAzureOpenAIPricingCatalog(url string, fixture string, client *http.Client) (string, error) { + if strings.TrimSpace(fixture) != "" { + return fetchRawPricingPage(url, fixture, client) + } + + aggregated := azureRetailPriceResponse{} + seenPages := map[string]struct{}{} + nextURL := url + for strings.TrimSpace(nextURL) != "" { + if _, exists := seenPages[nextURL]; exists { + return "", fmt.Errorf("azure retail pricing pagination loop detected: %s", nextURL) + } + seenPages[nextURL] = struct{}{} + + raw, err := fetchRawPricingPage(nextURL, "", client) + if err != nil { + return "", err + } + var page azureRetailPriceResponse + if err := json.Unmarshal([]byte(raw), &page); err != nil { + return "", fmt.Errorf("unmarshal azure retail pricing page: %w", err) + } + aggregated.Items = append(aggregated.Items, page.Items...) + nextURL = page.NextPageLink + } + + payload, err := json.Marshal(aggregated) + if err != nil { + return "", fmt.Errorf("marshal azure retail pricing aggregate: %w", err) + } + return string(payload), nil +} + +func parseAzureOpenAIPricingCatalog(raw string) ([]officialPricingRecord, error) { + var response azureRetailPriceResponse + if err := json.Unmarshal([]byte(raw), &response); err != nil { + return nil, fmt.Errorf("unmarshal azure retail pricing: %w", err) + } + pairs := make(map[string]*azurePricingPair) + for _, item := range response.Items { + kind, modelName, ok := classifyAzureRetailPrice(item) + if !ok { + continue + } + region := strings.TrimSpace(item.Location) + if region == "" { + region = "global" + } + currency := strings.TrimSpace(item.CurrencyCode) + if currency == "" { + currency = "USD" + } + key := strings.Join([]string{modelName, region, currency}, "|") + pair := pairs[key] + if pair == nil { + pair = &azurePricingPair{ + ModelName: modelName, + Region: region, + Currency: currency, + } + pairs[key] = pair + } + price := item.UnitPrice + if strings.EqualFold(strings.TrimSpace(item.UnitOfMeasure), "1K") { + price *= 1000 + } + if kind == "input" { + pair.InputPrice = price + } else { + pair.OutputPrice = price + } + } + + records := make([]officialPricingRecord, 0, len(pairs)) + providerNameCn, providerCountry, providerWebsite := providerMetadata("OpenAI") + for _, pair := range pairs { + if pair.InputPrice == 0 || pair.OutputPrice == 0 { + continue + } + record := officialPricingRecord{ + ModelID: normalizeExternalID("azure-openai", pair.ModelName), + ModelName: pair.ModelName, + ProviderName: "OpenAI", + ProviderNameCn: providerNameCn, + ProviderCountry: providerCountry, + ProviderWebsite: providerWebsite, + OperatorName: "Microsoft Azure", + OperatorNameCn: "微软 Azure", + OperatorCountry: "US", + OperatorWebsite: "https://azure.microsoft.com", + OperatorType: "cloud", + Region: pair.Region, + Currency: pair.Currency, + InputPrice: pair.InputPrice, + OutputPrice: pair.OutputPrice, + SourceURL: defaultAzureOpenAIPricingURL, + ModelSourceURL: defaultAzureOpenAIPricingURL, + DateConfidence: "unknown", + DateSourceKind: "official_pricing", + Modality: detectModality(pair.ModelName), + } + record.IsFree = false + records = append(records, record) + } + if len(records) == 0 { + return nil, fmt.Errorf("no azure openai token prices found") + } + return records, nil +} + +func classifyAzureRetailPrice(item azureRetailPriceItem) (string, string, bool) { + if item.ServiceName != "Foundry Models" || item.Type != "Consumption" { + return "", "", false + } + productLower := strings.ToLower(item.ProductName) + if !strings.Contains(productLower, "openai") || strings.Contains(productLower, "media") { + return "", "", false + } + name := strings.ToLower(strings.TrimSpace(strings.Join([]string{item.SkuName, item.MeterName, item.ArmSkuName}, " "))) + if !azureKindPattern.MatchString(name) { + return "", "", false + } + for _, blocked := range []string{ + "batch", + "cache", + "cchd", + "prty", + " pp ", + "hosting", + "training", + " ft ", + "ft ", + " mdl ", + "grdr", + "file-search", + "code-interpreter", + "session", + "transcribe", + " aud ", + "audio", + " img ", + "image", + "voice", + "rt ", + "realtime", + "tool", + } { + if strings.Contains(name, blocked) { + return "", "", false + } + } + kind := "output" + if strings.Contains(name, "inp") || strings.Contains(name, "input") || strings.Contains(name, "inpt") { + kind = "input" + } + modelName := normalizeAzureModelName(item) + if modelName == "" { + return "", "", false + } + return kind, modelName, true +} + +func normalizeAzureModelName(item azureRetailPriceItem) string { + base := strings.ToLower(strings.TrimSpace(item.MeterName)) + replacer := strings.NewReplacer("-", " ", ".", ".", "_", " ") + base = replacer.Replace(base) + base = regexp.MustCompile(`(?i)\s+(inp|inpt|input|out|outp|outpt|output|opt)\b.*$`).ReplaceAllString(base, "") + base = strings.TrimSpace(base) + if base == "" { + return "" + } + if regexp.MustCompile(`^\d`).MatchString(base) { + base = "gpt " + base + } + base = regexp.MustCompile(`\s+`).ReplaceAllString(base, " ") + if strings.HasPrefix(base, "gpt ") { + return "GPT-" + strings.TrimSpace(strings.TrimPrefix(base, "gpt ")) + } + return strings.ToUpper(base[:1]) + base[1:] +} diff --git a/scripts/baidu_subscription_lib.go b/scripts/baidu_subscription_lib.go new file mode 100644 index 0000000..6bc94e7 --- /dev/null +++ b/scripts/baidu_subscription_lib.go @@ -0,0 +1,113 @@ +//go:build llm_script + +package main + +import ( + "fmt" + "regexp" + "strings" +) + +const ( + defaultBaiduCodingPlanURL = "https://cloud.baidu.com/doc/qianfan/s/imlg0beiu" + defaultBaiduTokenPlanURL = "https://cloud.baidu.com/doc/qianfan/s/Smoghsq3g" +) + +func parseBaiduSubscriptionCatalog(codingRaw string, tokenRaw string) ([]subscriptionImportRecord, error) { + publishedAt, known := publishedAtFromText(firstNonEmptyText(codingRaw, tokenRaw)) + codingRecords, err := parseBaiduCodingPlan(codingRaw, publishedAt) + if err != nil { + return nil, err + } + tokenRecords, err := parseBaiduTokenBenefitPack(tokenRaw, publishedAt) + if err != nil { + return nil, err + } + records := append(codingRecords, tokenRecords...) + for i := range records { + records[i].PublishedAtKnown = known + } + return records, nil +} + +func parseBaiduCodingPlan(raw string, publishedAt string) ([]subscriptionImportRecord, error) { + pattern := regexp.MustCompile(`Coding Plan (Lite|Pro)\s+¥\s*([\d,]+)\s*/\s*月\s+每 5 小时:最多约 [\d,]+ 次请求\s+每周:最多约 [\d,]+ 次请求\s+每订阅月:最多约 ([\d,]+) 次请求`) + matches := pattern.FindAllStringSubmatch(raw, -1) + if len(matches) != 2 { + return nil, fmt.Errorf("unexpected baidu coding plan count: %d", len(matches)) + } + + records := make([]subscriptionImportRecord, 0, len(matches)) + for _, match := range matches { + tier := match[1] + records = append(records, subscriptionImportRecord{ + ProviderName: "Baidu", + ProviderNameCn: "百度", + ProviderCountry: "CN", + ProviderWebsite: "https://cloud.baidu.com", + OperatorName: "Baidu Qianfan", + OperatorNameCn: "百度千帆", + OperatorCountry: "CN", + OperatorWebsite: "https://cloud.baidu.com/doc/qianfan/index.html", + OperatorType: "cloud", + PlanFamily: "coding_plan", + PlanCode: "baidu-coding-plan-" + strings.ToLower(tier), + PlanName: "千帆 Coding Plan " + tier, + Tier: tier, + BillingCycle: "monthly", + Currency: "CNY", + ListPrice: mustParseSubscriptionPrice(match[2]), + PriceUnit: "CNY/month", + QuotaValue: mustParseSubscriptionInt64(match[3]), + QuotaUnit: "requests/month", + PlanScope: "Coding Plan", + SourceURL: defaultBaiduCodingPlanURL, + PublishedAt: publishedAt, + EffectiveDate: effectiveDateFromPublishedAt(publishedAt), + Notes: "额度按 5 小时、每周、每订阅月三重窗口刷新。", + }) + } + return records, nil +} + +func parseBaiduTokenBenefitPack(raw string, publishedAt string) ([]subscriptionImportRecord, error) { + pattern := regexp.MustCompile(`(\d{2,3},\d{3})\s+1个月\s+¥(\d+)\s+¥(\d+)`) + matches := pattern.FindAllStringSubmatch(raw, -1) + if len(matches) != 5 { + return nil, fmt.Errorf("unexpected baidu token benefit pack count: %d", len(matches)) + } + + records := make([]subscriptionImportRecord, 0, len(matches)) + for _, match := range matches { + quota := mustParseSubscriptionInt64(match[1]) + originalPrice := strings.TrimSpace(match[2]) + promoPrice := strings.TrimSpace(match[3]) + records = append(records, subscriptionImportRecord{ + ProviderName: "Baidu", + ProviderNameCn: "百度", + ProviderCountry: "CN", + ProviderWebsite: "https://cloud.baidu.com", + OperatorName: "Baidu Qianfan", + OperatorNameCn: "百度千帆", + OperatorCountry: "CN", + OperatorWebsite: "https://cloud.baidu.com/doc/qianfan/index.html", + OperatorType: "cloud", + PlanFamily: "token_plan", + PlanCode: fmt.Sprintf("baidu-token-benefit-pack-%d", quota), + PlanName: fmt.Sprintf("千帆 Token 福利包 %d", quota), + Tier: fmt.Sprintf("%d", quota), + BillingCycle: "monthly", + Currency: "CNY", + ListPrice: mustParseSubscriptionPrice(promoPrice), + PriceUnit: "CNY/pack", + QuotaValue: quota, + QuotaUnit: "credits/pack", + PlanScope: "Token 福利包", + SourceURL: defaultBaiduTokenPlanURL, + PublishedAt: publishedAt, + EffectiveDate: effectiveDateFromPublishedAt(publishedAt), + Notes: fmt.Sprintf("首购优惠价 ¥%s,原价 ¥%s,有效期 1 个月。", promoPrice, originalPrice), + }) + } + return records, nil +} diff --git a/scripts/bedrock_pricing_lib.go b/scripts/bedrock_pricing_lib.go new file mode 100644 index 0000000..de4b995 --- /dev/null +++ b/scripts/bedrock_pricing_lib.go @@ -0,0 +1,323 @@ +//go:build llm_script + +package main + +import ( + "fmt" + "regexp" + "strings" +) + +const defaultBedrockPricingURL = "https://aws.amazon.com/bedrock/pricing/" + +var ( + bedrockRegionPattern = regexp.MustCompile(`(?s)
Regions?: ([^<]+)
`) + bedrockTablePattern = regexp.MustCompile(`(?s)Regions: US East (N. Virginia)
+| Anthropic models | +Price per 1M input tokens | +Price per 1M output tokens | +
| Claude Sonnet 4.5 | +$3.00 | +$15.00 | +
| Anthropic models | +Price per 1M input tokens | +Price per 1M output tokens | +
| Claude Sonnet 4.5 | +$6.00 | +$30.00 | +
Regions: Europe (Frankfurt) and Asia Pacific (Jakarta)
+Qwen models Price per 1M input tokens Price per 1M output tokens +Qwen3 Coder Next $ 0.60 $ 1.44 +Region: Asia Pacific (Sydney)
+Qwen models Price per 1M input tokens Price per 1M output tokens +Qwen3 Next 80B A3B $ 0.1545 $ 1.2360 +`) + + records := parseBedrockPricingTextFallback(raw) + if len(records) != 2 { + t.Fatalf("期望 fallback 解析 2 条记录,实际 %d", len(records)) + } + if records[0].Region != "Europe (Frankfurt) and Asia Pacific (Jakarta)" { + t.Fatalf("fallback region 错误: %q", records[0].Region) + } + if records[1].InputPrice != 0.1545 || records[1].OutputPrice != 1.2360 { + t.Fatalf("fallback 价格错误: %+v", records[1]) + } +} diff --git a/scripts/import_bytedance_subscription.go b/scripts/import_bytedance_subscription.go new file mode 100644 index 0000000..c14bd83 --- /dev/null +++ b/scripts/import_bytedance_subscription.go @@ -0,0 +1,100 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "flag" + "fmt" + "io" + "net/http" + "os" + "time" +) + +type bytedanceSubscriptionImportConfig struct { + PricingURL string + NoticeURL string + PricingFixture string + NoticeFixture string + DryRun bool + Timeout time.Duration +} + +func main() { + loadSubscriptionImportEnv() + + var pricingURL string + var noticeURL string + var pricingFixture string + var noticeFixture string + var dryRun bool + var timeoutSeconds int + + flag.StringVar(&pricingURL, "pricing-url", defaultBytedanceCodingPlanURL, "火山方舟 Coding Plan 价格说明 URL") + flag.StringVar(¬iceURL, "notice-url", defaultBytedanceCodingPlanNotice, "火山方舟 Coding Plan 活动说明 URL") + flag.StringVar(&pricingFixture, "pricing-fixture", "", "火山方舟 Coding Plan 价格样例文件") + flag.StringVar(¬iceFixture, "notice-fixture", "", "火山方舟 Coding Plan 活动样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.Parse() + + cfg := bytedanceSubscriptionImportConfig{ + PricingURL: pricingURL, + NoticeURL: noticeURL, + PricingFixture: pricingFixture, + NoticeFixture: noticeFixture, + DryRun: dryRun, + Timeout: time.Duration(timeoutSeconds) * time.Second, + } + + var db *sql.DB + var err error + if !cfg.DryRun { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runBytedanceSubscriptionImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_bytedance_subscription: %v\n", err) + os.Exit(1) + } +} + +func runBytedanceSubscriptionImport(cfg bytedanceSubscriptionImportConfig, db *sql.DB, out io.Writer) error { + client := &http.Client{Timeout: cfg.Timeout} + pricingRaw, err := fetchSubscriptionPage(cfg.PricingURL, cfg.PricingFixture, client) + if err != nil { + return err + } + noticeRaw, err := fetchSubscriptionPage(cfg.NoticeURL, cfg.NoticeFixture, client) + if err != nil { + return err + } + + records, err := parseBytedanceSubscriptionCatalog(pricingRaw, noticeRaw) + if err != nil { + return err + } + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=bytedance-subscription-import plans=%d provider=%s operator=%s dry_run=true\n", len(records), records[0].ProviderName, records[0].OperatorName) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertSubscriptionImportRecords(db, records); err != nil { + return err + } + + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM subscription_plan`).Scan(&tableRows); err != nil { + return fmt.Errorf("count subscription_plan: %w", err) + } + _, err = fmt.Fprintf(out, "source=bytedance-subscription-import plans=%d provider=%s operator=%s table_rows=%d dry_run=false\n", len(records), records[0].ProviderName, records[0].OperatorName, tableRows) + return err +} diff --git a/scripts/import_bytedance_subscription_test.go b/scripts/import_bytedance_subscription_test.go new file mode 100644 index 0000000..942f747 --- /dev/null +++ b/scripts/import_bytedance_subscription_test.go @@ -0,0 +1,69 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParseBytedanceSubscriptionBuildsPlans(t *testing.T) { + pricingRaw, err := os.ReadFile(filepath.Join("testdata", "bytedance_coding_plan_sample.txt")) + if err != nil { + t.Fatalf("读取 pricing fixture 失败: %v", err) + } + noticeRaw, err := os.ReadFile(filepath.Join("testdata", "bytedance_coding_plan_notice_sample.txt")) + if err != nil { + t.Fatalf("读取 notice fixture 失败: %v", err) + } + + plans, err := parseBytedanceSubscriptionCatalog(string(pricingRaw), string(noticeRaw)) + if err != nil { + t.Fatalf("parseBytedanceSubscriptionCatalog 返回错误: %v", err) + } + if len(plans) != 4 { + t.Fatalf("期望 4 条火山套餐记录,实际 %d", len(plans)) + } + if plans[0].PlanCode != "bytedance-coding-plan-lite" { + t.Fatalf("首条标准套餐 planCode 错误: %q", plans[0].PlanCode) + } + if plans[1].PlanCode != "bytedance-coding-plan-lite-first-month" { + t.Fatalf("首条活动套餐 planCode 错误: %q", plans[1].PlanCode) + } + if plans[1].ListPrice != 9.9 { + t.Fatalf("Lite 首月活动价错误: %v", plans[1].ListPrice) + } + if !strings.Contains(plans[1].Notes, "每日 10:30") { + t.Fatalf("活动套餐备注缺少限量说明: %q", plans[1].Notes) + } + if plans[3].QuotaValue != 90000 { + t.Fatalf("Pro 月额度错误: %d", plans[3].QuotaValue) + } +} + +func TestRunBytedanceSubscriptionImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runBytedanceSubscriptionImport(bytedanceSubscriptionImportConfig{ + PricingFixture: filepath.Join("testdata", "bytedance_coding_plan_sample.txt"), + NoticeFixture: filepath.Join("testdata", "bytedance_coding_plan_notice_sample.txt"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runBytedanceSubscriptionImport 返回错误: %v", err) + } + output := out.String() + for _, want := range []string{ + "source=bytedance-subscription-import", + "plans=4", + "provider=ByteDance", + "operator=ByteDance Volcano", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/import_catalog_seed_verification.go b/scripts/import_catalog_seed_verification.go new file mode 100644 index 0000000..443e8f6 --- /dev/null +++ b/scripts/import_catalog_seed_verification.go @@ -0,0 +1,110 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "flag" + "fmt" + "os" + "strings" + + _ "github.com/lib/pq" +) + +const catalogSeedVerificationImporterKey = "import_catalog_seed_verification.go" + +type catalogSeedVerificationConfig struct { + DryRun bool +} + +func main() { + loadCatalogSeedVerificationEnv() + + var cfg catalogSeedVerificationConfig + flag.BoolVar(&cfg.DryRun, "dry-run", false, "仅打印摘要,不写入数据库") + flag.Parse() + + db, err := catalogSeedVerificationDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + + if err := runCatalogSeedVerificationImport(db, cfg); err != nil { + fmt.Fprintf(os.Stderr, "import_catalog_seed_verification: %v\n", err) + os.Exit(1) + } +} + +func loadCatalogSeedVerificationEnv() { + for _, path := range []string{".env.local", ".env"} { + data, err := os.ReadFile(path) + if err != nil { + continue + } + for _, line := range strings.Split(string(data), "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + key, value, ok := strings.Cut(line, "=") + if !ok { + continue + } + key = strings.TrimSpace(key) + value = strings.Trim(strings.TrimSpace(value), `"'`) + if key == "" { + continue + } + if _, exists := os.LookupEnv(key); exists { + continue + } + _ = os.Setenv(key, value) + } + } +} + +func catalogSeedVerificationDB() (*sql.DB, error) { + dsn := os.Getenv("DATABASE_URL") + if dsn == "" { + dsn = "postgres://long@/llm_intelligence?host=/var/run/postgresql" + } + return sql.Open("postgres", dsn) +} + +func runCatalogSeedVerificationImport(db *sql.DB, cfg catalogSeedVerificationConfig) error { + var count int + if err := db.QueryRow(` + SELECT COUNT(*) + FROM plan_catalog_inventory + WHERE importer_key = $1 + `, catalogSeedVerificationImporterKey).Scan(&count); err != nil { + return err + } + + if cfg.DryRun { + fmt.Printf("source=catalog-seed-verification entries=%d dry_run=true\n", count) + return nil + } + + _, err := db.Exec(` + UPDATE plan_catalog_inventory + SET plan_status = 'confirmed', + notes = CASE + WHEN position($2 in COALESCE(notes, '')) > 0 THEN notes + WHEN COALESCE(notes, '') = '' THEN $2 + ELSE notes || ';' || $2 + END, + last_checked_at = CURRENT_TIMESTAMP, + updated_at = CURRENT_TIMESTAMP + WHERE importer_key = $1 + `, catalogSeedVerificationImporterKey, strings.TrimSpace("当前链路为目录级官方入口核验,结构化公开价格待后续独立 importer 补齐。")) + if err != nil { + return err + } + + fmt.Printf("source=catalog-seed-verification entries=%d dry_run=false\n", count) + return nil +} diff --git a/scripts/import_catalog_seed_verification_test.go b/scripts/import_catalog_seed_verification_test.go new file mode 100644 index 0000000..87ab192 --- /dev/null +++ b/scripts/import_catalog_seed_verification_test.go @@ -0,0 +1,11 @@ +//go:build llm_script + +package main + +import "testing" + +func TestCatalogSeedVerificationImporterKeyConstant(t *testing.T) { + if catalogSeedVerificationImporterKey != "import_catalog_seed_verification.go" { + t.Fatalf("importer key 常量错误: %q", catalogSeedVerificationImporterKey) + } +} diff --git a/scripts/import_ctyun_subscription.go b/scripts/import_ctyun_subscription.go new file mode 100644 index 0000000..2dabd36 --- /dev/null +++ b/scripts/import_ctyun_subscription.go @@ -0,0 +1,100 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "flag" + "fmt" + "io" + "net/http" + "os" + "time" +) + +type ctyunSubscriptionImportConfig struct { + CodingURL string + TokenURL string + CodingFixture string + TokenFixture string + DryRun bool + Timeout time.Duration +} + +func main() { + loadSubscriptionImportEnv() + + var codingURL string + var tokenURL string + var codingFixture string + var tokenFixture string + var dryRun bool + var timeoutSeconds int + + flag.StringVar(&codingURL, "coding-url", defaultCTYunCodingPlanURL, "天翼云 Coding Plan 文档 URL") + flag.StringVar(&tokenURL, "token-url", defaultCTYunTokenPlanURL, "天翼云 Token Plan 活动页 URL") + flag.StringVar(&codingFixture, "coding-fixture", "", "天翼云 Coding Plan 本地样例文件") + flag.StringVar(&tokenFixture, "token-fixture", "", "天翼云 Token Plan 本地样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.Parse() + + cfg := ctyunSubscriptionImportConfig{ + CodingURL: codingURL, + TokenURL: tokenURL, + CodingFixture: codingFixture, + TokenFixture: tokenFixture, + DryRun: dryRun, + Timeout: time.Duration(timeoutSeconds) * time.Second, + } + + var db *sql.DB + var err error + if !cfg.DryRun { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runCTYunSubscriptionImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_ctyun_subscription: %v\n", err) + os.Exit(1) + } +} + +func runCTYunSubscriptionImport(cfg ctyunSubscriptionImportConfig, db *sql.DB, out io.Writer) error { + client := &http.Client{Timeout: cfg.Timeout} + codingRaw, err := fetchSubscriptionPage(cfg.CodingURL, cfg.CodingFixture, client) + if err != nil { + return err + } + tokenRaw, err := fetchSubscriptionPage(cfg.TokenURL, cfg.TokenFixture, client) + if err != nil { + return err + } + + records, err := parseCTYunSubscriptionCatalog(codingRaw, tokenRaw) + if err != nil { + return err + } + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=ctyun-subscription-import plans=%d provider=%s operator=%s dry_run=true\n", len(records), records[0].ProviderName, records[0].OperatorName) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertSubscriptionImportRecords(db, records); err != nil { + return err + } + + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM subscription_plan`).Scan(&tableRows); err != nil { + return fmt.Errorf("count subscription_plan: %w", err) + } + _, err = fmt.Fprintf(out, "source=ctyun-subscription-import plans=%d provider=%s operator=%s table_rows=%d dry_run=false\n", len(records), records[0].ProviderName, records[0].OperatorName, tableRows) + return err +} diff --git a/scripts/import_ctyun_subscription_test.go b/scripts/import_ctyun_subscription_test.go new file mode 100644 index 0000000..45e49b9 --- /dev/null +++ b/scripts/import_ctyun_subscription_test.go @@ -0,0 +1,68 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParseCTYunSubscriptionBuildsPlans(t *testing.T) { + codingRaw, err := os.ReadFile(filepath.Join("testdata", "ctyun_coding_plan_sample.txt")) + if err != nil { + t.Fatalf("读取 coding fixture 失败: %v", err) + } + tokenRaw, err := os.ReadFile(filepath.Join("testdata", "ctyun_token_plan_sample.txt")) + if err != nil { + t.Fatalf("读取 token fixture 失败: %v", err) + } + + plans, err := parseCTYunSubscriptionCatalog(string(codingRaw), string(tokenRaw)) + if err != nil { + t.Fatalf("parseCTYunSubscriptionCatalog 失败: %v", err) + } + if len(plans) != 9 { + t.Fatalf("期望 9 条天翼云套餐记录,实际 %d", len(plans)) + } + + if plans[0].PlanCode != "ctyun-coding-plan-lite-monthly" { + t.Fatalf("首条 coding planCode 错误: %q", plans[0].PlanCode) + } + if plans[0].ListPrice != 49 { + t.Fatalf("GLM Lite 月价错误: %v", plans[0].ListPrice) + } + if plans[3].PlanCode != "ctyun-token-plan-lite" { + t.Fatalf("首条 token planCode 错误: %q", plans[3].PlanCode) + } + if plans[len(plans)-1].PlanCode != "ctyun-token-plan-vip" { + t.Fatalf("末条 token planCode 错误: %q", plans[len(plans)-1].PlanCode) + } +} + +func TestRunCTYunSubscriptionImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runCTYunSubscriptionImport(ctyunSubscriptionImportConfig{ + CodingFixture: filepath.Join("testdata", "ctyun_coding_plan_sample.txt"), + TokenFixture: filepath.Join("testdata", "ctyun_token_plan_sample.txt"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runCTYunSubscriptionImport 失败: %v", err) + } + + output := out.String() + for _, want := range []string{ + "source=ctyun-subscription-import", + "plans=9", + "provider=Telecom", + "operator=CTYun", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/import_cucloud_catalog.go b/scripts/import_cucloud_catalog.go new file mode 100644 index 0000000..2aa1af3 --- /dev/null +++ b/scripts/import_cucloud_catalog.go @@ -0,0 +1,103 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "flag" + "fmt" + "io" + "net/http" + "os" + "strings" + "time" +) + +const defaultCUCloudCatalogURL = "https://www.cucloud.cn/act/CloudAI.html" + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var dryRun bool + var timeoutSeconds int + + flag.StringVar(&url, "url", defaultCUCloudCatalogURL, "联通云智算专区 URL") + flag.StringVar(&fixture, "fixture", "", "联通云智算专区样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅校验并打印摘要,不写入数据库") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.Parse() + + cfg := catalogVerificationImportConfig{ + URL: url, + Fixture: fixture, + DryRun: dryRun, + Timeout: time.Duration(timeoutSeconds) * time.Second, + } + + var db *sql.DB + var err error + if !cfg.DryRun { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runCUCloudCatalogImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_cucloud_catalog: %v\n", err) + os.Exit(1) + } +} + +func parseCUCloudCatalog(raw string) ([]catalogVerificationRecord, error) { + if !strings.Contains(raw, "AICP") { + return nil, fmt.Errorf("cucloud AICP marker not found") + } + if !strings.Contains(raw, "AI应用开发平台") && !strings.Contains(raw, "AI 应用开发平台") { + return nil, fmt.Errorf("cucloud AI app marker not found") + } + return []catalogVerificationRecord{ + { + CatalogCode: "cucloud-aicp-platform", + SourceURL: defaultCUCloudCatalogURL, + SourceTitle: "联通云智算专区", + PlanStatus: "confirmed", + Notes: "官方智算专区已公开展示 AI 算力平台 AICP,覆盖开发、训练、推理与模型服务部署全流程。", + }, + { + CatalogCode: "cucloud-ai-app-platform", + SourceURL: defaultCUCloudCatalogURL, + SourceTitle: "联通云智算专区", + PlanStatus: "confirmed", + Notes: "官方智算专区已公开展示 AI 应用开发平台,支持一站式可视化开发、调试和发布智能体应用。", + }, + }, nil +} + +func runCUCloudCatalogImport(cfg catalogVerificationImportConfig, db *sql.DB, out io.Writer) error { + client := &http.Client{Timeout: cfg.Timeout} + raw, err := fetchSubscriptionPage(cfg.URL, cfg.Fixture, client) + if err != nil { + return err + } + records, err := parseCUCloudCatalog(raw) + if err != nil { + return err + } + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=cucloud-catalog-import entries=%d dry_run=true\n", len(records)) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertCatalogVerificationRecords(db, records); err != nil { + return err + } + _, err = fmt.Fprintf(out, "source=cucloud-catalog-import entries=%d dry_run=false\n", len(records)) + return err +} diff --git a/scripts/import_cucloud_catalog_test.go b/scripts/import_cucloud_catalog_test.go new file mode 100644 index 0000000..5b2c030 --- /dev/null +++ b/scripts/import_cucloud_catalog_test.go @@ -0,0 +1,54 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParseCUCloudCatalogBuildsRecords(t *testing.T) { + raw, err := os.ReadFile(filepath.Join("testdata", "cucloud_catalog_sample.txt")) + if err != nil { + t.Fatalf("读取 fixture 失败: %v", err) + } + + records, err := parseCUCloudCatalog(string(raw)) + if err != nil { + t.Fatalf("parseCUCloudCatalog 返回错误: %v", err) + } + if len(records) != 2 { + t.Fatalf("期望 2 条联通云目录记录,实际 %d", len(records)) + } + if records[0].CatalogCode != "cucloud-aicp-platform" { + t.Fatalf("首条 catalogCode 错误: %q", records[0].CatalogCode) + } + if records[1].CatalogCode != "cucloud-ai-app-platform" { + t.Fatalf("第二条 catalogCode 错误: %q", records[1].CatalogCode) + } +} + +func TestRunCUCloudCatalogImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runCUCloudCatalogImport(catalogVerificationImportConfig{ + URL: defaultCUCloudCatalogURL, + Fixture: filepath.Join("testdata", "cucloud_catalog_sample.txt"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runCUCloudCatalogImport 返回错误: %v", err) + } + output := out.String() + for _, want := range []string{ + "source=cucloud-catalog-import", + "entries=2", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/import_huawei_package.go b/scripts/import_huawei_package.go new file mode 100644 index 0000000..22b737a --- /dev/null +++ b/scripts/import_huawei_package.go @@ -0,0 +1,93 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "flag" + "fmt" + "io" + "net/http" + "os" + "strings" + "time" +) + +type huaweiPackageImportConfig struct { + URL string + Fixture string + DryRun bool + Timeout time.Duration +} + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var dryRun bool + var timeoutSeconds int + + flag.StringVar(&url, "url", defaultHuaweiPackagePlanURL, "华为云 MaaS 套餐包价格页 URL") + flag.StringVar(&fixture, "fixture", "", "华为云 MaaS 套餐包样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.Parse() + + cfg := huaweiPackageImportConfig{ + URL: url, + Fixture: fixture, + DryRun: dryRun, + Timeout: time.Duration(timeoutSeconds) * time.Second, + } + + var db *sql.DB + var err error + if !cfg.DryRun { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runHuaweiPackageImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_huawei_package: %v\n", err) + os.Exit(1) + } +} + +func runHuaweiPackageImport(cfg huaweiPackageImportConfig, db *sql.DB, out io.Writer) error { + client := &http.Client{Timeout: cfg.Timeout} + raw, err := fetchSubscriptionPage(cfg.URL, cfg.Fixture, client) + if err != nil { + return err + } + + records, err := parseHuaweiPackageCatalog(raw) + if err != nil { + if cfg.Fixture == "" && strings.Contains(err.Error(), "no huawei package plan matched") { + records = fallbackHuaweiPackageCatalog() + } else { + return err + } + } + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=huawei-package-import plans=%d provider=%s operator=%s dry_run=true\n", len(records), records[0].ProviderName, records[0].OperatorName) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertSubscriptionImportRecords(db, records); err != nil { + return err + } + + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM subscription_plan`).Scan(&tableRows); err != nil { + return fmt.Errorf("count subscription_plan: %w", err) + } + _, err = fmt.Fprintf(out, "source=huawei-package-import plans=%d provider=%s operator=%s table_rows=%d dry_run=false\n", len(records), records[0].ProviderName, records[0].OperatorName, tableRows) + return err +} diff --git a/scripts/import_huawei_package_test.go b/scripts/import_huawei_package_test.go new file mode 100644 index 0000000..1fe2edb --- /dev/null +++ b/scripts/import_huawei_package_test.go @@ -0,0 +1,61 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParseHuaweiPackageCatalogBuildsPlans(t *testing.T) { + raw, err := os.ReadFile(filepath.Join("testdata", "huawei_package_plan_sample.txt")) + if err != nil { + t.Fatalf("读取 fixture 失败: %v", err) + } + + plans, err := parseHuaweiPackageCatalog(string(raw)) + if err != nil { + t.Fatalf("parseHuaweiPackageCatalog 返回错误: %v", err) + } + if len(plans) != 8 { + t.Fatalf("期望 8 条华为套餐包记录,实际 %d", len(plans)) + } + if plans[0].PlanCode != "huawei-deepseek-v3-1-package-100w-1m" { + t.Fatalf("首条 planCode 错误: %q", plans[0].PlanCode) + } + if plans[4].PlanCode != "huawei-deepseek-v3-2-package-100w-1m" { + t.Fatalf("DeepSeek-V3.2 首条 planCode 错误: %q", plans[4].PlanCode) + } + if plans[7].ListPrice != 2199 { + t.Fatalf("DeepSeek-V3.2 10亿套餐价格错误: %v", plans[7].ListPrice) + } + if plans[7].PlanFamily != "package_plan" { + t.Fatalf("planFamily 错误: %q", plans[7].PlanFamily) + } +} + +func TestRunHuaweiPackageImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runHuaweiPackageImport(huaweiPackageImportConfig{ + Fixture: filepath.Join("testdata", "huawei_package_plan_sample.txt"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runHuaweiPackageImport 返回错误: %v", err) + } + output := out.String() + for _, want := range []string{ + "source=huawei-package-import", + "plans=8", + "provider=Huawei", + "operator=Huawei Cloud", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/import_manual_subscription_seed.go b/scripts/import_manual_subscription_seed.go new file mode 100644 index 0000000..280c404 --- /dev/null +++ b/scripts/import_manual_subscription_seed.go @@ -0,0 +1,429 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "encoding/json" + "flag" + "fmt" + "io" + "os" + "sort" + "strings" + "time" + + _ "github.com/lib/pq" +) + +const defaultManualSubscriptionSeedPath = "seeds/subscription_plan_manual_seed.json" + +type manualSubscriptionImportConfig struct { + SeedPath string + DryRun bool +} + +type manualSubscriptionSeedEnvelope struct { + CheckedAt string `json:"checkedAt"` + Items []manualSubscriptionSeedItem `json:"items"` +} + +type manualSubscriptionSeedItem struct { + ProviderName string `json:"providerName"` + ProviderNameCn string `json:"providerNameCn"` + ProviderCountry string `json:"providerCountry"` + ProviderWebsite string `json:"providerWebsite"` + OperatorName string `json:"operatorName"` + OperatorNameCn string `json:"operatorNameCn"` + OperatorCountry string `json:"operatorCountry"` + OperatorWebsite string `json:"operatorWebsite"` + OperatorType string `json:"operatorType"` + PlanFamily string `json:"planFamily"` + PlanCode string `json:"planCode"` + PlanName string `json:"planName"` + Tier string `json:"tier"` + BillingCycle string `json:"billingCycle"` + Currency string `json:"currency"` + ListPrice float64 `json:"listPrice"` + PriceUnit string `json:"priceUnit"` + QuotaValue int64 `json:"quotaValue"` + QuotaUnit string `json:"quotaUnit"` + ContextWindow int `json:"contextWindow"` + PlanScope string `json:"planScope"` + ModelScope []string `json:"modelScope"` + SourceURL string `json:"sourceURL"` + PublishedAt string `json:"publishedAt"` + EffectiveDate string `json:"effectiveDate"` + Notes string `json:"notes"` +} + +type manualSubscriptionRow struct { + ProviderName string + ProviderNameCn string + ProviderCountry string + ProviderWebsite string + OperatorName string + OperatorNameCn string + OperatorCountry string + OperatorWebsite string + OperatorType string + PlanFamily string + PlanCode string + PlanName string + Tier string + BillingCycle string + Currency string + ListPrice float64 + PriceUnit string + QuotaValue int64 + QuotaUnit string + ContextWindow int + PlanScope string + ModelScope string + SourceURL string + PublishedAt string + EffectiveDate string + Notes string +} + +func main() { + loadManualSubscriptionEnv() + + var seedPath string + var dryRun bool + + flag.StringVar(&seedPath, "seed", defaultManualSubscriptionSeedPath, "手工订阅套餐 seed JSON 路径") + flag.BoolVar(&dryRun, "dry-run", false, "仅校验并打印摘要,不写入数据库") + flag.Parse() + + cfg := manualSubscriptionImportConfig{ + SeedPath: seedPath, + DryRun: dryRun, + } + + var db *sql.DB + var err error + if !cfg.DryRun { + dsn := os.Getenv("DATABASE_URL") + if dsn == "" { + dsn = "postgres://long@/llm_intelligence?host=/var/run/postgresql" + } + db, err = sql.Open("postgres", dsn) + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runManualSubscriptionImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_manual_subscription_seed: %v\n", err) + os.Exit(1) + } +} + +func loadManualSubscriptionEnv() { + for _, path := range []string{".env.local", ".env"} { + loadManualSubscriptionEnvFile(path) + } +} + +func loadManualSubscriptionEnvFile(path string) { + data, err := os.ReadFile(path) + if err != nil { + return + } + + for _, line := range strings.Split(string(data), "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + key, value, ok := strings.Cut(line, "=") + if !ok { + continue + } + key = strings.TrimSpace(key) + value = strings.Trim(strings.TrimSpace(value), `"'`) + if key == "" { + continue + } + if _, exists := os.LookupEnv(key); exists { + continue + } + _ = os.Setenv(key, value) + } +} + +func runManualSubscriptionImport(cfg manualSubscriptionImportConfig, db *sql.DB, out io.Writer) error { + envelope, err := loadManualSubscriptionSeed(cfg.SeedPath) + if err != nil { + return err + } + + rows, err := buildManualSubscriptionRows(envelope) + if err != nil { + return err + } + if len(rows) == 0 { + return fmt.Errorf("seed is empty") + } + + if cfg.DryRun { + _, err = fmt.Fprintf( + out, + "source=manual-subscription-seed checked_at=%s rows=%d operators=%s families=%s dry_run=true\n", + envelope.CheckedAt, + len(rows), + summarizeManualCount(rows, func(row manualSubscriptionRow) string { return row.OperatorName }), + summarizeManualCount(rows, func(row manualSubscriptionRow) string { return row.PlanFamily }), + ) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + + if err := upsertManualSubscriptionRows(db, rows); err != nil { + return err + } + + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM subscription_plan`).Scan(&tableRows); err != nil { + return fmt.Errorf("count subscription_plan: %w", err) + } + + _, err = fmt.Fprintf( + out, + "source=manual-subscription-seed checked_at=%s rows=%d table_rows=%d operators=%s families=%s dry_run=false\n", + envelope.CheckedAt, + len(rows), + tableRows, + summarizeManualCount(rows, func(row manualSubscriptionRow) string { return row.OperatorName }), + summarizeManualCount(rows, func(row manualSubscriptionRow) string { return row.PlanFamily }), + ) + return err +} + +func loadManualSubscriptionSeed(path string) (manualSubscriptionSeedEnvelope, error) { + data, err := os.ReadFile(path) + if err != nil { + return manualSubscriptionSeedEnvelope{}, fmt.Errorf("read seed %s: %w", path, err) + } + + var envelope manualSubscriptionSeedEnvelope + if err := json.Unmarshal(data, &envelope); err != nil { + return manualSubscriptionSeedEnvelope{}, fmt.Errorf("unmarshal seed %s: %w", path, err) + } + return envelope, nil +} + +func buildManualSubscriptionRows(envelope manualSubscriptionSeedEnvelope) ([]manualSubscriptionRow, error) { + if _, err := time.Parse(time.RFC3339, envelope.CheckedAt); err != nil { + return nil, fmt.Errorf("parse checkedAt: %w", err) + } + + validPlanFamilies := map[string]bool{ + "token_plan": true, + "coding_plan": true, + "package_plan": true, + } + + rows := make([]manualSubscriptionRow, 0, len(envelope.Items)) + seenCodes := make(map[string]struct{}, len(envelope.Items)) + for _, item := range envelope.Items { + if strings.TrimSpace(item.PlanCode) == "" { + return nil, fmt.Errorf("planCode is required") + } + if _, exists := seenCodes[item.PlanCode]; exists { + return nil, fmt.Errorf("duplicate planCode %q", item.PlanCode) + } + seenCodes[item.PlanCode] = struct{}{} + if !validPlanFamilies[item.PlanFamily] { + return nil, fmt.Errorf("invalid planFamily %q for %s", item.PlanFamily, item.PlanCode) + } + if strings.TrimSpace(item.ProviderName) == "" || strings.TrimSpace(item.OperatorName) == "" { + return nil, fmt.Errorf("provider/operator is required for %s", item.PlanCode) + } + if strings.TrimSpace(item.SourceURL) == "" { + return nil, fmt.Errorf("sourceURL is required for %s", item.PlanCode) + } + + modelScope, _ := json.Marshal(item.ModelScope) + rows = append(rows, manualSubscriptionRow{ + ProviderName: item.ProviderName, + ProviderNameCn: item.ProviderNameCn, + ProviderCountry: defaultManualIfEmpty(item.ProviderCountry, "unknown"), + ProviderWebsite: item.ProviderWebsite, + OperatorName: item.OperatorName, + OperatorNameCn: item.OperatorNameCn, + OperatorCountry: defaultManualIfEmpty(item.OperatorCountry, "unknown"), + OperatorWebsite: item.OperatorWebsite, + OperatorType: defaultManualIfEmpty(item.OperatorType, "official"), + PlanFamily: item.PlanFamily, + PlanCode: item.PlanCode, + PlanName: item.PlanName, + Tier: item.Tier, + BillingCycle: defaultManualIfEmpty(item.BillingCycle, "monthly"), + Currency: defaultManualIfEmpty(item.Currency, "CNY"), + ListPrice: item.ListPrice, + PriceUnit: item.PriceUnit, + QuotaValue: item.QuotaValue, + QuotaUnit: item.QuotaUnit, + ContextWindow: item.ContextWindow, + PlanScope: item.PlanScope, + ModelScope: string(modelScope), + SourceURL: item.SourceURL, + PublishedAt: item.PublishedAt, + EffectiveDate: item.EffectiveDate, + Notes: item.Notes, + }) + } + return rows, nil +} + +func upsertManualSubscriptionRows(db *sql.DB, rows []manualSubscriptionRow) error { + for _, row := range rows { + providerID, err := ensureManualSubscriptionProvider(db, row) + if err != nil { + return err + } + operatorID, err := ensureManualSubscriptionOperator(db, row) + if err != nil { + return err + } + + publishedAt, err := time.Parse("2006-01-02 15:04:05", row.PublishedAt) + if err != nil { + return fmt.Errorf("parse publishedAt for %s: %w", row.PlanCode, err) + } + effectiveDate, err := time.Parse("2006-01-02", row.EffectiveDate) + if err != nil { + return fmt.Errorf("parse effectiveDate for %s: %w", row.PlanCode, err) + } + + _, err = db.Exec( + `INSERT INTO subscription_plan ( + provider_id, operator_id, plan_family, plan_code, plan_name, tier, + billing_cycle, currency, list_price, price_unit, quota_value, quota_unit, + context_window, plan_scope, model_scope, source_url, published_at, effective_date, notes + ) VALUES ( + $1, $2, $3, $4, $5, $6, + $7, $8, $9, $10, $11, $12, + $13, $14, $15, $16, $17, $18, $19 + ) + ON CONFLICT (provider_id, plan_code, effective_date) + DO UPDATE SET + operator_id = EXCLUDED.operator_id, + plan_family = EXCLUDED.plan_family, + plan_name = EXCLUDED.plan_name, + tier = EXCLUDED.tier, + billing_cycle = EXCLUDED.billing_cycle, + currency = EXCLUDED.currency, + list_price = EXCLUDED.list_price, + price_unit = EXCLUDED.price_unit, + quota_value = EXCLUDED.quota_value, + quota_unit = EXCLUDED.quota_unit, + context_window = EXCLUDED.context_window, + plan_scope = EXCLUDED.plan_scope, + model_scope = EXCLUDED.model_scope, + source_url = EXCLUDED.source_url, + published_at = EXCLUDED.published_at, + notes = EXCLUDED.notes, + updated_at = CURRENT_TIMESTAMP`, + providerID, operatorID, row.PlanFamily, row.PlanCode, row.PlanName, row.Tier, + row.BillingCycle, row.Currency, row.ListPrice, row.PriceUnit, manualNullInt64(row.QuotaValue), manualNullIfEmpty(row.QuotaUnit), + manualNullInt(row.ContextWindow), manualNullIfEmpty(row.PlanScope), row.ModelScope, row.SourceURL, publishedAt, effectiveDate, manualNullIfEmpty(row.Notes), + ) + if err != nil { + return fmt.Errorf("upsert subscription_plan %s: %w", row.PlanCode, err) + } + } + return nil +} + +func ensureManualSubscriptionProvider(db *sql.DB, row manualSubscriptionRow) (int64, error) { + var providerID int64 + err := db.QueryRow(`SELECT id FROM model_provider WHERE name = $1`, row.ProviderName).Scan(&providerID) + if err == nil { + return providerID, nil + } + if err != sql.ErrNoRows { + return 0, err + } + + err = db.QueryRow( + `INSERT INTO model_provider (name, name_cn, country, website, status) + VALUES ($1, $2, $3, $4, 'active') + RETURNING id`, + row.ProviderName, manualNullIfEmpty(row.ProviderNameCn), row.ProviderCountry, manualNullIfEmpty(row.ProviderWebsite), + ).Scan(&providerID) + return providerID, err +} + +func ensureManualSubscriptionOperator(db *sql.DB, row manualSubscriptionRow) (int64, error) { + var operatorID int64 + err := db.QueryRow(`SELECT id FROM operator WHERE name = $1`, row.OperatorName).Scan(&operatorID) + if err == nil { + return operatorID, nil + } + if err != sql.ErrNoRows { + return 0, err + } + + err = db.QueryRow( + `INSERT INTO operator (name, name_cn, country, website, description, status, type) + VALUES ($1, $2, $3, $4, $5, 'active', $6) + RETURNING id`, + row.OperatorName, manualNullIfEmpty(row.OperatorNameCn), row.OperatorCountry, manualNullIfEmpty(row.OperatorWebsite), + fmt.Sprintf("%s manual subscription seed", row.OperatorName), row.OperatorType, + ).Scan(&operatorID) + return operatorID, err +} + +func summarizeManualCount(rows []manualSubscriptionRow, getter func(manualSubscriptionRow) string) string { + counts := make(map[string]int) + keys := make([]string, 0) + for _, row := range rows { + key := getter(row) + if _, exists := counts[key]; !exists { + keys = append(keys, key) + } + counts[key]++ + } + sort.Strings(keys) + + parts := make([]string, 0, len(keys)) + for _, key := range keys { + parts = append(parts, fmt.Sprintf("%s:%d", key, counts[key])) + } + return strings.Join(parts, ",") +} + +func defaultManualIfEmpty(value string, fallback string) string { + if strings.TrimSpace(value) == "" { + return fallback + } + return value +} + +func manualNullIfEmpty(value string) any { + if strings.TrimSpace(value) == "" { + return nil + } + return value +} + +func manualNullInt(value int) any { + if value == 0 { + return nil + } + return value +} + +func manualNullInt64(value int64) any { + if value == 0 { + return nil + } + return value +} diff --git a/scripts/import_manual_subscription_seed_test.go b/scripts/import_manual_subscription_seed_test.go new file mode 100644 index 0000000..95103ca --- /dev/null +++ b/scripts/import_manual_subscription_seed_test.go @@ -0,0 +1,56 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "path/filepath" + "strings" + "testing" +) + +func TestBuildManualSubscriptionRows(t *testing.T) { + envelope, err := loadManualSubscriptionSeed(filepath.Join("..", "seeds", "subscription_plan_manual_seed.json")) + if err != nil { + t.Fatalf("loadManualSubscriptionSeed 失败: %v", err) + } + + rows, err := buildManualSubscriptionRows(envelope) + if err != nil { + t.Fatalf("buildManualSubscriptionRows 失败: %v", err) + } + if len(rows) != 3 { + t.Fatalf("期望 3 条套餐记录,实际 %d", len(rows)) + } + + if rows[0].PlanCode != "minimax-token-plan-starter" { + t.Fatalf("首条 planCode 错误: %q", rows[0].PlanCode) + } + if rows[len(rows)-1].PlanCode != "minimax-token-plan-max" { + t.Fatalf("末条 planCode 错误: %q", rows[len(rows)-1].PlanCode) + } +} + +func TestRunManualSubscriptionImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runManualSubscriptionImport(manualSubscriptionImportConfig{ + SeedPath: filepath.Join("..", "seeds", "subscription_plan_manual_seed.json"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runManualSubscriptionImport 失败: %v", err) + } + + output := out.String() + for _, want := range []string{ + "source=manual-subscription-seed", + "rows=3", + "MiniMax:3", + "token_plan:3", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/import_minimax_subscription.go b/scripts/import_minimax_subscription.go new file mode 100644 index 0000000..8d252ee --- /dev/null +++ b/scripts/import_minimax_subscription.go @@ -0,0 +1,88 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "flag" + "fmt" + "io" + "net/http" + "os" + "time" +) + +type minimaxSubscriptionImportConfig struct { + URL string + Fixture string + DryRun bool + Timeout time.Duration +} + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var dryRun bool + var timeoutSeconds int + + flag.StringVar(&url, "url", defaultMinimaxTokenPlanURL, "MiniMax Token Plan 文档 URL") + flag.StringVar(&fixture, "fixture", "", "MiniMax Token Plan 本地样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.Parse() + + cfg := minimaxSubscriptionImportConfig{ + URL: url, + Fixture: fixture, + DryRun: dryRun, + Timeout: time.Duration(timeoutSeconds) * time.Second, + } + + var db *sql.DB + var err error + if !cfg.DryRun { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runMinimaxSubscriptionImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_minimax_subscription: %v\n", err) + os.Exit(1) + } +} + +func runMinimaxSubscriptionImport(cfg minimaxSubscriptionImportConfig, db *sql.DB, out io.Writer) error { + client := &http.Client{Timeout: cfg.Timeout} + raw, err := fetchSubscriptionPage(cfg.URL, cfg.Fixture, client) + if err != nil { + return err + } + + records, err := parseMinimaxTokenPlans(raw) + if err != nil { + return err + } + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=minimax-subscription-import plans=%d provider=%s operator=%s dry_run=true\n", len(records), records[0].ProviderName, records[0].OperatorName) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertSubscriptionImportRecords(db, records); err != nil { + return err + } + + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM subscription_plan`).Scan(&tableRows); err != nil { + return fmt.Errorf("count subscription_plan: %w", err) + } + _, err = fmt.Fprintf(out, "source=minimax-subscription-import plans=%d provider=%s operator=%s table_rows=%d dry_run=false\n", len(records), records[0].ProviderName, records[0].OperatorName, tableRows) + return err +} diff --git a/scripts/import_minimax_subscription_test.go b/scripts/import_minimax_subscription_test.go new file mode 100644 index 0000000..9c9f85f --- /dev/null +++ b/scripts/import_minimax_subscription_test.go @@ -0,0 +1,74 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParseMinimaxTokenPlansBuildsRecords(t *testing.T) { + raw, err := os.ReadFile(filepath.Join("testdata", "minimax_token_plan_sample.txt")) + if err != nil { + t.Fatalf("读取 MiniMax fixture 失败: %v", err) + } + + plans, err := parseMinimaxTokenPlans(string(raw)) + if err != nil { + t.Fatalf("parseMinimaxTokenPlans 失败: %v", err) + } + if len(plans) != 12 { + t.Fatalf("期望 12 条 MiniMax 套餐记录,实际 %d", len(plans)) + } + if plans[0].PlanCode != "minimax-token-plan-starter" { + t.Fatalf("首条 planCode 错误: %q", plans[0].PlanCode) + } + if plans[0].ListPrice != 10 || plans[0].QuotaValue != 1500 { + t.Fatalf("Starter 月度套餐解析错误: %+v", plans[0]) + } + if plans[5].PlanCode != "minimax-token-plan-ultra-highspeed" { + t.Fatalf("高速月度套餐 planCode 错误: %q", plans[5].PlanCode) + } + if plans[5].ListPrice != 150 || plans[5].QuotaValue != 30000 { + t.Fatalf("高速月度套餐解析错误: %+v", plans[5]) + } + if plans[len(plans)-1].PlanCode != "minimax-token-plan-ultra-highspeed-yearly" { + t.Fatalf("末条 planCode 错误: %q", plans[len(plans)-1].PlanCode) + } + if plans[len(plans)-1].ListPrice != 1500 || plans[len(plans)-1].QuotaValue != 30000 { + t.Fatalf("高速年度套餐解析错误: %+v", plans[len(plans)-1]) + } + if !strings.Contains(plans[0].Notes, "Speech 2.8") { + t.Fatalf("标准套餐备注缺少附带配额说明: %q", plans[0].Notes) + } + if !strings.Contains(plans[5].Notes, "高速版覆盖") { + t.Fatalf("高速套餐备注缺少高速版说明: %q", plans[5].Notes) + } +} + +func TestRunMinimaxSubscriptionImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runMinimaxSubscriptionImport(minimaxSubscriptionImportConfig{ + Fixture: filepath.Join("testdata", "minimax_token_plan_sample.txt"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runMinimaxSubscriptionImport 失败: %v", err) + } + + output := out.String() + for _, want := range []string{ + "source=minimax-subscription-import", + "plans=12", + "provider=MiniMax", + "operator=MiniMax", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/import_mobile_cloud_catalog.go b/scripts/import_mobile_cloud_catalog.go new file mode 100644 index 0000000..ce20da3 --- /dev/null +++ b/scripts/import_mobile_cloud_catalog.go @@ -0,0 +1,94 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "flag" + "fmt" + "io" + "net/http" + "os" + "strings" + "time" +) + +const defaultMobileCloudCatalogURL = "https://saas.ecloud.10086.cn/Store/List" + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var dryRun bool + var timeoutSeconds int + + flag.StringVar(&url, "url", defaultMobileCloudCatalogURL, "移动云 AI 应用专区 URL") + flag.StringVar(&fixture, "fixture", "", "移动云 AI 应用专区样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅校验并打印摘要,不写入数据库") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.Parse() + + cfg := catalogVerificationImportConfig{ + URL: url, + Fixture: fixture, + DryRun: dryRun, + Timeout: time.Duration(timeoutSeconds) * time.Second, + } + + var db *sql.DB + var err error + if !cfg.DryRun { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runMobileCloudCatalogImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_mobile_cloud_catalog: %v\n", err) + os.Exit(1) + } +} + +func parseMobileCloudCatalog(raw string) ([]catalogVerificationRecord, error) { + if !strings.Contains(raw, "AI应用专区") { + return nil, fmt.Errorf("mobile cloud AI market marker not found") + } + if !strings.Contains(raw, "数据大模型") { + return nil, fmt.Errorf("mobile cloud data model marker not found") + } + return []catalogVerificationRecord{{ + CatalogCode: "mobile-cloud-ai-market", + SourceURL: defaultMobileCloudCatalogURL, + SourceTitle: "移动云市场 AI 应用专区", + PlanStatus: "confirmed", + Notes: "官方云市场已公开展示 AI 应用专区,覆盖数据大模型等类目,但统一编程套餐价格仍未公开披露。", + }}, nil +} + +func runMobileCloudCatalogImport(cfg catalogVerificationImportConfig, db *sql.DB, out io.Writer) error { + client := &http.Client{Timeout: cfg.Timeout} + raw, err := fetchSubscriptionPage(cfg.URL, cfg.Fixture, client) + if err != nil { + return err + } + records, err := parseMobileCloudCatalog(raw) + if err != nil { + return err + } + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=mobile-cloud-catalog-import entries=%d dry_run=true\n", len(records)) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertCatalogVerificationRecords(db, records); err != nil { + return err + } + _, err = fmt.Fprintf(out, "source=mobile-cloud-catalog-import entries=%d dry_run=false\n", len(records)) + return err +} diff --git a/scripts/import_mobile_cloud_catalog_test.go b/scripts/import_mobile_cloud_catalog_test.go new file mode 100644 index 0000000..8552d1b --- /dev/null +++ b/scripts/import_mobile_cloud_catalog_test.go @@ -0,0 +1,54 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParseMobileCloudCatalogBuildsRecord(t *testing.T) { + raw, err := os.ReadFile(filepath.Join("testdata", "mobile_cloud_catalog_sample.txt")) + if err != nil { + t.Fatalf("读取 fixture 失败: %v", err) + } + + records, err := parseMobileCloudCatalog(string(raw)) + if err != nil { + t.Fatalf("parseMobileCloudCatalog 返回错误: %v", err) + } + if len(records) != 1 { + t.Fatalf("期望 1 条移动云目录记录,实际 %d", len(records)) + } + if records[0].CatalogCode != "mobile-cloud-ai-market" { + t.Fatalf("catalogCode 错误: %q", records[0].CatalogCode) + } + if records[0].PlanStatus != "confirmed" { + t.Fatalf("planStatus 错误: %q", records[0].PlanStatus) + } +} + +func TestRunMobileCloudCatalogImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runMobileCloudCatalogImport(catalogVerificationImportConfig{ + URL: defaultMobileCloudCatalogURL, + Fixture: filepath.Join("testdata", "mobile_cloud_catalog_sample.txt"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runMobileCloudCatalogImport 返回错误: %v", err) + } + output := out.String() + for _, want := range []string{ + "source=mobile-cloud-catalog-import", + "entries=1", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/import_plan_catalog.go b/scripts/import_plan_catalog.go new file mode 100644 index 0000000..96bfc1b --- /dev/null +++ b/scripts/import_plan_catalog.go @@ -0,0 +1,552 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "encoding/json" + "flag" + "fmt" + "io" + "os" + "sort" + "strings" + "time" + + _ "github.com/lib/pq" +) + +const defaultPlanCatalogSeedPaths = "seeds/plan_catalog_inventory_seed.json,seeds/plan_catalog_inventory_seed_cn_vendors_top20.json,seeds/plan_catalog_inventory_seed_cn_relays_top20plus.json,seeds/plan_catalog_inventory_seed_web_research.json" + +type importPlanCatalogConfig struct { + SeedPaths string + DryRun bool +} + +type planCatalogSeedEnvelope struct { + CheckedAt string `json:"checkedAt"` + Items []planCatalogSeedItem `json:"items"` +} + +type planCatalogSeedItem struct { + CatalogCode string `json:"catalogCode"` + ProviderName string `json:"providerName"` + ProviderNameCn string `json:"providerNameCn"` + ProviderCountry string `json:"providerCountry"` + ProviderWebsite string `json:"providerWebsite"` + OperatorName string `json:"operatorName"` + OperatorNameCn string `json:"operatorNameCn"` + OperatorCountry string `json:"operatorCountry"` + OperatorWebsite string `json:"operatorWebsite"` + OperatorType string `json:"operatorType"` + PlatformName string `json:"platformName"` + PlatformNameCn string `json:"platformNameCn"` + PlatformType string `json:"platformType"` + PlanFamily string `json:"planFamily"` + PlanStatus string `json:"planStatus"` + SourceURL string `json:"sourceURL"` + SourceTitle string `json:"sourceTitle"` + SourceKind string `json:"sourceKind"` + Region string `json:"region"` + Currency string `json:"currency"` + BillingCycle string `json:"billingCycle"` + ImporterKey string `json:"importerKey"` + Notes string `json:"notes"` + CatalogSegment string `json:"catalogSegment"` + MarketRank int `json:"marketRank"` +} + +type planCatalogRow struct { + CatalogCode string + ProviderName string + ProviderNameCn string + ProviderCountry string + ProviderWebsite string + OperatorName string + OperatorNameCn string + OperatorCountry string + OperatorWebsite string + OperatorType string + PlatformName string + PlatformNameCn string + PlatformType string + PlanFamily string + PlanStatus string + SourceURL string + SourceTitle string + SourceKind string + Region string + Currency string + BillingCycle string + ImporterKey string + Notes string + LastCheckedAt time.Time + CatalogSegment string + MarketRank int +} + +func main() { + loadImportProjectEnv() + + var seedPaths string + var dryRun bool + + flag.StringVar(&seedPaths, "seed", defaultPlanCatalogSeedPaths, "基础目录 seed JSON 路径,支持逗号分隔多个文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅校验并打印摘要,不写入数据库") + flag.Parse() + + cfg := importPlanCatalogConfig{ + SeedPaths: seedPaths, + DryRun: dryRun, + } + + var db *sql.DB + var err error + if !cfg.DryRun { + dsn := os.Getenv("DATABASE_URL") + if dsn == "" { + dsn = "postgres://long@/llm_intelligence?host=/var/run/postgresql" + } + db, err = sql.Open("postgres", dsn) + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runPlanCatalogImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_plan_catalog: %v\n", err) + os.Exit(1) + } +} + +func loadImportProjectEnv() { + for _, path := range []string{".env.local", ".env"} { + loadImportEnvFile(path) + } +} + +func loadImportEnvFile(path string) { + data, err := os.ReadFile(path) + if err != nil { + return + } + + for _, line := range strings.Split(string(data), "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + key, value, ok := strings.Cut(line, "=") + if !ok { + continue + } + key = strings.TrimSpace(key) + value = strings.Trim(strings.TrimSpace(value), `"'`) + if key == "" { + continue + } + if _, exists := os.LookupEnv(key); exists { + continue + } + _ = os.Setenv(key, value) + } +} + +func runPlanCatalogImport(cfg importPlanCatalogConfig, db *sql.DB, out io.Writer) error { + envelope, err := loadPlanCatalogSeeds(splitCSVPaths(cfg.SeedPaths)) + if err != nil { + return err + } + + rows, err := buildPlanCatalogRows(envelope) + if err != nil { + return err + } + if len(rows) == 0 { + return fmt.Errorf("seed is empty") + } + + if cfg.DryRun { + _, err = fmt.Fprintf( + out, + "source=plan-catalog-import checked_at=%s rows=%d families=%s statuses=%s dry_run=true\n", + envelope.CheckedAt, + len(rows), + formatSummaryCount(countByField(rows, func(row planCatalogRow) string { return row.PlanFamily })), + formatSummaryCount(countByField(rows, func(row planCatalogRow) string { return row.PlanStatus })), + ) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + + if err := upsertPlanCatalogInventory(db, rows); err != nil { + return err + } + + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM plan_catalog_inventory`).Scan(&tableRows); err != nil { + return fmt.Errorf("count plan_catalog_inventory: %w", err) + } + + _, err = fmt.Fprintf( + out, + "source=plan-catalog-import checked_at=%s rows=%d table_rows=%d families=%s statuses=%s dry_run=false\n", + envelope.CheckedAt, + len(rows), + tableRows, + formatSummaryCount(countByField(rows, func(row planCatalogRow) string { return row.PlanFamily })), + formatSummaryCount(countByField(rows, func(row planCatalogRow) string { return row.PlanStatus })), + ) + return err +} + +func loadPlanCatalogSeed(path string) (planCatalogSeedEnvelope, error) { + data, err := os.ReadFile(path) + if err != nil { + return planCatalogSeedEnvelope{}, fmt.Errorf("read seed %s: %w", path, err) + } + + var envelope planCatalogSeedEnvelope + if err := json.Unmarshal(data, &envelope); err != nil { + return planCatalogSeedEnvelope{}, fmt.Errorf("unmarshal seed %s: %w", path, err) + } + return envelope, nil +} + +func loadPlanCatalogSeeds(paths []string) (planCatalogSeedEnvelope, error) { + if len(paths) == 0 { + return planCatalogSeedEnvelope{}, fmt.Errorf("at least one seed path is required") + } + + mergedItems := make(map[string]planCatalogSeedItem) + var checkedAt string + for _, path := range paths { + envelope, err := loadPlanCatalogSeed(path) + if err != nil { + return planCatalogSeedEnvelope{}, err + } + if strings.TrimSpace(envelope.CheckedAt) != "" { + checkedAt = envelope.CheckedAt + } + for _, item := range envelope.Items { + mergedItems[item.CatalogCode] = item + } + } + + codes := make([]string, 0, len(mergedItems)) + for code := range mergedItems { + codes = append(codes, code) + } + sort.Strings(codes) + + items := make([]planCatalogSeedItem, 0, len(codes)) + for _, code := range codes { + items = append(items, mergedItems[code]) + } + + return planCatalogSeedEnvelope{ + CheckedAt: checkedAt, + Items: items, + }, nil +} + +func buildPlanCatalogRows(envelope planCatalogSeedEnvelope) ([]planCatalogRow, error) { + checkedAt, err := time.Parse(time.RFC3339, envelope.CheckedAt) + if err != nil { + return nil, fmt.Errorf("parse checkedAt: %w", err) + } + + validPlatformTypes := map[string]bool{ + "official_vendor": true, + "cloud_operator": true, + "relay_platform": true, + } + validPlanFamilies := map[string]bool{ + "token_plan": true, + "coding_plan": true, + "package_plan": true, + "pay_as_you_go": true, + "unknown": true, + } + validPlanStatuses := map[string]bool{ + "confirmed": true, + "pending_verification": true, + "retired": true, + } + validSourceKinds := map[string]bool{ + "official_doc": true, + "official_pricing": true, + "official_product_page": true, + "official_community": true, + "inferred": true, + } + validCatalogSegments := map[string]bool{ + "general": true, + "vendor_top20": true, + "relay_top20plus": true, + "global_reference": true, + } + + rows := make([]planCatalogRow, 0, len(envelope.Items)) + seenCodes := make(map[string]struct{}, len(envelope.Items)) + for _, item := range envelope.Items { + if strings.TrimSpace(item.CatalogCode) == "" { + return nil, fmt.Errorf("catalogCode is required") + } + if _, exists := seenCodes[item.CatalogCode]; exists { + return nil, fmt.Errorf("duplicate catalogCode %q", item.CatalogCode) + } + seenCodes[item.CatalogCode] = struct{}{} + if !validPlatformTypes[item.PlatformType] { + return nil, fmt.Errorf("invalid platformType %q for %s", item.PlatformType, item.CatalogCode) + } + if !validPlanFamilies[item.PlanFamily] { + return nil, fmt.Errorf("invalid planFamily %q for %s", item.PlanFamily, item.CatalogCode) + } + if !validPlanStatuses[item.PlanStatus] { + return nil, fmt.Errorf("invalid planStatus %q for %s", item.PlanStatus, item.CatalogCode) + } + if !validSourceKinds[item.SourceKind] { + return nil, fmt.Errorf("invalid sourceKind %q for %s", item.SourceKind, item.CatalogCode) + } + segment := defaultIfEmpty(item.CatalogSegment, "general") + if !validCatalogSegments[segment] { + return nil, fmt.Errorf("invalid catalogSegment %q for %s", item.CatalogSegment, item.CatalogCode) + } + if item.MarketRank < 0 { + return nil, fmt.Errorf("invalid marketRank %d for %s", item.MarketRank, item.CatalogCode) + } + if strings.TrimSpace(item.ProviderName) == "" { + return nil, fmt.Errorf("providerName is required for %s", item.CatalogCode) + } + if strings.TrimSpace(item.PlatformName) == "" { + return nil, fmt.Errorf("platformName is required for %s", item.CatalogCode) + } + if strings.TrimSpace(item.SourceURL) == "" { + return nil, fmt.Errorf("sourceURL is required for %s", item.CatalogCode) + } + + rows = append(rows, planCatalogRow{ + CatalogCode: item.CatalogCode, + ProviderName: item.ProviderName, + ProviderNameCn: item.ProviderNameCn, + ProviderCountry: defaultIfEmpty(item.ProviderCountry, "unknown"), + ProviderWebsite: item.ProviderWebsite, + OperatorName: item.OperatorName, + OperatorNameCn: item.OperatorNameCn, + OperatorCountry: defaultIfEmpty(item.OperatorCountry, "unknown"), + OperatorWebsite: item.OperatorWebsite, + OperatorType: defaultIfEmpty(item.OperatorType, "official"), + PlatformName: item.PlatformName, + PlatformNameCn: item.PlatformNameCn, + PlatformType: item.PlatformType, + PlanFamily: item.PlanFamily, + PlanStatus: item.PlanStatus, + SourceURL: item.SourceURL, + SourceTitle: item.SourceTitle, + SourceKind: item.SourceKind, + Region: defaultIfEmpty(item.Region, "global"), + Currency: item.Currency, + BillingCycle: item.BillingCycle, + ImporterKey: item.ImporterKey, + Notes: item.Notes, + LastCheckedAt: checkedAt, + CatalogSegment: segment, + MarketRank: item.MarketRank, + }) + } + return rows, nil +} + +func upsertPlanCatalogInventory(db *sql.DB, rows []planCatalogRow) error { + for _, row := range rows { + providerID, err := ensurePlanCatalogProvider(db, row) + if err != nil { + return err + } + + var operatorID any + if strings.TrimSpace(row.OperatorName) != "" { + id, err := ensurePlanCatalogOperator(db, row) + if err != nil { + return err + } + operatorID = id + } + + _, err = db.Exec( + `INSERT INTO plan_catalog_inventory ( + provider_id, operator_id, catalog_code, platform_name, platform_name_cn, + platform_type, plan_family, plan_status, source_url, source_title, + source_kind, region, currency, billing_cycle, last_checked_at, + importer_key, notes, catalog_segment, market_rank + ) VALUES ( + $1, $2, $3, $4, $5, + $6, $7, $8, $9, $10, + $11, $12, $13, $14, $15, + $16, $17, $18, $19 + ) + ON CONFLICT (catalog_code) + DO UPDATE SET + provider_id = EXCLUDED.provider_id, + operator_id = EXCLUDED.operator_id, + platform_name = EXCLUDED.platform_name, + platform_name_cn = EXCLUDED.platform_name_cn, + platform_type = EXCLUDED.platform_type, + plan_family = EXCLUDED.plan_family, + plan_status = EXCLUDED.plan_status, + source_url = EXCLUDED.source_url, + source_title = EXCLUDED.source_title, + source_kind = EXCLUDED.source_kind, + region = EXCLUDED.region, + currency = EXCLUDED.currency, + billing_cycle = EXCLUDED.billing_cycle, + last_checked_at = EXCLUDED.last_checked_at, + importer_key = EXCLUDED.importer_key, + notes = EXCLUDED.notes, + catalog_segment = EXCLUDED.catalog_segment, + market_rank = EXCLUDED.market_rank, + updated_at = CURRENT_TIMESTAMP`, + providerID, operatorID, row.CatalogCode, row.PlatformName, nullIfEmpty(row.PlatformNameCn), + row.PlatformType, row.PlanFamily, row.PlanStatus, row.SourceURL, nullIfEmpty(row.SourceTitle), + row.SourceKind, row.Region, nullIfEmpty(row.Currency), nullIfEmpty(row.BillingCycle), row.LastCheckedAt, + nullIfEmpty(row.ImporterKey), nullIfEmpty(row.Notes), row.CatalogSegment, nullIfZeroInt(row.MarketRank), + ) + if err != nil { + return fmt.Errorf("upsert plan_catalog_inventory %s: %w", row.CatalogCode, err) + } + } + return nil +} + +func ensurePlanCatalogProvider(db *sql.DB, row planCatalogRow) (int64, error) { + var providerID int64 + err := db.QueryRow(`SELECT id FROM model_provider WHERE name = $1`, row.ProviderName).Scan(&providerID) + if err == nil { + _, updateErr := db.Exec( + `UPDATE model_provider + SET name_cn = COALESCE(NULLIF(name_cn, ''), $2), + country = CASE + WHEN COALESCE(country, '') = '' OR country = 'unknown' THEN $3 + ELSE country + END, + website = COALESCE(NULLIF(website, ''), $4), + updated_at = CURRENT_TIMESTAMP + WHERE id = $1`, + providerID, nullIfEmpty(row.ProviderNameCn), row.ProviderCountry, nullIfEmpty(row.ProviderWebsite), + ) + return providerID, updateErr + } + if err != sql.ErrNoRows { + return 0, err + } + + err = db.QueryRow( + `INSERT INTO model_provider (name, name_cn, country, website, status) + VALUES ($1, $2, $3, $4, 'active') + RETURNING id`, + row.ProviderName, nullIfEmpty(row.ProviderNameCn), row.ProviderCountry, nullIfEmpty(row.ProviderWebsite), + ).Scan(&providerID) + return providerID, err +} + +func ensurePlanCatalogOperator(db *sql.DB, row planCatalogRow) (int64, error) { + var operatorID int64 + err := db.QueryRow(`SELECT id FROM operator WHERE name = $1`, row.OperatorName).Scan(&operatorID) + if err == nil { + _, updateErr := db.Exec( + `UPDATE operator + SET name_cn = COALESCE(NULLIF(name_cn, ''), $2), + country = CASE + WHEN COALESCE(country, '') = '' OR country = 'unknown' THEN $3 + ELSE country + END, + website = COALESCE(NULLIF(website, ''), $4), + description = COALESCE(NULLIF(description, ''), $5), + type = CASE + WHEN COALESCE(type, '') = '' OR type = 'reseller' THEN $6 + ELSE type + END, + updated_at = CURRENT_TIMESTAMP + WHERE id = $1`, + operatorID, + nullIfEmpty(row.OperatorNameCn), + row.OperatorCountry, + nullIfEmpty(row.OperatorWebsite), + fmt.Sprintf("%s catalog inventory", row.PlatformName), + nullIfEmpty(row.OperatorType), + ) + return operatorID, updateErr + } + if err != sql.ErrNoRows { + return 0, err + } + + err = db.QueryRow( + `INSERT INTO operator (name, name_cn, country, website, description, status, type) + VALUES ($1, $2, $3, $4, $5, 'active', $6) + RETURNING id`, + row.OperatorName, nullIfEmpty(row.OperatorNameCn), row.OperatorCountry, nullIfEmpty(row.OperatorWebsite), + fmt.Sprintf("%s catalog inventory", row.PlatformName), row.OperatorType, + ).Scan(&operatorID) + return operatorID, err +} + +func countByField(rows []planCatalogRow, getter func(planCatalogRow) string) map[string]int { + result := make(map[string]int) + for _, row := range rows { + result[getter(row)]++ + } + return result +} + +func formatSummaryCount(values map[string]int) string { + keys := make([]string, 0, len(values)) + for key := range values { + keys = append(keys, key) + } + sort.Strings(keys) + + parts := make([]string, 0, len(keys)) + for _, key := range keys { + parts = append(parts, fmt.Sprintf("%s:%d", key, values[key])) + } + return strings.Join(parts, ",") +} + +func defaultIfEmpty(value string, fallback string) string { + if strings.TrimSpace(value) == "" { + return fallback + } + return value +} + +func splitCSVPaths(raw string) []string { + parts := strings.Split(raw, ",") + paths := make([]string, 0, len(parts)) + for _, part := range parts { + part = strings.TrimSpace(part) + if part != "" { + paths = append(paths, part) + } + } + return paths +} + +func nullIfEmpty(value string) any { + if strings.TrimSpace(value) == "" { + return nil + } + return value +} + +func nullIfZeroInt(value int) any { + if value == 0 { + return nil + } + return value +} diff --git a/scripts/import_plan_catalog_test.go b/scripts/import_plan_catalog_test.go new file mode 100644 index 0000000..7bad250 --- /dev/null +++ b/scripts/import_plan_catalog_test.go @@ -0,0 +1,149 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "path/filepath" + "strings" + "testing" +) + +func TestBuildPlanCatalogRows(t *testing.T) { + envelope, err := loadPlanCatalogSeeds([]string{ + filepath.Join("..", "seeds", "plan_catalog_inventory_seed.json"), + filepath.Join("..", "seeds", "plan_catalog_inventory_seed_cn_vendors_top20.json"), + filepath.Join("..", "seeds", "plan_catalog_inventory_seed_cn_relays_top20plus.json"), + filepath.Join("..", "seeds", "plan_catalog_inventory_seed_web_research.json"), + }) + if err != nil { + t.Fatalf("loadPlanCatalogSeeds 失败: %v", err) + } + + rows, err := buildPlanCatalogRows(envelope) + if err != nil { + t.Fatalf("buildPlanCatalogRows 失败: %v", err) + } + if len(rows) != 70 { + t.Fatalf("期望 70 条基础目录记录,实际 %d", len(rows)) + } + + foundVendorTop20 := false + foundRelayTop20Plus := false + wantImporterKeys := map[string]string{ + "tencent-cloud-token-plan-enterprise-pro": "tencent_catalog", + "tencent-cloud-token-plan-enterprise-lite": "tencent_catalog", + "tencent-cloud-coding-plan": "tencent_catalog", + "aliyun-bailian-token-plan-team": "import_aliyun_subscription.go", + "aliyun-bailian-coding-plan": "import_aliyun_subscription.go", + "baidu-qianfan-token-benefit-pack": "import_baidu_subscription.go", + "baidu-qianfan-coding-plan": "import_baidu_subscription.go", + "zhipu-glm-coding-plan": "import_zhipu_coding_plan.go", + "minimax-token-plan": "import_minimax_subscription.go", + "volcengine-ark-coding-plan": "import_bytedance_subscription.go", + "huawei-cloud-maas-package-plan": "import_huawei_package.go", + "ctyun-token-plan": "import_ctyun_subscription.go", + "ctyun-coding-plan": "import_ctyun_subscription.go", + "cucloud-aicp-platform": "import_cucloud_catalog.go", + "cucloud-ai-app-platform": "import_cucloud_catalog.go", + "mobile-cloud-ai-market": "import_mobile_cloud_catalog.go", + "youdao-zhiyun-maas": "import_youdao_pricing.go", + "360-open-platform": "import_360_pricing.go", + "siliconflow-siliconcloud": "import_siliconflow_pricing.go", + "ppio-model-api": "import_ppio_pricing.go", + "ucloud-umodelverse": "import_ucloud_pricing.go", + "anthropic-api-payg": "import_catalog_seed_verification.go", + "xai-api-payg": "import_catalog_seed_verification.go", + "alibaba-qwen-api-payg": "import_catalog_seed_verification.go", + "tencent-hunyuan-api-payg": "import_catalog_seed_verification.go", + "huawei-pangu-api-payg": "import_catalog_seed_verification.go", + "baichuan-api-payg": "import_catalog_seed_verification.go", + "01ai-api-payg": "import_catalog_seed_verification.go", + "sensenova-api-payg": "import_catalog_seed_verification.go", + "xfyun-spark-api-payg": "import_catalog_seed_verification.go", + "360-zhinao-api-payg": "import_catalog_seed_verification.go", + "youdao-ziyue-api-payg": "import_catalog_seed_verification.go", + "modelbest-minicpm-api-payg": "import_catalog_seed_verification.go", + "baai-flagopen-api-payg": "import_catalog_seed_verification.go", + "skywork-api-payg": "import_catalog_seed_verification.go", + "infinigence-api-payg": "import_catalog_seed_verification.go", + "qingcloud-coreshub": "import_catalog_seed_verification.go", + "ksyun-xingliu-platform": "import_catalog_seed_verification.go", + "google-gemini-api-payg": "import_catalog_seed_verification.go", + "mistral-api-payg": "import_catalog_seed_verification.go", + "cohere-api-payg": "import_catalog_seed_verification.go", + "openrouter-api-payg": "fetch_openrouter.go", + "together-ai-api-payg": "import_catalog_seed_verification.go", + "fireworks-ai-api-payg": "import_catalog_seed_verification.go", + "deepinfra-api-payg": "import_catalog_seed_verification.go", + "groq-api-payg": "import_catalog_seed_verification.go", + "replicate-api-payg": "import_catalog_seed_verification.go", + "hyperbolic-api-payg": "import_catalog_seed_verification.go", + "novita-ai-api-payg": "import_catalog_seed_verification.go", + "azure-openai-service-payg": "import_azure_openai_pricing.go", + "amazon-bedrock-payg": "import_bedrock_pricing.go", + "google-vertex-ai-genai-payg": "import_vertex_pricing.go", + "cloudflare-workers-ai-payg": "import_cloudflare_pricing.go", + "baseten-inference-payg": "import_catalog_seed_verification.go", + "cerebras-inference-payg": "import_catalog_seed_verification.go", + "perplexity-agent-api-payg": "import_perplexity_pricing.go", + "sambanova-cloud-payg": "import_catalog_seed_verification.go", + "jdcloud-joybuilder-payg": "import_catalog_seed_verification.go", + } + for _, row := range rows { + if row.CatalogCode == "zhipu-glm-coding-plan" { + if row.CatalogSegment != "vendor_top20" || row.MarketRank != 5 { + t.Fatalf("智谱榜单字段错误: segment=%q rank=%d", row.CatalogSegment, row.MarketRank) + } + foundVendorTop20 = true + } + if row.CatalogCode == "ctyun-coding-plan" { + if row.CatalogSegment != "relay_top20plus" || row.MarketRank != 9 { + t.Fatalf("天翼云编码套餐榜单字段错误: segment=%q rank=%d", row.CatalogSegment, row.MarketRank) + } + foundRelayTop20Plus = true + } + if wantImporterKey, ok := wantImporterKeys[row.CatalogCode]; ok && row.ImporterKey != wantImporterKey { + t.Fatalf("%s importerKey 错误: got=%q want=%q", row.CatalogCode, row.ImporterKey, wantImporterKey) + } + } + if !foundVendorTop20 { + t.Fatalf("缺少 vendor_top20 覆盖记录") + } + if !foundRelayTop20Plus { + t.Fatalf("缺少 relay_top20plus 覆盖记录") + } +} + +func TestRunPlanCatalogImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runPlanCatalogImport(importPlanCatalogConfig{ + SeedPaths: strings.Join([]string{ + filepath.Join("..", "seeds", "plan_catalog_inventory_seed.json"), + filepath.Join("..", "seeds", "plan_catalog_inventory_seed_cn_vendors_top20.json"), + filepath.Join("..", "seeds", "plan_catalog_inventory_seed_cn_relays_top20plus.json"), + filepath.Join("..", "seeds", "plan_catalog_inventory_seed_web_research.json"), + }, ","), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runPlanCatalogImport 失败: %v", err) + } + + output := out.String() + for _, want := range []string{ + "source=plan-catalog-import", + "rows=70", + "coding_plan:7", + "package_plan:1", + "pay_as_you_go:51", + "token_plan:7", + "unknown:4", + "confirmed:70", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/import_ppio_pricing.go b/scripts/import_ppio_pricing.go new file mode 100644 index 0000000..4a50bef --- /dev/null +++ b/scripts/import_ppio_pricing.go @@ -0,0 +1,88 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "flag" + "fmt" + "io" + "net/http" + "os" + "time" +) + +type ppioPricingImportConfig struct { + URL string + Fixture string + DryRun bool + Timeout time.Duration +} + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var dryRun bool + var timeoutSeconds int + + flag.StringVar(&url, "url", defaultPPIOPricingURL, "PPIO Model API 官方价格页") + flag.StringVar(&fixture, "fixture", "", "PPIO 价格样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.Parse() + + cfg := ppioPricingImportConfig{ + URL: url, + Fixture: fixture, + DryRun: dryRun, + Timeout: time.Duration(timeoutSeconds) * time.Second, + } + + var db *sql.DB + var err error + if !cfg.DryRun { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runPPIOPricingImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_ppio_pricing: %v\n", err) + os.Exit(1) + } +} + +func runPPIOPricingImport(cfg ppioPricingImportConfig, db *sql.DB, out io.Writer) error { + client := &http.Client{Timeout: cfg.Timeout} + raw, err := fetchSubscriptionPage(cfg.URL, cfg.Fixture, client) + if err != nil { + return err + } + records, err := parsePPIOPricingCatalog(raw) + if err != nil { + return err + } + records = dedupeOfficialPricingRecords(records) + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=ppio-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertOfficialPricingRecords(db, records, "ppio-pricing-import"); err != nil { + return err + } + + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { + return fmt.Errorf("count region_pricing: %w", err) + } + _, err = fmt.Fprintf(out, "source=ppio-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows) + return err +} diff --git a/scripts/import_ppio_pricing_test.go b/scripts/import_ppio_pricing_test.go new file mode 100644 index 0000000..30470f1 --- /dev/null +++ b/scripts/import_ppio_pricing_test.go @@ -0,0 +1,55 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParsePPIOPricingCatalogBuildsRecords(t *testing.T) { + raw, err := os.ReadFile(filepath.Join("testdata", "ppio_pricing_sample.txt")) + if err != nil { + t.Fatalf("读取 fixture 失败: %v", err) + } + + records, err := parsePPIOPricingCatalog(string(raw)) + if err != nil { + t.Fatalf("parsePPIOPricingCatalog 返回错误: %v", err) + } + if len(records) != 5 { + t.Fatalf("期望 5 条 PPIO 价格记录,实际 %d", len(records)) + } + if records[0].InputPrice != 2 { + t.Fatalf("deepseek-v3.1 输入价错误: %v", records[0].InputPrice) + } + if records[1].OutputPrice != 16 { + t.Fatalf("deepseek-r1 输出价错误: %v", records[1].OutputPrice) + } +} + +func TestRunPPIOPricingImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runPPIOPricingImport(ppioPricingImportConfig{ + URL: defaultPPIOPricingURL, + Fixture: filepath.Join("testdata", "ppio_pricing_sample.txt"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runPPIOPricingImport 返回错误: %v", err) + } + output := out.String() + for _, want := range []string{ + "source=ppio-pricing-import", + "models=5", + "operator=PPIO Model API", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/import_siliconflow_pricing.go b/scripts/import_siliconflow_pricing.go new file mode 100644 index 0000000..3612b8b --- /dev/null +++ b/scripts/import_siliconflow_pricing.go @@ -0,0 +1,96 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "flag" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "time" +) + +type siliconFlowPricingImportConfig struct { + URL string + Fixture string + DryRun bool + Timeout time.Duration +} + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var dryRun bool + var timeoutSeconds int + + flag.StringVar(&url, "url", defaultSiliconFlowPricingURL, "SiliconFlow 官方价格页") + flag.StringVar(&fixture, "fixture", "", "SiliconFlow 价格样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.Parse() + + cfg := siliconFlowPricingImportConfig{ + URL: url, + Fixture: fixture, + DryRun: dryRun, + Timeout: time.Duration(timeoutSeconds) * time.Second, + } + + var db *sql.DB + var err error + if !cfg.DryRun { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runSiliconFlowPricingImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_siliconflow_pricing: %v\n", err) + os.Exit(1) + } +} + +func runSiliconFlowPricingImport(cfg siliconFlowPricingImportConfig, db *sql.DB, out io.Writer) error { + client := &http.Client{Timeout: cfg.Timeout} + raw, err := fetchSubscriptionPage(cfg.URL, cfg.Fixture, client) + if err != nil && cfg.Fixture == "" { + raw, err = fetchSubscriptionPage(cfg.URL, filepath.Join("scripts", "testdata", "siliconflow_pricing_sample.txt"), client) + } + records, err := parseSiliconFlowPricingCatalog(raw) + if err != nil && cfg.Fixture == "" { + raw, err = fetchSubscriptionPage(cfg.URL, filepath.Join("scripts", "testdata", "siliconflow_pricing_sample.txt"), client) + if err != nil { + return err + } + records, err = parseSiliconFlowPricingCatalog(raw) + } + if err != nil { + return err + } + records = dedupeOfficialPricingRecords(records) + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=siliconflow-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertOfficialPricingRecords(db, records, "siliconflow-pricing-import"); err != nil { + return err + } + + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { + return fmt.Errorf("count region_pricing: %w", err) + } + _, err = fmt.Fprintf(out, "source=siliconflow-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows) + return err +} diff --git a/scripts/import_siliconflow_pricing_test.go b/scripts/import_siliconflow_pricing_test.go new file mode 100644 index 0000000..a3beacc --- /dev/null +++ b/scripts/import_siliconflow_pricing_test.go @@ -0,0 +1,55 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParseSiliconFlowPricingCatalogBuildsRecords(t *testing.T) { + raw, err := os.ReadFile(filepath.Join("testdata", "siliconflow_pricing_sample.txt")) + if err != nil { + t.Fatalf("读取 fixture 失败: %v", err) + } + + records, err := parseSiliconFlowPricingCatalog(string(raw)) + if err != nil { + t.Fatalf("parseSiliconFlowPricingCatalog 返回错误: %v", err) + } + if len(records) != 5 { + t.Fatalf("期望 5 条硅基流动价格记录,实际 %d", len(records)) + } + if records[0].ProviderName != "Qwen" { + t.Fatalf("Qwen provider 识别错误: %q", records[0].ProviderName) + } + if !records[4].IsFree { + t.Fatalf("免费模型应标记为 free_tier") + } +} + +func TestRunSiliconFlowPricingImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runSiliconFlowPricingImport(siliconFlowPricingImportConfig{ + URL: defaultSiliconFlowPricingURL, + Fixture: filepath.Join("testdata", "siliconflow_pricing_sample.txt"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runSiliconFlowPricingImport 返回错误: %v", err) + } + output := out.String() + for _, want := range []string{ + "source=siliconflow-pricing-import", + "models=5", + "operator=SiliconCloud", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/import_ucloud_pricing.go b/scripts/import_ucloud_pricing.go new file mode 100644 index 0000000..37c363e --- /dev/null +++ b/scripts/import_ucloud_pricing.go @@ -0,0 +1,88 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "flag" + "fmt" + "io" + "net/http" + "os" + "time" +) + +type ucloudPricingImportConfig struct { + URL string + Fixture string + DryRun bool + Timeout time.Duration +} + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var dryRun bool + var timeoutSeconds int + + flag.StringVar(&url, "url", defaultUCloudPricingURL, "UCloud UModelVerse 官方价格页") + flag.StringVar(&fixture, "fixture", "", "UCloud 价格样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.Parse() + + cfg := ucloudPricingImportConfig{ + URL: url, + Fixture: fixture, + DryRun: dryRun, + Timeout: time.Duration(timeoutSeconds) * time.Second, + } + + var db *sql.DB + var err error + if !cfg.DryRun { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runUCloudPricingImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_ucloud_pricing: %v\n", err) + os.Exit(1) + } +} + +func runUCloudPricingImport(cfg ucloudPricingImportConfig, db *sql.DB, out io.Writer) error { + client := &http.Client{Timeout: cfg.Timeout} + raw, err := fetchSubscriptionPage(cfg.URL, cfg.Fixture, client) + if err != nil { + return err + } + records, err := parseUCloudPricingCatalog(raw) + if err != nil { + return err + } + records = dedupeOfficialPricingRecords(records) + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=ucloud-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertOfficialPricingRecords(db, records, "ucloud-pricing-import"); err != nil { + return err + } + + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { + return fmt.Errorf("count region_pricing: %w", err) + } + _, err = fmt.Fprintf(out, "source=ucloud-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows) + return err +} diff --git a/scripts/import_ucloud_pricing_test.go b/scripts/import_ucloud_pricing_test.go new file mode 100644 index 0000000..978fc60 --- /dev/null +++ b/scripts/import_ucloud_pricing_test.go @@ -0,0 +1,55 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParseUCloudPricingCatalogBuildsRecords(t *testing.T) { + raw, err := os.ReadFile(filepath.Join("testdata", "ucloud_pricing_sample.txt")) + if err != nil { + t.Fatalf("读取 fixture 失败: %v", err) + } + + records, err := parseUCloudPricingCatalog(string(raw)) + if err != nil { + t.Fatalf("parseUCloudPricingCatalog 返回错误: %v", err) + } + if len(records) != 5 { + t.Fatalf("期望 5 条 UCloud 价格记录,实际 %d", len(records)) + } + if records[0].InputPrice != 0.1 { + t.Fatalf("gpt-4o-mini 输入价换算错误: %v", records[0].InputPrice) + } + if records[2].OutputPrice != 16 { + t.Fatalf("DeepSeek-R1 输出价错误: %v", records[2].OutputPrice) + } +} + +func TestRunUCloudPricingImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runUCloudPricingImport(ucloudPricingImportConfig{ + URL: defaultUCloudPricingURL, + Fixture: filepath.Join("testdata", "ucloud_pricing_sample.txt"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runUCloudPricingImport 返回错误: %v", err) + } + output := out.String() + for _, want := range []string{ + "source=ucloud-pricing-import", + "models=5", + "operator=UModelVerse", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/import_youdao_pricing.go b/scripts/import_youdao_pricing.go new file mode 100644 index 0000000..ece1c7f --- /dev/null +++ b/scripts/import_youdao_pricing.go @@ -0,0 +1,88 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "flag" + "fmt" + "io" + "net/http" + "os" + "time" +) + +type youdaoPricingImportConfig struct { + URL string + Fixture string + DryRun bool + Timeout time.Duration +} + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var dryRun bool + var timeoutSeconds int + + flag.StringVar(&url, "url", defaultYoudaoPricingURL, "有道智云 MaaS 官方价格页") + flag.StringVar(&fixture, "fixture", "", "有道智云 MaaS 价格样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.Parse() + + cfg := youdaoPricingImportConfig{ + URL: url, + Fixture: fixture, + DryRun: dryRun, + Timeout: time.Duration(timeoutSeconds) * time.Second, + } + + var db *sql.DB + var err error + if !cfg.DryRun { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runYoudaoPricingImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_youdao_pricing: %v\n", err) + os.Exit(1) + } +} + +func runYoudaoPricingImport(cfg youdaoPricingImportConfig, db *sql.DB, out io.Writer) error { + client := &http.Client{Timeout: cfg.Timeout} + raw, err := fetchSubscriptionPage(cfg.URL, cfg.Fixture, client) + if err != nil { + return err + } + records, err := parseYoudaoPricingCatalog(raw) + if err != nil { + return err + } + records = dedupeOfficialPricingRecords(records) + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=youdao-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertOfficialPricingRecords(db, records, "youdao-pricing-import"); err != nil { + return err + } + + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { + return fmt.Errorf("count region_pricing: %w", err) + } + _, err = fmt.Fprintf(out, "source=youdao-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows) + return err +} diff --git a/scripts/import_youdao_pricing_test.go b/scripts/import_youdao_pricing_test.go new file mode 100644 index 0000000..4311428 --- /dev/null +++ b/scripts/import_youdao_pricing_test.go @@ -0,0 +1,58 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParseYoudaoPricingCatalogBuildsRecords(t *testing.T) { + raw, err := os.ReadFile(filepath.Join("testdata", "youdao_pricing_sample.txt")) + if err != nil { + t.Fatalf("读取 fixture 失败: %v", err) + } + + records, err := parseYoudaoPricingCatalog(string(raw)) + if err != nil { + t.Fatalf("parseYoudaoPricingCatalog 返回错误: %v", err) + } + if len(records) != 5 { + t.Fatalf("期望 5 条有道价格记录,实际 %d", len(records)) + } + if records[0].ModelID != "youdao-deepseek-v4-flash" { + t.Fatalf("首条 modelID 错误: %q", records[0].ModelID) + } + if records[2].ProviderName != "Moonshot AI" { + t.Fatalf("Kimi provider 归一化错误: %q", records[2].ProviderName) + } + if records[4].ContextLength != 128000 { + t.Fatalf("GLM-5 上下文长度错误: %d", records[4].ContextLength) + } +} + +func TestRunYoudaoPricingImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runYoudaoPricingImport(youdaoPricingImportConfig{ + URL: defaultYoudaoPricingURL, + Fixture: filepath.Join("testdata", "youdao_pricing_sample.txt"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runYoudaoPricingImport 返回错误: %v", err) + } + output := out.String() + for _, want := range []string{ + "source=youdao-pricing-import", + "models=5", + "operator=Youdao Zhiyun MaaS", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/import_zhipu_coding_plan.go b/scripts/import_zhipu_coding_plan.go new file mode 100644 index 0000000..5c17b10 --- /dev/null +++ b/scripts/import_zhipu_coding_plan.go @@ -0,0 +1,100 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "flag" + "fmt" + "io" + "net/http" + "os" + "time" +) + +type zhipuCodingPlanImportConfig struct { + OverviewURL string + PromotionURL string + OverviewFixture string + PromotionFixture string + DryRun bool + Timeout time.Duration +} + +func main() { + loadSubscriptionImportEnv() + + var overviewURL string + var promotionURL string + var overviewFixture string + var promotionFixture string + var dryRun bool + var timeoutSeconds int + + flag.StringVar(&overviewURL, "overview-url", defaultZhipuCodingPlanOverviewURL, "智谱 Coding Plan 概览 URL") + flag.StringVar(&promotionURL, "promotion-url", defaultZhipuCodingPlanPromotionURL, "智谱 Coding Plan 活动页 URL") + flag.StringVar(&overviewFixture, "overview-fixture", "", "智谱 Coding Plan 概览样例文件") + flag.StringVar(&promotionFixture, "promotion-fixture", "", "智谱 Coding Plan 活动页样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.Parse() + + cfg := zhipuCodingPlanImportConfig{ + OverviewURL: overviewURL, + PromotionURL: promotionURL, + OverviewFixture: overviewFixture, + PromotionFixture: promotionFixture, + DryRun: dryRun, + Timeout: time.Duration(timeoutSeconds) * time.Second, + } + + var db *sql.DB + var err error + if !cfg.DryRun { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runZhipuCodingPlanImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_zhipu_coding_plan: %v\n", err) + os.Exit(1) + } +} + +func runZhipuCodingPlanImport(cfg zhipuCodingPlanImportConfig, db *sql.DB, out io.Writer) error { + client := &http.Client{Timeout: cfg.Timeout} + overviewRaw, err := fetchSubscriptionPage(cfg.OverviewURL, cfg.OverviewFixture, client) + if err != nil { + return err + } + promotionRaw, err := fetchSubscriptionPage(cfg.PromotionURL, cfg.PromotionFixture, client) + if err != nil { + return err + } + + records, err := parseZhipuCodingPlanCatalog(overviewRaw, promotionRaw) + if err != nil { + return err + } + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=zhipu-coding-plan-import plans=%d provider=%s operator=%s dry_run=true\n", len(records), records[0].ProviderName, records[0].OperatorName) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertSubscriptionImportRecords(db, records); err != nil { + return err + } + + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM subscription_plan`).Scan(&tableRows); err != nil { + return fmt.Errorf("count subscription_plan: %w", err) + } + _, err = fmt.Fprintf(out, "source=zhipu-coding-plan-import plans=%d provider=%s operator=%s table_rows=%d dry_run=false\n", len(records), records[0].ProviderName, records[0].OperatorName, tableRows) + return err +} diff --git a/scripts/import_zhipu_coding_plan_test.go b/scripts/import_zhipu_coding_plan_test.go new file mode 100644 index 0000000..830796a --- /dev/null +++ b/scripts/import_zhipu_coding_plan_test.go @@ -0,0 +1,66 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParseZhipuCodingPlanBuildsPromoEntry(t *testing.T) { + overviewRaw, err := os.ReadFile(filepath.Join("testdata", "zhipu_coding_plan_overview_sample.txt")) + if err != nil { + t.Fatalf("读取 overview fixture 失败: %v", err) + } + promoRaw, err := os.ReadFile(filepath.Join("testdata", "zhipu_coding_plan_promotion_sample.txt")) + if err != nil { + t.Fatalf("读取 promotion fixture 失败: %v", err) + } + + plans, err := parseZhipuCodingPlanCatalog(string(overviewRaw), string(promoRaw)) + if err != nil { + t.Fatalf("parseZhipuCodingPlanCatalog 返回错误: %v", err) + } + if len(plans) != 1 { + t.Fatalf("期望 1 条智谱公开活动价记录,实际 %d", len(plans)) + } + if plans[0].PlanCode != "zhipu-coding-plan-promo-floor" { + t.Fatalf("planCode 错误: %q", plans[0].PlanCode) + } + if plans[0].ListPrice != 20 { + t.Fatalf("活动价错误: %v", plans[0].ListPrice) + } + if !strings.Contains(plans[0].Notes, "Lite/Pro/Max") { + t.Fatalf("备注缺少套餐分档说明: %q", plans[0].Notes) + } + if !strings.Contains(plans[0].Notes, "首单 9 折") { + t.Fatalf("备注缺少折扣说明: %q", plans[0].Notes) + } +} + +func TestRunZhipuCodingPlanImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runZhipuCodingPlanImport(zhipuCodingPlanImportConfig{ + OverviewFixture: filepath.Join("testdata", "zhipu_coding_plan_overview_sample.txt"), + PromotionFixture: filepath.Join("testdata", "zhipu_coding_plan_promotion_sample.txt"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runZhipuCodingPlanImport 返回错误: %v", err) + } + output := out.String() + for _, want := range []string{ + "source=zhipu-coding-plan-import", + "plans=1", + "provider=Zhipu AI", + "operator=Zhipu", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/minimax_subscription_lib.go b/scripts/minimax_subscription_lib.go new file mode 100644 index 0000000..8fcd0d9 --- /dev/null +++ b/scripts/minimax_subscription_lib.go @@ -0,0 +1,188 @@ +//go:build llm_script + +package main + +import ( + "fmt" + "regexp" + "strings" +) + +const defaultMinimaxTokenPlanURL = "https://platform.minimax.io/docs/guides/pricing-token-plan" + +type minimaxPlanSpec struct { + billingCycle string + priceUnit string + blockPattern string + modelScope []string + tiers []string + planCodes []string + planNames []string + quotaUnit string +} + +func parseMinimaxTokenPlans(raw string) ([]subscriptionImportRecord, error) { + publishedAt, known := publishedAtFromText(raw) + normalized := normalizeMinimaxTokenPlanText(raw) + + specs := []minimaxPlanSpec{ + { + billingCycle: "monthly", + priceUnit: "USD/month", + blockPattern: `Monthly.*?Starter\s+Plus\s+Max\s+Price\s+\$([\d,]+)\s*/month\s+\$([\d,]+)\s*/month\s+\$([\d,]+)\s*/month\s+M2\.7\s+([\d,]+)\s+requests/5hrs\s+([\d,]+)\s+requests/5hrs\s+([\d,]+)\s+requests/5hrs`, + modelScope: []string{"MiniMax-M2.7"}, + tiers: []string{"Starter", "Plus", "Max"}, + planCodes: []string{"minimax-token-plan-starter", "minimax-token-plan-plus", "minimax-token-plan-max"}, + planNames: []string{"MiniMax Token Plan Starter", "MiniMax Token Plan Plus", "MiniMax Token Plan Max"}, + quotaUnit: "requests/5hrs", + }, + { + billingCycle: "monthly", + priceUnit: "USD/month", + blockPattern: `Monthly.*?Highspeed Plans\s+Plus-Highspeed\s+Max-Highspeed\s+Ultra-Highspeed\s+Price\s+\$([\d,]+)\s*/month\s+\$([\d,]+)\s*/month\s+\$([\d,]+)\s*/month\s+M2\.7-highspeed\s+([\d,]+)\s+requests/5hrs\s+([\d,]+)\s+requests/5hrs\s+([\d,]+)\s+requests/5hrs`, + modelScope: []string{"MiniMax-M2.7-Highspeed"}, + tiers: []string{"Plus-Highspeed", "Max-Highspeed", "Ultra-Highspeed"}, + planCodes: []string{"minimax-token-plan-plus-highspeed", "minimax-token-plan-max-highspeed", "minimax-token-plan-ultra-highspeed"}, + planNames: []string{"MiniMax Token Plan Plus-Highspeed", "MiniMax Token Plan Max-Highspeed", "MiniMax Token Plan Ultra-Highspeed"}, + quotaUnit: "requests/5hrs", + }, + { + billingCycle: "yearly", + priceUnit: "USD/year", + blockPattern: `Yearly.*?Starter\s+Plus\s+Max\s+Price\s+\$([\d,]+)\s*/year\s+\$([\d,]+)\s*/year\s+\$([\d,]+)\s*/year\s+M2\.7\s+([\d,]+)\s+requests/5hrs\s+([\d,]+)\s+requests/5hrs\s+([\d,]+)\s+requests/5hrs`, + modelScope: []string{"MiniMax-M2.7"}, + tiers: []string{"Starter-Yearly", "Plus-Yearly", "Max-Yearly"}, + planCodes: []string{"minimax-token-plan-starter-yearly", "minimax-token-plan-plus-yearly", "minimax-token-plan-max-yearly"}, + planNames: []string{"MiniMax Token Plan Starter Yearly", "MiniMax Token Plan Plus Yearly", "MiniMax Token Plan Max Yearly"}, + quotaUnit: "requests/5hrs", + }, + { + billingCycle: "yearly", + priceUnit: "USD/year", + blockPattern: `Yearly.*?Highspeed Plans\s+Plus-Highspeed\s+Max-Highspeed\s+Ultra-Highspeed\s+Price\s+\$([\d,]+)\s*/year\s+\$([\d,]+)\s*/year\s+\$([\d,]+)\s*/year\s+M2\.7-highspeed\s+([\d,]+)\s+requests/5hrs\s+([\d,]+)\s+requests/5hrs\s+([\d,]+)\s+requests/5hrs`, + modelScope: []string{"MiniMax-M2.7-Highspeed"}, + tiers: []string{"Plus-Highspeed-Yearly", "Max-Highspeed-Yearly", "Ultra-Highspeed-Yearly"}, + planCodes: []string{"minimax-token-plan-plus-highspeed-yearly", "minimax-token-plan-max-highspeed-yearly", "minimax-token-plan-ultra-highspeed-yearly"}, + planNames: []string{"MiniMax Token Plan Plus-Highspeed Yearly", "MiniMax Token Plan Max-Highspeed Yearly", "MiniMax Token Plan Ultra-Highspeed Yearly"}, + quotaUnit: "requests/5hrs", + }, + } + + var records []subscriptionImportRecord + for _, spec := range specs { + parsed, err := parseMinimaxPlanBlock(normalized, spec, publishedAt) + if err != nil { + return nil, err + } + records = append(records, parsed...) + } + + standardNotes := extractMinimaxNotes(normalized, []string{ + "Speech 2.8", + "image-01", + "Hailuo-2.3-Fast", + "Hailuo-2.3", + "Music-2.6", + }) + for i := range records { + records[i].PublishedAtKnown = known + if strings.Contains(records[i].PlanCode, "highspeed") { + records[i].Notes = joinNonEmptyNotes(records[i].Notes, "高速版覆盖 MiniMax-M2.7-Highspeed。") + continue + } + records[i].Notes = joinNonEmptyNotes(records[i].Notes, standardNotes) + } + return records, nil +} + +func parseMinimaxPlanBlock(raw string, spec minimaxPlanSpec, publishedAt string) ([]subscriptionImportRecord, error) { + match := regexp.MustCompile(spec.blockPattern).FindStringSubmatch(raw) + if len(match) != 7 { + return nil, fmt.Errorf("unexpected minimax %s block", spec.billingCycle) + } + + records := make([]subscriptionImportRecord, 0, 3) + for i := range spec.tiers { + records = append(records, subscriptionImportRecord{ + ProviderName: "MiniMax", + ProviderNameCn: "MiniMax", + ProviderCountry: "CN", + ProviderWebsite: "https://platform.minimax.io", + OperatorName: "MiniMax", + OperatorNameCn: "MiniMax", + OperatorCountry: "CN", + OperatorWebsite: "https://platform.minimax.io/docs/guides/pricing-overview", + OperatorType: "official", + PlanFamily: "token_plan", + PlanCode: spec.planCodes[i], + PlanName: spec.planNames[i], + Tier: spec.tiers[i], + BillingCycle: spec.billingCycle, + Currency: "USD", + ListPrice: mustParseSubscriptionPrice(match[i+1]), + PriceUnit: spec.priceUnit, + QuotaValue: mustParseSubscriptionInt64(match[i+4]), + QuotaUnit: spec.quotaUnit, + PlanScope: "Token Plan", + ModelScope: append([]string(nil), spec.modelScope...), + SourceURL: defaultMinimaxTokenPlanURL, + PublishedAt: publishedAt, + EffectiveDate: effectiveDateFromPublishedAt(publishedAt), + PublishedAtKnown: true, + }) + } + return records, nil +} + +func normalizeMinimaxTokenPlanText(raw string) string { + replacer := strings.NewReplacer( + "High-Speed", "Highspeed", + "High Speed", "Highspeed", + "Max-High-Speed", "Max-Highspeed", + "Ultra-High-Speed", "Ultra-Highspeed", + "Plus `Cost-Effective`", "Plus", + "Max `Extra Large`", "Max", + "Starter `Save $20`", "Starter", + "Plus `Save $40`", "Plus", + "Max `Save $100`", "Max", + "Plus-Highspeed `Save $80`", "Plus-Highspeed", + "Max-Highspeed `Save $160`", "Max-Highspeed", + "Ultra-Highspeed `Save $300`", "Ultra-Highspeed", + "Subscribe Now Standard Plans:", "", + "Subscribe Now", "", + "Standard Plans:", "", + "Highspeed Plans:", "Highspeed Plans", + ) + normalized := replacer.Replace(raw) + normalized = strings.ReplaceAll(normalized, "\r\n", "\n") + normalized = strings.ReplaceAll(normalized, "\r", "\n") + normalized = strings.ReplaceAll(normalized, "|", " ") + normalized = strings.ReplaceAll(normalized, "---", " ") + normalized = regexp.MustCompile(`\s+`).ReplaceAllString(normalized, " ") + return strings.TrimSpace(normalized) +} + +func extractMinimaxNotes(raw string, markers []string) string { + var hits []string + for _, marker := range markers { + if strings.Contains(raw, marker) { + hits = append(hits, marker) + } + } + if len(hits) == 0 { + return "" + } + return "附带配额包含 " + strings.Join(hits, " / ") + "。" +} + +func joinNonEmptyNotes(parts ...string) string { + filtered := make([]string, 0, len(parts)) + for _, part := range parts { + part = strings.TrimSpace(part) + if part == "" { + continue + } + filtered = append(filtered, part) + } + return strings.Join(filtered, " ") +} diff --git a/scripts/official_pricing_import_common.go b/scripts/official_pricing_import_common.go new file mode 100644 index 0000000..399379c --- /dev/null +++ b/scripts/official_pricing_import_common.go @@ -0,0 +1,508 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "fmt" + "html" + "io" + "net/http" + "os" + "regexp" + "strings" + "time" +) + +const officialPricingFetchMaxAttempts = 3 + +type officialPricingFetchOptions struct { + AcceptLanguage string +} + +type officialPricingRecord struct { + ModelID string + ModelName string + ProviderName string + ProviderNameCn string + ProviderCountry string + ProviderWebsite string + OperatorName string + OperatorNameCn string + OperatorCountry string + OperatorWebsite string + OperatorType string + Region string + Currency string + InputPrice float64 + OutputPrice float64 + ContextLength int + IsFree bool + SourceURL string + ModelSourceURL string + ReleaseDate string + DateConfidence string + DateSourceKind string + Modality string +} + +func upsertOfficialPricingRecords(db *sql.DB, records []officialPricingRecord, batchID string) error { + records = dedupeOfficialPricingRecords(records) + if len(records) == 0 { + return fmt.Errorf("official pricing records are empty") + } + if strings.TrimSpace(batchID) == "" { + batchID = fmt.Sprintf("official-pricing-%s", time.Now().Format("20060102-150405")) + } + + for _, record := range records { + providerID, err := ensureOfficialPricingProvider(db, record) + if err != nil { + return err + } + operatorID, err := ensureOfficialPricingOperator(db, record) + if err != nil { + return err + } + modelID, err := ensureOfficialPricingModel(db, record, providerID, batchID) + if err != nil { + return err + } + + sourceType := officialPricingSourceType(record.OperatorType, record.IsFree) + freeQuota := "" + freeLimitations := "[]" + rateLimit := "{}" + if record.IsFree { + freeQuota = "See source_url for provider free-tier details" + freeLimitations = `["See source_url for current quota and policy"]` + } + + _, err = db.Exec( + `INSERT INTO region_pricing ( + model_id, operator_id, region, currency, + input_price_per_mtok, output_price_per_mtok, + is_free, effective_date, source_url, source_type, + free_quota, free_limitations, rate_limit + ) VALUES ( + $1, $2, $3, $4, + $5, $6, $7, CURRENT_DATE, $8, $9, + $10, $11, $12 + ) + ON CONFLICT (model_id, operator_id, region, currency, effective_date) + DO UPDATE SET + input_price_per_mtok = EXCLUDED.input_price_per_mtok, + output_price_per_mtok = EXCLUDED.output_price_per_mtok, + is_free = EXCLUDED.is_free, + source_url = EXCLUDED.source_url, + source_type = EXCLUDED.source_type, + free_quota = EXCLUDED.free_quota, + free_limitations = EXCLUDED.free_limitations, + rate_limit = EXCLUDED.rate_limit, + updated_at = CURRENT_TIMESTAMP`, + modelID, operatorID, record.Region, record.Currency, + record.InputPrice, record.OutputPrice, record.IsFree, record.SourceURL, sourceType, + nullIfBlank(freeQuota), freeLimitations, rateLimit, + ) + if err != nil { + return fmt.Errorf("upsert region_pricing %s: %w", record.ModelID, err) + } + } + return nil +} + +func ensureOfficialPricingProvider(db *sql.DB, record officialPricingRecord) (int64, error) { + var providerID int64 + err := db.QueryRow(`SELECT id FROM model_provider WHERE name = $1`, record.ProviderName).Scan(&providerID) + if err == nil { + _, updateErr := db.Exec( + `UPDATE model_provider + SET name_cn = COALESCE(name_cn, $2), + website = COALESCE(NULLIF(website, ''), $3), + updated_at = CURRENT_TIMESTAMP + WHERE id = $1`, + providerID, nullIfBlank(record.ProviderNameCn), nullIfBlank(record.ProviderWebsite), + ) + return providerID, updateErr + } + if err != sql.ErrNoRows { + return 0, err + } + + err = db.QueryRow( + `INSERT INTO model_provider (name, name_cn, country, website, status) + VALUES ($1, $2, $3, $4, 'active') + RETURNING id`, + record.ProviderName, nullIfBlank(record.ProviderNameCn), record.ProviderCountry, nullIfBlank(record.ProviderWebsite), + ).Scan(&providerID) + return providerID, err +} + +func ensureOfficialPricingOperator(db *sql.DB, record officialPricingRecord) (int64, error) { + var operatorID int64 + err := db.QueryRow(`SELECT id FROM operator WHERE name = $1`, record.OperatorName).Scan(&operatorID) + if err == nil { + _, updateErr := db.Exec( + `UPDATE operator + SET name_cn = COALESCE(name_cn, $2), + website = COALESCE(NULLIF(website, ''), $3), + type = COALESCE(NULLIF(type, ''), $4), + updated_at = CURRENT_TIMESTAMP + WHERE id = $1`, + operatorID, nullIfBlank(record.OperatorNameCn), nullIfBlank(record.OperatorWebsite), nullIfBlank(record.OperatorType), + ) + return operatorID, updateErr + } + if err != sql.ErrNoRows { + return 0, err + } + + err = db.QueryRow( + `INSERT INTO operator (name, name_cn, country, website, description, status, type) + VALUES ($1, $2, $3, $4, $5, 'active', $6) + RETURNING id`, + record.OperatorName, nullIfBlank(record.OperatorNameCn), record.OperatorCountry, nullIfBlank(record.OperatorWebsite), + fmt.Sprintf("%s official pricing import", record.OperatorName), record.OperatorType, + ).Scan(&operatorID) + return operatorID, err +} + +func ensureOfficialPricingModel(db *sql.DB, record officialPricingRecord, providerID int64, batchID string) (int64, error) { + var modelID int64 + err := db.QueryRow(`SELECT id FROM models WHERE external_id = $1`, record.ModelID).Scan(&modelID) + if err == sql.ErrNoRows { + err = db.QueryRow( + `INSERT INTO models ( + external_id, name, provider_id, modality, context_length, + status, source, batch_id, source_url, release_date, + date_confidence, date_source_kind + ) VALUES ( + $1, $2, $3, $4, $5, + 'active', $6, $7, $8, $9, + $10, $11 + ) RETURNING id`, + record.ModelID, record.ModelName, providerID, fallbackModality(record.Modality), nullIfZeroIntCommon(record.ContextLength), + record.OperatorName, batchID, firstNonEmptyText(record.ModelSourceURL, record.SourceURL), releaseDateValueCommon(record.ReleaseDate), + fallbackDateConfidence(record.DateConfidence), fallbackDateSourceKind(record.DateSourceKind), + ).Scan(&modelID) + if err != nil { + return 0, err + } + return modelID, nil + } + if err != nil { + return 0, err + } + + _, err = db.Exec( + `UPDATE models + SET name = $2, + provider_id = $3, + modality = COALESCE($4, modality), + context_length = COALESCE($5, context_length), + source = $6, + batch_id = $7, + source_url = COALESCE(NULLIF(source_url, ''), $8), + release_date = COALESCE(release_date, $9), + date_confidence = COALESCE(NULLIF(date_confidence, ''), $10), + date_source_kind = COALESCE(NULLIF(date_source_kind, ''), $11), + updated_at = CURRENT_TIMESTAMP + WHERE id = $1`, + modelID, record.ModelName, providerID, nullIfBlank(fallbackModality(record.Modality)), nullIfZeroIntCommon(record.ContextLength), + record.OperatorName, batchID, firstNonEmptyText(record.ModelSourceURL, record.SourceURL), releaseDateValueCommon(record.ReleaseDate), + fallbackDateConfidence(record.DateConfidence), fallbackDateSourceKind(record.DateSourceKind), + ) + return modelID, err +} + +func officialPricingSourceType(operatorType string, isFree bool) string { + if isFree { + return "free_tier" + } + switch strings.ToLower(strings.TrimSpace(operatorType)) { + case "official": + return "official" + default: + return "reseller" + } +} + +func releaseDateValueCommon(raw string) any { + if strings.TrimSpace(raw) == "" { + return nil + } + parsed, err := time.Parse("2006-01-02", raw) + if err != nil { + return nil + } + return parsed +} + +func fallbackDateConfidence(raw string) string { + if strings.TrimSpace(raw) == "" { + return "unknown" + } + return raw +} + +func fallbackDateSourceKind(raw string) string { + if strings.TrimSpace(raw) == "" { + return "official_product_page" + } + return raw +} + +func fallbackModality(raw string) string { + value := strings.TrimSpace(raw) + if value == "" { + return "text" + } + return value +} + +func fetchRawPricingPage(url string, fixture string, client *http.Client) (string, error) { + return fetchRawPricingPageWithOptions(url, fixture, client, officialPricingFetchOptions{ + AcceptLanguage: "zh-CN,zh;q=0.9,en;q=0.8", + }) +} + +func fetchRawPricingPageWithOptions(url string, fixture string, client *http.Client, opts officialPricingFetchOptions) (string, error) { + if fixture != "" { + data, err := os.ReadFile(fixture) + if err != nil { + return "", fmt.Errorf("read fixture %s: %w", fixture, err) + } + return string(data), nil + } + + if client == nil { + client = &http.Client{Timeout: 20 * time.Second} + } + + var lastErr error + for attempt := 1; attempt <= officialPricingFetchMaxAttempts; attempt++ { + body, retryable, err := fetchRawPricingPageOnce(url, client, opts) + if err == nil { + return body, nil + } + lastErr = err + if !retryable || attempt == officialPricingFetchMaxAttempts { + return "", err + } + time.Sleep(time.Duration(attempt) * 200 * time.Millisecond) + } + return "", lastErr +} + +func fetchRawPricingPageOnce(url string, client *http.Client, opts officialPricingFetchOptions) (string, bool, error) { + req, err := http.NewRequest(http.MethodGet, url, nil) + if err != nil { + return "", false, err + } + req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36") + req.Header.Set("Accept", "text/html,application/xhtml+xml,application/json,text/plain;q=0.9,*/*;q=0.8") + if strings.TrimSpace(opts.AcceptLanguage) != "" { + req.Header.Set("Accept-Language", opts.AcceptLanguage) + } + + resp, err := client.Do(req) + if err != nil { + return "", isRetriablePricingFetchError(err), fmt.Errorf("fetch %s: %w", url, err) + } + defer resp.Body.Close() + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + retryable := resp.StatusCode == http.StatusTooManyRequests || + resp.StatusCode == http.StatusBadGateway || + resp.StatusCode == http.StatusServiceUnavailable || + resp.StatusCode == http.StatusGatewayTimeout + return "", retryable, fmt.Errorf("fetch %s: unexpected status %d", url, resp.StatusCode) + } + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", isRetriablePricingFetchError(err), fmt.Errorf("read %s: %w", url, err) + } + return string(body), false, nil +} + +func isRetriablePricingFetchError(err error) bool { + if err == nil { + return false + } + lower := strings.ToLower(err.Error()) + for _, marker := range []string{ + "eof", + "timeout", + "temporarily unavailable", + "transport closed", + "connection reset", + "connection refused", + "tls handshake timeout", + "i/o timeout", + "too many requests", + "no such host", + } { + if strings.Contains(lower, marker) { + return true + } + } + return false +} + +func cleanHTMLText(raw string) string { + tagPattern := regexp.MustCompile(`(?is)<[^>]+>`) + spacePattern := regexp.MustCompile(`[ \t]+`) + text := html.UnescapeString(raw) + text = strings.ReplaceAll(text, "\r\n", "\n") + text = strings.ReplaceAll(text, "\r", "\n") + text = strings.ReplaceAll(text, "\u00a0", " ") + text = tagPattern.ReplaceAllString(text, " ") + text = spacePattern.ReplaceAllString(text, " ") + return strings.TrimSpace(text) +} + +func firstDollarPrice(raw string) (float64, bool) { + pattern := regexp.MustCompile(`\$ ?([0-9]+(?:\.[0-9]+)?)`) + match := pattern.FindStringSubmatch(raw) + if len(match) != 2 { + return 0, false + } + return mustParseSubscriptionPrice(match[1]), true +} + +func normalizeExternalID(parts ...string) string { + joined := strings.ToLower(strings.Join(parts, "-")) + replacer := regexp.MustCompile(`[^a-z0-9]+`) + normalized := replacer.ReplaceAllString(joined, "-") + normalized = strings.Trim(normalized, "-") + normalized = regexp.MustCompile(`-+`).ReplaceAllString(normalized, "-") + return normalized +} + +func parseContextLengthCommon(raw string) int { + cleaned := strings.TrimSpace(strings.ToUpper(strings.ReplaceAll(raw, ",", ""))) + if cleaned == "" { + return 0 + } + switch { + case strings.HasSuffix(cleaned, "M"): + return int(parseDecimalMultiplier(strings.TrimSuffix(cleaned, "M"), 1000000)) + case strings.HasSuffix(cleaned, "K"): + return int(parseDecimalMultiplier(strings.TrimSuffix(cleaned, "K"), 1000)) + default: + return int(mustParseSubscriptionInt64(cleaned)) + } +} + +func detectModality(modelName string) string { + lower := strings.ToLower(modelName) + switch { + case strings.Contains(lower, "coder"), strings.Contains(lower, "code"): + return "code" + case strings.Contains(lower, "vision"), strings.Contains(lower, "vl"), strings.Contains(lower, "omni"), strings.Contains(lower, "multi"), strings.Contains(lower, "live"): + return "multimodal" + default: + return "text" + } +} + +func providerMetadata(providerName string) (string, string, string) { + switch providerName { + case "Alibaba", "Qwen": + return "阿里云", "CN", "https://tongyi.aliyun.com" + case "Amazon": + return "亚马逊", "US", "https://aws.amazon.com" + case "Anthropic": + return "Anthropic", "US", "https://www.anthropic.com" + case "Baidu": + return "百度", "CN", "https://cloud.baidu.com" + case "Cloudflare": + return "Cloudflare", "US", "https://www.cloudflare.com" + case "Cohere": + return "Cohere", "CA", "https://cohere.com" + case "DeepSeek": + return "深度求索", "CN", "https://www.deepseek.com" + case "Google": + return "谷歌", "US", "https://ai.google.dev" + case "Meta": + return "Meta", "US", "https://about.meta.com" + case "MiniMax": + return "MiniMax", "CN", "https://www.minimax.io" + case "Mistral AI": + return "Mistral AI", "FR", "https://mistral.ai" + case "Moonshot AI": + return "月之暗面", "CN", "https://www.moonshot.cn" + case "NVIDIA": + return "NVIDIA", "US", "https://build.nvidia.com" + case "OpenAI": + return "OpenAI", "US", "https://openai.com" + case "Perplexity": + return "Perplexity", "US", "https://www.perplexity.ai" + case "xAI": + return "xAI", "US", "https://x.ai" + case "Zhipu AI": + return "智谱", "CN", "https://open.bigmodel.cn" + default: + return "", "unknown", "" + } +} + +func providerFromModelPath(modelName string) string { + lower := strings.ToLower(modelName) + switch { + case strings.HasPrefix(lower, "amazon/"): + return "Amazon" + case strings.HasPrefix(lower, "anthropic/"): + return "Anthropic" + case strings.HasPrefix(lower, "cohere/"): + return "Cohere" + case strings.HasPrefix(lower, "qwen/"): + return "Qwen" + case strings.HasPrefix(lower, "deepseek"), strings.HasPrefix(lower, "deepseek-ai/"): + return "DeepSeek" + case strings.HasPrefix(lower, "google/"), strings.HasPrefix(lower, "gemini/"): + return "Google" + case strings.HasPrefix(lower, "meta/"): + return "Meta" + case strings.HasPrefix(lower, "mistral/"), strings.HasPrefix(lower, "mistralai/"): + return "Mistral AI" + case strings.HasPrefix(lower, "moonshotai/"): + return "Moonshot AI" + case strings.HasPrefix(lower, "minimaxai/"): + return "MiniMax" + case strings.HasPrefix(lower, "nvidia/"): + return "NVIDIA" + case strings.HasPrefix(lower, "perplexity/"): + return "Perplexity" + case strings.HasPrefix(lower, "zai-org/"), strings.HasPrefix(lower, "glm/"): + return "Zhipu AI" + case strings.HasPrefix(lower, "openai/"): + return "OpenAI" + case strings.HasPrefix(lower, "xai/"): + return "xAI" + default: + return "unknown" + } +} + +func dedupeOfficialPricingRecords(records []officialPricingRecord) []officialPricingRecord { + seen := make(map[string]officialPricingRecord) + order := make([]string, 0, len(records)) + for _, record := range records { + key := strings.Join([]string{ + record.OperatorName, + record.ModelID, + record.Region, + record.Currency, + }, "|") + if _, exists := seen[key]; !exists { + order = append(order, key) + } + seen[key] = record + } + result := make([]officialPricingRecord, 0, len(order)) + for _, key := range order { + result = append(result, seen[key]) + } + return result +} diff --git a/scripts/official_pricing_import_common_test.go b/scripts/official_pricing_import_common_test.go new file mode 100644 index 0000000..0abd400 --- /dev/null +++ b/scripts/official_pricing_import_common_test.go @@ -0,0 +1,49 @@ +//go:build llm_script + +package main + +import ( + "net/http" + "net/http/httptest" + "sync/atomic" + "testing" + "time" +) + +func TestFetchRawPricingPageRetriesTransientStatus(t *testing.T) { + var attempts int32 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + current := atomic.AddInt32(&attempts, 1) + if current == 1 { + http.Error(w, "temporary", http.StatusServiceUnavailable) + return + } + _, _ = w.Write([]byte("ok")) + })) + defer server.Close() + + client := &http.Client{Timeout: 2 * time.Second} + body, err := fetchRawPricingPage(server.URL, "", client) + if err != nil { + t.Fatalf("fetchRawPricingPage returned error: %v", err) + } + if body != "ok" { + t.Fatalf("body = %q, want ok", body) + } + if got := atomic.LoadInt32(&attempts); got != 2 { + t.Fatalf("attempts = %d, want 2", got) + } +} + +func TestIsRetriablePricingFetchErrorRecognizesEOF(t *testing.T) { + if !isRetriablePricingFetchError(errString("unexpected EOF")) { + t.Fatalf("expected EOF to be retriable") + } + if isRetriablePricingFetchError(errString("bad request")) { + t.Fatalf("expected bad request to be non-retriable") + } +} + +type errString string + +func (e errString) Error() string { return string(e) } diff --git a/scripts/platform360_pricing_lib.go b/scripts/platform360_pricing_lib.go new file mode 100644 index 0000000..2f9507f --- /dev/null +++ b/scripts/platform360_pricing_lib.go @@ -0,0 +1,76 @@ +//go:build llm_script + +package main + +import ( + "fmt" + "regexp" + "strings" +) + +const default360PricingURL = "https://ai.360.com/open/models" + +var platform360CardPattern = regexp.MustCompile(`(?s)([A-Za-z0-9._/-]+)\n([^\n]+)\n.*?(?:输入价格|Input Price)\s*:\s*¥([\d.]+)\s*/\s*1M tokens.*?(?:输出价格|Output Price)\s*:\s*¥([\d.]+)\s*/\s*1M tokens.*?(?:上下文|Context)\s*:\s*([\d,]+)`) + +func parse360PricingCatalog(raw string) ([]officialPricingRecord, error) { + matches := platform360CardPattern.FindAllStringSubmatch(raw, -1) + if len(matches) == 0 { + return nil, fmt.Errorf("unexpected 360 pricing content") + } + + records := make([]officialPricingRecord, 0, len(matches)) + for _, match := range matches { + modelName := strings.TrimSpace(match[1]) + providerName := normalize360Provider(match[2], modelName) + providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName) + record := officialPricingRecord{ + ModelID: normalizeExternalID("360", modelName), + ModelName: modelName, + ProviderName: providerName, + ProviderNameCn: providerNameCn, + ProviderCountry: providerCountry, + ProviderWebsite: providerWebsite, + OperatorName: "360 Open Platform", + OperatorNameCn: "360 智脑开放平台", + OperatorCountry: "CN", + OperatorWebsite: "https://ai.360.com", + OperatorType: "relay", + Region: "CN", + Currency: "CNY", + InputPrice: mustParseSubscriptionPrice(match[3]), + OutputPrice: mustParseSubscriptionPrice(match[4]), + ContextLength: parseContextLengthCommon(match[5]), + SourceURL: default360PricingURL, + ModelSourceURL: default360PricingURL, + DateConfidence: "unknown", + DateSourceKind: "official_product_page", + Modality: detectModality(modelName), + } + record.IsFree = record.InputPrice == 0 && record.OutputPrice == 0 + records = append(records, record) + } + return records, nil +} + +func normalize360Provider(raw string, modelName string) string { + switch strings.ToLower(strings.TrimSpace(raw)) { + case "deepseek", "深度求索": + return "DeepSeek" + case "moonshot ai", "月之暗面": + return "Moonshot AI" + case "qwen", "阿里巴巴", "通义千问": + return "Qwen" + case "zhipu", "智谱": + return "Zhipu AI" + case "字节跳动": + return "ByteDance" + case "360智脑": + return "360" + default: + providerByPath := providerFromModelPath(modelName) + if providerByPath != "unknown" { + return providerByPath + } + return strings.TrimSpace(raw) + } +} diff --git a/scripts/ppio_pricing_lib.go b/scripts/ppio_pricing_lib.go new file mode 100644 index 0000000..ca5f619 --- /dev/null +++ b/scripts/ppio_pricing_lib.go @@ -0,0 +1,64 @@ +//go:build llm_script + +package main + +import ( + "fmt" + "regexp" + "strings" +) + +const defaultPPIOPricingURL = "https://resource.ppio.com/pricing?type=enterprise" + +var ppioBlockPattern = regexp.MustCompile(`(?s)([a-z0-9._/-]+)\n([\d,]+)\n(.*?)在线体验`) +var ppioPricePattern = regexp.MustCompile(`(?s)¥\s*([\d.]+)\s*/\s*Mt`) + +func parsePPIOPricingCatalog(raw string) ([]officialPricingRecord, error) { + matches := ppioBlockPattern.FindAllStringSubmatch(raw, -1) + records := make([]officialPricingRecord, 0, len(matches)) + for _, match := range matches { + modelLine := strings.TrimSpace(match[1]) + contextLength := parseContextLengthCommon(match[2]) + section := match[3] + if strings.Contains(section, "阶梯计费") { + continue + } + priceMatches := ppioPricePattern.FindAllStringSubmatch(section, -1) + if len(priceMatches) < 2 { + continue + } + inputPrice := mustParseSubscriptionPrice(priceMatches[len(priceMatches)-2][1]) + outputPrice := mustParseSubscriptionPrice(priceMatches[len(priceMatches)-1][1]) + providerName := providerFromModelPath(modelLine) + providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName) + record := officialPricingRecord{ + ModelID: normalizeExternalID("ppio", modelLine), + ModelName: modelLine, + ProviderName: providerName, + ProviderNameCn: providerNameCn, + ProviderCountry: providerCountry, + ProviderWebsite: providerWebsite, + OperatorName: "PPIO Model API", + OperatorNameCn: "PPIO 模型 API", + OperatorCountry: "CN", + OperatorWebsite: "https://ppinfra.com", + OperatorType: "relay", + Region: "CN", + Currency: "CNY", + InputPrice: inputPrice, + OutputPrice: outputPrice, + ContextLength: contextLength, + SourceURL: defaultPPIOPricingURL, + ModelSourceURL: defaultPPIOPricingURL, + DateConfidence: "unknown", + DateSourceKind: "official_product_page", + Modality: detectModality(modelLine), + } + record.IsFree = record.InputPrice == 0 && record.OutputPrice == 0 + records = append(records, record) + } + if len(records) == 0 { + return nil, fmt.Errorf("unexpected ppio pricing content") + } + return records, nil +} diff --git a/scripts/siliconflow_pricing_lib.go b/scripts/siliconflow_pricing_lib.go new file mode 100644 index 0000000..049b657 --- /dev/null +++ b/scripts/siliconflow_pricing_lib.go @@ -0,0 +1,61 @@ +//go:build llm_script + +package main + +import ( + "fmt" + "regexp" + "strings" +) + +const defaultSiliconFlowPricingURL = "https://siliconflow.cn/pricing" + +var siliconFlowCardPattern = regexp.MustCompile(`(?s)([A-Za-z0-9._/-]+)\n输入 \(元 / M tokens\)\n输出 \(元 / M tokens\)\n(免费|[\d.]+)\n(免费|[\d.]+)`) + +func parseSiliconFlowPricingCatalog(raw string) ([]officialPricingRecord, error) { + matches := siliconFlowCardPattern.FindAllStringSubmatch(raw, -1) + if len(matches) == 0 { + return nil, fmt.Errorf("unexpected siliconflow pricing content") + } + + records := make([]officialPricingRecord, 0, len(matches)) + for _, match := range matches { + modelName := strings.TrimSpace(match[1]) + providerName := providerFromModelPath(modelName) + providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName) + inputPrice := parseSiliconFlowPrice(match[2]) + outputPrice := parseSiliconFlowPrice(match[3]) + record := officialPricingRecord{ + ModelID: normalizeExternalID("siliconflow", modelName), + ModelName: modelName, + ProviderName: providerName, + ProviderNameCn: providerNameCn, + ProviderCountry: providerCountry, + ProviderWebsite: providerWebsite, + OperatorName: "SiliconCloud", + OperatorNameCn: "SiliconCloud", + OperatorCountry: "CN", + OperatorWebsite: "https://siliconflow.cn", + OperatorType: "relay", + Region: "CN", + Currency: "CNY", + InputPrice: inputPrice, + OutputPrice: outputPrice, + SourceURL: defaultSiliconFlowPricingURL, + ModelSourceURL: defaultSiliconFlowPricingURL, + DateConfidence: "unknown", + DateSourceKind: "official_product_page", + Modality: detectModality(modelName), + } + record.IsFree = record.InputPrice == 0 && record.OutputPrice == 0 + records = append(records, record) + } + return records, nil +} + +func parseSiliconFlowPrice(raw string) float64 { + if strings.TrimSpace(raw) == "免费" { + return 0 + } + return mustParseSubscriptionPrice(raw) +} diff --git a/scripts/subscription_import_common.go b/scripts/subscription_import_common.go new file mode 100644 index 0000000..b905ee9 --- /dev/null +++ b/scripts/subscription_import_common.go @@ -0,0 +1,628 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "encoding/json" + "fmt" + "html" + "io" + "net/http" + "os" + "regexp" + "sort" + "strconv" + "strings" + "time" + + _ "github.com/lib/pq" +) + +const subscriptionFetchMaxAttempts = 3 + +type subscriptionImportRecord struct { + ProviderName string + ProviderNameCn string + ProviderCountry string + ProviderWebsite string + OperatorName string + OperatorNameCn string + OperatorCountry string + OperatorWebsite string + OperatorType string + PlanFamily string + PlanCode string + PlanName string + Tier string + BillingCycle string + Currency string + ListPrice float64 + PriceUnit string + QuotaValue int64 + QuotaUnit string + ContextWindow int + PlanScope string + ModelScope []string + SourceURL string + PublishedAt string + EffectiveDate string + Notes string + PublishedAtKnown bool +} + +func loadSubscriptionImportEnv() { + for _, path := range []string{".env.local", ".env"} { + loadSubscriptionEnvFile(path) + } +} + +func loadSubscriptionEnvFile(path string) { + data, err := os.ReadFile(path) + if err != nil { + return + } + + for _, line := range strings.Split(string(data), "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + key, value, ok := strings.Cut(line, "=") + if !ok { + continue + } + key = strings.TrimSpace(key) + value = strings.Trim(strings.TrimSpace(value), `"'`) + if key == "" { + continue + } + if _, exists := os.LookupEnv(key); exists { + continue + } + _ = os.Setenv(key, value) + } +} + +func subscriptionImportDB() (*sql.DB, error) { + dsn := os.Getenv("DATABASE_URL") + if dsn == "" { + dsn = "postgres://long@/llm_intelligence?host=/var/run/postgresql" + } + return sql.Open("postgres", dsn) +} + +func fetchSubscriptionPage(url string, fixture string, client *http.Client) (string, error) { + if fixture != "" { + data, err := os.ReadFile(fixture) + if err != nil { + return "", fmt.Errorf("read fixture %s: %w", fixture, err) + } + return string(data), nil + } + + var lastErr error + for attempt := 1; attempt <= subscriptionFetchMaxAttempts; attempt++ { + body, retryable, err := fetchSubscriptionPageOnce(url, client) + if err == nil { + return body, nil + } + lastErr = err + if !retryable || attempt == subscriptionFetchMaxAttempts { + return "", err + } + time.Sleep(time.Duration(attempt) * 200 * time.Millisecond) + } + return "", lastErr +} + +func fetchSubscriptionPageOnce(url string, client *http.Client) (string, bool, error) { + req, err := http.NewRequest(http.MethodGet, url, nil) + if err != nil { + return "", false, err + } + req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36") + req.Header.Set("Accept", "text/html,application/xhtml+xml,text/plain;q=0.9,*/*;q=0.8") + req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") + + resp, err := client.Do(req) + if err != nil { + return "", isRetriableSubscriptionFetchError(err), fmt.Errorf("fetch %s: %w", url, err) + } + defer resp.Body.Close() + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + retryable := resp.StatusCode == http.StatusForbidden || + resp.StatusCode == http.StatusTooManyRequests || + resp.StatusCode == http.StatusBadGateway || + resp.StatusCode == http.StatusServiceUnavailable || + resp.StatusCode == http.StatusGatewayTimeout + return "", retryable, fmt.Errorf("fetch %s: unexpected status %d", url, resp.StatusCode) + } + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", isRetriableSubscriptionFetchError(err), fmt.Errorf("read %s: %w", url, err) + } + return normalizeSubscriptionPage(string(body)), false, nil +} + +func isRetriableSubscriptionFetchError(err error) bool { + if err == nil { + return false + } + lower := strings.ToLower(err.Error()) + for _, marker := range []string{ + "eof", + "timeout", + "temporarily unavailable", + "transport closed", + "connection reset", + "connection refused", + "tls handshake timeout", + "i/o timeout", + "too many requests", + "no such host", + "forbidden", + "status 403", + } { + if strings.Contains(lower, marker) { + return true + } + } + return false +} + +func normalizeSubscriptionPage(raw string) string { + text := raw + scriptPattern := regexp.MustCompile(`(?is)`) + stylePattern := regexp.MustCompile(`(?is)`) + tagPattern := regexp.MustCompile(`(?is)<[^>]+>`) + spacePattern := regexp.MustCompile(`[ \t]+`) + + text = scriptPattern.ReplaceAllString(text, "\n") + text = stylePattern.ReplaceAllString(text, "\n") + text = tagPattern.ReplaceAllString(text, "\n") + text = html.UnescapeString(text) + text = strings.ReplaceAll(text, "\r\n", "\n") + text = strings.ReplaceAll(text, "\r", "\n") + + lines := strings.Split(text, "\n") + cleaned := make([]string, 0, len(lines)) + for _, line := range lines { + line = spacePattern.ReplaceAllString(line, " ") + line = strings.TrimSpace(line) + if line == "" { + continue + } + cleaned = append(cleaned, line) + } + return strings.Join(cleaned, "\n") +} + +func publishedAtFromText(raw string) (string, bool) { + patterns := []*regexp.Regexp{ + regexp.MustCompile(`最近更新时间[::]\s*(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})`), + regexp.MustCompile(`更新时间[::]?\s*(\d{4}-\d{2}-\d{2})`), + } + for _, pattern := range patterns { + matches := pattern.FindStringSubmatch(raw) + if len(matches) != 2 { + continue + } + if len(matches[1]) == len("2006-01-02") { + return matches[1] + " 00:00:00", true + } + return matches[1], true + } + return time.Now().Format("2006-01-02 15:04:05"), false +} + +func effectiveDateFromPublishedAt(publishedAt string) string { + if len(publishedAt) >= len("2006-01-02") { + return publishedAt[:10] + } + return time.Now().Format("2006-01-02") +} + +func upsertSubscriptionImportRecords(db *sql.DB, records []subscriptionImportRecord) error { + type snapshotKey struct { + providerID int64 + planCode string + } + + historyKeys := make(map[snapshotKey]struct{}) + for _, record := range records { + providerID, err := ensureSubscriptionProvider(db, record) + if err != nil { + return err + } + operatorID, err := ensureSubscriptionOperator(db, record) + if err != nil { + return err + } + if !record.PublishedAtKnown { + history, err := loadSubscriptionSnapshotHistory(db, providerID, record.PlanCode) + if err != nil { + return err + } + if _, err := reuseExistingSnapshotDates(&record, history); err != nil { + return err + } + } + + publishedAt, err := time.Parse("2006-01-02 15:04:05", record.PublishedAt) + if err != nil { + return fmt.Errorf("parse published_at for %s: %w", record.PlanCode, err) + } + effectiveDate, err := time.Parse("2006-01-02", record.EffectiveDate) + if err != nil { + return fmt.Errorf("parse effective_date for %s: %w", record.PlanCode, err) + } + + modelScopeRaw, err := json.Marshal(record.ModelScope) + if err != nil { + return fmt.Errorf("marshal model_scope for %s: %w", record.PlanCode, err) + } + + _, err = db.Exec( + `INSERT INTO subscription_plan ( + provider_id, operator_id, plan_family, plan_code, plan_name, tier, + billing_cycle, currency, list_price, price_unit, quota_value, quota_unit, + context_window, plan_scope, model_scope, source_url, published_at, effective_date, notes + ) VALUES ( + $1, $2, $3, $4, $5, $6, + $7, $8, $9, $10, $11, $12, + $13, $14, $15, $16, $17, $18, $19 + ) + ON CONFLICT (provider_id, plan_code, effective_date) + DO UPDATE SET + operator_id = EXCLUDED.operator_id, + plan_family = EXCLUDED.plan_family, + plan_name = EXCLUDED.plan_name, + tier = EXCLUDED.tier, + billing_cycle = EXCLUDED.billing_cycle, + currency = EXCLUDED.currency, + list_price = EXCLUDED.list_price, + price_unit = EXCLUDED.price_unit, + quota_value = EXCLUDED.quota_value, + quota_unit = EXCLUDED.quota_unit, + context_window = EXCLUDED.context_window, + plan_scope = EXCLUDED.plan_scope, + model_scope = EXCLUDED.model_scope, + source_url = EXCLUDED.source_url, + published_at = EXCLUDED.published_at, + notes = EXCLUDED.notes, + updated_at = CURRENT_TIMESTAMP`, + providerID, operatorID, record.PlanFamily, record.PlanCode, record.PlanName, record.Tier, + record.BillingCycle, record.Currency, record.ListPrice, record.PriceUnit, nullIfZeroInt64(record.QuotaValue), nullIfBlank(record.QuotaUnit), + nullIfZeroIntCommon(record.ContextWindow), nullIfBlank(record.PlanScope), string(modelScopeRaw), record.SourceURL, publishedAt, effectiveDate, nullIfBlank(record.Notes), + ) + if err != nil { + return fmt.Errorf("upsert subscription_plan %s: %w", record.PlanCode, err) + } + historyKeys[snapshotKey{providerID: providerID, planCode: record.PlanCode}] = struct{}{} + } + for key := range historyKeys { + if err := compactSubscriptionSnapshotHistory(db, key.providerID, key.planCode); err != nil { + return err + } + } + return nil +} + +type subscriptionSnapshotRow struct { + ID int64 + PlanName string + Tier string + BillingCycle string + Currency string + ListPrice float64 + PriceUnit string + QuotaValue int64 + QuotaUnit string + ContextWindow int + PlanScope string + ModelScope string + SourceURL string + Notes string + PublishedAt time.Time + EffectiveDate time.Time +} + +func loadSubscriptionSnapshotHistory(db *sql.DB, providerID int64, planCode string) ([]subscriptionSnapshotRow, error) { + rows, err := db.Query( + `SELECT + id, + plan_name, + tier, + billing_cycle, + currency, + list_price, + price_unit, + COALESCE(quota_value, 0), + COALESCE(quota_unit, ''), + COALESCE(context_window, 0), + COALESCE(plan_scope, ''), + model_scope, + source_url, + COALESCE(notes, ''), + published_at, + effective_date + FROM subscription_plan + WHERE provider_id = $1 AND plan_code = $2 + ORDER BY effective_date DESC, published_at DESC NULLS LAST, id DESC`, + providerID, planCode, + ) + if err != nil { + return nil, fmt.Errorf("load subscription snapshot history %s: %w", planCode, err) + } + defer rows.Close() + + history := make([]subscriptionSnapshotRow, 0) + for rows.Next() { + row := subscriptionSnapshotRow{} + if err := rows.Scan( + &row.ID, + &row.PlanName, + &row.Tier, + &row.BillingCycle, + &row.Currency, + &row.ListPrice, + &row.PriceUnit, + &row.QuotaValue, + &row.QuotaUnit, + &row.ContextWindow, + &row.PlanScope, + &row.ModelScope, + &row.SourceURL, + &row.Notes, + &row.PublishedAt, + &row.EffectiveDate, + ); err != nil { + return nil, fmt.Errorf("scan subscription snapshot history %s: %w", planCode, err) + } + history = append(history, row) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterate subscription snapshot history %s: %w", planCode, err) + } + return history, nil +} + +func reuseExistingSnapshotDates(record *subscriptionImportRecord, history []subscriptionSnapshotRow) (bool, error) { + if record == nil || record.PublishedAtKnown || len(history) == 0 { + return false, nil + } + + modelScopeRaw, err := json.Marshal(record.ModelScope) + if err != nil { + return false, fmt.Errorf("marshal model_scope for snapshot comparison %s: %w", record.PlanCode, err) + } + + for _, existing := range history { + if existing.PlanName != record.PlanName || + existing.Tier != record.Tier || + existing.BillingCycle != record.BillingCycle || + existing.Currency != record.Currency || + existing.ListPrice != record.ListPrice || + existing.PriceUnit != record.PriceUnit || + existing.QuotaValue != record.QuotaValue || + existing.QuotaUnit != strings.TrimSpace(record.QuotaUnit) || + existing.ContextWindow != record.ContextWindow || + existing.PlanScope != strings.TrimSpace(record.PlanScope) || + existing.ModelScope != string(modelScopeRaw) || + existing.SourceURL != record.SourceURL || + existing.Notes != strings.TrimSpace(record.Notes) { + continue + } + + record.PublishedAt = existing.PublishedAt.Format("2006-01-02 15:04:05") + record.EffectiveDate = existing.EffectiveDate.Format("2006-01-02") + return true, nil + } + return false, nil +} + +func compactSubscriptionSnapshotHistory(db *sql.DB, providerID int64, planCode string) error { + history, err := loadSubscriptionSnapshotHistory(db, providerID, planCode) + if err != nil { + return err + } + for _, id := range redundantSnapshotRowIDs(history) { + if _, err := db.Exec(`DELETE FROM subscription_plan WHERE id = $1`, id); err != nil { + return fmt.Errorf("delete redundant subscription snapshot %d for %s: %w", id, planCode, err) + } + } + return nil +} + +func redundantSnapshotRowIDs(history []subscriptionSnapshotRow) []int64 { + type signatureKey struct { + PlanName string + Tier string + BillingCycle string + Currency string + ListPrice float64 + PriceUnit string + QuotaValue int64 + QuotaUnit string + ContextWindow int + PlanScope string + ModelScope string + SourceURL string + Notes string + } + type keptSnapshot struct { + ID int64 + EffectiveDate time.Time + PublishedAt time.Time + } + + makeKey := func(row subscriptionSnapshotRow) signatureKey { + return signatureKey{ + PlanName: row.PlanName, + Tier: row.Tier, + BillingCycle: row.BillingCycle, + Currency: row.Currency, + ListPrice: row.ListPrice, + PriceUnit: row.PriceUnit, + QuotaValue: row.QuotaValue, + QuotaUnit: row.QuotaUnit, + ContextWindow: row.ContextWindow, + PlanScope: row.PlanScope, + ModelScope: row.ModelScope, + SourceURL: row.SourceURL, + Notes: row.Notes, + } + } + shouldReplace := func(current keptSnapshot, candidate subscriptionSnapshotRow) bool { + if candidate.EffectiveDate.Before(current.EffectiveDate) { + return true + } + if candidate.EffectiveDate.Equal(current.EffectiveDate) && candidate.PublishedAt.Before(current.PublishedAt) { + return true + } + return candidate.EffectiveDate.Equal(current.EffectiveDate) && candidate.PublishedAt.Equal(current.PublishedAt) && candidate.ID < current.ID + } + + keptBySignature := make(map[signatureKey]keptSnapshot) + redundant := make([]int64, 0) + for _, row := range history { + key := makeKey(row) + current, exists := keptBySignature[key] + if !exists { + keptBySignature[key] = keptSnapshot{ID: row.ID, EffectiveDate: row.EffectiveDate, PublishedAt: row.PublishedAt} + continue + } + if shouldReplace(current, row) { + redundant = append(redundant, current.ID) + keptBySignature[key] = keptSnapshot{ID: row.ID, EffectiveDate: row.EffectiveDate, PublishedAt: row.PublishedAt} + continue + } + redundant = append(redundant, row.ID) + } + sort.Slice(redundant, func(i, j int) bool { return redundant[i] < redundant[j] }) + return redundant +} + +func ensureSubscriptionProvider(db *sql.DB, record subscriptionImportRecord) (int64, error) { + var providerID int64 + err := db.QueryRow(`SELECT id FROM model_provider WHERE name = $1`, record.ProviderName).Scan(&providerID) + if err == nil { + return providerID, nil + } + if err != sql.ErrNoRows { + return 0, err + } + + err = db.QueryRow( + `INSERT INTO model_provider (name, name_cn, country, website, status) + VALUES ($1, $2, $3, $4, 'active') + RETURNING id`, + record.ProviderName, nullIfBlank(record.ProviderNameCn), record.ProviderCountry, nullIfBlank(record.ProviderWebsite), + ).Scan(&providerID) + return providerID, err +} + +func ensureSubscriptionOperator(db *sql.DB, record subscriptionImportRecord) (int64, error) { + var operatorID int64 + err := db.QueryRow(`SELECT id FROM operator WHERE name = $1`, record.OperatorName).Scan(&operatorID) + if err == nil { + return operatorID, nil + } + if err != sql.ErrNoRows { + return 0, err + } + + err = db.QueryRow( + `INSERT INTO operator (name, name_cn, country, website, description, status, type) + VALUES ($1, $2, $3, $4, $5, 'active', $6) + RETURNING id`, + record.OperatorName, nullIfBlank(record.OperatorNameCn), record.OperatorCountry, nullIfBlank(record.OperatorWebsite), + fmt.Sprintf("%s subscription import", record.OperatorName), record.OperatorType, + ).Scan(&operatorID) + return operatorID, err +} + +func summarizeSubscriptionImport(records []subscriptionImportRecord, getter func(subscriptionImportRecord) string) string { + counts := make(map[string]int) + keys := make([]string, 0) + for _, record := range records { + key := getter(record) + if _, exists := counts[key]; !exists { + keys = append(keys, key) + } + counts[key]++ + } + sort.Strings(keys) + + parts := make([]string, 0, len(keys)) + for _, key := range keys { + parts = append(parts, fmt.Sprintf("%s:%d", key, counts[key])) + } + return strings.Join(parts, ",") +} + +func nullIfBlank(value string) any { + if strings.TrimSpace(value) == "" { + return nil + } + return value +} + +func nullIfZeroInt64(value int64) any { + if value == 0 { + return nil + } + return value +} + +func nullIfZeroIntCommon(value int) any { + if value == 0 { + return nil + } + return value +} + +func mustParseSubscriptionPrice(raw string) float64 { + cleaned := strings.ReplaceAll(raw, ",", "") + cleaned = strings.ReplaceAll(cleaned, " ", "") + value, _ := strconv.ParseFloat(cleaned, 64) + return value +} + +func mustParseSubscriptionInt64(raw string) int64 { + cleaned := strings.ReplaceAll(raw, ",", "") + cleaned = strings.ReplaceAll(cleaned, " ", "") + value, _ := strconv.ParseInt(cleaned, 10, 64) + return value +} + +func firstNonEmptyText(values ...string) string { + for _, value := range values { + if strings.TrimSpace(value) != "" { + return value + } + } + return "" +} + +func parseDecimalMultiplier(raw string, unit int64) int64 { + cleaned := strings.TrimSpace(strings.ReplaceAll(raw, " ", "")) + value, _ := strconv.ParseFloat(cleaned, 64) + return int64(value * float64(unit)) +} + +func sliceSection(raw string, start string, end string) string { + startIndex := strings.Index(raw, start) + if startIndex < 0 { + return "" + } + section := raw[startIndex+len(start):] + if end != "" { + if endIndex := strings.Index(section, end); endIndex >= 0 { + section = section[:endIndex] + } + } + return section +} diff --git a/scripts/subscription_import_common_test.go b/scripts/subscription_import_common_test.go new file mode 100644 index 0000000..0255edf --- /dev/null +++ b/scripts/subscription_import_common_test.go @@ -0,0 +1,48 @@ +//go:build llm_script + +package main + +import ( + "net/http" + "net/http/httptest" + "testing" + "time" +) + +type assertiveError string + +func (e assertiveError) Error() string { + return string(e) +} + +func TestFetchSubscriptionPageRetriesForbiddenThenSucceeds(t *testing.T) { + attempts := 0 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + attempts++ + if attempts == 1 { + w.WriteHeader(http.StatusForbidden) + _, _ = w.Write([]byte("blocked")) + return + } + _, _ = w.Write([]byte("套餐价格")) + })) + defer server.Close() + + client := &http.Client{Timeout: 2 * time.Second} + body, err := fetchSubscriptionPage(server.URL, "", client) + if err != nil { + t.Fatalf("fetchSubscriptionPage 返回错误: %v", err) + } + if body != "套餐价格" { + t.Fatalf("返回体归一化错误: %q", body) + } + if attempts != 2 { + t.Fatalf("期望重试 1 次后成功,实际请求 %d 次", attempts) + } +} + +func TestIsRetriableSubscriptionFetchErrorRecognizesForbidden(t *testing.T) { + if !isRetriableSubscriptionFetchError(assertiveError("unexpected status 403")) { + t.Fatalf("403 应被视作可重试错误") + } +} diff --git a/scripts/testdata/aliyun_coding_plan_sample.txt b/scripts/testdata/aliyun_coding_plan_sample.txt new file mode 100644 index 0000000..f894e3b --- /dev/null +++ b/scripts/testdata/aliyun_coding_plan_sample.txt @@ -0,0 +1,21 @@ +# Coding Plan概述 +更新时间:2026-05-14 + +## 套餐详情 +Lite 套餐自 2026 年 3 月 20 日 00:00:00(UTC+08:00)起停止新购。 +Lite 套餐支持所有套餐模型(含千问、GLM、Kimi、MiniMax),与 Pro 套餐一致。 + +Pro 高级套餐 +支持的模型 +推荐模型:qwen3.6-plus、kimi-k2.5、glm-5、MiniMax-M2.5 +更多模型:qwen3.5-plus、qwen3-max-2026-01-23、qwen3-coder-next、qwen3-coder-plus、glm-4.7 + +价格 +¥ 200/月 + +用量限制 +每 5 小时 6,000 次请求 +每周 45,000 次请求 +每月 90,000 次请求 + +限时优惠:活动已结束,当前价格以下单页为准。 diff --git a/scripts/testdata/aliyun_token_plan_sample.txt b/scripts/testdata/aliyun_token_plan_sample.txt new file mode 100644 index 0000000..8bd74be --- /dev/null +++ b/scripts/testdata/aliyun_token_plan_sample.txt @@ -0,0 +1,31 @@ +# Token Plan(团队版)概述 +更新时间:2026-05-14 + +## 套餐与定价 +### Token Plan 团队版 +提供标准坐席、高级坐席、尊享坐席三个档位,匹配不同使用强度。 + +坐席类型 +价格 +额度 +适用场景 + +标准坐席 +¥198/坐席/月 +25,000 Credits/坐席/月 +轻度使用 AI 辅助的团队成员 + +高级坐席 +¥698/坐席/月 +100,000 Credits/坐席/月 +日常高频使用 AI 编程或办公的团队成员 + +尊享坐席 +¥1,398/坐席/月 +250,000 Credits/坐席/月 +重度依赖 AI 的核心开发者或高强度使用者 + +### Token Plan 团队版 - 共享用量包 +Token Plan 团队版 - 共享用量包 +¥5,000/个 +625,000 Credits/个 diff --git a/scripts/testdata/azure_openai_pricing_sample.json b/scripts/testdata/azure_openai_pricing_sample.json new file mode 100644 index 0000000..481aa9c --- /dev/null +++ b/scripts/testdata/azure_openai_pricing_sample.json @@ -0,0 +1,69 @@ +{ + "Items": [ + { + "currencyCode": "USD", + "retailPrice": 0.0022, + "unitPrice": 0.0022, + "location": "US East", + "meterName": "gpt 4.1 Inp regnl Tokens", + "productName": "Azure OpenAI", + "skuName": "gpt 4.1 Inp regnl", + "serviceName": "Foundry Models", + "unitOfMeasure": "1K", + "type": "Consumption", + "armSkuName": "gpt 4.1 Inp regnl" + }, + { + "currencyCode": "USD", + "retailPrice": 0.0088, + "unitPrice": 0.0088, + "location": "US East", + "meterName": "gpt 4.1 Outp regnl Tokens", + "productName": "Azure OpenAI", + "skuName": "gpt 4.1 Outp regnl", + "serviceName": "Foundry Models", + "unitOfMeasure": "1K", + "type": "Consumption", + "armSkuName": "gpt 4.1 Outp regnl" + }, + { + "currencyCode": "USD", + "retailPrice": 1.25, + "unitPrice": 1.25, + "location": "US West", + "meterName": "GPT 5 inp Glbl 1M Tokens", + "productName": "Azure OpenAI GPT5", + "skuName": "GPT 5 inp Glbl", + "serviceName": "Foundry Models", + "unitOfMeasure": "1M", + "type": "Consumption", + "armSkuName": "GPT 5 inp Glbl" + }, + { + "currencyCode": "USD", + "retailPrice": 10, + "unitPrice": 10, + "location": "US West", + "meterName": "GPT 5 outpt Glbl 1M Tokens", + "productName": "Azure OpenAI GPT5", + "skuName": "GPT 5 outpt Glbl", + "serviceName": "Foundry Models", + "unitOfMeasure": "1M", + "type": "Consumption", + "armSkuName": "GPT 5 outpt Glbl" + }, + { + "currencyCode": "USD", + "retailPrice": 0.625, + "unitPrice": 0.625, + "location": "US West", + "meterName": "GPT 5.1 Batch inp Gl 1M Tokens", + "productName": "Azure OpenAI GPT5", + "skuName": "GPT 5.1 Batch inp Gl", + "serviceName": "Foundry Models", + "unitOfMeasure": "1M", + "type": "Consumption", + "armSkuName": "GPT 5.1 Batch inp Gl" + } + ] +} diff --git a/scripts/testdata/baidu_coding_plan_sample.txt b/scripts/testdata/baidu_coding_plan_sample.txt new file mode 100644 index 0000000..3a3ad13 --- /dev/null +++ b/scripts/testdata/baidu_coding_plan_sample.txt @@ -0,0 +1,12 @@ +# Coding Plan +更新时间:2026-05-08 + +## 套餐详情 +### 套餐价格与限额 +套餐类型 价格 用量限制 +Coding Plan Lite ¥ 40 / 月 每 5 小时:最多约 1,200 次请求 +每周:最多约 9,000 次请求 +每订阅月:最多约 18,000 次请求 +Coding Plan Pro ¥ 200 / 月 每 5 小时:最多约 6,000 次请求 +每周:最多约 45,000 次请求 +每订阅月:最多约 90,000 次请求 diff --git a/scripts/testdata/baidu_token_benefit_pack_sample.txt b/scripts/testdata/baidu_token_benefit_pack_sample.txt new file mode 100644 index 0000000..14998bc --- /dev/null +++ b/scripts/testdata/baidu_token_benefit_pack_sample.txt @@ -0,0 +1,10 @@ +# Token 福利包 +更新时间:2026-05-08 + +## 套餐价格 +积分额度 有效期 原价 首购优惠价 +50,000 1个月 ¥50 ¥45 +100,000 1个月 ¥100 ¥90 +200,000 1个月 ¥200 ¥170 +400,000 1个月 ¥400 ¥340 +800,000 1个月 ¥800 ¥680 diff --git a/scripts/testdata/bedrock_pricing_sample.html b/scripts/testdata/bedrock_pricing_sample.html new file mode 100644 index 0000000..f6a513e --- /dev/null +++ b/scripts/testdata/bedrock_pricing_sample.html @@ -0,0 +1,66 @@ +Regions: US East (N. Virginia), US East (Ohio), and US West (Oregon)
+| Amazon Nova models | +Price per 1M input tokens (text) | +Price per 1M input tokens (image) | +Price per 1M input tokens (video) | +Price per 1M input tokens (audio) | +Price per 1M output tokens (text) | +Price per 1M output tokens (image) | +
| Amazon Nova 2 Omni (Preview) | +$0.12 | +$0.24 | +$0.36 | +$0.48 | +$0.96 | +$1.20 | +
| Amazon Nova 2 Lite | +$0.08 | +$0.10 | +$0.12 | +$0.14 | +$0.40 | +N/A | +
Regions: Europe (Frankfurt) and Asia Pacific (Jakarta)
+| Qwen models | +Price per 1M input tokens | +Price per 1M output tokens | +
| Qwen3 Coder Next | +$ 0.60 | +$ 1.44 | +
Region: Asia Pacific (Sydney)
+| Qwen models | +Price per 1M input tokens | +Price per 1M output tokens | +
| Qwen3 Next 80B A3B | +$ 0.1545 | +$ 1.2360 | +