diff --git a/scripts/import_baichuan_pricing.go b/scripts/import_baichuan_pricing.go new file mode 100644 index 0000000..ec17452 --- /dev/null +++ b/scripts/import_baichuan_pricing.go @@ -0,0 +1,188 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "flag" + "fmt" + "io" + "net/http" + "os" + "regexp" + "sort" + "strings" + "time" +) + +const defaultBaichuanPricingURL = "https://platform.baichuan-ai.com/prices" + +type baichuanPricingImportConfig struct { + URL string + Fixture string + DryRun bool + Timeout time.Duration +} + +type baichuanPricingRow struct { + Index int + ModelName string + ContextLength int + InputPrice float64 + OutputPrice float64 +} + +var baichuanModelContextPattern = regexp.MustCompile(`模型调用\s+(Baichuan[-A-Za-z0-9]+)\s+([0-9]+k)`) +var baichuanPairPricePattern = regexp.MustCompile(`输入:([0-9.]+)元/千tokens\s+输出:([0-9.]+)元/千tokens`) +var baichuanFlatPricePattern = regexp.MustCompile(`(?:00:00\s*~\s*24:00|00:00\s*~\s*8:00)\s+([0-9.]+)元/千tokens`) + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var dryRun bool + var timeoutSeconds int + + flag.StringVar(&url, "url", defaultBaichuanPricingURL, "百川官方价格页") + flag.StringVar(&fixture, "fixture", "", "百川价格样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.Parse() + + cfg := baichuanPricingImportConfig{URL: url, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second} + + var db *sql.DB + var err error + if !cfg.DryRun { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runBaichuanPricingImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_baichuan_pricing: %v\n", err) + os.Exit(1) + } +} + +func runBaichuanPricingImport(cfg baichuanPricingImportConfig, db *sql.DB, out io.Writer) error { + client := &http.Client{Timeout: cfg.Timeout} + raw, err := fetchRawPricingPage(cfg.URL, cfg.Fixture, client) + if err != nil { + return err + } + records, err := parseBaichuanPricingCatalog(raw) + if err != nil { + return err + } + records = dedupeOfficialPricingRecords(records) + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=baichuan-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertOfficialPricingRecords(db, records, "baichuan-pricing-import"); err != nil { + return err + } + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { + return fmt.Errorf("count region_pricing: %w", err) + } + _, err = fmt.Fprintf(out, "source=baichuan-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows) + return err +} + +func parseBaichuanPricingCatalog(raw string) ([]officialPricingRecord, error) { + text := cleanHTMLText(raw) + text = strings.ReplaceAll(text, "\n", " ") + text = regexp.MustCompile(`\s+`).ReplaceAllString(text, " ") + text = strings.TrimSpace(text) + + sectionStart := strings.Index(text, "通用大模型") + if sectionStart == -1 { + return nil, fmt.Errorf("unexpected baichuan pricing content: missing 通用大模型") + } + text = text[sectionStart:] + sectionEnd := strings.Index(text, "搜索增强服务") + if sectionEnd == -1 { + return nil, fmt.Errorf("unexpected baichuan pricing content: missing 搜索增强服务") + } + section := text[:sectionEnd] + + chunks := strings.Split(section, "模型调用 ") + rows := make([]baichuanPricingRow, 0, len(chunks)) + for idx, chunk := range chunks { + chunk = strings.TrimSpace(chunk) + if chunk == "" { + continue + } + chunk = "模型调用 " + chunk + if strings.Contains(chunk, "Baichuan-Text-Embedding") { + continue + } + meta := baichuanModelContextPattern.FindStringSubmatch(chunk) + if len(meta) != 3 { + continue + } + modelName := strings.TrimSpace(meta[1]) + contextLength := parseContextLengthCommon(meta[2]) + if contextLength == 0 { + continue + } + row := baichuanPricingRow{Index: idx, ModelName: modelName, ContextLength: contextLength} + if pair := baichuanPairPricePattern.FindStringSubmatch(chunk); len(pair) == 3 { + row.InputPrice = baichuanPerKTokenToPerMToken(pair[1]) + row.OutputPrice = baichuanPerKTokenToPerMToken(pair[2]) + } else if flat := baichuanFlatPricePattern.FindStringSubmatch(chunk); len(flat) == 2 { + price := baichuanPerKTokenToPerMToken(flat[1]) + row.InputPrice = price + row.OutputPrice = price + } else { + continue + } + rows = append(rows, row) + } + if len(rows) == 0 { + return nil, fmt.Errorf("unexpected baichuan pricing content: no model rows parsed") + } + sort.Slice(rows, func(i, j int) bool { return rows[i].Index < rows[j].Index }) + + providerNameCn, providerCountry, providerWebsite := providerMetadata("Baichuan") + records := make([]officialPricingRecord, 0, len(rows)) + for _, row := range rows { + records = append(records, officialPricingRecord{ + ModelID: normalizeExternalID("baichuan", row.ModelName), + ModelName: row.ModelName, + ProviderName: "Baichuan", + ProviderNameCn: providerNameCn, + ProviderCountry: providerCountry, + ProviderWebsite: providerWebsite, + OperatorName: "Baichuan API", + OperatorNameCn: "百川开放平台", + OperatorCountry: "CN", + OperatorWebsite: "https://platform.baichuan-ai.com/docs", + OperatorType: "official", + Region: "CN", + Currency: "CNY", + InputPrice: row.InputPrice, + OutputPrice: row.OutputPrice, + ContextLength: row.ContextLength, + SourceURL: defaultBaichuanPricingURL, + ModelSourceURL: defaultBaichuanPricingURL, + DateConfidence: "unknown", + DateSourceKind: "official_pricing", + Modality: detectModality(row.ModelName), + }) + } + return records, nil +} + +func baichuanPerKTokenToPerMToken(raw string) float64 { + return mustParseSubscriptionPrice(raw) * 1000 +} diff --git a/scripts/import_baichuan_pricing_test.go b/scripts/import_baichuan_pricing_test.go new file mode 100644 index 0000000..6e1cc83 --- /dev/null +++ b/scripts/import_baichuan_pricing_test.go @@ -0,0 +1,64 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParseBaichuanPricingCatalogBuildsRecords(t *testing.T) { + raw, err := os.ReadFile(filepath.Join("testdata", "baichuan_pricing_sample.txt")) + if err != nil { + t.Fatalf("读取 fixture 失败: %v", err) + } + + records, err := parseBaichuanPricingCatalog(string(raw)) + if err != nil { + t.Fatalf("parseBaichuanPricingCatalog 返回错误: %v", err) + } + if len(records) != 11 { + t.Fatalf("期望 11 条百川价格记录,实际 %d", len(records)) + } + if records[0].ModelID != "baichuan-baichuan-m3-plus" { + t.Fatalf("首条 modelID 错误: %q", records[0].ModelID) + } + if records[0].InputPrice != 5 || records[0].OutputPrice != 9 { + t.Fatalf("Baichuan-M3-Plus 定价错误: %v / %v", records[0].InputPrice, records[0].OutputPrice) + } + if records[4].InputPrice != 15 || records[4].OutputPrice != 15 { + t.Fatalf("Baichuan4-Turbo blended 定价错误: %v / %v", records[4].InputPrice, records[4].OutputPrice) + } + if records[8].ContextLength != 128000 { + t.Fatalf("Baichuan3-Turbo-128k context 错误: %d", records[8].ContextLength) + } + if records[10].InputPrice != 10 || records[10].OutputPrice != 10 { + t.Fatalf("Baichuan2-53B 基线定价错误: %v / %v", records[10].InputPrice, records[10].OutputPrice) + } +} + +func TestRunBaichuanPricingImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runBaichuanPricingImport(baichuanPricingImportConfig{ + URL: defaultBaichuanPricingURL, + Fixture: filepath.Join("testdata", "baichuan_pricing_sample.txt"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runBaichuanPricingImport 返回错误: %v", err) + } + output := out.String() + for _, want := range []string{ + "source=baichuan-pricing-import", + "models=11", + "operator=Baichuan API", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/import_lingyiwanwu_pricing.go b/scripts/import_lingyiwanwu_pricing.go new file mode 100644 index 0000000..23425b9 --- /dev/null +++ b/scripts/import_lingyiwanwu_pricing.go @@ -0,0 +1,160 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "flag" + "fmt" + "html" + "io" + "net/http" + "os" + "regexp" + "strings" + "time" +) + +const defaultLingyiwanwuPricingURL = "https://platform.lingyiwanwu.com/docs" + +type lingyiwanwuPricingImportConfig struct { + URL string + Fixture string + DryRun bool + Timeout time.Duration +} + +var lingyiwanwuPricingRowPattern = regexp.MustCompile(`(?s)"children":"(yi-[a-z0-9-]+)"\}\],\["\$","td",null,\{"children":"([0-9]+K)"\}.*?"children":"¥([0-9]+(?:\.[0-9]+)?)"`) + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var dryRun bool + var timeoutSeconds int + + flag.StringVar(&url, "url", defaultLingyiwanwuPricingURL, "零一万物官方价格页") + flag.StringVar(&fixture, "fixture", "", "零一万物价格样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.Parse() + + cfg := lingyiwanwuPricingImportConfig{URL: url, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second} + + var db *sql.DB + var err error + if !cfg.DryRun { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runLingyiwanwuPricingImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_lingyiwanwu_pricing: %v\n", err) + os.Exit(1) + } +} + +func runLingyiwanwuPricingImport(cfg lingyiwanwuPricingImportConfig, db *sql.DB, out io.Writer) error { + client := &http.Client{Timeout: cfg.Timeout} + raw, err := fetchRawPricingPage(cfg.URL, cfg.Fixture, client) + if err != nil { + return err + } + records, err := parseLingyiwanwuPricingCatalog(raw) + if err != nil { + return err + } + records = dedupeOfficialPricingRecords(records) + if len(records) == 0 { + return fmt.Errorf("unexpected lingyiwanwu pricing content: no records") + } + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=lingyiwanwu-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertOfficialPricingRecords(db, records, "lingyiwanwu-pricing-import"); err != nil { + return err + } + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { + return fmt.Errorf("count region_pricing: %w", err) + } + _, err = fmt.Fprintf(out, "source=lingyiwanwu-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows) + return err +} + +func parseLingyiwanwuPricingCatalog(raw string) ([]officialPricingRecord, error) { + payload := lingyiwanwuPricingPayload(raw) + sectionStart := strings.Index(payload, "模型与计费") + if sectionStart == -1 { + return nil, fmt.Errorf("unexpected lingyiwanwu pricing content: missing 模型与计费") + } + payload = payload[sectionStart:] + sectionEnd := strings.Index(payload, "关于计费") + if sectionEnd == -1 { + return nil, fmt.Errorf("unexpected lingyiwanwu pricing content: missing 关于计费") + } + section := payload[:sectionEnd] + matches := lingyiwanwuPricingRowPattern.FindAllStringSubmatch(section, -1) + if len(matches) == 0 { + return nil, fmt.Errorf("unexpected lingyiwanwu pricing content: no model rows parsed") + } + + providerNameCn, providerCountry, providerWebsite := providerMetadata("Yi") + records := make([]officialPricingRecord, 0, len(matches)) + for _, match := range matches { + if len(match) != 4 { + continue + } + modelName := strings.TrimSpace(match[1]) + contextLength := parseContextLengthCommon(match[2]) + price := mustParseSubscriptionPrice(match[3]) + records = append(records, officialPricingRecord{ + ModelID: normalizeExternalID("yi", modelName), + ModelName: modelName, + ProviderName: "Yi", + ProviderNameCn: providerNameCn, + ProviderCountry: providerCountry, + ProviderWebsite: providerWebsite, + OperatorName: "01.AI API", + OperatorNameCn: "零一万物开放平台", + OperatorCountry: "CN", + OperatorWebsite: defaultLingyiwanwuPricingURL, + OperatorType: "official", + Region: "CN", + Currency: "CNY", + InputPrice: price, + OutputPrice: price, + ContextLength: contextLength, + SourceURL: defaultLingyiwanwuPricingURL, + ModelSourceURL: defaultLingyiwanwuPricingURL, + DateConfidence: "unknown", + DateSourceKind: "official_pricing", + Modality: detectModality(modelName), + }) + } + if len(records) == 0 { + return nil, fmt.Errorf("unexpected lingyiwanwu pricing content: empty records after parse") + } + return records, nil +} + +func lingyiwanwuPricingPayload(raw string) string { + text := html.UnescapeString(raw) + text = strings.ReplaceAll(text, `\u003c`, "<") + text = strings.ReplaceAll(text, `\u003e`, ">") + text = strings.ReplaceAll(text, `\n`, "\n") + text = strings.ReplaceAll(text, `\t`, " ") + text = strings.ReplaceAll(text, `\"`, `"`) + text = regexp.MustCompile(`(?is)<[^>]+>`).ReplaceAllString(text, " ") + text = regexp.MustCompile(`[ \t]+`).ReplaceAllString(text, " ") + return strings.TrimSpace(text) +} diff --git a/scripts/import_lingyiwanwu_pricing_test.go b/scripts/import_lingyiwanwu_pricing_test.go new file mode 100644 index 0000000..7168c53 --- /dev/null +++ b/scripts/import_lingyiwanwu_pricing_test.go @@ -0,0 +1,64 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParseLingyiwanwuPricingCatalogBuildsRecords(t *testing.T) { + raw, err := os.ReadFile(filepath.Join("testdata", "lingyiwanwu_pricing_sample.txt")) + if err != nil { + t.Fatalf("读取 fixture 失败: %v", err) + } + + records, err := parseLingyiwanwuPricingCatalog(string(raw)) + if err != nil { + t.Fatalf("parseLingyiwanwuPricingCatalog 返回错误: %v", err) + } + if len(records) != 2 { + t.Fatalf("期望 2 条零一万物价格记录,实际 %d", len(records)) + } + if records[0].ModelID != "yi-yi-lightning" { + t.Fatalf("首条 modelID 错误: %q", records[0].ModelID) + } + if records[0].InputPrice != 0.99 || records[0].OutputPrice != 0.99 { + t.Fatalf("yi-lightning 定价错误: %v / %v", records[0].InputPrice, records[0].OutputPrice) + } + if records[1].ContextLength != 16000 { + t.Fatalf("yi-vision-v2 context 错误: %d", records[1].ContextLength) + } + if records[1].Modality != "multimodal" { + t.Fatalf("yi-vision-v2 modality 错误: %q", records[1].Modality) + } + if records[1].InputPrice != 6 || records[1].OutputPrice != 6 { + t.Fatalf("yi-vision-v2 定价错误: %v / %v", records[1].InputPrice, records[1].OutputPrice) + } +} + +func TestRunLingyiwanwuPricingImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runLingyiwanwuPricingImport(lingyiwanwuPricingImportConfig{ + URL: defaultLingyiwanwuPricingURL, + Fixture: filepath.Join("testdata", "lingyiwanwu_pricing_sample.txt"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runLingyiwanwuPricingImport 返回错误: %v", err) + } + output := out.String() + for _, want := range []string{ + "source=lingyiwanwu-pricing-import", + "models=2", + "operator=01.AI API", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/import_sensenova_pricing.go b/scripts/import_sensenova_pricing.go new file mode 100644 index 0000000..b14268a --- /dev/null +++ b/scripts/import_sensenova_pricing.go @@ -0,0 +1,378 @@ +//go:build llm_script + +package main + +import ( + "context" + "database/sql" + "flag" + "fmt" + "io" + "net/http" + "net/url" + "os" + "os/exec" + "regexp" + "strings" + "time" +) + +const ( + defaultSensenovaDocsURL = "https://platform.sensenova.cn/docs" + defaultSensenovaModelsURL = "https://www.sensenova.cn/models" +) + +type sensenovaPricingImportConfig struct { + DocsURL string + ModelsURL string + Fixture string + DryRun bool + Timeout time.Duration +} + +type sensenovaPricingFixture struct { + DocsHTML string + ModelsText string +} + +type sensenovaPricingDocModel struct { + ModelName string + ModelID string + QuotaPer5Hour int +} + +var ( + sensenovaFixtureSplitMarker = "\n===SENSENOVA_MODELS_BUNDLE===\n" + sensenovaOverviewCardPattern = regexp.MustCompile(`(?s)
]*>每5小时([0-9]+)次
.*?MODEL ID]*>([^<]+)`)
+ sensenovaModelsScriptPattern = regexp.MustCompile(`src="([^"]+/_next/static/chunks/[^"]+\.js|/_next/static/chunks/[^"]+\.js)"`)
+ sensenovaPricingZeroPattern = regexp.MustCompile(`(?s)"pricing"\s*:\s*\{\s*"prompt"\s*:\s*"0"\s*,\s*"completion"\s*:\s*"0"\s*,\s*"image"\s*:\s*"0"\s*,\s*"request"\s*:\s*"0"`)
+)
+
+func main() {
+ loadSubscriptionImportEnv()
+
+ var docsURL string
+ var modelsURL string
+ var fixture string
+ var dryRun bool
+ var timeoutSeconds int
+
+ flag.StringVar(&docsURL, "docs-url", defaultSensenovaDocsURL, "商汤 SenseNova API 文档页")
+ flag.StringVar(&modelsURL, "models-url", defaultSensenovaModelsURL, "商汤 SenseNova 模型页")
+ flag.StringVar(&fixture, "fixture", "", "商汤 SenseNova 价格样例文件")
+ flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库")
+ flag.IntVar(&timeoutSeconds, "timeout", 45, "请求超时(秒)")
+ flag.Parse()
+
+ cfg := sensenovaPricingImportConfig{
+ DocsURL: docsURL,
+ ModelsURL: modelsURL,
+ Fixture: fixture,
+ DryRun: dryRun,
+ Timeout: time.Duration(timeoutSeconds) * time.Second,
+ }
+
+ var db *sql.DB
+ var err error
+ if !cfg.DryRun {
+ db, err = subscriptionImportDB()
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "open db: %v\n", err)
+ os.Exit(1)
+ }
+ defer db.Close()
+ }
+
+ if err := runSensenovaPricingImport(cfg, db, os.Stdout); err != nil {
+ fmt.Fprintf(os.Stderr, "import_sensenova_pricing: %v\n", err)
+ os.Exit(1)
+ }
+}
+
+func runSensenovaPricingImport(cfg sensenovaPricingImportConfig, db *sql.DB, out io.Writer) error {
+ fixture, err := fetchSensenovaPricingFixture(cfg)
+ if err != nil {
+ return err
+ }
+ records, err := parseSensenovaPricingCatalog(fixture)
+ if err != nil {
+ return err
+ }
+ records = dedupeOfficialPricingRecords(records)
+ if len(records) == 0 {
+ return fmt.Errorf("unexpected sensenova pricing content: no records")
+ }
+ if cfg.DryRun {
+ _, err = fmt.Fprintf(out, "source=sensenova-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName)
+ return err
+ }
+ if db == nil {
+ return fmt.Errorf("db is required when dry-run=false")
+ }
+ if err := upsertOfficialPricingRecords(db, records, "sensenova-pricing-import"); err != nil {
+ return err
+ }
+ var tableRows int
+ if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil {
+ return fmt.Errorf("count region_pricing: %w", err)
+ }
+ _, err = fmt.Fprintf(out, "source=sensenova-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows)
+ return err
+}
+
+func fetchSensenovaPricingFixture(cfg sensenovaPricingImportConfig) (sensenovaPricingFixture, error) {
+ if strings.TrimSpace(cfg.Fixture) != "" {
+ data, err := os.ReadFile(cfg.Fixture)
+ if err != nil {
+ return sensenovaPricingFixture{}, fmt.Errorf("read fixture %s: %w", cfg.Fixture, err)
+ }
+ return splitSensenovaFixture(string(data))
+ }
+
+ docsHTML, err := fetchRenderedPricingPageWithChromium(cfg.DocsURL, cfg.Timeout)
+ if err != nil {
+ return sensenovaPricingFixture{}, fmt.Errorf("fetch docs render: %w", err)
+ }
+ modelsText, err := fetchSensenovaModelsBundle(cfg.ModelsURL, cfg.Timeout)
+ if err != nil {
+ return sensenovaPricingFixture{}, err
+ }
+ return sensenovaPricingFixture{DocsHTML: docsHTML, ModelsText: modelsText}, nil
+}
+
+func splitSensenovaFixture(raw string) (sensenovaPricingFixture, error) {
+ parts := strings.SplitN(raw, sensenovaFixtureSplitMarker, 2)
+ if len(parts) != 2 {
+ return sensenovaPricingFixture{}, fmt.Errorf("unexpected sensenova fixture: missing models bundle marker")
+ }
+ docsHTML := strings.TrimSpace(parts[0])
+ modelsText := strings.TrimSpace(parts[1])
+ if docsHTML == "" || modelsText == "" {
+ return sensenovaPricingFixture{}, fmt.Errorf("unexpected sensenova fixture: empty docs or models segment")
+ }
+ return sensenovaPricingFixture{DocsHTML: docsHTML, ModelsText: modelsText}, nil
+}
+
+func fetchSensenovaModelsBundle(modelsURL string, timeout time.Duration) (string, error) {
+ client := &http.Client{Timeout: timeout}
+ html, err := fetchRawPricingPage(modelsURL, "", client)
+ if err != nil {
+ return "", fmt.Errorf("fetch models page shell: %w", err)
+ }
+ scripts := sensenovaModelsScriptPattern.FindAllStringSubmatch(html, -1)
+ if len(scripts) == 0 {
+ return "", fmt.Errorf("unexpected sensenova models page: no chunk scripts found")
+ }
+ seen := make(map[string]struct{}, len(scripts))
+ for _, match := range scripts {
+ if len(match) != 2 {
+ continue
+ }
+ scriptURL, err := resolveSensenovaAssetURL(modelsURL, match[1])
+ if err != nil {
+ continue
+ }
+ if _, ok := seen[scriptURL]; ok {
+ continue
+ }
+ seen[scriptURL] = struct{}{}
+ bundle, err := fetchRawPricingPage(scriptURL, "", client)
+ if err != nil {
+ continue
+ }
+ if sensenovaBundleConfirmsFreeBeta(bundle) {
+ return bundle, nil
+ }
+ }
+ return "", fmt.Errorf("unexpected sensenova models page: free-beta bundle not found")
+}
+
+func resolveSensenovaAssetURL(baseURL string, assetPath string) (string, error) {
+ parsedBase, err := url.Parse(baseURL)
+ if err != nil {
+ return "", err
+ }
+ asset, err := url.Parse(assetPath)
+ if err != nil {
+ return "", err
+ }
+ return parsedBase.ResolveReference(asset).String(), nil
+}
+
+func sensenovaBundleConfirmsFreeBeta(raw string) bool {
+ hasFree := strings.Contains(raw, "公测期完全免费开放") || strings.Contains(raw, "free during public beta")
+ hasAllModels := strings.Contains(raw, "所有模型完全开放") || strings.Contains(raw, "all models included")
+ return hasFree && hasAllModels
+}
+
+func fetchRenderedPricingPageWithChromium(pageURL string, timeout time.Duration) (string, error) {
+ browserPath, err := lookupChromiumBinaryForSensenova()
+ if err != nil {
+ return "", err
+ }
+ ctx, cancel := context.WithTimeout(context.Background(), timeout)
+ defer cancel()
+ cmd := exec.CommandContext(ctx, browserPath,
+ "--headless",
+ "--no-sandbox",
+ "--disable-gpu",
+ "--virtual-time-budget=8000",
+ "--dump-dom",
+ pageURL,
+ )
+ cmd.Stderr = io.Discard
+ out, err := cmd.Output()
+ if ctx.Err() == context.DeadlineExceeded {
+ return "", fmt.Errorf("chromium render timeout after %s", timeout)
+ }
+ if err != nil {
+ return "", fmt.Errorf("chromium dump-dom: %w", err)
+ }
+ if len(out) == 0 {
+ return "", fmt.Errorf("chromium dump-dom returned empty output")
+ }
+ return string(out), nil
+}
+
+func lookupChromiumBinaryForSensenova() (string, error) {
+ for _, name := range []string{"chromium", "chromium-browser", "google-chrome", "google-chrome-stable"} {
+ if path, err := exec.LookPath(name); err == nil {
+ return path, nil
+ }
+ }
+ return "", fmt.Errorf("no chromium-compatible browser found in PATH")
+}
+
+func parseSensenovaPricingCatalog(fixture sensenovaPricingFixture) ([]officialPricingRecord, error) {
+ if !sensenovaBundleConfirmsFreeBeta(fixture.ModelsText) {
+ return nil, fmt.Errorf("unexpected sensenova models bundle: missing public-beta free signal")
+ }
+ if !strings.Contains(fixture.DocsHTML, "GET https://token.sensenova.cn/v1/models") {
+ return nil, fmt.Errorf("unexpected sensenova docs content: missing list models endpoint")
+ }
+ if !sensenovaPricingZeroPattern.MatchString(fixture.DocsHTML) {
+ return nil, fmt.Errorf("unexpected sensenova docs content: missing zero pricing object example")
+ }
+
+ matches := sensenovaOverviewCardPattern.FindAllStringSubmatch(fixture.DocsHTML, -1)
+ if len(matches) == 0 {
+ return nil, fmt.Errorf("unexpected sensenova docs content: no model overview cards parsed")
+ }
+
+ providerNameCn, providerCountry, providerWebsite := providerMetadata("SenseTime")
+ records := make([]officialPricingRecord, 0, len(matches))
+ seenModelIDs := make(map[string]struct{}, len(matches))
+ for _, match := range matches {
+ if len(match) != 4 {
+ continue
+ }
+ modelName := strings.TrimSpace(match[1])
+ modelID := strings.TrimSpace(match[3])
+ if modelName == "" || modelID == "" {
+ continue
+ }
+ if _, ok := seenModelIDs[modelID]; ok {
+ continue
+ }
+ seenModelIDs[modelID] = struct{}{}
+ sectionID := sensenovaSectionIDForModel(modelID)
+ section, err := extractHTMLSectionByID(fixture.DocsHTML, sectionID)
+ if err != nil {
+ return nil, err
+ }
+ providerName := sensenovaProviderName(modelID)
+ providerCn, providerCountryCode, providerSite := providerNameCn, providerCountry, providerWebsite
+ if providerName != "SenseTime" {
+ providerCn, providerCountryCode, providerSite = providerMetadata(providerName)
+ }
+ records = append(records, officialPricingRecord{
+ ModelID: normalizeExternalID("sensenova", modelID),
+ ModelName: modelName,
+ ProviderName: providerName,
+ ProviderNameCn: providerCn,
+ ProviderCountry: providerCountryCode,
+ ProviderWebsite: providerSite,
+ OperatorName: "SenseNova API",
+ OperatorNameCn: "日日新开放平台",
+ OperatorCountry: "CN",
+ OperatorWebsite: defaultSensenovaDocsURL,
+ OperatorType: "official",
+ Region: "CN",
+ Currency: "CNY",
+ InputPrice: 0,
+ OutputPrice: 0,
+ IsFree: true,
+ ContextLength: sensenovaContextLength(modelID, section),
+ SourceURL: defaultSensenovaDocsURL,
+ ModelSourceURL: firstNonEmptyText(defaultSensenovaDocsURL+"#"+sectionID, defaultSensenovaDocsURL),
+ DateConfidence: "unknown",
+ DateSourceKind: "official_pricing",
+ Modality: sensenovaModality(modelID, section),
+ })
+ }
+ if len(records) == 0 {
+ return nil, fmt.Errorf("unexpected sensenova pricing content: empty records after parse")
+ }
+ return records, nil
+}
+
+func extractHTMLSectionByID(raw string, sectionID string) (string, error) {
+ marker := fmt.Sprintf(`面向真实工作流的轻量多模态智能体模型,支持文本对话与图像输入理解
调用次数限制
每5小时1500次
MODEL ID
sensenova-6.7-flash-lite基于 SenseNova U1 的加速版本,专供信息图(Infographics)生成
调用次数限制
每5小时1500次
MODEL ID
sensenova-u1-fastDeepSeek 高性能对话模型,支持思考/非思考模式、256K 上下文、工具调用
调用次数限制
每5小时150次
MODEL ID
deepseek-v4-flash面向真实工作流的轻量多模态智能体模型,支持文本对话与图像输入理解。
MODEL ID: sensenova-6.7-flash-lite
SenseNova U1 Fast 基于 SenseNova U1 的加速版本,专供信息图(Infographics)生成场景。
MODEL ID: sensenova-u1-fast
注意: U1 Fast 使用独立的图像生成接口 POST /v1/images/generations,不是 Chat Completions;不支持图像输入。
DeepSeek 高性能对话模型,支持思考模式与非思考模式,上下文长度 256K tokens,最大输出 64K tokens,内置 JSON Output、Tool Calls等功能。
MODEL ID: deepseek-v4-flash
GET https://token.sensenova.cn/v1/models{
+ "data": [
+ {
+ "id": "sensenova-6.7-flash-lite",
+ "name": "sensenova-6.7-flash-lite",
+ "created": 1777392000,
+ "input_modalities": ["text", "image"],
+ "output_modalities": ["text"],
+ "quantization": "fp8",
+ "context_length": 262144,
+ "max_output_length": 65536,
+ "pricing": {
+ "prompt": "0",
+ "completion": "0",
+ "image": "0",
+ "request": "0",
+ "input_cache_read": "0"
+ }
+ }
+ ]
+}
+===SENSENOVA_MODELS_BUNDLE===
+{"subtitle":"兼容 OpenAI 接口,按量透明计费,公测期内免费开放","freeDesc":"公测期完全免费开放","promoLine2":",所有模型完全开放"}
diff --git a/scripts/testdata/xfyun_pricing_sample.html b/scripts/testdata/xfyun_pricing_sample.html
new file mode 100644
index 0000000..4eeb8b7
--- /dev/null
+++ b/scripts/testdata/xfyun_pricing_sample.html
@@ -0,0 +1 @@
+