//go:build llm_script package main import ( "context" "database/sql" "flag" "fmt" "io" "net/http" "net/url" "os" "os/exec" "regexp" "strings" "time" ) const ( defaultSensenovaDocsURL = "https://platform.sensenova.cn/docs" defaultSensenovaModelsURL = "https://www.sensenova.cn/models" ) type sensenovaPricingImportConfig struct { DocsURL string ModelsURL string Fixture string DryRun bool Timeout time.Duration } type sensenovaPricingFixture struct { DocsHTML string ModelsText string } type sensenovaPricingDocModel struct { ModelName string ModelID string QuotaPer5Hour int } var ( sensenovaFixtureSplitMarker = "\n===SENSENOVA_MODELS_BUNDLE===\n" sensenovaOverviewCardPattern = regexp.MustCompile(`(?s)]*>([^<]+).*?调用次数限制

]*>每5小时([0-9]+)次

.*?MODEL ID

]*>([^<]+)`) sensenovaModelsScriptPattern = regexp.MustCompile(`src="([^"]+/_next/static/chunks/[^"]+\.js|/_next/static/chunks/[^"]+\.js)"`) sensenovaPricingZeroPattern = regexp.MustCompile(`(?s)"pricing"\s*:\s*\{\s*"prompt"\s*:\s*"0"\s*,\s*"completion"\s*:\s*"0"\s*,\s*"image"\s*:\s*"0"\s*,\s*"request"\s*:\s*"0"`) ) func main() { loadSubscriptionImportEnv() var docsURL string var modelsURL string var fixture string var dryRun bool var timeoutSeconds int flag.StringVar(&docsURL, "docs-url", defaultSensenovaDocsURL, "商汤 SenseNova API 文档页") flag.StringVar(&modelsURL, "models-url", defaultSensenovaModelsURL, "商汤 SenseNova 模型页") flag.StringVar(&fixture, "fixture", "", "商汤 SenseNova 价格样例文件") flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") flag.IntVar(&timeoutSeconds, "timeout", 45, "请求超时(秒)") flag.Parse() cfg := sensenovaPricingImportConfig{ DocsURL: docsURL, ModelsURL: modelsURL, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second, } var db *sql.DB var err error if !cfg.DryRun { db, err = subscriptionImportDB() if err != nil { fmt.Fprintf(os.Stderr, "open db: %v\n", err) os.Exit(1) } defer db.Close() } if err := runSensenovaPricingImport(cfg, db, os.Stdout); err != nil { fmt.Fprintf(os.Stderr, "import_sensenova_pricing: %v\n", err) os.Exit(1) } } func runSensenovaPricingImport(cfg sensenovaPricingImportConfig, db *sql.DB, out io.Writer) error { fixture, err := fetchSensenovaPricingFixture(cfg) if err != nil { return err } records, err := parseSensenovaPricingCatalog(fixture) if err != nil { return err } records = dedupeOfficialPricingRecords(records) if len(records) == 0 { return fmt.Errorf("unexpected sensenova pricing content: no records") } if cfg.DryRun { _, err = fmt.Fprintf(out, "source=sensenova-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName) return err } if db == nil { return fmt.Errorf("db is required when dry-run=false") } if err := upsertOfficialPricingRecords(db, records, "sensenova-pricing-import"); err != nil { return err } var tableRows int if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { return fmt.Errorf("count region_pricing: %w", err) } _, err = fmt.Fprintf(out, "source=sensenova-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows) return err } func fetchSensenovaPricingFixture(cfg sensenovaPricingImportConfig) (sensenovaPricingFixture, error) { if strings.TrimSpace(cfg.Fixture) != "" { data, err := os.ReadFile(cfg.Fixture) if err != nil { return sensenovaPricingFixture{}, fmt.Errorf("read fixture %s: %w", cfg.Fixture, err) } return splitSensenovaFixture(string(data)) } docsHTML, err := fetchRenderedPricingPageWithChromium(cfg.DocsURL, cfg.Timeout) if err != nil { return sensenovaPricingFixture{}, fmt.Errorf("fetch docs render: %w", err) } modelsText, err := fetchSensenovaModelsBundle(cfg.ModelsURL, cfg.Timeout) if err != nil { return sensenovaPricingFixture{}, err } return sensenovaPricingFixture{DocsHTML: docsHTML, ModelsText: modelsText}, nil } func splitSensenovaFixture(raw string) (sensenovaPricingFixture, error) { parts := strings.SplitN(raw, sensenovaFixtureSplitMarker, 2) if len(parts) != 2 { return sensenovaPricingFixture{}, fmt.Errorf("unexpected sensenova fixture: missing models bundle marker") } docsHTML := strings.TrimSpace(parts[0]) modelsText := strings.TrimSpace(parts[1]) if docsHTML == "" || modelsText == "" { return sensenovaPricingFixture{}, fmt.Errorf("unexpected sensenova fixture: empty docs or models segment") } return sensenovaPricingFixture{DocsHTML: docsHTML, ModelsText: modelsText}, nil } func fetchSensenovaModelsBundle(modelsURL string, timeout time.Duration) (string, error) { client := &http.Client{Timeout: timeout} html, err := fetchRawPricingPage(modelsURL, "", client) if err != nil { return "", fmt.Errorf("fetch models page shell: %w", err) } scripts := sensenovaModelsScriptPattern.FindAllStringSubmatch(html, -1) if len(scripts) == 0 { return "", fmt.Errorf("unexpected sensenova models page: no chunk scripts found") } seen := make(map[string]struct{}, len(scripts)) for _, match := range scripts { if len(match) != 2 { continue } scriptURL, err := resolveSensenovaAssetURL(modelsURL, match[1]) if err != nil { continue } if _, ok := seen[scriptURL]; ok { continue } seen[scriptURL] = struct{}{} bundle, err := fetchRawPricingPage(scriptURL, "", client) if err != nil { continue } if sensenovaBundleConfirmsFreeBeta(bundle) { return bundle, nil } } return "", fmt.Errorf("unexpected sensenova models page: free-beta bundle not found") } func resolveSensenovaAssetURL(baseURL string, assetPath string) (string, error) { parsedBase, err := url.Parse(baseURL) if err != nil { return "", err } asset, err := url.Parse(assetPath) if err != nil { return "", err } return parsedBase.ResolveReference(asset).String(), nil } func sensenovaBundleConfirmsFreeBeta(raw string) bool { hasFree := strings.Contains(raw, "公测期完全免费开放") || strings.Contains(raw, "free during public beta") hasAllModels := strings.Contains(raw, "所有模型完全开放") || strings.Contains(raw, "all models included") return hasFree && hasAllModels } func fetchRenderedPricingPageWithChromium(pageURL string, timeout time.Duration) (string, error) { browserPath, err := lookupChromiumBinaryForSensenova() if err != nil { return "", err } ctx, cancel := context.WithTimeout(context.Background(), timeout) defer cancel() cmd := exec.CommandContext(ctx, browserPath, "--headless", "--no-sandbox", "--disable-gpu", "--virtual-time-budget=8000", "--dump-dom", pageURL, ) cmd.Stderr = io.Discard out, err := cmd.Output() if ctx.Err() == context.DeadlineExceeded { return "", fmt.Errorf("chromium render timeout after %s", timeout) } if err != nil { return "", fmt.Errorf("chromium dump-dom: %w", err) } if len(out) == 0 { return "", fmt.Errorf("chromium dump-dom returned empty output") } return string(out), nil } func lookupChromiumBinaryForSensenova() (string, error) { for _, name := range []string{"chromium", "chromium-browser", "google-chrome", "google-chrome-stable"} { if path, err := exec.LookPath(name); err == nil { return path, nil } } return "", fmt.Errorf("no chromium-compatible browser found in PATH") } func parseSensenovaPricingCatalog(fixture sensenovaPricingFixture) ([]officialPricingRecord, error) { if !sensenovaBundleConfirmsFreeBeta(fixture.ModelsText) { return nil, fmt.Errorf("unexpected sensenova models bundle: missing public-beta free signal") } if !strings.Contains(fixture.DocsHTML, "GET https://token.sensenova.cn/v1/models") { return nil, fmt.Errorf("unexpected sensenova docs content: missing list models endpoint") } if !sensenovaPricingZeroPattern.MatchString(fixture.DocsHTML) { return nil, fmt.Errorf("unexpected sensenova docs content: missing zero pricing object example") } matches := sensenovaOverviewCardPattern.FindAllStringSubmatch(fixture.DocsHTML, -1) if len(matches) == 0 { return nil, fmt.Errorf("unexpected sensenova docs content: no model overview cards parsed") } providerNameCn, providerCountry, providerWebsite := providerMetadata("SenseTime") records := make([]officialPricingRecord, 0, len(matches)) seenModelIDs := make(map[string]struct{}, len(matches)) for _, match := range matches { if len(match) != 4 { continue } modelName := strings.TrimSpace(match[1]) modelID := strings.TrimSpace(match[3]) if modelName == "" || modelID == "" { continue } if _, ok := seenModelIDs[modelID]; ok { continue } seenModelIDs[modelID] = struct{}{} sectionID := sensenovaSectionIDForModel(modelID) section, err := extractHTMLSectionByID(fixture.DocsHTML, sectionID) if err != nil { return nil, err } providerName := sensenovaProviderName(modelID) providerCn, providerCountryCode, providerSite := providerNameCn, providerCountry, providerWebsite if providerName != "SenseTime" { providerCn, providerCountryCode, providerSite = providerMetadata(providerName) } records = append(records, officialPricingRecord{ ModelID: normalizeExternalID("sensenova", modelID), ModelName: modelName, ProviderName: providerName, ProviderNameCn: providerCn, ProviderCountry: providerCountryCode, ProviderWebsite: providerSite, OperatorName: "SenseNova API", OperatorNameCn: "日日新开放平台", OperatorCountry: "CN", OperatorWebsite: defaultSensenovaDocsURL, OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: 0, OutputPrice: 0, IsFree: true, ContextLength: sensenovaContextLength(modelID, section), SourceURL: defaultSensenovaDocsURL, ModelSourceURL: firstNonEmptyText(defaultSensenovaDocsURL+"#"+sectionID, defaultSensenovaDocsURL), DateConfidence: "unknown", DateSourceKind: "official_pricing", Modality: sensenovaModality(modelID, section), }) } if len(records) == 0 { return nil, fmt.Errorf("unexpected sensenova pricing content: empty records after parse") } return records, nil } func extractHTMLSectionByID(raw string, sectionID string) (string, error) { marker := fmt.Sprintf(`