//go:build llm_script package main import ( "database/sql" "flag" "fmt" "html" "io" "net/http" "os" "regexp" "strings" "time" ) const defaultLingyiwanwuPricingURL = "https://platform.lingyiwanwu.com/docs" type lingyiwanwuPricingImportConfig struct { URL string Fixture string DryRun bool Timeout time.Duration } var lingyiwanwuPricingRowPattern = regexp.MustCompile(`(?s)"children":"(yi-[a-z0-9-]+)"\}\],\["\$","td",null,\{"children":"([0-9]+K)"\}.*?"children":"¥([0-9]+(?:\.[0-9]+)?)"`) func main() { loadSubscriptionImportEnv() var url string var fixture string var dryRun bool var timeoutSeconds int flag.StringVar(&url, "url", defaultLingyiwanwuPricingURL, "零一万物官方价格页") flag.StringVar(&fixture, "fixture", "", "零一万物价格样例文件") flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") flag.Parse() cfg := lingyiwanwuPricingImportConfig{URL: url, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second} var db *sql.DB var err error if !cfg.DryRun { db, err = subscriptionImportDB() if err != nil { fmt.Fprintf(os.Stderr, "open db: %v\n", err) os.Exit(1) } defer db.Close() } if err := runLingyiwanwuPricingImport(cfg, db, os.Stdout); err != nil { fmt.Fprintf(os.Stderr, "import_lingyiwanwu_pricing: %v\n", err) os.Exit(1) } } func runLingyiwanwuPricingImport(cfg lingyiwanwuPricingImportConfig, db *sql.DB, out io.Writer) error { client := &http.Client{Timeout: cfg.Timeout} raw, err := fetchRawPricingPage(cfg.URL, cfg.Fixture, client) if err != nil { return err } records, err := parseLingyiwanwuPricingCatalog(raw) if err != nil { return err } records = dedupeOfficialPricingRecords(records) if len(records) == 0 { return fmt.Errorf("unexpected lingyiwanwu pricing content: no records") } if cfg.DryRun { _, err = fmt.Fprintf(out, "source=lingyiwanwu-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName) return err } if db == nil { return fmt.Errorf("db is required when dry-run=false") } if err := upsertOfficialPricingRecords(db, records, "lingyiwanwu-pricing-import"); err != nil { return err } var tableRows int if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { return fmt.Errorf("count region_pricing: %w", err) } _, err = fmt.Fprintf(out, "source=lingyiwanwu-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows) return err } func parseLingyiwanwuPricingCatalog(raw string) ([]officialPricingRecord, error) { payload := lingyiwanwuPricingPayload(raw) sectionStart := strings.Index(payload, "模型与计费") if sectionStart == -1 { return nil, fmt.Errorf("unexpected lingyiwanwu pricing content: missing 模型与计费") } payload = payload[sectionStart:] sectionEnd := strings.Index(payload, "关于计费") if sectionEnd == -1 { return nil, fmt.Errorf("unexpected lingyiwanwu pricing content: missing 关于计费") } section := payload[:sectionEnd] matches := lingyiwanwuPricingRowPattern.FindAllStringSubmatch(section, -1) if len(matches) == 0 { return nil, fmt.Errorf("unexpected lingyiwanwu pricing content: no model rows parsed") } providerNameCn, providerCountry, providerWebsite := providerMetadata("Yi") records := make([]officialPricingRecord, 0, len(matches)) for _, match := range matches { if len(match) != 4 { continue } modelName := strings.TrimSpace(match[1]) contextLength := parseContextLengthCommon(match[2]) price := mustParseSubscriptionPrice(match[3]) records = append(records, officialPricingRecord{ ModelID: normalizeExternalID("yi", modelName), ModelName: modelName, ProviderName: "Yi", ProviderNameCn: providerNameCn, ProviderCountry: providerCountry, ProviderWebsite: providerWebsite, OperatorName: "01.AI API", OperatorNameCn: "零一万物开放平台", OperatorCountry: "CN", OperatorWebsite: defaultLingyiwanwuPricingURL, OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: price, OutputPrice: price, ContextLength: contextLength, SourceURL: defaultLingyiwanwuPricingURL, ModelSourceURL: defaultLingyiwanwuPricingURL, DateConfidence: "unknown", DateSourceKind: "official_pricing", Modality: detectModality(modelName), }) } if len(records) == 0 { return nil, fmt.Errorf("unexpected lingyiwanwu pricing content: empty records after parse") } return records, nil } func lingyiwanwuPricingPayload(raw string) string { text := html.UnescapeString(raw) text = strings.ReplaceAll(text, `\u003c`, "<") text = strings.ReplaceAll(text, `\u003e`, ">") text = strings.ReplaceAll(text, `\n`, "\n") text = strings.ReplaceAll(text, `\t`, " ") text = strings.ReplaceAll(text, `\"`, `"`) text = regexp.MustCompile(`(?is)<[^>]+>`).ReplaceAllString(text, " ") text = regexp.MustCompile(`[ \t]+`).ReplaceAllString(text, " ") return strings.TrimSpace(text) }