//go:build llm_script package main import ( "database/sql" "flag" "fmt" "html" "io" "net/http" "os" "regexp" "strings" "time" ) const defaultHunyuanPricingURL = "https://cloud.tencent.com/document/product/1729/97731" var hunyuanModelLinePattern = regexp.MustCompile(`^[A-Za-z0-9 ._-]+$`) type hunyuanPricingImportConfig struct { URL string Fixture string DryRun bool Timeout time.Duration } func main() { loadSubscriptionImportEnv() var url string var fixture string var dryRun bool var timeoutSeconds int flag.StringVar(&url, "url", defaultHunyuanPricingURL, "腾讯混元官方价格页") flag.StringVar(&fixture, "fixture", "", "腾讯混元价格样例文件") flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") flag.Parse() cfg := hunyuanPricingImportConfig{URL: url, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second} var db *sql.DB var err error if !cfg.DryRun { db, err = subscriptionImportDB() if err != nil { fmt.Fprintf(os.Stderr, "open db: %v\n", err) os.Exit(1) } defer db.Close() } if err := runHunyuanPricingImport(cfg, db, os.Stdout); err != nil { fmt.Fprintf(os.Stderr, "import_hunyuan_pricing: %v\n", err) os.Exit(1) } } func runHunyuanPricingImport(cfg hunyuanPricingImportConfig, db *sql.DB, out io.Writer) error { client := &http.Client{Timeout: cfg.Timeout} raw, err := fetchRawPricingPage(cfg.URL, cfg.Fixture, client) if err != nil { return err } records, err := parseHunyuanPricingCatalog(raw) if err != nil { return err } records = dedupeOfficialPricingRecords(records) if cfg.DryRun { _, err = fmt.Fprintf(out, "source=hunyuan-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName) return err } if db == nil { return fmt.Errorf("db is required when dry-run=false") } if err := upsertOfficialPricingRecords(db, records, "hunyuan-pricing-import"); err != nil { return err } var tableRows int if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { return fmt.Errorf("count region_pricing: %w", err) } _, err = fmt.Fprintf(out, "source=hunyuan-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows) return err } func parseHunyuanPricingCatalog(raw string) ([]officialPricingRecord, error) { lines := hunyuanPricingLines(raw) records := make([]officialPricingRecord, 0) currentModel := "" currentInput := 0.0 for _, line := range lines { trimmed := strings.TrimSpace(line) switch { case trimmed == "" || strings.Contains(trimmed, "混元生文价格说明") || strings.Contains(trimmed, "token 后付费") || strings.Contains(trimmed, "产品名") || strings.Contains(trimmed, "输入长度") || strings.Contains(trimmed, "免费额度"): continue case strings.HasPrefix(trimmed, "输入:"): currentInput = mustParseSubscriptionPrice(strings.TrimSuffix(strings.TrimPrefix(trimmed, "输入:"), "元")) case strings.HasPrefix(trimmed, "输出:"): if currentModel == "" || currentInput == 0 { continue } outputPrice := mustParseSubscriptionPrice(strings.TrimSuffix(strings.TrimPrefix(trimmed, "输出:"), "元")) providerNameCn, providerCountry, providerWebsite := providerMetadata("Tencent") records = append(records, officialPricingRecord{ ModelID: normalizeExternalID("hunyuan", currentModel), ModelName: currentModel, ProviderName: "Tencent", ProviderNameCn: providerNameCn, ProviderCountry: providerCountry, ProviderWebsite: providerWebsite, OperatorName: "Tencent Hunyuan", OperatorNameCn: "腾讯混元", OperatorCountry: "CN", OperatorWebsite: "https://cloud.tencent.com/product/hunyuan", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: currentInput, OutputPrice: outputPrice, SourceURL: defaultHunyuanPricingURL, ModelSourceURL: defaultHunyuanPricingURL, DateConfidence: "unknown", DateSourceKind: "official_pricing", Modality: detectModality(currentModel), }) currentModel = "" currentInput = 0 case hunyuanModelLinePattern.MatchString(trimmed) && !strings.Contains(trimmed, "元") && !strings.Contains(trimmed, "tokens") && trimmed != "-": currentModel = trimmed currentInput = 0 } } if len(records) == 0 { return nil, fmt.Errorf("unexpected hunyuan pricing content") } return records, nil } func hunyuanPricingLines(raw string) []string { raw = strings.ReplaceAll(raw, `\u003c`, "<") raw = strings.ReplaceAll(raw, `\u003e`, ">") raw = strings.ReplaceAll(raw, `\n`, "\n") raw = strings.ReplaceAll(raw, `\t`, " ") raw = html.UnescapeString(raw) replacer := strings.NewReplacer( "
", "\n", "
", "\n", "
", "\n", "

", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", ) withBreaks := replacer.Replace(raw) withBreaks = regexp.MustCompile(`(?is)<[^>]+>`).ReplaceAllString(withBreaks, " ") parts := strings.Split(withBreaks, "\n") lines := make([]string, 0, len(parts)) for _, part := range parts { line := strings.TrimSpace(regexp.MustCompile(`\s+`).ReplaceAllString(part, " ")) if line != "" { lines = append(lines, line) } } return lines }