//go:build llm_script && !scripts_pkg package main import ( "database/sql" "flag" "fmt" "html" "io" "net/http" "os" "regexp" "strings" "time" ) const defaultQwenPricingURL = "https://help.aliyun.com/zh/model-studio/model-pricing" var qwenModelLinePattern = regexp.MustCompile(`^(qwen[0-9a-z.-]+|qwq[0-9a-z.-]+|qvq[0-9a-z.-]+)$`) type qwenPricingImportConfig struct { URL string Fixture string DryRun bool Timeout time.Duration } func main() { loadSubscriptionImportEnv() var url string var fixture string var dryRun bool var timeoutSeconds int flag.StringVar(&url, "url", defaultQwenPricingURL, "通义千问官方模型价格页") flag.StringVar(&fixture, "fixture", "", "通义千问价格样例文件") flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") flag.Parse() cfg := qwenPricingImportConfig{URL: url, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second} var db *sql.DB var err error if !cfg.DryRun { db, err = subscriptionImportDB() if err != nil { fmt.Fprintf(os.Stderr, "open db: %v\n", err) os.Exit(1) } defer db.Close() } if err := runQwenPricingImport(cfg, db, os.Stdout); err != nil { fmt.Fprintf(os.Stderr, "import_qwen_pricing: %v\n", err) os.Exit(1) } } func runQwenPricingImport(cfg qwenPricingImportConfig, db *sql.DB, out io.Writer) error { client := &http.Client{Timeout: cfg.Timeout} raw, err := fetchRawPricingPage(cfg.URL, cfg.Fixture, client) if err != nil { return err } records, err := parseQwenPricingCatalog(raw) if err != nil { return err } records = dedupeOfficialPricingRecords(records) if cfg.DryRun { _, err = fmt.Fprintf(out, "source=qwen-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName) return err } if db == nil { return fmt.Errorf("db is required when dry-run=false") } if err := upsertOfficialPricingRecords(db, records, "qwen-pricing-import"); err != nil { return err } var tableRows int if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { return fmt.Errorf("count region_pricing: %w", err) } _, err = fmt.Fprintf(out, "source=qwen-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows) return err } func parseQwenPricingCatalog(raw string) ([]officialPricingRecord, error) { lines := qwenPricingLines(raw) records := make([]officialPricingRecord, 0) for i := 0; i < len(lines); i++ { modelName := strings.ToLower(strings.TrimSpace(lines[i])) if !qwenModelLinePattern.MatchString(modelName) { continue } block := make([]string, 0, 12) for j := i + 1; j < len(lines) && j < i+14; j++ { next := strings.ToLower(strings.TrimSpace(lines[j])) if qwenModelLinePattern.MatchString(next) { break } block = append(block, lines[j]) } prices := qwenBlockPrices(block) if len(prices) < 2 { continue } providerNameCn, providerCountry, providerWebsite := providerMetadata("Qwen") record := officialPricingRecord{ ModelID: normalizeExternalID("qwen", modelName), ModelName: modelName, ProviderName: "Qwen", ProviderNameCn: providerNameCn, ProviderCountry: providerCountry, ProviderWebsite: providerWebsite, OperatorName: "DashScope", OperatorNameCn: "通义千问 API", OperatorCountry: "CN", OperatorWebsite: "https://help.aliyun.com/zh/model-studio/model-pricing", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: prices[0], OutputPrice: prices[1], SourceURL: defaultQwenPricingURL, ModelSourceURL: defaultQwenPricingURL, DateConfidence: "unknown", DateSourceKind: "official_pricing", Modality: detectModality(modelName), } records = append(records, record) } if len(records) == 0 { return nil, fmt.Errorf("unexpected qwen pricing content") } return records, nil } func qwenPricingLines(raw string) []string { raw = strings.ReplaceAll(raw, `\u003c`, "<") raw = strings.ReplaceAll(raw, `\u003e`, ">") raw = strings.ReplaceAll(raw, `\n`, "\n") raw = strings.ReplaceAll(raw, `\t`, " ") raw = html.UnescapeString(raw) replacer := strings.NewReplacer( "
", "\n", "
", "\n", "
", "\n", "

", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", ) withBreaks := replacer.Replace(raw) tagPattern := regexp.MustCompile(`(?is)<[^>]+>`) withBreaks = tagPattern.ReplaceAllString(withBreaks, " ") parts := strings.Split(withBreaks, "\n") lines := make([]string, 0, len(parts)) for _, part := range parts { line := strings.TrimSpace(regexp.MustCompile(`\s+`).ReplaceAllString(part, " ")) if line != "" { lines = append(lines, line) } } return lines } func qwenBlockPrices(lines []string) []float64 { pricePattern := regexp.MustCompile(`^([0-9]+(?:\.[0-9]+)?) 元$`) prices := make([]float64, 0, 4) for _, line := range lines { match := pricePattern.FindStringSubmatch(strings.TrimSpace(line)) if len(match) == 2 { prices = append(prices, mustParseSubscriptionPrice(match[1])) } } return prices }