Files
llm-intelligence/scripts/import_lingyiwanwu_pricing.go
2026-05-29 18:48:48 +08:00

161 lines
5.1 KiB
Go

//go:build llm_script && !scripts_pkg
package main
import (
"database/sql"
"flag"
"fmt"
"html"
"io"
"net/http"
"os"
"regexp"
"strings"
"time"
)
const defaultLingyiwanwuPricingURL = "https://platform.lingyiwanwu.com/docs"
type lingyiwanwuPricingImportConfig struct {
URL string
Fixture string
DryRun bool
Timeout time.Duration
}
var lingyiwanwuPricingRowPattern = regexp.MustCompile(`(?s)"children":"(yi-[a-z0-9-]+)"\}\],\["\$","td",null,\{"children":"([0-9]+K)"\}.*?"children":"¥([0-9]+(?:\.[0-9]+)?)"`)
func main() {
loadSubscriptionImportEnv()
var url string
var fixture string
var dryRun bool
var timeoutSeconds int
flag.StringVar(&url, "url", defaultLingyiwanwuPricingURL, "零一万物官方价格页")
flag.StringVar(&fixture, "fixture", "", "零一万物价格样例文件")
flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库")
flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)")
flag.Parse()
cfg := lingyiwanwuPricingImportConfig{URL: url, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second}
var db *sql.DB
var err error
if !cfg.DryRun {
db, err = subscriptionImportDB()
if err != nil {
fmt.Fprintf(os.Stderr, "open db: %v\n", err)
os.Exit(1)
}
defer db.Close()
}
if err := runLingyiwanwuPricingImport(cfg, db, os.Stdout); err != nil {
fmt.Fprintf(os.Stderr, "import_lingyiwanwu_pricing: %v\n", err)
os.Exit(1)
}
}
func runLingyiwanwuPricingImport(cfg lingyiwanwuPricingImportConfig, db *sql.DB, out io.Writer) error {
client := &http.Client{Timeout: cfg.Timeout}
raw, err := fetchRawPricingPage(cfg.URL, cfg.Fixture, client)
if err != nil {
return err
}
records, err := parseLingyiwanwuPricingCatalog(raw)
if err != nil {
return err
}
records = dedupeOfficialPricingRecords(records)
if len(records) == 0 {
return fmt.Errorf("unexpected lingyiwanwu pricing content: no records")
}
if cfg.DryRun {
_, err = fmt.Fprintf(out, "source=lingyiwanwu-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName)
return err
}
if db == nil {
return fmt.Errorf("db is required when dry-run=false")
}
if err := upsertOfficialPricingRecords(db, records, "lingyiwanwu-pricing-import"); err != nil {
return err
}
var tableRows int
if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil {
return fmt.Errorf("count region_pricing: %w", err)
}
_, err = fmt.Fprintf(out, "source=lingyiwanwu-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows)
return err
}
func parseLingyiwanwuPricingCatalog(raw string) ([]officialPricingRecord, error) {
payload := lingyiwanwuPricingPayload(raw)
sectionStart := strings.Index(payload, "模型与计费")
if sectionStart == -1 {
return nil, fmt.Errorf("unexpected lingyiwanwu pricing content: missing 模型与计费")
}
payload = payload[sectionStart:]
sectionEnd := strings.Index(payload, "关于计费")
if sectionEnd == -1 {
return nil, fmt.Errorf("unexpected lingyiwanwu pricing content: missing 关于计费")
}
section := payload[:sectionEnd]
matches := lingyiwanwuPricingRowPattern.FindAllStringSubmatch(section, -1)
if len(matches) == 0 {
return nil, fmt.Errorf("unexpected lingyiwanwu pricing content: no model rows parsed")
}
providerNameCn, providerCountry, providerWebsite := providerMetadata("Yi")
records := make([]officialPricingRecord, 0, len(matches))
for _, match := range matches {
if len(match) != 4 {
continue
}
modelName := strings.TrimSpace(match[1])
contextLength := parseContextLengthCommon(match[2])
price := mustParseSubscriptionPrice(match[3])
records = append(records, officialPricingRecord{
ModelID: normalizeExternalID("yi", modelName),
ModelName: modelName,
ProviderName: "Yi",
ProviderNameCn: providerNameCn,
ProviderCountry: providerCountry,
ProviderWebsite: providerWebsite,
OperatorName: "01.AI API",
OperatorNameCn: "零一万物开放平台",
OperatorCountry: "CN",
OperatorWebsite: defaultLingyiwanwuPricingURL,
OperatorType: "official",
Region: "CN",
Currency: "CNY",
InputPrice: price,
OutputPrice: price,
ContextLength: contextLength,
SourceURL: defaultLingyiwanwuPricingURL,
ModelSourceURL: defaultLingyiwanwuPricingURL,
DateConfidence: "unknown",
DateSourceKind: "official_pricing",
Modality: detectModality(modelName),
})
}
if len(records) == 0 {
return nil, fmt.Errorf("unexpected lingyiwanwu pricing content: empty records after parse")
}
return records, nil
}
func lingyiwanwuPricingPayload(raw string) string {
text := html.UnescapeString(raw)
text = strings.ReplaceAll(text, `\u003c`, "<")
text = strings.ReplaceAll(text, `\u003e`, ">")
text = strings.ReplaceAll(text, `\n`, "\n")
text = strings.ReplaceAll(text, `\t`, " ")
text = strings.ReplaceAll(text, `\"`, `"`)
text = regexp.MustCompile(`(?is)<[^>]+>`).ReplaceAllString(text, " ")
text = regexp.MustCompile(`[ \t]+`).ReplaceAllString(text, " ")
return strings.TrimSpace(text)
}