179 lines
5.3 KiB
Go
179 lines
5.3 KiB
Go
|
|
//go:build llm_script
|
||
|
|
|
||
|
|
package main
|
||
|
|
|
||
|
|
import (
|
||
|
|
"database/sql"
|
||
|
|
"flag"
|
||
|
|
"fmt"
|
||
|
|
"html"
|
||
|
|
"io"
|
||
|
|
"net/http"
|
||
|
|
"os"
|
||
|
|
"regexp"
|
||
|
|
"strings"
|
||
|
|
"time"
|
||
|
|
)
|
||
|
|
|
||
|
|
const defaultQwenPricingURL = "https://help.aliyun.com/zh/model-studio/model-pricing"
|
||
|
|
|
||
|
|
var qwenModelLinePattern = regexp.MustCompile(`^(qwen[0-9a-z.-]+|qwq[0-9a-z.-]+|qvq[0-9a-z.-]+)$`)
|
||
|
|
|
||
|
|
type qwenPricingImportConfig struct {
|
||
|
|
URL string
|
||
|
|
Fixture string
|
||
|
|
DryRun bool
|
||
|
|
Timeout time.Duration
|
||
|
|
}
|
||
|
|
|
||
|
|
func main() {
|
||
|
|
loadSubscriptionImportEnv()
|
||
|
|
|
||
|
|
var url string
|
||
|
|
var fixture string
|
||
|
|
var dryRun bool
|
||
|
|
var timeoutSeconds int
|
||
|
|
|
||
|
|
flag.StringVar(&url, "url", defaultQwenPricingURL, "通义千问官方模型价格页")
|
||
|
|
flag.StringVar(&fixture, "fixture", "", "通义千问价格样例文件")
|
||
|
|
flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库")
|
||
|
|
flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)")
|
||
|
|
flag.Parse()
|
||
|
|
|
||
|
|
cfg := qwenPricingImportConfig{URL: url, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second}
|
||
|
|
|
||
|
|
var db *sql.DB
|
||
|
|
var err error
|
||
|
|
if !cfg.DryRun {
|
||
|
|
db, err = subscriptionImportDB()
|
||
|
|
if err != nil {
|
||
|
|
fmt.Fprintf(os.Stderr, "open db: %v\n", err)
|
||
|
|
os.Exit(1)
|
||
|
|
}
|
||
|
|
defer db.Close()
|
||
|
|
}
|
||
|
|
|
||
|
|
if err := runQwenPricingImport(cfg, db, os.Stdout); err != nil {
|
||
|
|
fmt.Fprintf(os.Stderr, "import_qwen_pricing: %v\n", err)
|
||
|
|
os.Exit(1)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func runQwenPricingImport(cfg qwenPricingImportConfig, db *sql.DB, out io.Writer) error {
|
||
|
|
client := &http.Client{Timeout: cfg.Timeout}
|
||
|
|
raw, err := fetchRawPricingPage(cfg.URL, cfg.Fixture, client)
|
||
|
|
if err != nil {
|
||
|
|
return err
|
||
|
|
}
|
||
|
|
records, err := parseQwenPricingCatalog(raw)
|
||
|
|
if err != nil {
|
||
|
|
return err
|
||
|
|
}
|
||
|
|
records = dedupeOfficialPricingRecords(records)
|
||
|
|
if cfg.DryRun {
|
||
|
|
_, err = fmt.Fprintf(out, "source=qwen-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName)
|
||
|
|
return err
|
||
|
|
}
|
||
|
|
if db == nil {
|
||
|
|
return fmt.Errorf("db is required when dry-run=false")
|
||
|
|
}
|
||
|
|
if err := upsertOfficialPricingRecords(db, records, "qwen-pricing-import"); err != nil {
|
||
|
|
return err
|
||
|
|
}
|
||
|
|
var tableRows int
|
||
|
|
if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil {
|
||
|
|
return fmt.Errorf("count region_pricing: %w", err)
|
||
|
|
}
|
||
|
|
_, err = fmt.Fprintf(out, "source=qwen-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows)
|
||
|
|
return err
|
||
|
|
}
|
||
|
|
|
||
|
|
func parseQwenPricingCatalog(raw string) ([]officialPricingRecord, error) {
|
||
|
|
lines := qwenPricingLines(raw)
|
||
|
|
records := make([]officialPricingRecord, 0)
|
||
|
|
for i := 0; i < len(lines); i++ {
|
||
|
|
modelName := strings.ToLower(strings.TrimSpace(lines[i]))
|
||
|
|
if !qwenModelLinePattern.MatchString(modelName) {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
block := make([]string, 0, 12)
|
||
|
|
for j := i + 1; j < len(lines) && j < i+14; j++ {
|
||
|
|
next := strings.ToLower(strings.TrimSpace(lines[j]))
|
||
|
|
if qwenModelLinePattern.MatchString(next) {
|
||
|
|
break
|
||
|
|
}
|
||
|
|
block = append(block, lines[j])
|
||
|
|
}
|
||
|
|
prices := qwenBlockPrices(block)
|
||
|
|
if len(prices) < 2 {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
providerNameCn, providerCountry, providerWebsite := providerMetadata("Qwen")
|
||
|
|
record := officialPricingRecord{
|
||
|
|
ModelID: normalizeExternalID("qwen", modelName),
|
||
|
|
ModelName: modelName,
|
||
|
|
ProviderName: "Qwen",
|
||
|
|
ProviderNameCn: providerNameCn,
|
||
|
|
ProviderCountry: providerCountry,
|
||
|
|
ProviderWebsite: providerWebsite,
|
||
|
|
OperatorName: "DashScope",
|
||
|
|
OperatorNameCn: "通义千问 API",
|
||
|
|
OperatorCountry: "CN",
|
||
|
|
OperatorWebsite: "https://help.aliyun.com/zh/model-studio/model-pricing",
|
||
|
|
OperatorType: "official",
|
||
|
|
Region: "CN",
|
||
|
|
Currency: "CNY",
|
||
|
|
InputPrice: prices[0],
|
||
|
|
OutputPrice: prices[1],
|
||
|
|
SourceURL: defaultQwenPricingURL,
|
||
|
|
ModelSourceURL: defaultQwenPricingURL,
|
||
|
|
DateConfidence: "unknown",
|
||
|
|
DateSourceKind: "official_pricing",
|
||
|
|
Modality: detectModality(modelName),
|
||
|
|
}
|
||
|
|
records = append(records, record)
|
||
|
|
}
|
||
|
|
if len(records) == 0 {
|
||
|
|
return nil, fmt.Errorf("unexpected qwen pricing content")
|
||
|
|
}
|
||
|
|
return records, nil
|
||
|
|
}
|
||
|
|
|
||
|
|
func qwenPricingLines(raw string) []string {
|
||
|
|
raw = strings.ReplaceAll(raw, `\u003c`, "<")
|
||
|
|
raw = strings.ReplaceAll(raw, `\u003e`, ">")
|
||
|
|
raw = strings.ReplaceAll(raw, `\n`, "\n")
|
||
|
|
raw = strings.ReplaceAll(raw, `\t`, " ")
|
||
|
|
raw = html.UnescapeString(raw)
|
||
|
|
replacer := strings.NewReplacer(
|
||
|
|
"<br>", "\n", "<br/>", "\n", "<br />", "\n",
|
||
|
|
"</p>", "\n", "</div>", "\n", "</section>", "\n", "</tr>", "\n",
|
||
|
|
"</td>", "\n", "</th>", "\n", "</li>", "\n", "</h1>", "\n",
|
||
|
|
"</h2>", "\n", "</h3>", "\n", "</h4>", "\n", "</h5>", "\n", "</h6>", "\n",
|
||
|
|
)
|
||
|
|
withBreaks := replacer.Replace(raw)
|
||
|
|
tagPattern := regexp.MustCompile(`(?is)<[^>]+>`)
|
||
|
|
withBreaks = tagPattern.ReplaceAllString(withBreaks, " ")
|
||
|
|
parts := strings.Split(withBreaks, "\n")
|
||
|
|
lines := make([]string, 0, len(parts))
|
||
|
|
for _, part := range parts {
|
||
|
|
line := strings.TrimSpace(regexp.MustCompile(`\s+`).ReplaceAllString(part, " "))
|
||
|
|
if line != "" {
|
||
|
|
lines = append(lines, line)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return lines
|
||
|
|
}
|
||
|
|
|
||
|
|
func qwenBlockPrices(lines []string) []float64 {
|
||
|
|
pricePattern := regexp.MustCompile(`^([0-9]+(?:\.[0-9]+)?) 元$`)
|
||
|
|
prices := make([]float64, 0, 4)
|
||
|
|
for _, line := range lines {
|
||
|
|
match := pricePattern.FindStringSubmatch(strings.TrimSpace(line))
|
||
|
|
if len(match) == 2 {
|
||
|
|
prices = append(prices, mustParseSubscriptionPrice(match[1]))
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return prices
|
||
|
|
}
|