Files
llm-intelligence/scripts/import_tencent_subscription.go

421 lines
11 KiB
Go
Raw Permalink Normal View History

//go:build llm_script && !scripts_pkg
package main
import (
"database/sql"
"encoding/json"
"flag"
"fmt"
"io"
"net/http"
"os"
"regexp"
"strconv"
"strings"
"time"
_ "github.com/lib/pq"
)
type importTencentSubscriptionConfig struct {
URL string
Fixture string
DryRun bool
Timeout time.Duration
}
type subscriptionPlanRow struct {
ProviderName string
ProviderCN string
ProviderCountry string
OperatorName string
OperatorCN string
OperatorCountry string
OperatorType string
PlanFamily string
PlanCode string
PlanName string
Tier string
BillingCycle string
Currency string
ListPrice float64
PriceUnit string
QuotaValue int64
QuotaUnit string
ContextWindow int
PlanScope string
ModelScope string
SourceURL string
PublishedAt string
EffectiveDate string
Notes string
}
func main() {
loadImportProjectEnv()
var rawURL string
var fixturePath string
var dryRun bool
var timeoutSeconds int
flag.StringVar(&rawURL, "url", defaultTencentCatalogURL, "腾讯云公开目录 URL")
flag.StringVar(&fixturePath, "fixture", "", "本地 HTML/Text 样例文件,优先用于离线导入")
flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库")
flag.IntVar(&timeoutSeconds, "timeout", int(defaultTencentCatalogTimeout/time.Second), "请求超时(秒)")
flag.Parse()
cfg := importTencentSubscriptionConfig{
URL: rawURL,
Fixture: fixturePath,
DryRun: dryRun,
Timeout: time.Duration(timeoutSeconds) * time.Second,
}
var db *sql.DB
var err error
if !cfg.DryRun {
dsn := os.Getenv("DATABASE_URL")
if dsn == "" {
dsn = "postgres://long@/llm_intelligence?host=/var/run/postgresql"
}
db, err = sql.Open("postgres", dsn)
if err != nil {
fmt.Fprintf(os.Stderr, "open db: %v\n", err)
os.Exit(1)
}
defer db.Close()
}
if err := runTencentSubscriptionImport(cfg, db, os.Stdout); err != nil {
fmt.Fprintf(os.Stderr, "import_tencent_subscription: %v\n", err)
os.Exit(1)
}
}
func loadImportProjectEnv() {
for _, path := range []string{".env.local", ".env"} {
loadImportEnvFile(path)
}
}
func loadImportEnvFile(path string) {
data, err := os.ReadFile(path)
if err != nil {
return
}
for _, line := range strings.Split(string(data), "\n") {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "#") {
continue
}
key, value, ok := strings.Cut(line, "=")
if !ok {
continue
}
key = strings.TrimSpace(key)
value = strings.Trim(strings.TrimSpace(value), `"'`)
if key == "" {
continue
}
if _, exists := os.LookupEnv(key); exists {
continue
}
_ = os.Setenv(key, value)
}
}
func runTencentSubscriptionImport(cfg importTencentSubscriptionConfig, db *sql.DB, out io.Writer) error {
raw, err := fetchTencentCatalogContent(fetchTencentCatalogConfig{
URL: cfg.URL,
DryRun: cfg.DryRun,
Timeout: cfg.Timeout,
Fixture: cfg.Fixture,
}, &http.Client{Timeout: cfg.Timeout})
if err != nil {
return err
}
catalog, err := parseTencentCatalog(raw)
if err != nil {
return err
}
plans := buildSubscriptionPlans(catalog, cfg.URL)
if cfg.DryRun {
_, err = fmt.Fprintf(
out,
"source=tencent-subscription-import updated_at=%s plans=%d provider=%s operator=%s dry_run=true\n",
catalog.UpdatedAt,
len(plans),
plans[0].ProviderName,
plans[0].OperatorName,
)
return err
}
if db == nil {
return fmt.Errorf("db is required when dry-run=false")
}
if err := upsertSubscriptionPlans(db, plans); err != nil {
return err
}
var tableRows int
if err := db.QueryRow(`SELECT COUNT(*) FROM subscription_plan`).Scan(&tableRows); err != nil {
return fmt.Errorf("count subscription_plan: %w", err)
}
summary := fmt.Sprintf(
"source=tencent-subscription-import updated_at=%s plans=%d provider=%s operator=%s table_rows=%d dry_run=false\n",
catalog.UpdatedAt,
len(plans),
plans[0].ProviderName,
plans[0].OperatorName,
tableRows,
)
if _, err := io.WriteString(out, summary); err != nil {
return err
}
if err := writeTencentImportSummary(summary); err != nil {
return err
}
return nil
}
func buildSubscriptionPlans(catalog tencentCatalog, sourceURL string) []subscriptionPlanRow {
modelsBySeries := make(map[string][]tencentModel)
for _, model := range catalog.Models {
modelsBySeries[model.Series] = append(modelsBySeries[model.Series], model)
}
plans := make([]subscriptionPlanRow, 0, len(catalog.Plans))
for _, plan := range catalog.Plans {
models := modelsBySeries[plan.Series]
plans = append(plans, subscriptionPlanRow{
ProviderName: "Tencent",
ProviderCN: "腾讯",
ProviderCountry: "CN",
OperatorName: "Tencent Cloud",
OperatorCN: "腾讯云",
OperatorCountry: "CN",
OperatorType: "cloud",
PlanFamily: inferPlanFamily(plan.Series),
PlanCode: slugifyPlanCode(plan.Series, plan.Tier),
PlanName: fmt.Sprintf("%s %s", plan.Series, plan.Tier),
Tier: plan.Tier,
BillingCycle: normalizeBillingCycle(plan.BillingCycle),
Currency: "CNY",
ListPrice: parsePlanPrice(plan.Price),
PriceUnit: "CNY/month",
QuotaValue: parseQuotaValue(plan.Quota),
QuotaUnit: "tokens/month",
ContextWindow: maxContextWindow(models),
PlanScope: plan.Series,
ModelScope: encodeModelScope(models),
SourceURL: sourceURL,
PublishedAt: catalog.UpdatedAt,
EffectiveDate: extractEffectiveDate(catalog.UpdatedAt),
Notes: strings.TrimSpace(plan.Scene),
})
}
return plans
}
func inferPlanFamily(series string) string {
lower := strings.ToLower(series)
if strings.Contains(lower, "coding plan") {
return "coding_plan"
}
return "token_plan"
}
func slugifyPlanCode(series string, tier string) string {
seriesCode := strings.TrimSpace(series)
switch seriesCode {
case "通用 Token Plan":
seriesCode = "token-plan"
case "Hy Token Plan":
seriesCode = "hy-token-plan"
}
raw := strings.ToLower(strings.TrimSpace(seriesCode + "-" + tier))
replacer := strings.NewReplacer(" ", "-", "/", "-", "_", "-", ".", "-", "", "", "", "", "(", "", ")", "", ":", "-", "--", "-")
raw = replacer.Replace(raw)
raw = strings.Trim(raw, "-")
return raw
}
func normalizeBillingCycle(raw string) string {
if strings.Contains(raw, "月") {
return "monthly"
}
return strings.TrimSpace(raw)
}
func parsePlanPrice(raw string) float64 {
value := strings.TrimSpace(strings.TrimSuffix(raw, "元/月"))
f, _ := strconv.ParseFloat(value, 64)
return f
}
func parseQuotaValue(raw string) int64 {
quotaPattern := regexp.MustCompile(`([\d.]+)\s*([万亿]?)\s*Tokens`)
matches := quotaPattern.FindStringSubmatch(raw)
if len(matches) != 3 {
return 0
}
base, _ := strconv.ParseFloat(matches[1], 64)
switch matches[2] {
case "万":
base *= 10000
case "亿":
base *= 100000000
}
return int64(base)
}
func maxContextWindow(models []tencentModel) int {
max := 0
for _, model := range models {
if model.ContextLength > max {
max = model.ContextLength
}
}
return max
}
func encodeModelScope(models []tencentModel) string {
ids := make([]string, 0, len(models))
for _, model := range models {
ids = append(ids, model.ModelID)
}
data, _ := json.Marshal(ids)
return string(data)
}
func extractEffectiveDate(updatedAt string) string {
if len(updatedAt) >= len("2006-01-02") {
return updatedAt[:10]
}
return time.Now().Format("2006-01-02")
}
func upsertSubscriptionPlans(db *sql.DB, plans []subscriptionPlanRow) error {
providerID, err := ensureModelProvider(db, plans[0])
if err != nil {
return err
}
operatorID, err := ensureOperator(db, plans[0])
if err != nil {
return err
}
for _, plan := range plans {
publishedAt, err := time.Parse("2006-01-02 15:04:05", plan.PublishedAt)
if err != nil {
return fmt.Errorf("parse published_at for %s: %w", plan.PlanCode, err)
}
effectiveDate, err := time.Parse("2006-01-02", plan.EffectiveDate)
if err != nil {
return fmt.Errorf("parse effective_date for %s: %w", plan.PlanCode, err)
}
_, err = db.Exec(
`INSERT INTO subscription_plan (
provider_id, operator_id, plan_family, plan_code, plan_name, tier,
billing_cycle, currency, list_price, price_unit, quota_value, quota_unit,
context_window, plan_scope, model_scope, source_url, published_at, effective_date, notes
) VALUES (
$1, $2, $3, $4, $5, $6,
$7, $8, $9, $10, $11, $12,
$13, $14, $15, $16, $17, $18, $19
)
ON CONFLICT (provider_id, plan_code, effective_date)
DO UPDATE SET
operator_id = EXCLUDED.operator_id,
plan_family = EXCLUDED.plan_family,
plan_name = EXCLUDED.plan_name,
tier = EXCLUDED.tier,
billing_cycle = EXCLUDED.billing_cycle,
currency = EXCLUDED.currency,
list_price = EXCLUDED.list_price,
price_unit = EXCLUDED.price_unit,
quota_value = EXCLUDED.quota_value,
quota_unit = EXCLUDED.quota_unit,
context_window = EXCLUDED.context_window,
plan_scope = EXCLUDED.plan_scope,
model_scope = EXCLUDED.model_scope,
source_url = EXCLUDED.source_url,
published_at = EXCLUDED.published_at,
notes = EXCLUDED.notes,
updated_at = CURRENT_TIMESTAMP`,
providerID, operatorID, plan.PlanFamily, plan.PlanCode, plan.PlanName, plan.Tier,
plan.BillingCycle, plan.Currency, plan.ListPrice, plan.PriceUnit, plan.QuotaValue, plan.QuotaUnit,
nullIfZero(plan.ContextWindow), plan.PlanScope, plan.ModelScope, plan.SourceURL, publishedAt, effectiveDate, plan.Notes,
)
if err != nil {
return fmt.Errorf("upsert subscription_plan %s: %w", plan.PlanCode, err)
}
}
return nil
}
func ensureModelProvider(db *sql.DB, plan subscriptionPlanRow) (int64, error) {
var providerID int64
err := db.QueryRow(`SELECT id FROM model_provider WHERE name = $1`, plan.ProviderName).Scan(&providerID)
if err == nil {
return providerID, nil
}
if err != sql.ErrNoRows {
return 0, err
}
err = db.QueryRow(
`INSERT INTO model_provider (name, name_cn, country, website, status)
VALUES ($1, $2, $3, $4, 'active')
RETURNING id`,
plan.ProviderName, plan.ProviderCN, plan.ProviderCountry, "https://cloud.tencent.com",
).Scan(&providerID)
return providerID, err
}
func ensureOperator(db *sql.DB, plan subscriptionPlanRow) (int64, error) {
var operatorID int64
err := db.QueryRow(`SELECT id FROM operator WHERE name = $1`, plan.OperatorName).Scan(&operatorID)
if err == nil {
return operatorID, nil
}
if err != sql.ErrNoRows {
return 0, err
}
err = db.QueryRow(
`INSERT INTO operator (name, name_cn, country, website, description, status, type)
VALUES ($1, $2, $3, $4, $5, 'active', $6)
RETURNING id`,
plan.OperatorName, plan.OperatorCN, plan.OperatorCountry, "https://cloud.tencent.com",
"Tencent Cloud subscription plans", plan.OperatorType,
).Scan(&operatorID)
return operatorID, err
}
func nullIfZero(value int) any {
if value == 0 {
return nil
}
return value
}
func writeTencentImportSummary(summary string) error {
const summaryPath = "reports/verification/tencent_subscription_import_latest.txt"
if err := os.MkdirAll("reports/verification", 0755); err != nil {
return err
}
return os.WriteFile(summaryPath, []byte(summary), 0644)
}