This commit is contained in:
420
scripts/import_tencent_subscription.go
Normal file
420
scripts/import_tencent_subscription.go
Normal file
@@ -0,0 +1,420 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
_ "github.com/lib/pq"
|
||||
)
|
||||
|
||||
type importTencentSubscriptionConfig struct {
|
||||
URL string
|
||||
Fixture string
|
||||
DryRun bool
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
type subscriptionPlanRow struct {
|
||||
ProviderName string
|
||||
ProviderCN string
|
||||
ProviderCountry string
|
||||
OperatorName string
|
||||
OperatorCN string
|
||||
OperatorCountry string
|
||||
OperatorType string
|
||||
PlanFamily string
|
||||
PlanCode string
|
||||
PlanName string
|
||||
Tier string
|
||||
BillingCycle string
|
||||
Currency string
|
||||
ListPrice float64
|
||||
PriceUnit string
|
||||
QuotaValue int64
|
||||
QuotaUnit string
|
||||
ContextWindow int
|
||||
PlanScope string
|
||||
ModelScope string
|
||||
SourceURL string
|
||||
PublishedAt string
|
||||
EffectiveDate string
|
||||
Notes string
|
||||
}
|
||||
|
||||
func main() {
|
||||
loadImportProjectEnv()
|
||||
|
||||
var rawURL string
|
||||
var fixturePath string
|
||||
var dryRun bool
|
||||
var timeoutSeconds int
|
||||
|
||||
flag.StringVar(&rawURL, "url", defaultTencentCatalogURL, "腾讯云公开目录 URL")
|
||||
flag.StringVar(&fixturePath, "fixture", "", "本地 HTML/Text 样例文件,优先用于离线导入")
|
||||
flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库")
|
||||
flag.IntVar(&timeoutSeconds, "timeout", int(defaultTencentCatalogTimeout/time.Second), "请求超时(秒)")
|
||||
flag.Parse()
|
||||
|
||||
cfg := importTencentSubscriptionConfig{
|
||||
URL: rawURL,
|
||||
Fixture: fixturePath,
|
||||
DryRun: dryRun,
|
||||
Timeout: time.Duration(timeoutSeconds) * time.Second,
|
||||
}
|
||||
|
||||
var db *sql.DB
|
||||
var err error
|
||||
if !cfg.DryRun {
|
||||
dsn := os.Getenv("DATABASE_URL")
|
||||
if dsn == "" {
|
||||
dsn = "postgres://long@/llm_intelligence?host=/var/run/postgresql"
|
||||
}
|
||||
db, err = sql.Open("postgres", dsn)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "open db: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer db.Close()
|
||||
}
|
||||
|
||||
if err := runTencentSubscriptionImport(cfg, db, os.Stdout); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "import_tencent_subscription: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func loadImportProjectEnv() {
|
||||
for _, path := range []string{".env.local", ".env"} {
|
||||
loadImportEnvFile(path)
|
||||
}
|
||||
}
|
||||
|
||||
func loadImportEnvFile(path string) {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
for _, line := range strings.Split(string(data), "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" || strings.HasPrefix(line, "#") {
|
||||
continue
|
||||
}
|
||||
key, value, ok := strings.Cut(line, "=")
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
key = strings.TrimSpace(key)
|
||||
value = strings.Trim(strings.TrimSpace(value), `"'`)
|
||||
if key == "" {
|
||||
continue
|
||||
}
|
||||
if _, exists := os.LookupEnv(key); exists {
|
||||
continue
|
||||
}
|
||||
_ = os.Setenv(key, value)
|
||||
}
|
||||
}
|
||||
|
||||
func runTencentSubscriptionImport(cfg importTencentSubscriptionConfig, db *sql.DB, out io.Writer) error {
|
||||
raw, err := fetchTencentCatalogContent(fetchTencentCatalogConfig{
|
||||
URL: cfg.URL,
|
||||
DryRun: cfg.DryRun,
|
||||
Timeout: cfg.Timeout,
|
||||
Fixture: cfg.Fixture,
|
||||
}, &http.Client{Timeout: cfg.Timeout})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
catalog, err := parseTencentCatalog(raw)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
plans := buildSubscriptionPlans(catalog, cfg.URL)
|
||||
if cfg.DryRun {
|
||||
_, err = fmt.Fprintf(
|
||||
out,
|
||||
"source=tencent-subscription-import updated_at=%s plans=%d provider=%s operator=%s dry_run=true\n",
|
||||
catalog.UpdatedAt,
|
||||
len(plans),
|
||||
plans[0].ProviderName,
|
||||
plans[0].OperatorName,
|
||||
)
|
||||
return err
|
||||
}
|
||||
|
||||
if db == nil {
|
||||
return fmt.Errorf("db is required when dry-run=false")
|
||||
}
|
||||
|
||||
if err := upsertSubscriptionPlans(db, plans); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var tableRows int
|
||||
if err := db.QueryRow(`SELECT COUNT(*) FROM subscription_plan`).Scan(&tableRows); err != nil {
|
||||
return fmt.Errorf("count subscription_plan: %w", err)
|
||||
}
|
||||
|
||||
summary := fmt.Sprintf(
|
||||
"source=tencent-subscription-import updated_at=%s plans=%d provider=%s operator=%s table_rows=%d dry_run=false\n",
|
||||
catalog.UpdatedAt,
|
||||
len(plans),
|
||||
plans[0].ProviderName,
|
||||
plans[0].OperatorName,
|
||||
tableRows,
|
||||
)
|
||||
if _, err := io.WriteString(out, summary); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeTencentImportSummary(summary); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func buildSubscriptionPlans(catalog tencentCatalog, sourceURL string) []subscriptionPlanRow {
|
||||
modelsBySeries := make(map[string][]tencentModel)
|
||||
for _, model := range catalog.Models {
|
||||
modelsBySeries[model.Series] = append(modelsBySeries[model.Series], model)
|
||||
}
|
||||
|
||||
plans := make([]subscriptionPlanRow, 0, len(catalog.Plans))
|
||||
for _, plan := range catalog.Plans {
|
||||
models := modelsBySeries[plan.Series]
|
||||
plans = append(plans, subscriptionPlanRow{
|
||||
ProviderName: "Tencent",
|
||||
ProviderCN: "腾讯",
|
||||
ProviderCountry: "CN",
|
||||
OperatorName: "Tencent Cloud",
|
||||
OperatorCN: "腾讯云",
|
||||
OperatorCountry: "CN",
|
||||
OperatorType: "cloud",
|
||||
PlanFamily: inferPlanFamily(plan.Series),
|
||||
PlanCode: slugifyPlanCode(plan.Series, plan.Tier),
|
||||
PlanName: fmt.Sprintf("%s %s", plan.Series, plan.Tier),
|
||||
Tier: plan.Tier,
|
||||
BillingCycle: normalizeBillingCycle(plan.BillingCycle),
|
||||
Currency: "CNY",
|
||||
ListPrice: parsePlanPrice(plan.Price),
|
||||
PriceUnit: "CNY/month",
|
||||
QuotaValue: parseQuotaValue(plan.Quota),
|
||||
QuotaUnit: "tokens/month",
|
||||
ContextWindow: maxContextWindow(models),
|
||||
PlanScope: plan.Series,
|
||||
ModelScope: encodeModelScope(models),
|
||||
SourceURL: sourceURL,
|
||||
PublishedAt: catalog.UpdatedAt,
|
||||
EffectiveDate: extractEffectiveDate(catalog.UpdatedAt),
|
||||
Notes: strings.TrimSpace(plan.Scene),
|
||||
})
|
||||
}
|
||||
return plans
|
||||
}
|
||||
|
||||
func inferPlanFamily(series string) string {
|
||||
lower := strings.ToLower(series)
|
||||
if strings.Contains(lower, "coding plan") {
|
||||
return "coding_plan"
|
||||
}
|
||||
return "token_plan"
|
||||
}
|
||||
|
||||
func slugifyPlanCode(series string, tier string) string {
|
||||
seriesCode := strings.TrimSpace(series)
|
||||
switch seriesCode {
|
||||
case "通用 Token Plan":
|
||||
seriesCode = "token-plan"
|
||||
case "Hy Token Plan":
|
||||
seriesCode = "hy-token-plan"
|
||||
}
|
||||
|
||||
raw := strings.ToLower(strings.TrimSpace(seriesCode + "-" + tier))
|
||||
replacer := strings.NewReplacer(" ", "-", "/", "-", "_", "-", ".", "-", "(", "", ")", "", "(", "", ")", "", ":", "-", "--", "-")
|
||||
raw = replacer.Replace(raw)
|
||||
raw = strings.Trim(raw, "-")
|
||||
return raw
|
||||
}
|
||||
|
||||
func normalizeBillingCycle(raw string) string {
|
||||
if strings.Contains(raw, "月") {
|
||||
return "monthly"
|
||||
}
|
||||
return strings.TrimSpace(raw)
|
||||
}
|
||||
|
||||
func parsePlanPrice(raw string) float64 {
|
||||
value := strings.TrimSpace(strings.TrimSuffix(raw, "元/月"))
|
||||
f, _ := strconv.ParseFloat(value, 64)
|
||||
return f
|
||||
}
|
||||
|
||||
func parseQuotaValue(raw string) int64 {
|
||||
quotaPattern := regexp.MustCompile(`([\d.]+)\s*([万亿]?)\s*Tokens`)
|
||||
matches := quotaPattern.FindStringSubmatch(raw)
|
||||
if len(matches) != 3 {
|
||||
return 0
|
||||
}
|
||||
base, _ := strconv.ParseFloat(matches[1], 64)
|
||||
switch matches[2] {
|
||||
case "万":
|
||||
base *= 10000
|
||||
case "亿":
|
||||
base *= 100000000
|
||||
}
|
||||
return int64(base)
|
||||
}
|
||||
|
||||
func maxContextWindow(models []tencentModel) int {
|
||||
max := 0
|
||||
for _, model := range models {
|
||||
if model.ContextLength > max {
|
||||
max = model.ContextLength
|
||||
}
|
||||
}
|
||||
return max
|
||||
}
|
||||
|
||||
func encodeModelScope(models []tencentModel) string {
|
||||
ids := make([]string, 0, len(models))
|
||||
for _, model := range models {
|
||||
ids = append(ids, model.ModelID)
|
||||
}
|
||||
data, _ := json.Marshal(ids)
|
||||
return string(data)
|
||||
}
|
||||
|
||||
func extractEffectiveDate(updatedAt string) string {
|
||||
if len(updatedAt) >= len("2006-01-02") {
|
||||
return updatedAt[:10]
|
||||
}
|
||||
return time.Now().Format("2006-01-02")
|
||||
}
|
||||
|
||||
func upsertSubscriptionPlans(db *sql.DB, plans []subscriptionPlanRow) error {
|
||||
providerID, err := ensureModelProvider(db, plans[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
operatorID, err := ensureOperator(db, plans[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, plan := range plans {
|
||||
publishedAt, err := time.Parse("2006-01-02 15:04:05", plan.PublishedAt)
|
||||
if err != nil {
|
||||
return fmt.Errorf("parse published_at for %s: %w", plan.PlanCode, err)
|
||||
}
|
||||
effectiveDate, err := time.Parse("2006-01-02", plan.EffectiveDate)
|
||||
if err != nil {
|
||||
return fmt.Errorf("parse effective_date for %s: %w", plan.PlanCode, err)
|
||||
}
|
||||
|
||||
_, err = db.Exec(
|
||||
`INSERT INTO subscription_plan (
|
||||
provider_id, operator_id, plan_family, plan_code, plan_name, tier,
|
||||
billing_cycle, currency, list_price, price_unit, quota_value, quota_unit,
|
||||
context_window, plan_scope, model_scope, source_url, published_at, effective_date, notes
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5, $6,
|
||||
$7, $8, $9, $10, $11, $12,
|
||||
$13, $14, $15, $16, $17, $18, $19
|
||||
)
|
||||
ON CONFLICT (provider_id, plan_code, effective_date)
|
||||
DO UPDATE SET
|
||||
operator_id = EXCLUDED.operator_id,
|
||||
plan_family = EXCLUDED.plan_family,
|
||||
plan_name = EXCLUDED.plan_name,
|
||||
tier = EXCLUDED.tier,
|
||||
billing_cycle = EXCLUDED.billing_cycle,
|
||||
currency = EXCLUDED.currency,
|
||||
list_price = EXCLUDED.list_price,
|
||||
price_unit = EXCLUDED.price_unit,
|
||||
quota_value = EXCLUDED.quota_value,
|
||||
quota_unit = EXCLUDED.quota_unit,
|
||||
context_window = EXCLUDED.context_window,
|
||||
plan_scope = EXCLUDED.plan_scope,
|
||||
model_scope = EXCLUDED.model_scope,
|
||||
source_url = EXCLUDED.source_url,
|
||||
published_at = EXCLUDED.published_at,
|
||||
notes = EXCLUDED.notes,
|
||||
updated_at = CURRENT_TIMESTAMP`,
|
||||
providerID, operatorID, plan.PlanFamily, plan.PlanCode, plan.PlanName, plan.Tier,
|
||||
plan.BillingCycle, plan.Currency, plan.ListPrice, plan.PriceUnit, plan.QuotaValue, plan.QuotaUnit,
|
||||
nullIfZero(plan.ContextWindow), plan.PlanScope, plan.ModelScope, plan.SourceURL, publishedAt, effectiveDate, plan.Notes,
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("upsert subscription_plan %s: %w", plan.PlanCode, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func ensureModelProvider(db *sql.DB, plan subscriptionPlanRow) (int64, error) {
|
||||
var providerID int64
|
||||
err := db.QueryRow(`SELECT id FROM model_provider WHERE name = $1`, plan.ProviderName).Scan(&providerID)
|
||||
if err == nil {
|
||||
return providerID, nil
|
||||
}
|
||||
if err != sql.ErrNoRows {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
err = db.QueryRow(
|
||||
`INSERT INTO model_provider (name, name_cn, country, website, status)
|
||||
VALUES ($1, $2, $3, $4, 'active')
|
||||
RETURNING id`,
|
||||
plan.ProviderName, plan.ProviderCN, plan.ProviderCountry, "https://cloud.tencent.com",
|
||||
).Scan(&providerID)
|
||||
return providerID, err
|
||||
}
|
||||
|
||||
func ensureOperator(db *sql.DB, plan subscriptionPlanRow) (int64, error) {
|
||||
var operatorID int64
|
||||
err := db.QueryRow(`SELECT id FROM operator WHERE name = $1`, plan.OperatorName).Scan(&operatorID)
|
||||
if err == nil {
|
||||
return operatorID, nil
|
||||
}
|
||||
if err != sql.ErrNoRows {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
err = db.QueryRow(
|
||||
`INSERT INTO operator (name, name_cn, country, website, description, status, type)
|
||||
VALUES ($1, $2, $3, $4, $5, 'active', $6)
|
||||
RETURNING id`,
|
||||
plan.OperatorName, plan.OperatorCN, plan.OperatorCountry, "https://cloud.tencent.com",
|
||||
"Tencent Cloud subscription plans", plan.OperatorType,
|
||||
).Scan(&operatorID)
|
||||
return operatorID, err
|
||||
}
|
||||
|
||||
func nullIfZero(value int) any {
|
||||
if value == 0 {
|
||||
return nil
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
func writeTencentImportSummary(summary string) error {
|
||||
const summaryPath = "reports/verification/tencent_subscription_import_latest.txt"
|
||||
if err := os.MkdirAll("reports/verification", 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.WriteFile(summaryPath, []byte(summary), 0644)
|
||||
}
|
||||
Reference in New Issue
Block a user