//go:build llm_script package main import ( "database/sql" "encoding/json" "fmt" "html" "io" "net/http" "os" "regexp" "sort" "strconv" "strings" "time" _ "github.com/lib/pq" ) const subscriptionFetchMaxAttempts = 3 type subscriptionImportRecord struct { ProviderName string ProviderNameCn string ProviderCountry string ProviderWebsite string OperatorName string OperatorNameCn string OperatorCountry string OperatorWebsite string OperatorType string PlanFamily string PlanCode string PlanName string Tier string BillingCycle string Currency string ListPrice float64 PriceUnit string QuotaValue int64 QuotaUnit string ContextWindow int PlanScope string ModelScope []string SourceURL string PublishedAt string EffectiveDate string Notes string PublishedAtKnown bool } func loadSubscriptionImportEnv() { for _, path := range []string{".env.local", ".env"} { loadSubscriptionEnvFile(path) } } func loadSubscriptionEnvFile(path string) { data, err := os.ReadFile(path) if err != nil { return } for _, line := range strings.Split(string(data), "\n") { line = strings.TrimSpace(line) if line == "" || strings.HasPrefix(line, "#") { continue } key, value, ok := strings.Cut(line, "=") if !ok { continue } key = strings.TrimSpace(key) value = strings.Trim(strings.TrimSpace(value), `"'`) if key == "" { continue } if _, exists := os.LookupEnv(key); exists { continue } _ = os.Setenv(key, value) } } func subscriptionImportDB() (*sql.DB, error) { dsn := os.Getenv("DATABASE_URL") if dsn == "" { dsn = "postgres://long@/llm_intelligence?host=/var/run/postgresql" } return sql.Open("postgres", dsn) } func fetchSubscriptionPage(url string, fixture string, client *http.Client) (string, error) { if fixture != "" { data, err := os.ReadFile(fixture) if err != nil { return "", fmt.Errorf("read fixture %s: %w", fixture, err) } return string(data), nil } body, err := fetchSubscriptionPageWithRetry(url, client) if err == nil { return body, nil } if markdownURL, ok := markdownFallbackURL(url, err); ok { return fetchSubscriptionPageWithRetry(markdownURL, client) } return "", err } func fetchSubscriptionPageWithRetry(url string, client *http.Client) (string, error) { var lastErr error for attempt := 1; attempt <= subscriptionFetchMaxAttempts; attempt++ { body, retryable, err := fetchSubscriptionPageOnce(url, client) if err == nil { return body, nil } lastErr = err if !retryable || attempt == subscriptionFetchMaxAttempts { return "", err } time.Sleep(time.Duration(attempt) * 200 * time.Millisecond) } return "", lastErr } func fetchSubscriptionPageOnce(url string, client *http.Client) (string, bool, error) { req, err := http.NewRequest(http.MethodGet, url, nil) if err != nil { return "", false, err } req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36") req.Header.Set("Accept", "text/html,application/xhtml+xml,text/plain;q=0.9,*/*;q=0.8") req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") resp, err := client.Do(req) if err != nil { return "", isRetriableSubscriptionFetchError(err), fmt.Errorf("fetch %s: %w", url, err) } defer resp.Body.Close() if resp.StatusCode < 200 || resp.StatusCode >= 300 { retryable := resp.StatusCode == http.StatusForbidden || resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode == http.StatusBadGateway || resp.StatusCode == http.StatusServiceUnavailable || resp.StatusCode == http.StatusGatewayTimeout return "", retryable, fmt.Errorf("fetch %s: unexpected status %d", url, resp.StatusCode) } body, err := io.ReadAll(resp.Body) if err != nil { return "", isRetriableSubscriptionFetchError(err), fmt.Errorf("read %s: %w", url, err) } return normalizeSubscriptionPage(string(body)), false, nil } func markdownFallbackURL(url string, err error) (string, bool) { if strings.TrimSpace(url) == "" || err == nil { return "", false } lower := strings.ToLower(err.Error()) if !strings.Contains(lower, "status 403") && !strings.Contains(lower, "forbidden") { return "", false } if strings.HasSuffix(url, ".md") { return "", false } return strings.TrimRight(url, "/") + ".md", true } func isRetriableSubscriptionFetchError(err error) bool { if err == nil { return false } lower := strings.ToLower(err.Error()) for _, marker := range []string{ "eof", "timeout", "temporarily unavailable", "transport closed", "connection reset", "connection refused", "tls handshake timeout", "i/o timeout", "too many requests", "no such host", "forbidden", "status 403", } { if strings.Contains(lower, marker) { return true } } return false } func normalizeSubscriptionPage(raw string) string { text := raw scriptPattern := regexp.MustCompile(`(?is)]*>.*?`) stylePattern := regexp.MustCompile(`(?is)]*>.*?`) tagPattern := regexp.MustCompile(`(?is)<[^>]+>`) spacePattern := regexp.MustCompile(`[ \t]+`) text = scriptPattern.ReplaceAllString(text, "\n") text = stylePattern.ReplaceAllString(text, "\n") text = tagPattern.ReplaceAllString(text, "\n") text = html.UnescapeString(text) text = strings.ReplaceAll(text, "\r\n", "\n") text = strings.ReplaceAll(text, "\r", "\n") lines := strings.Split(text, "\n") cleaned := make([]string, 0, len(lines)) for _, line := range lines { line = spacePattern.ReplaceAllString(line, " ") line = strings.TrimSpace(line) if line == "" { continue } cleaned = append(cleaned, line) } return strings.Join(cleaned, "\n") } func publishedAtFromText(raw string) (string, bool) { patterns := []*regexp.Regexp{ regexp.MustCompile(`最近更新时间[::]\s*(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})`), regexp.MustCompile(`更新时间[::]?\s*(\d{4}-\d{2}-\d{2})`), } for _, pattern := range patterns { matches := pattern.FindStringSubmatch(raw) if len(matches) != 2 { continue } if len(matches[1]) == len("2006-01-02") { return matches[1] + " 00:00:00", true } return matches[1], true } return time.Now().Format("2006-01-02 15:04:05"), false } func effectiveDateFromPublishedAt(publishedAt string) string { if len(publishedAt) >= len("2006-01-02") { return publishedAt[:10] } return time.Now().Format("2006-01-02") } func upsertSubscriptionImportRecords(db *sql.DB, records []subscriptionImportRecord) error { type snapshotKey struct { providerID int64 planCode string } historyKeys := make(map[snapshotKey]struct{}) for _, record := range records { providerID, err := ensureSubscriptionProvider(db, record) if err != nil { return err } operatorID, err := ensureSubscriptionOperator(db, record) if err != nil { return err } if !record.PublishedAtKnown { history, err := loadSubscriptionSnapshotHistory(db, providerID, record.PlanCode) if err != nil { return err } if _, err := reuseExistingSnapshotDates(&record, history); err != nil { return err } } publishedAt, err := time.Parse("2006-01-02 15:04:05", record.PublishedAt) if err != nil { return fmt.Errorf("parse published_at for %s: %w", record.PlanCode, err) } effectiveDate, err := time.Parse("2006-01-02", record.EffectiveDate) if err != nil { return fmt.Errorf("parse effective_date for %s: %w", record.PlanCode, err) } modelScopeRaw, err := json.Marshal(record.ModelScope) if err != nil { return fmt.Errorf("marshal model_scope for %s: %w", record.PlanCode, err) } _, err = db.Exec( `INSERT INTO subscription_plan ( provider_id, operator_id, plan_family, plan_code, plan_name, tier, billing_cycle, currency, list_price, price_unit, quota_value, quota_unit, context_window, plan_scope, model_scope, source_url, published_at, effective_date, notes ) VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19 ) ON CONFLICT (provider_id, plan_code, effective_date) DO UPDATE SET operator_id = EXCLUDED.operator_id, plan_family = EXCLUDED.plan_family, plan_name = EXCLUDED.plan_name, tier = EXCLUDED.tier, billing_cycle = EXCLUDED.billing_cycle, currency = EXCLUDED.currency, list_price = EXCLUDED.list_price, price_unit = EXCLUDED.price_unit, quota_value = EXCLUDED.quota_value, quota_unit = EXCLUDED.quota_unit, context_window = EXCLUDED.context_window, plan_scope = EXCLUDED.plan_scope, model_scope = EXCLUDED.model_scope, source_url = EXCLUDED.source_url, published_at = EXCLUDED.published_at, notes = EXCLUDED.notes, updated_at = CURRENT_TIMESTAMP`, providerID, operatorID, record.PlanFamily, record.PlanCode, record.PlanName, record.Tier, record.BillingCycle, record.Currency, record.ListPrice, record.PriceUnit, nullIfZeroInt64(record.QuotaValue), nullIfBlank(record.QuotaUnit), nullIfZeroIntCommon(record.ContextWindow), nullIfBlank(record.PlanScope), string(modelScopeRaw), record.SourceURL, publishedAt, effectiveDate, nullIfBlank(record.Notes), ) if err != nil { return fmt.Errorf("upsert subscription_plan %s: %w", record.PlanCode, err) } historyKeys[snapshotKey{providerID: providerID, planCode: record.PlanCode}] = struct{}{} } for key := range historyKeys { if err := compactSubscriptionSnapshotHistory(db, key.providerID, key.planCode); err != nil { return err } } return nil } type subscriptionSnapshotRow struct { ID int64 PlanName string Tier string BillingCycle string Currency string ListPrice float64 PriceUnit string QuotaValue int64 QuotaUnit string ContextWindow int PlanScope string ModelScope string SourceURL string Notes string PublishedAt time.Time EffectiveDate time.Time } func loadSubscriptionSnapshotHistory(db *sql.DB, providerID int64, planCode string) ([]subscriptionSnapshotRow, error) { rows, err := db.Query( `SELECT id, plan_name, tier, billing_cycle, currency, list_price, price_unit, COALESCE(quota_value, 0), COALESCE(quota_unit, ''), COALESCE(context_window, 0), COALESCE(plan_scope, ''), model_scope, source_url, COALESCE(notes, ''), published_at, effective_date FROM subscription_plan WHERE provider_id = $1 AND plan_code = $2 ORDER BY effective_date DESC, published_at DESC NULLS LAST, id DESC`, providerID, planCode, ) if err != nil { return nil, fmt.Errorf("load subscription snapshot history %s: %w", planCode, err) } defer rows.Close() history := make([]subscriptionSnapshotRow, 0) for rows.Next() { row := subscriptionSnapshotRow{} if err := rows.Scan( &row.ID, &row.PlanName, &row.Tier, &row.BillingCycle, &row.Currency, &row.ListPrice, &row.PriceUnit, &row.QuotaValue, &row.QuotaUnit, &row.ContextWindow, &row.PlanScope, &row.ModelScope, &row.SourceURL, &row.Notes, &row.PublishedAt, &row.EffectiveDate, ); err != nil { return nil, fmt.Errorf("scan subscription snapshot history %s: %w", planCode, err) } history = append(history, row) } if err := rows.Err(); err != nil { return nil, fmt.Errorf("iterate subscription snapshot history %s: %w", planCode, err) } return history, nil } func reuseExistingSnapshotDates(record *subscriptionImportRecord, history []subscriptionSnapshotRow) (bool, error) { if record == nil || record.PublishedAtKnown || len(history) == 0 { return false, nil } modelScopeRaw, err := json.Marshal(record.ModelScope) if err != nil { return false, fmt.Errorf("marshal model_scope for snapshot comparison %s: %w", record.PlanCode, err) } for _, existing := range history { if existing.PlanName != record.PlanName || existing.Tier != record.Tier || existing.BillingCycle != record.BillingCycle || existing.Currency != record.Currency || existing.ListPrice != record.ListPrice || existing.PriceUnit != record.PriceUnit || existing.QuotaValue != record.QuotaValue || existing.QuotaUnit != strings.TrimSpace(record.QuotaUnit) || existing.ContextWindow != record.ContextWindow || existing.PlanScope != strings.TrimSpace(record.PlanScope) || existing.ModelScope != string(modelScopeRaw) || existing.SourceURL != record.SourceURL || existing.Notes != strings.TrimSpace(record.Notes) { continue } record.PublishedAt = existing.PublishedAt.Format("2006-01-02 15:04:05") record.EffectiveDate = existing.EffectiveDate.Format("2006-01-02") return true, nil } return false, nil } func compactSubscriptionSnapshotHistory(db *sql.DB, providerID int64, planCode string) error { history, err := loadSubscriptionSnapshotHistory(db, providerID, planCode) if err != nil { return err } for _, id := range redundantSnapshotRowIDs(history) { if _, err := db.Exec(`DELETE FROM subscription_plan WHERE id = $1`, id); err != nil { return fmt.Errorf("delete redundant subscription snapshot %d for %s: %w", id, planCode, err) } } return nil } func redundantSnapshotRowIDs(history []subscriptionSnapshotRow) []int64 { type signatureKey struct { PlanName string Tier string BillingCycle string Currency string ListPrice float64 PriceUnit string QuotaValue int64 QuotaUnit string ContextWindow int PlanScope string ModelScope string SourceURL string Notes string } type keptSnapshot struct { ID int64 EffectiveDate time.Time PublishedAt time.Time } makeKey := func(row subscriptionSnapshotRow) signatureKey { return signatureKey{ PlanName: row.PlanName, Tier: row.Tier, BillingCycle: row.BillingCycle, Currency: row.Currency, ListPrice: row.ListPrice, PriceUnit: row.PriceUnit, QuotaValue: row.QuotaValue, QuotaUnit: row.QuotaUnit, ContextWindow: row.ContextWindow, PlanScope: row.PlanScope, ModelScope: row.ModelScope, SourceURL: row.SourceURL, Notes: row.Notes, } } shouldReplace := func(current keptSnapshot, candidate subscriptionSnapshotRow) bool { if candidate.EffectiveDate.Before(current.EffectiveDate) { return true } if candidate.EffectiveDate.Equal(current.EffectiveDate) && candidate.PublishedAt.Before(current.PublishedAt) { return true } return candidate.EffectiveDate.Equal(current.EffectiveDate) && candidate.PublishedAt.Equal(current.PublishedAt) && candidate.ID < current.ID } keptBySignature := make(map[signatureKey]keptSnapshot) redundant := make([]int64, 0) for _, row := range history { key := makeKey(row) current, exists := keptBySignature[key] if !exists { keptBySignature[key] = keptSnapshot{ID: row.ID, EffectiveDate: row.EffectiveDate, PublishedAt: row.PublishedAt} continue } if shouldReplace(current, row) { redundant = append(redundant, current.ID) keptBySignature[key] = keptSnapshot{ID: row.ID, EffectiveDate: row.EffectiveDate, PublishedAt: row.PublishedAt} continue } redundant = append(redundant, row.ID) } sort.Slice(redundant, func(i, j int) bool { return redundant[i] < redundant[j] }) return redundant } func ensureSubscriptionProvider(db *sql.DB, record subscriptionImportRecord) (int64, error) { var providerID int64 err := db.QueryRow(`SELECT id FROM model_provider WHERE name = $1`, record.ProviderName).Scan(&providerID) if err == nil { return providerID, nil } if err != sql.ErrNoRows { return 0, err } err = db.QueryRow( `INSERT INTO model_provider (name, name_cn, country, website, status) VALUES ($1, $2, $3, $4, 'active') RETURNING id`, record.ProviderName, nullIfBlank(record.ProviderNameCn), record.ProviderCountry, nullIfBlank(record.ProviderWebsite), ).Scan(&providerID) return providerID, err } func ensureSubscriptionOperator(db *sql.DB, record subscriptionImportRecord) (int64, error) { var operatorID int64 err := db.QueryRow(`SELECT id FROM operator WHERE name = $1`, record.OperatorName).Scan(&operatorID) if err == nil { return operatorID, nil } if err != sql.ErrNoRows { return 0, err } err = db.QueryRow( `INSERT INTO operator (name, name_cn, country, website, description, status, type) VALUES ($1, $2, $3, $4, $5, 'active', $6) RETURNING id`, record.OperatorName, nullIfBlank(record.OperatorNameCn), record.OperatorCountry, nullIfBlank(record.OperatorWebsite), fmt.Sprintf("%s subscription import", record.OperatorName), record.OperatorType, ).Scan(&operatorID) return operatorID, err } func summarizeSubscriptionImport(records []subscriptionImportRecord, getter func(subscriptionImportRecord) string) string { counts := make(map[string]int) keys := make([]string, 0) for _, record := range records { key := getter(record) if _, exists := counts[key]; !exists { keys = append(keys, key) } counts[key]++ } sort.Strings(keys) parts := make([]string, 0, len(keys)) for _, key := range keys { parts = append(parts, fmt.Sprintf("%s:%d", key, counts[key])) } return strings.Join(parts, ",") } func nullIfBlank(value string) any { if strings.TrimSpace(value) == "" { return nil } return value } func nullIfZeroInt64(value int64) any { if value == 0 { return nil } return value } func nullIfZeroIntCommon(value int) any { if value == 0 { return nil } return value } func mustParseSubscriptionPrice(raw string) float64 { cleaned := strings.ReplaceAll(raw, ",", "") cleaned = strings.ReplaceAll(cleaned, " ", "") value, _ := strconv.ParseFloat(cleaned, 64) return value } func mustParseSubscriptionInt64(raw string) int64 { cleaned := strings.ReplaceAll(raw, ",", "") cleaned = strings.ReplaceAll(cleaned, " ", "") value, _ := strconv.ParseInt(cleaned, 10, 64) return value } func firstNonEmptyText(values ...string) string { for _, value := range values { if strings.TrimSpace(value) != "" { return value } } return "" } func parseDecimalMultiplier(raw string, unit int64) int64 { cleaned := strings.TrimSpace(strings.ReplaceAll(raw, " ", "")) value, _ := strconv.ParseFloat(cleaned, 64) return int64(value * float64(unit)) } func sliceSection(raw string, start string, end string) string { startIndex := strings.Index(raw, start) if startIndex < 0 { return "" } section := raw[startIndex+len(start):] if end != "" { if endIndex := strings.Index(section, end); endIndex >= 0 { section = section[:endIndex] } } return section }