629 lines
18 KiB
Go
629 lines
18 KiB
Go
|
|
//go:build llm_script
|
|||
|
|
|
|||
|
|
package main
|
|||
|
|
|
|||
|
|
import (
|
|||
|
|
"database/sql"
|
|||
|
|
"encoding/json"
|
|||
|
|
"fmt"
|
|||
|
|
"html"
|
|||
|
|
"io"
|
|||
|
|
"net/http"
|
|||
|
|
"os"
|
|||
|
|
"regexp"
|
|||
|
|
"sort"
|
|||
|
|
"strconv"
|
|||
|
|
"strings"
|
|||
|
|
"time"
|
|||
|
|
|
|||
|
|
_ "github.com/lib/pq"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
const subscriptionFetchMaxAttempts = 3
|
|||
|
|
|
|||
|
|
type subscriptionImportRecord struct {
|
|||
|
|
ProviderName string
|
|||
|
|
ProviderNameCn string
|
|||
|
|
ProviderCountry string
|
|||
|
|
ProviderWebsite string
|
|||
|
|
OperatorName string
|
|||
|
|
OperatorNameCn string
|
|||
|
|
OperatorCountry string
|
|||
|
|
OperatorWebsite string
|
|||
|
|
OperatorType string
|
|||
|
|
PlanFamily string
|
|||
|
|
PlanCode string
|
|||
|
|
PlanName string
|
|||
|
|
Tier string
|
|||
|
|
BillingCycle string
|
|||
|
|
Currency string
|
|||
|
|
ListPrice float64
|
|||
|
|
PriceUnit string
|
|||
|
|
QuotaValue int64
|
|||
|
|
QuotaUnit string
|
|||
|
|
ContextWindow int
|
|||
|
|
PlanScope string
|
|||
|
|
ModelScope []string
|
|||
|
|
SourceURL string
|
|||
|
|
PublishedAt string
|
|||
|
|
EffectiveDate string
|
|||
|
|
Notes string
|
|||
|
|
PublishedAtKnown bool
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func loadSubscriptionImportEnv() {
|
|||
|
|
for _, path := range []string{".env.local", ".env"} {
|
|||
|
|
loadSubscriptionEnvFile(path)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func loadSubscriptionEnvFile(path string) {
|
|||
|
|
data, err := os.ReadFile(path)
|
|||
|
|
if err != nil {
|
|||
|
|
return
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
for _, line := range strings.Split(string(data), "\n") {
|
|||
|
|
line = strings.TrimSpace(line)
|
|||
|
|
if line == "" || strings.HasPrefix(line, "#") {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
key, value, ok := strings.Cut(line, "=")
|
|||
|
|
if !ok {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
key = strings.TrimSpace(key)
|
|||
|
|
value = strings.Trim(strings.TrimSpace(value), `"'`)
|
|||
|
|
if key == "" {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
if _, exists := os.LookupEnv(key); exists {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
_ = os.Setenv(key, value)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func subscriptionImportDB() (*sql.DB, error) {
|
|||
|
|
dsn := os.Getenv("DATABASE_URL")
|
|||
|
|
if dsn == "" {
|
|||
|
|
dsn = "postgres://long@/llm_intelligence?host=/var/run/postgresql"
|
|||
|
|
}
|
|||
|
|
return sql.Open("postgres", dsn)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func fetchSubscriptionPage(url string, fixture string, client *http.Client) (string, error) {
|
|||
|
|
if fixture != "" {
|
|||
|
|
data, err := os.ReadFile(fixture)
|
|||
|
|
if err != nil {
|
|||
|
|
return "", fmt.Errorf("read fixture %s: %w", fixture, err)
|
|||
|
|
}
|
|||
|
|
return string(data), nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
var lastErr error
|
|||
|
|
for attempt := 1; attempt <= subscriptionFetchMaxAttempts; attempt++ {
|
|||
|
|
body, retryable, err := fetchSubscriptionPageOnce(url, client)
|
|||
|
|
if err == nil {
|
|||
|
|
return body, nil
|
|||
|
|
}
|
|||
|
|
lastErr = err
|
|||
|
|
if !retryable || attempt == subscriptionFetchMaxAttempts {
|
|||
|
|
return "", err
|
|||
|
|
}
|
|||
|
|
time.Sleep(time.Duration(attempt) * 200 * time.Millisecond)
|
|||
|
|
}
|
|||
|
|
return "", lastErr
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func fetchSubscriptionPageOnce(url string, client *http.Client) (string, bool, error) {
|
|||
|
|
req, err := http.NewRequest(http.MethodGet, url, nil)
|
|||
|
|
if err != nil {
|
|||
|
|
return "", false, err
|
|||
|
|
}
|
|||
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36")
|
|||
|
|
req.Header.Set("Accept", "text/html,application/xhtml+xml,text/plain;q=0.9,*/*;q=0.8")
|
|||
|
|
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
|
|||
|
|
|
|||
|
|
resp, err := client.Do(req)
|
|||
|
|
if err != nil {
|
|||
|
|
return "", isRetriableSubscriptionFetchError(err), fmt.Errorf("fetch %s: %w", url, err)
|
|||
|
|
}
|
|||
|
|
defer resp.Body.Close()
|
|||
|
|
|
|||
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|||
|
|
retryable := resp.StatusCode == http.StatusForbidden ||
|
|||
|
|
resp.StatusCode == http.StatusTooManyRequests ||
|
|||
|
|
resp.StatusCode == http.StatusBadGateway ||
|
|||
|
|
resp.StatusCode == http.StatusServiceUnavailable ||
|
|||
|
|
resp.StatusCode == http.StatusGatewayTimeout
|
|||
|
|
return "", retryable, fmt.Errorf("fetch %s: unexpected status %d", url, resp.StatusCode)
|
|||
|
|
}
|
|||
|
|
body, err := io.ReadAll(resp.Body)
|
|||
|
|
if err != nil {
|
|||
|
|
return "", isRetriableSubscriptionFetchError(err), fmt.Errorf("read %s: %w", url, err)
|
|||
|
|
}
|
|||
|
|
return normalizeSubscriptionPage(string(body)), false, nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func isRetriableSubscriptionFetchError(err error) bool {
|
|||
|
|
if err == nil {
|
|||
|
|
return false
|
|||
|
|
}
|
|||
|
|
lower := strings.ToLower(err.Error())
|
|||
|
|
for _, marker := range []string{
|
|||
|
|
"eof",
|
|||
|
|
"timeout",
|
|||
|
|
"temporarily unavailable",
|
|||
|
|
"transport closed",
|
|||
|
|
"connection reset",
|
|||
|
|
"connection refused",
|
|||
|
|
"tls handshake timeout",
|
|||
|
|
"i/o timeout",
|
|||
|
|
"too many requests",
|
|||
|
|
"no such host",
|
|||
|
|
"forbidden",
|
|||
|
|
"status 403",
|
|||
|
|
} {
|
|||
|
|
if strings.Contains(lower, marker) {
|
|||
|
|
return true
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return false
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func normalizeSubscriptionPage(raw string) string {
|
|||
|
|
text := raw
|
|||
|
|
scriptPattern := regexp.MustCompile(`(?is)<script[^>]*>.*?</script>`)
|
|||
|
|
stylePattern := regexp.MustCompile(`(?is)<style[^>]*>.*?</style>`)
|
|||
|
|
tagPattern := regexp.MustCompile(`(?is)<[^>]+>`)
|
|||
|
|
spacePattern := regexp.MustCompile(`[ \t]+`)
|
|||
|
|
|
|||
|
|
text = scriptPattern.ReplaceAllString(text, "\n")
|
|||
|
|
text = stylePattern.ReplaceAllString(text, "\n")
|
|||
|
|
text = tagPattern.ReplaceAllString(text, "\n")
|
|||
|
|
text = html.UnescapeString(text)
|
|||
|
|
text = strings.ReplaceAll(text, "\r\n", "\n")
|
|||
|
|
text = strings.ReplaceAll(text, "\r", "\n")
|
|||
|
|
|
|||
|
|
lines := strings.Split(text, "\n")
|
|||
|
|
cleaned := make([]string, 0, len(lines))
|
|||
|
|
for _, line := range lines {
|
|||
|
|
line = spacePattern.ReplaceAllString(line, " ")
|
|||
|
|
line = strings.TrimSpace(line)
|
|||
|
|
if line == "" {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
cleaned = append(cleaned, line)
|
|||
|
|
}
|
|||
|
|
return strings.Join(cleaned, "\n")
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func publishedAtFromText(raw string) (string, bool) {
|
|||
|
|
patterns := []*regexp.Regexp{
|
|||
|
|
regexp.MustCompile(`最近更新时间[::]\s*(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})`),
|
|||
|
|
regexp.MustCompile(`更新时间[::]?\s*(\d{4}-\d{2}-\d{2})`),
|
|||
|
|
}
|
|||
|
|
for _, pattern := range patterns {
|
|||
|
|
matches := pattern.FindStringSubmatch(raw)
|
|||
|
|
if len(matches) != 2 {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
if len(matches[1]) == len("2006-01-02") {
|
|||
|
|
return matches[1] + " 00:00:00", true
|
|||
|
|
}
|
|||
|
|
return matches[1], true
|
|||
|
|
}
|
|||
|
|
return time.Now().Format("2006-01-02 15:04:05"), false
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func effectiveDateFromPublishedAt(publishedAt string) string {
|
|||
|
|
if len(publishedAt) >= len("2006-01-02") {
|
|||
|
|
return publishedAt[:10]
|
|||
|
|
}
|
|||
|
|
return time.Now().Format("2006-01-02")
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func upsertSubscriptionImportRecords(db *sql.DB, records []subscriptionImportRecord) error {
|
|||
|
|
type snapshotKey struct {
|
|||
|
|
providerID int64
|
|||
|
|
planCode string
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
historyKeys := make(map[snapshotKey]struct{})
|
|||
|
|
for _, record := range records {
|
|||
|
|
providerID, err := ensureSubscriptionProvider(db, record)
|
|||
|
|
if err != nil {
|
|||
|
|
return err
|
|||
|
|
}
|
|||
|
|
operatorID, err := ensureSubscriptionOperator(db, record)
|
|||
|
|
if err != nil {
|
|||
|
|
return err
|
|||
|
|
}
|
|||
|
|
if !record.PublishedAtKnown {
|
|||
|
|
history, err := loadSubscriptionSnapshotHistory(db, providerID, record.PlanCode)
|
|||
|
|
if err != nil {
|
|||
|
|
return err
|
|||
|
|
}
|
|||
|
|
if _, err := reuseExistingSnapshotDates(&record, history); err != nil {
|
|||
|
|
return err
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
publishedAt, err := time.Parse("2006-01-02 15:04:05", record.PublishedAt)
|
|||
|
|
if err != nil {
|
|||
|
|
return fmt.Errorf("parse published_at for %s: %w", record.PlanCode, err)
|
|||
|
|
}
|
|||
|
|
effectiveDate, err := time.Parse("2006-01-02", record.EffectiveDate)
|
|||
|
|
if err != nil {
|
|||
|
|
return fmt.Errorf("parse effective_date for %s: %w", record.PlanCode, err)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
modelScopeRaw, err := json.Marshal(record.ModelScope)
|
|||
|
|
if err != nil {
|
|||
|
|
return fmt.Errorf("marshal model_scope for %s: %w", record.PlanCode, err)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
_, err = db.Exec(
|
|||
|
|
`INSERT INTO subscription_plan (
|
|||
|
|
provider_id, operator_id, plan_family, plan_code, plan_name, tier,
|
|||
|
|
billing_cycle, currency, list_price, price_unit, quota_value, quota_unit,
|
|||
|
|
context_window, plan_scope, model_scope, source_url, published_at, effective_date, notes
|
|||
|
|
) VALUES (
|
|||
|
|
$1, $2, $3, $4, $5, $6,
|
|||
|
|
$7, $8, $9, $10, $11, $12,
|
|||
|
|
$13, $14, $15, $16, $17, $18, $19
|
|||
|
|
)
|
|||
|
|
ON CONFLICT (provider_id, plan_code, effective_date)
|
|||
|
|
DO UPDATE SET
|
|||
|
|
operator_id = EXCLUDED.operator_id,
|
|||
|
|
plan_family = EXCLUDED.plan_family,
|
|||
|
|
plan_name = EXCLUDED.plan_name,
|
|||
|
|
tier = EXCLUDED.tier,
|
|||
|
|
billing_cycle = EXCLUDED.billing_cycle,
|
|||
|
|
currency = EXCLUDED.currency,
|
|||
|
|
list_price = EXCLUDED.list_price,
|
|||
|
|
price_unit = EXCLUDED.price_unit,
|
|||
|
|
quota_value = EXCLUDED.quota_value,
|
|||
|
|
quota_unit = EXCLUDED.quota_unit,
|
|||
|
|
context_window = EXCLUDED.context_window,
|
|||
|
|
plan_scope = EXCLUDED.plan_scope,
|
|||
|
|
model_scope = EXCLUDED.model_scope,
|
|||
|
|
source_url = EXCLUDED.source_url,
|
|||
|
|
published_at = EXCLUDED.published_at,
|
|||
|
|
notes = EXCLUDED.notes,
|
|||
|
|
updated_at = CURRENT_TIMESTAMP`,
|
|||
|
|
providerID, operatorID, record.PlanFamily, record.PlanCode, record.PlanName, record.Tier,
|
|||
|
|
record.BillingCycle, record.Currency, record.ListPrice, record.PriceUnit, nullIfZeroInt64(record.QuotaValue), nullIfBlank(record.QuotaUnit),
|
|||
|
|
nullIfZeroIntCommon(record.ContextWindow), nullIfBlank(record.PlanScope), string(modelScopeRaw), record.SourceURL, publishedAt, effectiveDate, nullIfBlank(record.Notes),
|
|||
|
|
)
|
|||
|
|
if err != nil {
|
|||
|
|
return fmt.Errorf("upsert subscription_plan %s: %w", record.PlanCode, err)
|
|||
|
|
}
|
|||
|
|
historyKeys[snapshotKey{providerID: providerID, planCode: record.PlanCode}] = struct{}{}
|
|||
|
|
}
|
|||
|
|
for key := range historyKeys {
|
|||
|
|
if err := compactSubscriptionSnapshotHistory(db, key.providerID, key.planCode); err != nil {
|
|||
|
|
return err
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
type subscriptionSnapshotRow struct {
|
|||
|
|
ID int64
|
|||
|
|
PlanName string
|
|||
|
|
Tier string
|
|||
|
|
BillingCycle string
|
|||
|
|
Currency string
|
|||
|
|
ListPrice float64
|
|||
|
|
PriceUnit string
|
|||
|
|
QuotaValue int64
|
|||
|
|
QuotaUnit string
|
|||
|
|
ContextWindow int
|
|||
|
|
PlanScope string
|
|||
|
|
ModelScope string
|
|||
|
|
SourceURL string
|
|||
|
|
Notes string
|
|||
|
|
PublishedAt time.Time
|
|||
|
|
EffectiveDate time.Time
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func loadSubscriptionSnapshotHistory(db *sql.DB, providerID int64, planCode string) ([]subscriptionSnapshotRow, error) {
|
|||
|
|
rows, err := db.Query(
|
|||
|
|
`SELECT
|
|||
|
|
id,
|
|||
|
|
plan_name,
|
|||
|
|
tier,
|
|||
|
|
billing_cycle,
|
|||
|
|
currency,
|
|||
|
|
list_price,
|
|||
|
|
price_unit,
|
|||
|
|
COALESCE(quota_value, 0),
|
|||
|
|
COALESCE(quota_unit, ''),
|
|||
|
|
COALESCE(context_window, 0),
|
|||
|
|
COALESCE(plan_scope, ''),
|
|||
|
|
model_scope,
|
|||
|
|
source_url,
|
|||
|
|
COALESCE(notes, ''),
|
|||
|
|
published_at,
|
|||
|
|
effective_date
|
|||
|
|
FROM subscription_plan
|
|||
|
|
WHERE provider_id = $1 AND plan_code = $2
|
|||
|
|
ORDER BY effective_date DESC, published_at DESC NULLS LAST, id DESC`,
|
|||
|
|
providerID, planCode,
|
|||
|
|
)
|
|||
|
|
if err != nil {
|
|||
|
|
return nil, fmt.Errorf("load subscription snapshot history %s: %w", planCode, err)
|
|||
|
|
}
|
|||
|
|
defer rows.Close()
|
|||
|
|
|
|||
|
|
history := make([]subscriptionSnapshotRow, 0)
|
|||
|
|
for rows.Next() {
|
|||
|
|
row := subscriptionSnapshotRow{}
|
|||
|
|
if err := rows.Scan(
|
|||
|
|
&row.ID,
|
|||
|
|
&row.PlanName,
|
|||
|
|
&row.Tier,
|
|||
|
|
&row.BillingCycle,
|
|||
|
|
&row.Currency,
|
|||
|
|
&row.ListPrice,
|
|||
|
|
&row.PriceUnit,
|
|||
|
|
&row.QuotaValue,
|
|||
|
|
&row.QuotaUnit,
|
|||
|
|
&row.ContextWindow,
|
|||
|
|
&row.PlanScope,
|
|||
|
|
&row.ModelScope,
|
|||
|
|
&row.SourceURL,
|
|||
|
|
&row.Notes,
|
|||
|
|
&row.PublishedAt,
|
|||
|
|
&row.EffectiveDate,
|
|||
|
|
); err != nil {
|
|||
|
|
return nil, fmt.Errorf("scan subscription snapshot history %s: %w", planCode, err)
|
|||
|
|
}
|
|||
|
|
history = append(history, row)
|
|||
|
|
}
|
|||
|
|
if err := rows.Err(); err != nil {
|
|||
|
|
return nil, fmt.Errorf("iterate subscription snapshot history %s: %w", planCode, err)
|
|||
|
|
}
|
|||
|
|
return history, nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func reuseExistingSnapshotDates(record *subscriptionImportRecord, history []subscriptionSnapshotRow) (bool, error) {
|
|||
|
|
if record == nil || record.PublishedAtKnown || len(history) == 0 {
|
|||
|
|
return false, nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
modelScopeRaw, err := json.Marshal(record.ModelScope)
|
|||
|
|
if err != nil {
|
|||
|
|
return false, fmt.Errorf("marshal model_scope for snapshot comparison %s: %w", record.PlanCode, err)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
for _, existing := range history {
|
|||
|
|
if existing.PlanName != record.PlanName ||
|
|||
|
|
existing.Tier != record.Tier ||
|
|||
|
|
existing.BillingCycle != record.BillingCycle ||
|
|||
|
|
existing.Currency != record.Currency ||
|
|||
|
|
existing.ListPrice != record.ListPrice ||
|
|||
|
|
existing.PriceUnit != record.PriceUnit ||
|
|||
|
|
existing.QuotaValue != record.QuotaValue ||
|
|||
|
|
existing.QuotaUnit != strings.TrimSpace(record.QuotaUnit) ||
|
|||
|
|
existing.ContextWindow != record.ContextWindow ||
|
|||
|
|
existing.PlanScope != strings.TrimSpace(record.PlanScope) ||
|
|||
|
|
existing.ModelScope != string(modelScopeRaw) ||
|
|||
|
|
existing.SourceURL != record.SourceURL ||
|
|||
|
|
existing.Notes != strings.TrimSpace(record.Notes) {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
record.PublishedAt = existing.PublishedAt.Format("2006-01-02 15:04:05")
|
|||
|
|
record.EffectiveDate = existing.EffectiveDate.Format("2006-01-02")
|
|||
|
|
return true, nil
|
|||
|
|
}
|
|||
|
|
return false, nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func compactSubscriptionSnapshotHistory(db *sql.DB, providerID int64, planCode string) error {
|
|||
|
|
history, err := loadSubscriptionSnapshotHistory(db, providerID, planCode)
|
|||
|
|
if err != nil {
|
|||
|
|
return err
|
|||
|
|
}
|
|||
|
|
for _, id := range redundantSnapshotRowIDs(history) {
|
|||
|
|
if _, err := db.Exec(`DELETE FROM subscription_plan WHERE id = $1`, id); err != nil {
|
|||
|
|
return fmt.Errorf("delete redundant subscription snapshot %d for %s: %w", id, planCode, err)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func redundantSnapshotRowIDs(history []subscriptionSnapshotRow) []int64 {
|
|||
|
|
type signatureKey struct {
|
|||
|
|
PlanName string
|
|||
|
|
Tier string
|
|||
|
|
BillingCycle string
|
|||
|
|
Currency string
|
|||
|
|
ListPrice float64
|
|||
|
|
PriceUnit string
|
|||
|
|
QuotaValue int64
|
|||
|
|
QuotaUnit string
|
|||
|
|
ContextWindow int
|
|||
|
|
PlanScope string
|
|||
|
|
ModelScope string
|
|||
|
|
SourceURL string
|
|||
|
|
Notes string
|
|||
|
|
}
|
|||
|
|
type keptSnapshot struct {
|
|||
|
|
ID int64
|
|||
|
|
EffectiveDate time.Time
|
|||
|
|
PublishedAt time.Time
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
makeKey := func(row subscriptionSnapshotRow) signatureKey {
|
|||
|
|
return signatureKey{
|
|||
|
|
PlanName: row.PlanName,
|
|||
|
|
Tier: row.Tier,
|
|||
|
|
BillingCycle: row.BillingCycle,
|
|||
|
|
Currency: row.Currency,
|
|||
|
|
ListPrice: row.ListPrice,
|
|||
|
|
PriceUnit: row.PriceUnit,
|
|||
|
|
QuotaValue: row.QuotaValue,
|
|||
|
|
QuotaUnit: row.QuotaUnit,
|
|||
|
|
ContextWindow: row.ContextWindow,
|
|||
|
|
PlanScope: row.PlanScope,
|
|||
|
|
ModelScope: row.ModelScope,
|
|||
|
|
SourceURL: row.SourceURL,
|
|||
|
|
Notes: row.Notes,
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
shouldReplace := func(current keptSnapshot, candidate subscriptionSnapshotRow) bool {
|
|||
|
|
if candidate.EffectiveDate.Before(current.EffectiveDate) {
|
|||
|
|
return true
|
|||
|
|
}
|
|||
|
|
if candidate.EffectiveDate.Equal(current.EffectiveDate) && candidate.PublishedAt.Before(current.PublishedAt) {
|
|||
|
|
return true
|
|||
|
|
}
|
|||
|
|
return candidate.EffectiveDate.Equal(current.EffectiveDate) && candidate.PublishedAt.Equal(current.PublishedAt) && candidate.ID < current.ID
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
keptBySignature := make(map[signatureKey]keptSnapshot)
|
|||
|
|
redundant := make([]int64, 0)
|
|||
|
|
for _, row := range history {
|
|||
|
|
key := makeKey(row)
|
|||
|
|
current, exists := keptBySignature[key]
|
|||
|
|
if !exists {
|
|||
|
|
keptBySignature[key] = keptSnapshot{ID: row.ID, EffectiveDate: row.EffectiveDate, PublishedAt: row.PublishedAt}
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
if shouldReplace(current, row) {
|
|||
|
|
redundant = append(redundant, current.ID)
|
|||
|
|
keptBySignature[key] = keptSnapshot{ID: row.ID, EffectiveDate: row.EffectiveDate, PublishedAt: row.PublishedAt}
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
redundant = append(redundant, row.ID)
|
|||
|
|
}
|
|||
|
|
sort.Slice(redundant, func(i, j int) bool { return redundant[i] < redundant[j] })
|
|||
|
|
return redundant
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func ensureSubscriptionProvider(db *sql.DB, record subscriptionImportRecord) (int64, error) {
|
|||
|
|
var providerID int64
|
|||
|
|
err := db.QueryRow(`SELECT id FROM model_provider WHERE name = $1`, record.ProviderName).Scan(&providerID)
|
|||
|
|
if err == nil {
|
|||
|
|
return providerID, nil
|
|||
|
|
}
|
|||
|
|
if err != sql.ErrNoRows {
|
|||
|
|
return 0, err
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
err = db.QueryRow(
|
|||
|
|
`INSERT INTO model_provider (name, name_cn, country, website, status)
|
|||
|
|
VALUES ($1, $2, $3, $4, 'active')
|
|||
|
|
RETURNING id`,
|
|||
|
|
record.ProviderName, nullIfBlank(record.ProviderNameCn), record.ProviderCountry, nullIfBlank(record.ProviderWebsite),
|
|||
|
|
).Scan(&providerID)
|
|||
|
|
return providerID, err
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func ensureSubscriptionOperator(db *sql.DB, record subscriptionImportRecord) (int64, error) {
|
|||
|
|
var operatorID int64
|
|||
|
|
err := db.QueryRow(`SELECT id FROM operator WHERE name = $1`, record.OperatorName).Scan(&operatorID)
|
|||
|
|
if err == nil {
|
|||
|
|
return operatorID, nil
|
|||
|
|
}
|
|||
|
|
if err != sql.ErrNoRows {
|
|||
|
|
return 0, err
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
err = db.QueryRow(
|
|||
|
|
`INSERT INTO operator (name, name_cn, country, website, description, status, type)
|
|||
|
|
VALUES ($1, $2, $3, $4, $5, 'active', $6)
|
|||
|
|
RETURNING id`,
|
|||
|
|
record.OperatorName, nullIfBlank(record.OperatorNameCn), record.OperatorCountry, nullIfBlank(record.OperatorWebsite),
|
|||
|
|
fmt.Sprintf("%s subscription import", record.OperatorName), record.OperatorType,
|
|||
|
|
).Scan(&operatorID)
|
|||
|
|
return operatorID, err
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func summarizeSubscriptionImport(records []subscriptionImportRecord, getter func(subscriptionImportRecord) string) string {
|
|||
|
|
counts := make(map[string]int)
|
|||
|
|
keys := make([]string, 0)
|
|||
|
|
for _, record := range records {
|
|||
|
|
key := getter(record)
|
|||
|
|
if _, exists := counts[key]; !exists {
|
|||
|
|
keys = append(keys, key)
|
|||
|
|
}
|
|||
|
|
counts[key]++
|
|||
|
|
}
|
|||
|
|
sort.Strings(keys)
|
|||
|
|
|
|||
|
|
parts := make([]string, 0, len(keys))
|
|||
|
|
for _, key := range keys {
|
|||
|
|
parts = append(parts, fmt.Sprintf("%s:%d", key, counts[key]))
|
|||
|
|
}
|
|||
|
|
return strings.Join(parts, ",")
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func nullIfBlank(value string) any {
|
|||
|
|
if strings.TrimSpace(value) == "" {
|
|||
|
|
return nil
|
|||
|
|
}
|
|||
|
|
return value
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func nullIfZeroInt64(value int64) any {
|
|||
|
|
if value == 0 {
|
|||
|
|
return nil
|
|||
|
|
}
|
|||
|
|
return value
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func nullIfZeroIntCommon(value int) any {
|
|||
|
|
if value == 0 {
|
|||
|
|
return nil
|
|||
|
|
}
|
|||
|
|
return value
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func mustParseSubscriptionPrice(raw string) float64 {
|
|||
|
|
cleaned := strings.ReplaceAll(raw, ",", "")
|
|||
|
|
cleaned = strings.ReplaceAll(cleaned, " ", "")
|
|||
|
|
value, _ := strconv.ParseFloat(cleaned, 64)
|
|||
|
|
return value
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func mustParseSubscriptionInt64(raw string) int64 {
|
|||
|
|
cleaned := strings.ReplaceAll(raw, ",", "")
|
|||
|
|
cleaned = strings.ReplaceAll(cleaned, " ", "")
|
|||
|
|
value, _ := strconv.ParseInt(cleaned, 10, 64)
|
|||
|
|
return value
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func firstNonEmptyText(values ...string) string {
|
|||
|
|
for _, value := range values {
|
|||
|
|
if strings.TrimSpace(value) != "" {
|
|||
|
|
return value
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return ""
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func parseDecimalMultiplier(raw string, unit int64) int64 {
|
|||
|
|
cleaned := strings.TrimSpace(strings.ReplaceAll(raw, " ", ""))
|
|||
|
|
value, _ := strconv.ParseFloat(cleaned, 64)
|
|||
|
|
return int64(value * float64(unit))
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func sliceSection(raw string, start string, end string) string {
|
|||
|
|
startIndex := strings.Index(raw, start)
|
|||
|
|
if startIndex < 0 {
|
|||
|
|
return ""
|
|||
|
|
}
|
|||
|
|
section := raw[startIndex+len(start):]
|
|||
|
|
if end != "" {
|
|||
|
|
if endIndex := strings.Index(section, end); endIndex >= 0 {
|
|||
|
|
section = section[:endIndex]
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return section
|
|||
|
|
}
|