//go:build llm_script package main import ( "database/sql" "encoding/json" "fmt" "log" "os" "strings" "time" _ "github.com/lib/pq" ) type RawData struct { Zhipu []struct { Model string `json:"model"` Context string `json:"context"` InputPrice string `json:"inputPrice"` OutputPrice string `json:"outputPrice"` Operator string `json:"operator"` Region string `json:"region"` Currency string `json:"currency"` } `json:"zhipu"` Baidu []struct { Model string `json:"model"` Type string `json:"type"` InputPrice *float64 `json:"inputPrice"` OutputPrice *float64 `json:"outputPrice"` Operator string `json:"operator"` Region string `json:"region"` Currency string `json:"currency"` } `json:"baidu"` } type ModelPricing struct { ModelID string ModelName string ProviderName string ProviderCountry string OperatorName string OperatorType string Region string Currency string InputPrice float64 OutputPrice float64 ContextLength int IsFree bool SourceURL string ReleaseDate string Modality string SceneTags []string } func releaseDateValue(raw string) time.Time { if strings.TrimSpace(raw) == "" { return time.Now() } parsed, err := time.Parse("2006-01-02", raw) if err != nil { return time.Now() } return parsed } func parseZhipuPrice(s string) float64 { // Extract price from strings like "6元", "免费", "限时免费" if strings.Contains(s, "免费") { return 0 } var f float64 fmt.Sscanf(s, "%f", &f) return f } func extractContextLength(context string) int { if strings.Contains(context, "1M") || strings.Contains(context, "1000K") { return 1000000 } if strings.Contains(context, "200K") { return 200000 } if strings.Contains(context, "128K") { return 128000 } if strings.Contains(context, "32K") { return 32000 } if strings.Contains(context, "8K") { return 8000 } if strings.Contains(context, "262144") || strings.Contains(context, "256K") { return 262144 } if strings.Contains(context, "8192") { return 8192 } return 0 } func main() { dsn := os.Getenv("DATABASE_URL") if dsn == "" { dsn = "postgres://long@/llm_intelligence?host=/var/run/postgresql" } db, err := sql.Open("postgres", dsn) if err != nil { log.Fatal(err) } defer db.Close() // Read raw data data, err := os.ReadFile("/tmp/phase2_raw_data.json") if err != nil { log.Fatal("Failed to read raw data:", err) } var raw RawData if err := json.Unmarshal(data, &raw); err != nil { log.Fatal("Failed to parse raw data:", err) } var prices []ModelPricing batchID := "manual-seed" // Process Baidu data modelPrices := make(map[string]map[string]float64) // model -> type -> price for _, b := range raw.Baidu { if modelPrices[b.Model] == nil { modelPrices[b.Model] = make(map[string]float64) } if b.InputPrice != nil { if strings.Contains(b.Type, "输入") { modelPrices[b.Model]["input"] = *b.InputPrice * 1000000 // Convert to per 1M } if strings.Contains(b.Type, "输出") { modelPrices[b.Model]["output"] = *b.InputPrice * 1000000 } } if b.OutputPrice != nil { if strings.Contains(b.Type, "输出") { modelPrices[b.Model]["output"] = *b.OutputPrice * 1000000 } } } for model, pricesMap := range modelPrices { prices = append(prices, ModelPricing{ ModelID: "baidu-" + strings.ToLower(strings.ReplaceAll(model, " ", "-")), ModelName: model, ProviderName: "Baidu", ProviderCountry: "CN", OperatorName: "Baidu Qianfan", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: pricesMap["input"], OutputPrice: pricesMap["output"], IsFree: pricesMap["input"] == 0 && pricesMap["output"] == 0, SourceURL: "https://cloud.baidu.com/doc/qianfan/s/wmh4sv6ya", Modality: "text", }) } log.Printf("Parsed %d unique models from Baidu", len(prices)) // Save to database for _, p := range prices { // Find or create provider var providerID int64 err := db.QueryRow("SELECT id FROM model_provider WHERE name = $1", p.ProviderName).Scan(&providerID) if err == sql.ErrNoRows { err = db.QueryRow( "INSERT INTO model_provider (name, country, website, status) VALUES ($1, $2, $3, 'active') RETURNING id", p.ProviderName, p.ProviderCountry, "", ).Scan(&providerID) } if err != nil { log.Printf("Provider error: %v", err) continue } // Find or create operator var operatorID int64 err = db.QueryRow("SELECT id FROM operator WHERE name = $1", p.OperatorName).Scan(&operatorID) if err == sql.ErrNoRows { err = db.QueryRow( "INSERT INTO operator (name, country, status) VALUES ($1, $2, 'active') RETURNING id", p.OperatorName, p.ProviderCountry, ).Scan(&operatorID) } if err != nil { log.Printf("Operator error: %v", err) continue } // Find or create model var modelID int64 err = db.QueryRow("SELECT id FROM models WHERE external_id = $1", p.ModelID).Scan(&modelID) if err == sql.ErrNoRows { err = db.QueryRow( `INSERT INTO models (external_id, name, provider_id, modality, context_length, status, source, batch_id, source_url, release_date) VALUES ($1, $2, $3, $4, $5, 'active', $6, $7, $8, $9) RETURNING id`, p.ModelID, p.ModelName, providerID, p.Modality, p.ContextLength, p.OperatorName, batchID, p.SourceURL, releaseDateValue(p.ReleaseDate), ).Scan(&modelID) } if err != nil { log.Printf("Model error: %v", err) continue } _, _ = db.Exec( `UPDATE models SET source_url = COALESCE(NULLIF(source_url, ''), $2), release_date = COALESCE(release_date, $3), updated_at = CURRENT_TIMESTAMP WHERE id = $1`, modelID, p.SourceURL, releaseDateValue(p.ReleaseDate), ) // Insert pricing sourceType := p.OperatorType freeQuota := "" freeLimitations := "[]" rateLimit := "{}" if p.IsFree { sourceType = "free_tier" freeQuota = "Imported free-tier pricing entry" freeLimitations = `["See source_url for current quota and policy"]` } _, err = db.Exec( `INSERT INTO region_pricing (model_id, operator_id, region, currency, input_price_per_mtok, output_price_per_mtok, is_free, effective_date, source_url, source_type, free_quota, free_limitations, rate_limit) VALUES ($1, $2, $3, $4, $5, $6, $7, CURRENT_DATE, $8, $9, $10, $11, $12) ON CONFLICT (model_id, operator_id, region, currency, effective_date) DO UPDATE SET input_price_per_mtok = EXCLUDED.input_price_per_mtok, output_price_per_mtok = EXCLUDED.output_price_per_mtok, is_free = EXCLUDED.is_free, source_type = EXCLUDED.source_type, free_quota = EXCLUDED.free_quota, free_limitations = EXCLUDED.free_limitations, rate_limit = EXCLUDED.rate_limit, updated_at = CURRENT_TIMESTAMP`, modelID, operatorID, p.Region, p.Currency, p.InputPrice, p.OutputPrice, p.IsFree, p.SourceURL, sourceType, freeQuota, freeLimitations, rateLimit, ) if err != nil { log.Printf("Pricing error for %s: %v", p.ModelID, err) continue } } log.Printf("Successfully imported %d models into database", len(prices)) }