feat(import): enrich baidu and bytedance release metadata

This commit is contained in:
phamnazage-jpg
2026-05-13 22:37:37 +08:00
parent bb5a1ff9e5
commit 92c9a40f4b
6 changed files with 279 additions and 21 deletions

View File

@@ -49,22 +49,71 @@ type ModelPricing struct {
ContextLength int
IsFree bool
SourceURL string
ModelSourceURL string
ReleaseDate string
Modality string
SceneTags []string
}
func releaseDateValue(raw string) time.Time {
func releaseDateValue(raw string) any {
if strings.TrimSpace(raw) == "" {
return time.Now()
return nil
}
parsed, err := time.Parse("2006-01-02", raw)
if err != nil {
return time.Now()
return nil
}
return parsed
}
type baiduModelMetadata struct {
Prefix string
ReleaseDate string
ModelSourceURL string
}
var baiduModelMetadataRules = []baiduModelMetadata{
{
Prefix: "baidu-ernie-4.5-turbo",
ReleaseDate: "2025-04-25",
ModelSourceURL: "https://cloud.baidu.com/article/3887765",
},
{
Prefix: "baidu-ernie-x1-turbo",
ReleaseDate: "2025-04-25",
ModelSourceURL: "https://cloud.baidu.com/article/3887765",
},
{
Prefix: "baidu-ernie-4.5",
ReleaseDate: "2025-03-16",
ModelSourceURL: "https://cloud.baidu.com/article/3835921",
},
{
Prefix: "baidu-ernie-x1",
ReleaseDate: "2025-03-16",
ModelSourceURL: "https://cloud.baidu.com/article/3835921",
},
}
func enrichBaiduModelMetadata(model ModelPricing) ModelPricing {
normalizedID := strings.ToLower(model.ModelID)
for _, metadata := range baiduModelMetadataRules {
if strings.HasPrefix(normalizedID, metadata.Prefix) {
if metadata.ReleaseDate != "" {
model.ReleaseDate = metadata.ReleaseDate
}
if metadata.ModelSourceURL != "" {
model.ModelSourceURL = metadata.ModelSourceURL
}
return model
}
}
if model.ModelSourceURL == "" {
model.ModelSourceURL = model.SourceURL
}
return model
}
func parseZhipuPrice(s string) float64 {
// Extract price from strings like "6元", "免费", "限时免费"
if strings.Contains(s, "免费") {
@@ -148,7 +197,7 @@ func main() {
}
for model, pricesMap := range modelPrices {
prices = append(prices, ModelPricing{
prices = append(prices, enrichBaiduModelMetadata(ModelPricing{
ModelID: "baidu-" + strings.ToLower(strings.ReplaceAll(model, " ", "-")),
ModelName: model,
ProviderName: "Baidu",
@@ -162,7 +211,7 @@ func main() {
IsFree: pricesMap["input"] == 0 && pricesMap["output"] == 0,
SourceURL: "https://cloud.baidu.com/doc/qianfan/s/wmh4sv6ya",
Modality: "text",
})
}))
}
log.Printf("Parsed %d unique models from Baidu", len(prices))
@@ -201,11 +250,11 @@ func main() {
var modelID int64
err = db.QueryRow("SELECT id FROM models WHERE external_id = $1", p.ModelID).Scan(&modelID)
if err == sql.ErrNoRows {
err = db.QueryRow(
`INSERT INTO models (external_id, name, provider_id, modality, context_length, status, source, batch_id, source_url, release_date)
VALUES ($1, $2, $3, $4, $5, 'active', $6, $7, $8, $9) RETURNING id`,
p.ModelID, p.ModelName, providerID, p.Modality, p.ContextLength, p.OperatorName, batchID, p.SourceURL, releaseDateValue(p.ReleaseDate),
).Scan(&modelID)
err = db.QueryRow(
`INSERT INTO models (external_id, name, provider_id, modality, context_length, status, source, batch_id, source_url, release_date)
VALUES ($1, $2, $3, $4, $5, 'active', $6, $7, $8, $9) RETURNING id`,
p.ModelID, p.ModelName, providerID, p.Modality, p.ContextLength, p.OperatorName, batchID, firstNonEmpty(p.ModelSourceURL, p.SourceURL), releaseDateValue(p.ReleaseDate),
).Scan(&modelID)
}
if err != nil {
log.Printf("Model error: %v", err)
@@ -217,7 +266,7 @@ func main() {
release_date = COALESCE(release_date, $3),
updated_at = CURRENT_TIMESTAMP
WHERE id = $1`,
modelID, p.SourceURL, releaseDateValue(p.ReleaseDate),
modelID, firstNonEmpty(p.ModelSourceURL, p.SourceURL), releaseDateValue(p.ReleaseDate),
)
// Insert pricing
@@ -254,3 +303,12 @@ func main() {
log.Printf("Successfully imported %d models into database", len(prices))
}
func firstNonEmpty(values ...string) string {
for _, value := range values {
if value != "" {
return value
}
}
return ""
}