Files
llm-intelligence/scripts/bedrock_pricing_lib.go
phamnazage-jpg 958245537a feat(imports): add real pricing and subscription collectors
Add plan catalog and subscription schema support, seed baselines, and real importers for core domestic subscriptions plus stable official pricing sources.

This commit also hardens the shared fetch layers so the importers can support live collection and database writes instead of relying on manual placeholders alone.
2026-05-15 22:32:57 +08:00

324 lines
9.0 KiB
Go

//go:build llm_script
package main
import (
"fmt"
"regexp"
"strings"
)
const defaultBedrockPricingURL = "https://aws.amazon.com/bedrock/pricing/"
var (
bedrockRegionPattern = regexp.MustCompile(`(?s)<p><b>Regions?:&nbsp;([^<]+)</b></p>`)
bedrockTablePattern = regexp.MustCompile(`(?s)<table[^>]*>(.*?)</table>`)
bedrockRowPattern = regexp.MustCompile(`(?s)<tr>(.*?)</tr>`)
bedrockCellPattern = regexp.MustCompile(`(?s)<t[dh][^>]*>(.*?)</t[dh]>`)
)
func parseBedrockPricingCatalog(raw string) ([]officialPricingRecord, error) {
section := extractBetween(raw, `<h3 id="Model_Pricing"`, `<h2 id="Pricing_examples"`)
if strings.TrimSpace(section) == "" {
section = raw
}
blocks := splitBedrockProviderBlocks(section)
records := make([]officialPricingRecord, 0)
for _, block := range blocks {
records = append(records, parseBedrockProviderBlock(block.providerLabel, block.content)...)
}
if len(records) == 0 {
records = append(records, parseBedrockPricingTextFallback(cleanHTMLText(section))...)
}
if len(records) == 0 {
return nil, fmt.Errorf("no bedrock pricing rows found")
}
return records, nil
}
func parseBedrockProviderBlock(providerLabel string, raw string) []officialPricingRecord {
providerName := normalizeBedrockProvider(providerLabel)
providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName)
regionMatches := bedrockRegionPattern.FindAllStringSubmatchIndex(raw, -1)
tables := bedrockTablePattern.FindAllStringSubmatchIndex(raw, -1)
records := make([]officialPricingRecord, 0)
seenModelRegion := make(map[string]struct{})
for _, tableIndex := range tables {
tableHTML := raw[tableIndex[2]:tableIndex[3]]
if !strings.Contains(tableHTML, "Price per 1M input tokens") || !strings.Contains(tableHTML, "$") {
continue
}
region := "global"
for _, regionIndex := range regionMatches {
if regionIndex[0] < tableIndex[0] {
region = cleanHTMLText(raw[regionIndex[2]:regionIndex[3]])
}
}
rows := parseBedrockTableRows(tableHTML)
for _, row := range rows {
dedupeKey := strings.Join([]string{region, row.ModelName}, "|")
if _, exists := seenModelRegion[dedupeKey]; exists {
continue
}
record := officialPricingRecord{
ModelID: normalizeExternalID("bedrock", providerName, row.ModelName),
ModelName: row.ModelName,
ProviderName: providerName,
ProviderNameCn: providerNameCn,
ProviderCountry: providerCountry,
ProviderWebsite: providerWebsite,
OperatorName: "Amazon Bedrock",
OperatorNameCn: "Amazon Bedrock",
OperatorCountry: "US",
OperatorWebsite: "https://aws.amazon.com/bedrock/",
OperatorType: "cloud",
Region: region,
Currency: "USD",
InputPrice: row.InputPrice,
OutputPrice: row.OutputPrice,
SourceURL: defaultBedrockPricingURL,
ModelSourceURL: defaultBedrockPricingURL,
DateConfidence: "unknown",
DateSourceKind: "official_pricing",
Modality: detectModality(row.ModelName),
}
record.IsFree = false
seenModelRegion[dedupeKey] = struct{}{}
records = append(records, record)
}
}
return records
}
type bedrockProviderBlock struct {
providerLabel string
content string
}
func splitBedrockProviderBlocks(raw string) []bedrockProviderBlock {
marker := `<h2 id="`
indices := make([]int, 0)
for offset := 0; ; {
next := strings.Index(raw[offset:], marker)
if next == -1 {
break
}
indices = append(indices, offset+next)
offset += next + len(marker)
}
blocks := make([]bedrockProviderBlock, 0, len(indices))
for i, start := range indices {
end := len(raw)
if i+1 < len(indices) {
end = indices[i+1]
}
chunk := raw[start:end]
h2End := strings.Index(chunk, "</h2>")
if h2End == -1 {
continue
}
openEnd := strings.Index(chunk, ">")
if openEnd == -1 || openEnd >= h2End {
continue
}
label := cleanHTMLText(chunk[openEnd+1 : h2End])
if strings.TrimSpace(label) == "" {
continue
}
blocks = append(blocks, bedrockProviderBlock{
providerLabel: label,
content: chunk,
})
}
return blocks
}
func extractBetween(raw string, startMarker string, endMarker string) string {
start := strings.Index(raw, startMarker)
if start == -1 {
return ""
}
segment := raw[start:]
if endMarker == "" {
return segment
}
end := strings.Index(segment, endMarker)
if end == -1 {
return segment
}
return segment[:end]
}
type bedrockPriceRow struct {
ModelName string
InputPrice float64
OutputPrice float64
}
func parseBedrockTableRows(tableHTML string) []bedrockPriceRow {
rows := bedrockRowPattern.FindAllStringSubmatch(tableHTML, -1)
parsed := make([]bedrockPriceRow, 0)
for _, row := range rows {
cells := bedrockCellPattern.FindAllStringSubmatch(row[1], -1)
if len(cells) < 3 {
continue
}
values := make([]string, 0, len(cells))
for _, cell := range cells {
values = append(values, cleanHTMLText(cell[1]))
}
if strings.Contains(strings.ToLower(values[0]), "models") {
continue
}
modelName := values[0]
inputCell := values[1]
outputCell := values[2]
if len(values) >= 6 && strings.Contains(strings.ToLower(values[5]), "$") {
outputCell = values[5]
}
inputPrice, ok := firstDollarPrice(inputCell)
if !ok {
continue
}
outputPrice, ok := firstDollarPrice(outputCell)
if !ok {
continue
}
parsed = append(parsed, bedrockPriceRow{
ModelName: modelName,
InputPrice: inputPrice,
OutputPrice: outputPrice,
})
}
return parsed
}
func normalizeBedrockProvider(raw string) string {
switch strings.TrimSpace(raw) {
case "Amazon Nova":
return "Amazon"
case "Anthropic":
return "Anthropic"
case "Cohere":
return "Cohere"
case "DeepSeek":
return "DeepSeek"
case "Meta":
return "Meta"
case "Mistral AI":
return "Mistral AI"
case "Moonshot AI":
return "Moonshot AI"
case "Kimi":
return "Moonshot AI"
case "NVIDIA":
return "NVIDIA"
case "OpenAI OSS Models":
return "OpenAI"
case "Qwen":
return "Qwen"
case "Writer":
return "Writer"
case "Z AI":
return "Zhipu AI"
default:
return strings.TrimSpace(raw)
}
}
var bedrockTextProviderHeaderPattern = regexp.MustCompile(`([A-Za-z][A-Za-z0-9 .&-]+)\s+models\s+Pr(?:i)?ce per 1M input tokens`)
var bedrockTextRowPattern = regexp.MustCompile(`([A-Za-z0-9 .:+-]+?)\s+\$\s*([0-9.]+)\s+\$\s*([0-9.]+)`)
func parseBedrockPricingTextFallback(raw string) []officialPricingRecord {
matches := bedrockTextProviderHeaderPattern.FindAllStringSubmatchIndex(raw, -1)
records := make([]officialPricingRecord, 0)
seen := make(map[string]struct{})
for i, match := range matches {
if len(match) < 4 {
continue
}
start := match[0]
end := len(raw)
if i+1 < len(matches) {
end = matches[i+1][0]
}
block := raw[start:end]
region := normalizeBedrockRegionText(findBedrockTextRegion(raw, start))
providerName := normalizeBedrockProvider(raw[match[2]:match[3]])
providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName)
rows := bedrockTextRowPattern.FindAllStringSubmatch(block, -1)
for _, row := range rows {
if len(row) != 4 {
continue
}
modelName := strings.TrimSpace(row[1])
key := strings.Join([]string{providerName, region, modelName}, "|")
if _, exists := seen[key]; exists {
continue
}
seen[key] = struct{}{}
records = append(records, officialPricingRecord{
ModelID: normalizeExternalID("bedrock", providerName, modelName),
ModelName: modelName,
ProviderName: providerName,
ProviderNameCn: providerNameCn,
ProviderCountry: providerCountry,
ProviderWebsite: providerWebsite,
OperatorName: "Amazon Bedrock",
OperatorNameCn: "Amazon Bedrock",
OperatorCountry: "US",
OperatorWebsite: "https://aws.amazon.com/bedrock/",
OperatorType: "cloud",
Region: region,
Currency: "USD",
InputPrice: mustParseSubscriptionPrice(row[2]),
OutputPrice: mustParseSubscriptionPrice(row[3]),
SourceURL: defaultBedrockPricingURL,
ModelSourceURL: defaultBedrockPricingURL,
DateConfidence: "unknown",
DateSourceKind: "official_pricing",
Modality: detectModality(modelName),
})
}
}
return records
}
func findBedrockTextRegion(raw string, headerStart int) string {
prefixStart := headerStart - 300
if prefixStart < 0 {
prefixStart = 0
}
prefix := raw[prefixStart:headerStart]
lastPlural := strings.LastIndex(prefix, "Regions:")
lastSingular := strings.LastIndex(prefix, "Region:")
lastIndex := lastPlural
marker := "Regions:"
if lastSingular > lastIndex {
lastIndex = lastSingular
marker = "Region:"
}
if lastIndex == -1 {
return ""
}
region := strings.TrimSpace(prefix[lastIndex+len(marker):])
for _, stopMarker := range []string{" Priority ", " Flex ", " Batch ", " models "} {
if stop := strings.Index(region, stopMarker); stop != -1 {
region = strings.TrimSpace(region[:stop])
}
}
return region
}
func normalizeBedrockRegionText(raw string) string {
trimmed := strings.TrimSpace(raw)
if trimmed == "" {
return "global"
}
trimmed = strings.TrimSuffix(trimmed, ",")
return strings.Join(strings.Fields(trimmed), " ")
}