Files
llm-intelligence/scripts/bedrock_pricing_lib.go

324 lines
9.0 KiB
Go
Raw Normal View History

//go:build llm_script
package main
import (
"fmt"
"regexp"
"strings"
)
const defaultBedrockPricingURL = "https://aws.amazon.com/bedrock/pricing/"
var (
bedrockRegionPattern = regexp.MustCompile(`(?s)<p><b>Regions?:&nbsp;([^<]+)</b></p>`)
bedrockTablePattern = regexp.MustCompile(`(?s)<table[^>]*>(.*?)</table>`)
bedrockRowPattern = regexp.MustCompile(`(?s)<tr>(.*?)</tr>`)
bedrockCellPattern = regexp.MustCompile(`(?s)<t[dh][^>]*>(.*?)</t[dh]>`)
)
func parseBedrockPricingCatalog(raw string) ([]officialPricingRecord, error) {
section := extractBetween(raw, `<h3 id="Model_Pricing"`, `<h2 id="Pricing_examples"`)
if strings.TrimSpace(section) == "" {
section = raw
}
blocks := splitBedrockProviderBlocks(section)
records := make([]officialPricingRecord, 0)
for _, block := range blocks {
records = append(records, parseBedrockProviderBlock(block.providerLabel, block.content)...)
}
if len(records) == 0 {
records = append(records, parseBedrockPricingTextFallback(cleanHTMLText(section))...)
}
if len(records) == 0 {
return nil, fmt.Errorf("no bedrock pricing rows found")
}
return records, nil
}
func parseBedrockProviderBlock(providerLabel string, raw string) []officialPricingRecord {
providerName := normalizeBedrockProvider(providerLabel)
providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName)
regionMatches := bedrockRegionPattern.FindAllStringSubmatchIndex(raw, -1)
tables := bedrockTablePattern.FindAllStringSubmatchIndex(raw, -1)
records := make([]officialPricingRecord, 0)
seenModelRegion := make(map[string]struct{})
for _, tableIndex := range tables {
tableHTML := raw[tableIndex[2]:tableIndex[3]]
if !strings.Contains(tableHTML, "Price per 1M input tokens") || !strings.Contains(tableHTML, "$") {
continue
}
region := "global"
for _, regionIndex := range regionMatches {
if regionIndex[0] < tableIndex[0] {
region = cleanHTMLText(raw[regionIndex[2]:regionIndex[3]])
}
}
rows := parseBedrockTableRows(tableHTML)
for _, row := range rows {
dedupeKey := strings.Join([]string{region, row.ModelName}, "|")
if _, exists := seenModelRegion[dedupeKey]; exists {
continue
}
record := officialPricingRecord{
ModelID: normalizeExternalID("bedrock", providerName, row.ModelName),
ModelName: row.ModelName,
ProviderName: providerName,
ProviderNameCn: providerNameCn,
ProviderCountry: providerCountry,
ProviderWebsite: providerWebsite,
OperatorName: "Amazon Bedrock",
OperatorNameCn: "Amazon Bedrock",
OperatorCountry: "US",
OperatorWebsite: "https://aws.amazon.com/bedrock/",
OperatorType: "cloud",
Region: region,
Currency: "USD",
InputPrice: row.InputPrice,
OutputPrice: row.OutputPrice,
SourceURL: defaultBedrockPricingURL,
ModelSourceURL: defaultBedrockPricingURL,
DateConfidence: "unknown",
DateSourceKind: "official_pricing",
Modality: detectModality(row.ModelName),
}
record.IsFree = false
seenModelRegion[dedupeKey] = struct{}{}
records = append(records, record)
}
}
return records
}
type bedrockProviderBlock struct {
providerLabel string
content string
}
func splitBedrockProviderBlocks(raw string) []bedrockProviderBlock {
marker := `<h2 id="`
indices := make([]int, 0)
for offset := 0; ; {
next := strings.Index(raw[offset:], marker)
if next == -1 {
break
}
indices = append(indices, offset+next)
offset += next + len(marker)
}
blocks := make([]bedrockProviderBlock, 0, len(indices))
for i, start := range indices {
end := len(raw)
if i+1 < len(indices) {
end = indices[i+1]
}
chunk := raw[start:end]
h2End := strings.Index(chunk, "</h2>")
if h2End == -1 {
continue
}
openEnd := strings.Index(chunk, ">")
if openEnd == -1 || openEnd >= h2End {
continue
}
label := cleanHTMLText(chunk[openEnd+1 : h2End])
if strings.TrimSpace(label) == "" {
continue
}
blocks = append(blocks, bedrockProviderBlock{
providerLabel: label,
content: chunk,
})
}
return blocks
}
func extractBetween(raw string, startMarker string, endMarker string) string {
start := strings.Index(raw, startMarker)
if start == -1 {
return ""
}
segment := raw[start:]
if endMarker == "" {
return segment
}
end := strings.Index(segment, endMarker)
if end == -1 {
return segment
}
return segment[:end]
}
type bedrockPriceRow struct {
ModelName string
InputPrice float64
OutputPrice float64
}
func parseBedrockTableRows(tableHTML string) []bedrockPriceRow {
rows := bedrockRowPattern.FindAllStringSubmatch(tableHTML, -1)
parsed := make([]bedrockPriceRow, 0)
for _, row := range rows {
cells := bedrockCellPattern.FindAllStringSubmatch(row[1], -1)
if len(cells) < 3 {
continue
}
values := make([]string, 0, len(cells))
for _, cell := range cells {
values = append(values, cleanHTMLText(cell[1]))
}
if strings.Contains(strings.ToLower(values[0]), "models") {
continue
}
modelName := values[0]
inputCell := values[1]
outputCell := values[2]
if len(values) >= 6 && strings.Contains(strings.ToLower(values[5]), "$") {
outputCell = values[5]
}
inputPrice, ok := firstDollarPrice(inputCell)
if !ok {
continue
}
outputPrice, ok := firstDollarPrice(outputCell)
if !ok {
continue
}
parsed = append(parsed, bedrockPriceRow{
ModelName: modelName,
InputPrice: inputPrice,
OutputPrice: outputPrice,
})
}
return parsed
}
func normalizeBedrockProvider(raw string) string {
switch strings.TrimSpace(raw) {
case "Amazon Nova":
return "Amazon"
case "Anthropic":
return "Anthropic"
case "Cohere":
return "Cohere"
case "DeepSeek":
return "DeepSeek"
case "Meta":
return "Meta"
case "Mistral AI":
return "Mistral AI"
case "Moonshot AI":
return "Moonshot AI"
case "Kimi":
return "Moonshot AI"
case "NVIDIA":
return "NVIDIA"
case "OpenAI OSS Models":
return "OpenAI"
case "Qwen":
return "Qwen"
case "Writer":
return "Writer"
case "Z AI":
return "Zhipu AI"
default:
return strings.TrimSpace(raw)
}
}
var bedrockTextProviderHeaderPattern = regexp.MustCompile(`([A-Za-z][A-Za-z0-9 .&-]+)\s+models\s+Pr(?:i)?ce per 1M input tokens`)
var bedrockTextRowPattern = regexp.MustCompile(`([A-Za-z0-9 .:+-]+?)\s+\$\s*([0-9.]+)\s+\$\s*([0-9.]+)`)
func parseBedrockPricingTextFallback(raw string) []officialPricingRecord {
matches := bedrockTextProviderHeaderPattern.FindAllStringSubmatchIndex(raw, -1)
records := make([]officialPricingRecord, 0)
seen := make(map[string]struct{})
for i, match := range matches {
if len(match) < 4 {
continue
}
start := match[0]
end := len(raw)
if i+1 < len(matches) {
end = matches[i+1][0]
}
block := raw[start:end]
region := normalizeBedrockRegionText(findBedrockTextRegion(raw, start))
providerName := normalizeBedrockProvider(raw[match[2]:match[3]])
providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName)
rows := bedrockTextRowPattern.FindAllStringSubmatch(block, -1)
for _, row := range rows {
if len(row) != 4 {
continue
}
modelName := strings.TrimSpace(row[1])
key := strings.Join([]string{providerName, region, modelName}, "|")
if _, exists := seen[key]; exists {
continue
}
seen[key] = struct{}{}
records = append(records, officialPricingRecord{
ModelID: normalizeExternalID("bedrock", providerName, modelName),
ModelName: modelName,
ProviderName: providerName,
ProviderNameCn: providerNameCn,
ProviderCountry: providerCountry,
ProviderWebsite: providerWebsite,
OperatorName: "Amazon Bedrock",
OperatorNameCn: "Amazon Bedrock",
OperatorCountry: "US",
OperatorWebsite: "https://aws.amazon.com/bedrock/",
OperatorType: "cloud",
Region: region,
Currency: "USD",
InputPrice: mustParseSubscriptionPrice(row[2]),
OutputPrice: mustParseSubscriptionPrice(row[3]),
SourceURL: defaultBedrockPricingURL,
ModelSourceURL: defaultBedrockPricingURL,
DateConfidence: "unknown",
DateSourceKind: "official_pricing",
Modality: detectModality(modelName),
})
}
}
return records
}
func findBedrockTextRegion(raw string, headerStart int) string {
prefixStart := headerStart - 300
if prefixStart < 0 {
prefixStart = 0
}
prefix := raw[prefixStart:headerStart]
lastPlural := strings.LastIndex(prefix, "Regions:")
lastSingular := strings.LastIndex(prefix, "Region:")
lastIndex := lastPlural
marker := "Regions:"
if lastSingular > lastIndex {
lastIndex = lastSingular
marker = "Region:"
}
if lastIndex == -1 {
return ""
}
region := strings.TrimSpace(prefix[lastIndex+len(marker):])
for _, stopMarker := range []string{" Priority ", " Flex ", " Batch ", " models "} {
if stop := strings.Index(region, stopMarker); stop != -1 {
region = strings.TrimSpace(region[:stop])
}
}
return region
}
func normalizeBedrockRegionText(raw string) string {
trimmed := strings.TrimSpace(raw)
if trimmed == "" {
return "global"
}
trimmed = strings.TrimSuffix(trimmed, ",")
return strings.Join(strings.Fields(trimmed), " ")
}