324 lines
9.0 KiB
Go
324 lines
9.0 KiB
Go
|
|
//go:build llm_script
|
||
|
|
|
||
|
|
package main
|
||
|
|
|
||
|
|
import (
|
||
|
|
"fmt"
|
||
|
|
"regexp"
|
||
|
|
"strings"
|
||
|
|
)
|
||
|
|
|
||
|
|
const defaultBedrockPricingURL = "https://aws.amazon.com/bedrock/pricing/"
|
||
|
|
|
||
|
|
var (
|
||
|
|
bedrockRegionPattern = regexp.MustCompile(`(?s)<p><b>Regions?: ([^<]+)</b></p>`)
|
||
|
|
bedrockTablePattern = regexp.MustCompile(`(?s)<table[^>]*>(.*?)</table>`)
|
||
|
|
bedrockRowPattern = regexp.MustCompile(`(?s)<tr>(.*?)</tr>`)
|
||
|
|
bedrockCellPattern = regexp.MustCompile(`(?s)<t[dh][^>]*>(.*?)</t[dh]>`)
|
||
|
|
)
|
||
|
|
|
||
|
|
func parseBedrockPricingCatalog(raw string) ([]officialPricingRecord, error) {
|
||
|
|
section := extractBetween(raw, `<h3 id="Model_Pricing"`, `<h2 id="Pricing_examples"`)
|
||
|
|
if strings.TrimSpace(section) == "" {
|
||
|
|
section = raw
|
||
|
|
}
|
||
|
|
|
||
|
|
blocks := splitBedrockProviderBlocks(section)
|
||
|
|
records := make([]officialPricingRecord, 0)
|
||
|
|
for _, block := range blocks {
|
||
|
|
records = append(records, parseBedrockProviderBlock(block.providerLabel, block.content)...)
|
||
|
|
}
|
||
|
|
if len(records) == 0 {
|
||
|
|
records = append(records, parseBedrockPricingTextFallback(cleanHTMLText(section))...)
|
||
|
|
}
|
||
|
|
if len(records) == 0 {
|
||
|
|
return nil, fmt.Errorf("no bedrock pricing rows found")
|
||
|
|
}
|
||
|
|
return records, nil
|
||
|
|
}
|
||
|
|
|
||
|
|
func parseBedrockProviderBlock(providerLabel string, raw string) []officialPricingRecord {
|
||
|
|
providerName := normalizeBedrockProvider(providerLabel)
|
||
|
|
providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName)
|
||
|
|
|
||
|
|
regionMatches := bedrockRegionPattern.FindAllStringSubmatchIndex(raw, -1)
|
||
|
|
tables := bedrockTablePattern.FindAllStringSubmatchIndex(raw, -1)
|
||
|
|
records := make([]officialPricingRecord, 0)
|
||
|
|
seenModelRegion := make(map[string]struct{})
|
||
|
|
for _, tableIndex := range tables {
|
||
|
|
tableHTML := raw[tableIndex[2]:tableIndex[3]]
|
||
|
|
if !strings.Contains(tableHTML, "Price per 1M input tokens") || !strings.Contains(tableHTML, "$") {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
region := "global"
|
||
|
|
for _, regionIndex := range regionMatches {
|
||
|
|
if regionIndex[0] < tableIndex[0] {
|
||
|
|
region = cleanHTMLText(raw[regionIndex[2]:regionIndex[3]])
|
||
|
|
}
|
||
|
|
}
|
||
|
|
rows := parseBedrockTableRows(tableHTML)
|
||
|
|
for _, row := range rows {
|
||
|
|
dedupeKey := strings.Join([]string{region, row.ModelName}, "|")
|
||
|
|
if _, exists := seenModelRegion[dedupeKey]; exists {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
record := officialPricingRecord{
|
||
|
|
ModelID: normalizeExternalID("bedrock", providerName, row.ModelName),
|
||
|
|
ModelName: row.ModelName,
|
||
|
|
ProviderName: providerName,
|
||
|
|
ProviderNameCn: providerNameCn,
|
||
|
|
ProviderCountry: providerCountry,
|
||
|
|
ProviderWebsite: providerWebsite,
|
||
|
|
OperatorName: "Amazon Bedrock",
|
||
|
|
OperatorNameCn: "Amazon Bedrock",
|
||
|
|
OperatorCountry: "US",
|
||
|
|
OperatorWebsite: "https://aws.amazon.com/bedrock/",
|
||
|
|
OperatorType: "cloud",
|
||
|
|
Region: region,
|
||
|
|
Currency: "USD",
|
||
|
|
InputPrice: row.InputPrice,
|
||
|
|
OutputPrice: row.OutputPrice,
|
||
|
|
SourceURL: defaultBedrockPricingURL,
|
||
|
|
ModelSourceURL: defaultBedrockPricingURL,
|
||
|
|
DateConfidence: "unknown",
|
||
|
|
DateSourceKind: "official_pricing",
|
||
|
|
Modality: detectModality(row.ModelName),
|
||
|
|
}
|
||
|
|
record.IsFree = false
|
||
|
|
seenModelRegion[dedupeKey] = struct{}{}
|
||
|
|
records = append(records, record)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return records
|
||
|
|
}
|
||
|
|
|
||
|
|
type bedrockProviderBlock struct {
|
||
|
|
providerLabel string
|
||
|
|
content string
|
||
|
|
}
|
||
|
|
|
||
|
|
func splitBedrockProviderBlocks(raw string) []bedrockProviderBlock {
|
||
|
|
marker := `<h2 id="`
|
||
|
|
indices := make([]int, 0)
|
||
|
|
for offset := 0; ; {
|
||
|
|
next := strings.Index(raw[offset:], marker)
|
||
|
|
if next == -1 {
|
||
|
|
break
|
||
|
|
}
|
||
|
|
indices = append(indices, offset+next)
|
||
|
|
offset += next + len(marker)
|
||
|
|
}
|
||
|
|
blocks := make([]bedrockProviderBlock, 0, len(indices))
|
||
|
|
for i, start := range indices {
|
||
|
|
end := len(raw)
|
||
|
|
if i+1 < len(indices) {
|
||
|
|
end = indices[i+1]
|
||
|
|
}
|
||
|
|
chunk := raw[start:end]
|
||
|
|
h2End := strings.Index(chunk, "</h2>")
|
||
|
|
if h2End == -1 {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
openEnd := strings.Index(chunk, ">")
|
||
|
|
if openEnd == -1 || openEnd >= h2End {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
label := cleanHTMLText(chunk[openEnd+1 : h2End])
|
||
|
|
if strings.TrimSpace(label) == "" {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
blocks = append(blocks, bedrockProviderBlock{
|
||
|
|
providerLabel: label,
|
||
|
|
content: chunk,
|
||
|
|
})
|
||
|
|
}
|
||
|
|
return blocks
|
||
|
|
}
|
||
|
|
|
||
|
|
func extractBetween(raw string, startMarker string, endMarker string) string {
|
||
|
|
start := strings.Index(raw, startMarker)
|
||
|
|
if start == -1 {
|
||
|
|
return ""
|
||
|
|
}
|
||
|
|
segment := raw[start:]
|
||
|
|
if endMarker == "" {
|
||
|
|
return segment
|
||
|
|
}
|
||
|
|
end := strings.Index(segment, endMarker)
|
||
|
|
if end == -1 {
|
||
|
|
return segment
|
||
|
|
}
|
||
|
|
return segment[:end]
|
||
|
|
}
|
||
|
|
|
||
|
|
type bedrockPriceRow struct {
|
||
|
|
ModelName string
|
||
|
|
InputPrice float64
|
||
|
|
OutputPrice float64
|
||
|
|
}
|
||
|
|
|
||
|
|
func parseBedrockTableRows(tableHTML string) []bedrockPriceRow {
|
||
|
|
rows := bedrockRowPattern.FindAllStringSubmatch(tableHTML, -1)
|
||
|
|
parsed := make([]bedrockPriceRow, 0)
|
||
|
|
for _, row := range rows {
|
||
|
|
cells := bedrockCellPattern.FindAllStringSubmatch(row[1], -1)
|
||
|
|
if len(cells) < 3 {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
values := make([]string, 0, len(cells))
|
||
|
|
for _, cell := range cells {
|
||
|
|
values = append(values, cleanHTMLText(cell[1]))
|
||
|
|
}
|
||
|
|
if strings.Contains(strings.ToLower(values[0]), "models") {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
|
||
|
|
modelName := values[0]
|
||
|
|
inputCell := values[1]
|
||
|
|
outputCell := values[2]
|
||
|
|
if len(values) >= 6 && strings.Contains(strings.ToLower(values[5]), "$") {
|
||
|
|
outputCell = values[5]
|
||
|
|
}
|
||
|
|
inputPrice, ok := firstDollarPrice(inputCell)
|
||
|
|
if !ok {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
outputPrice, ok := firstDollarPrice(outputCell)
|
||
|
|
if !ok {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
parsed = append(parsed, bedrockPriceRow{
|
||
|
|
ModelName: modelName,
|
||
|
|
InputPrice: inputPrice,
|
||
|
|
OutputPrice: outputPrice,
|
||
|
|
})
|
||
|
|
}
|
||
|
|
return parsed
|
||
|
|
}
|
||
|
|
|
||
|
|
func normalizeBedrockProvider(raw string) string {
|
||
|
|
switch strings.TrimSpace(raw) {
|
||
|
|
case "Amazon Nova":
|
||
|
|
return "Amazon"
|
||
|
|
case "Anthropic":
|
||
|
|
return "Anthropic"
|
||
|
|
case "Cohere":
|
||
|
|
return "Cohere"
|
||
|
|
case "DeepSeek":
|
||
|
|
return "DeepSeek"
|
||
|
|
case "Meta":
|
||
|
|
return "Meta"
|
||
|
|
case "Mistral AI":
|
||
|
|
return "Mistral AI"
|
||
|
|
case "Moonshot AI":
|
||
|
|
return "Moonshot AI"
|
||
|
|
case "Kimi":
|
||
|
|
return "Moonshot AI"
|
||
|
|
case "NVIDIA":
|
||
|
|
return "NVIDIA"
|
||
|
|
case "OpenAI OSS Models":
|
||
|
|
return "OpenAI"
|
||
|
|
case "Qwen":
|
||
|
|
return "Qwen"
|
||
|
|
case "Writer":
|
||
|
|
return "Writer"
|
||
|
|
case "Z AI":
|
||
|
|
return "Zhipu AI"
|
||
|
|
default:
|
||
|
|
return strings.TrimSpace(raw)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
var bedrockTextProviderHeaderPattern = regexp.MustCompile(`([A-Za-z][A-Za-z0-9 .&-]+)\s+models\s+Pr(?:i)?ce per 1M input tokens`)
|
||
|
|
var bedrockTextRowPattern = regexp.MustCompile(`([A-Za-z0-9 .:+-]+?)\s+\$\s*([0-9.]+)\s+\$\s*([0-9.]+)`)
|
||
|
|
|
||
|
|
func parseBedrockPricingTextFallback(raw string) []officialPricingRecord {
|
||
|
|
matches := bedrockTextProviderHeaderPattern.FindAllStringSubmatchIndex(raw, -1)
|
||
|
|
records := make([]officialPricingRecord, 0)
|
||
|
|
seen := make(map[string]struct{})
|
||
|
|
for i, match := range matches {
|
||
|
|
if len(match) < 4 {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
start := match[0]
|
||
|
|
end := len(raw)
|
||
|
|
if i+1 < len(matches) {
|
||
|
|
end = matches[i+1][0]
|
||
|
|
}
|
||
|
|
block := raw[start:end]
|
||
|
|
region := normalizeBedrockRegionText(findBedrockTextRegion(raw, start))
|
||
|
|
providerName := normalizeBedrockProvider(raw[match[2]:match[3]])
|
||
|
|
providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName)
|
||
|
|
rows := bedrockTextRowPattern.FindAllStringSubmatch(block, -1)
|
||
|
|
for _, row := range rows {
|
||
|
|
if len(row) != 4 {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
modelName := strings.TrimSpace(row[1])
|
||
|
|
key := strings.Join([]string{providerName, region, modelName}, "|")
|
||
|
|
if _, exists := seen[key]; exists {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
seen[key] = struct{}{}
|
||
|
|
records = append(records, officialPricingRecord{
|
||
|
|
ModelID: normalizeExternalID("bedrock", providerName, modelName),
|
||
|
|
ModelName: modelName,
|
||
|
|
ProviderName: providerName,
|
||
|
|
ProviderNameCn: providerNameCn,
|
||
|
|
ProviderCountry: providerCountry,
|
||
|
|
ProviderWebsite: providerWebsite,
|
||
|
|
OperatorName: "Amazon Bedrock",
|
||
|
|
OperatorNameCn: "Amazon Bedrock",
|
||
|
|
OperatorCountry: "US",
|
||
|
|
OperatorWebsite: "https://aws.amazon.com/bedrock/",
|
||
|
|
OperatorType: "cloud",
|
||
|
|
Region: region,
|
||
|
|
Currency: "USD",
|
||
|
|
InputPrice: mustParseSubscriptionPrice(row[2]),
|
||
|
|
OutputPrice: mustParseSubscriptionPrice(row[3]),
|
||
|
|
SourceURL: defaultBedrockPricingURL,
|
||
|
|
ModelSourceURL: defaultBedrockPricingURL,
|
||
|
|
DateConfidence: "unknown",
|
||
|
|
DateSourceKind: "official_pricing",
|
||
|
|
Modality: detectModality(modelName),
|
||
|
|
})
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return records
|
||
|
|
}
|
||
|
|
|
||
|
|
func findBedrockTextRegion(raw string, headerStart int) string {
|
||
|
|
prefixStart := headerStart - 300
|
||
|
|
if prefixStart < 0 {
|
||
|
|
prefixStart = 0
|
||
|
|
}
|
||
|
|
prefix := raw[prefixStart:headerStart]
|
||
|
|
lastPlural := strings.LastIndex(prefix, "Regions:")
|
||
|
|
lastSingular := strings.LastIndex(prefix, "Region:")
|
||
|
|
lastIndex := lastPlural
|
||
|
|
marker := "Regions:"
|
||
|
|
if lastSingular > lastIndex {
|
||
|
|
lastIndex = lastSingular
|
||
|
|
marker = "Region:"
|
||
|
|
}
|
||
|
|
if lastIndex == -1 {
|
||
|
|
return ""
|
||
|
|
}
|
||
|
|
region := strings.TrimSpace(prefix[lastIndex+len(marker):])
|
||
|
|
for _, stopMarker := range []string{" Priority ", " Flex ", " Batch ", " models "} {
|
||
|
|
if stop := strings.Index(region, stopMarker); stop != -1 {
|
||
|
|
region = strings.TrimSpace(region[:stop])
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return region
|
||
|
|
}
|
||
|
|
|
||
|
|
func normalizeBedrockRegionText(raw string) string {
|
||
|
|
trimmed := strings.TrimSpace(raw)
|
||
|
|
if trimmed == "" {
|
||
|
|
return "global"
|
||
|
|
}
|
||
|
|
trimmed = strings.TrimSuffix(trimmed, ",")
|
||
|
|
return strings.Join(strings.Fields(trimmed), " ")
|
||
|
|
}
|