Files
llm-intelligence/scripts/azure_openai_pricing_lib.go

226 lines
6.4 KiB
Go
Raw Normal View History

//go:build llm_script
package main
import (
"encoding/json"
"fmt"
"net/http"
"regexp"
"strings"
)
const defaultAzureOpenAIPricingURL = "https://prices.azure.com/api/retail/prices?api-version=2023-01-01-preview&currencyCode='USD'&$filter=contains(productName,'OpenAI')"
type azureRetailPriceResponse struct {
Items []azureRetailPriceItem `json:"Items"`
NextPageLink string `json:"NextPageLink"`
}
type azureRetailPriceItem struct {
CurrencyCode string `json:"currencyCode"`
RetailPrice float64 `json:"retailPrice"`
UnitPrice float64 `json:"unitPrice"`
Location string `json:"location"`
MeterName string `json:"meterName"`
ProductName string `json:"productName"`
SkuName string `json:"skuName"`
ServiceName string `json:"serviceName"`
UnitOfMeasure string `json:"unitOfMeasure"`
Type string `json:"type"`
ArmSkuName string `json:"armSkuName"`
ArmRegionName string `json:"armRegionName"`
IsPrimaryMeter bool `json:"isPrimaryMeterRegion"`
}
type azurePricingPair struct {
ModelName string
Region string
Currency string
InputPrice float64
OutputPrice float64
}
var azureKindPattern = regexp.MustCompile(`(?i)\b(inp|inpt|input|out|outp|outpt|output|opt)\b`)
func fetchAzureOpenAIPricingCatalog(url string, fixture string, client *http.Client) (string, error) {
if strings.TrimSpace(fixture) != "" {
return fetchRawPricingPage(url, fixture, client)
}
aggregated := azureRetailPriceResponse{}
seenPages := map[string]struct{}{}
nextURL := url
for strings.TrimSpace(nextURL) != "" {
if _, exists := seenPages[nextURL]; exists {
return "", fmt.Errorf("azure retail pricing pagination loop detected: %s", nextURL)
}
seenPages[nextURL] = struct{}{}
raw, err := fetchRawPricingPage(nextURL, "", client)
if err != nil {
return "", err
}
var page azureRetailPriceResponse
if err := json.Unmarshal([]byte(raw), &page); err != nil {
return "", fmt.Errorf("unmarshal azure retail pricing page: %w", err)
}
aggregated.Items = append(aggregated.Items, page.Items...)
nextURL = page.NextPageLink
}
payload, err := json.Marshal(aggregated)
if err != nil {
return "", fmt.Errorf("marshal azure retail pricing aggregate: %w", err)
}
return string(payload), nil
}
func parseAzureOpenAIPricingCatalog(raw string) ([]officialPricingRecord, error) {
var response azureRetailPriceResponse
if err := json.Unmarshal([]byte(raw), &response); err != nil {
return nil, fmt.Errorf("unmarshal azure retail pricing: %w", err)
}
pairs := make(map[string]*azurePricingPair)
for _, item := range response.Items {
kind, modelName, ok := classifyAzureRetailPrice(item)
if !ok {
continue
}
region := strings.TrimSpace(item.Location)
if region == "" {
region = "global"
}
currency := strings.TrimSpace(item.CurrencyCode)
if currency == "" {
currency = "USD"
}
key := strings.Join([]string{modelName, region, currency}, "|")
pair := pairs[key]
if pair == nil {
pair = &azurePricingPair{
ModelName: modelName,
Region: region,
Currency: currency,
}
pairs[key] = pair
}
price := item.UnitPrice
if strings.EqualFold(strings.TrimSpace(item.UnitOfMeasure), "1K") {
price *= 1000
}
if kind == "input" {
pair.InputPrice = price
} else {
pair.OutputPrice = price
}
}
records := make([]officialPricingRecord, 0, len(pairs))
providerNameCn, providerCountry, providerWebsite := providerMetadata("OpenAI")
for _, pair := range pairs {
if pair.InputPrice == 0 || pair.OutputPrice == 0 {
continue
}
record := officialPricingRecord{
ModelID: normalizeExternalID("azure-openai", pair.ModelName),
ModelName: pair.ModelName,
ProviderName: "OpenAI",
ProviderNameCn: providerNameCn,
ProviderCountry: providerCountry,
ProviderWebsite: providerWebsite,
OperatorName: "Microsoft Azure",
OperatorNameCn: "微软 Azure",
OperatorCountry: "US",
OperatorWebsite: "https://azure.microsoft.com",
OperatorType: "cloud",
Region: pair.Region,
Currency: pair.Currency,
InputPrice: pair.InputPrice,
OutputPrice: pair.OutputPrice,
SourceURL: defaultAzureOpenAIPricingURL,
ModelSourceURL: defaultAzureOpenAIPricingURL,
DateConfidence: "unknown",
DateSourceKind: "official_pricing",
Modality: detectModality(pair.ModelName),
}
record.IsFree = false
records = append(records, record)
}
if len(records) == 0 {
return nil, fmt.Errorf("no azure openai token prices found")
}
return records, nil
}
func classifyAzureRetailPrice(item azureRetailPriceItem) (string, string, bool) {
if item.ServiceName != "Foundry Models" || item.Type != "Consumption" {
return "", "", false
}
productLower := strings.ToLower(item.ProductName)
if !strings.Contains(productLower, "openai") || strings.Contains(productLower, "media") {
return "", "", false
}
name := strings.ToLower(strings.TrimSpace(strings.Join([]string{item.SkuName, item.MeterName, item.ArmSkuName}, " ")))
if !azureKindPattern.MatchString(name) {
return "", "", false
}
for _, blocked := range []string{
"batch",
"cache",
"cchd",
"prty",
" pp ",
"hosting",
"training",
" ft ",
"ft ",
" mdl ",
"grdr",
"file-search",
"code-interpreter",
"session",
"transcribe",
" aud ",
"audio",
" img ",
"image",
"voice",
"rt ",
"realtime",
"tool",
} {
if strings.Contains(name, blocked) {
return "", "", false
}
}
kind := "output"
if strings.Contains(name, "inp") || strings.Contains(name, "input") || strings.Contains(name, "inpt") {
kind = "input"
}
modelName := normalizeAzureModelName(item)
if modelName == "" {
return "", "", false
}
return kind, modelName, true
}
func normalizeAzureModelName(item azureRetailPriceItem) string {
base := strings.ToLower(strings.TrimSpace(item.MeterName))
replacer := strings.NewReplacer("-", " ", ".", ".", "_", " ")
base = replacer.Replace(base)
base = regexp.MustCompile(`(?i)\s+(inp|inpt|input|out|outp|outpt|output|opt)\b.*$`).ReplaceAllString(base, "")
base = strings.TrimSpace(base)
if base == "" {
return ""
}
if regexp.MustCompile(`^\d`).MatchString(base) {
base = "gpt " + base
}
base = regexp.MustCompile(`\s+`).ReplaceAllString(base, " ")
if strings.HasPrefix(base, "gpt ") {
return "GPT-" + strings.TrimSpace(strings.TrimPrefix(base, "gpt "))
}
return strings.ToUpper(base[:1]) + base[1:]
}