feat(audit): add pricing signature guards and reporting
Add snapshot, signature, and drift guard support for Vertex AI, Cloudflare Workers AI, and Perplexity API, backed by a queryable audit table and recent-window view. This commit also wires the audit query layer into daily signal materialization and report generation so structure drift becomes a first-class signal instead of a log-only artifact.
This commit is contained in:
277
scripts/vertex_pricing_lib.go
Normal file
277
scripts/vertex_pricing_lib.go
Normal file
@@ -0,0 +1,277 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"html"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const defaultVertexPricingURL = "https://cloud.google.com/gemini-enterprise-agent-platform/generative-ai/pricing"
|
||||
|
||||
var (
|
||||
vertexRowPattern = regexp.MustCompile(`(?s)<tr>(.*?)</tr>`)
|
||||
vertexCellPattern = regexp.MustCompile(`(?s)<t[dh][^>]*>(.*?)</t[dh]>`)
|
||||
vertexHeadingPattern = regexp.MustCompile(`(?is)<h[2-4][^>]*>(.*?)</h[2-4]>`)
|
||||
vertexTablePattern = regexp.MustCompile(`(?is)<table[^>]*>(.*?)</table>`)
|
||||
vertexStandardHeadingPattern = regexp.MustCompile(`(?is)<h[2-5][^>]*>\s*(standard|标准)\s*</h[2-5]>`)
|
||||
)
|
||||
|
||||
func parseVertexPricingCatalog(raw string) ([]officialPricingRecord, error) {
|
||||
familyBlocks := splitVertexFamilyBlocks(raw)
|
||||
records := make([]officialPricingRecord, 0)
|
||||
if len(familyBlocks) > 0 {
|
||||
for _, block := range familyBlocks {
|
||||
tableHTML := extractVertexStandardTable(block)
|
||||
if strings.TrimSpace(tableHTML) == "" {
|
||||
continue
|
||||
}
|
||||
records = append(records, parseVertexStandardTable(tableHTML)...)
|
||||
}
|
||||
}
|
||||
if len(records) > 0 {
|
||||
return records, nil
|
||||
}
|
||||
records = parseVertexStandardTextBlocks(raw)
|
||||
if len(records) > 0 {
|
||||
return records, nil
|
||||
}
|
||||
if len(familyBlocks) == 0 {
|
||||
return nil, fmt.Errorf("unexpected vertex pricing content")
|
||||
}
|
||||
return nil, fmt.Errorf("no vertex standard pricing rows found")
|
||||
}
|
||||
|
||||
func parseVertexStandardTable(table string) []officialPricingRecord {
|
||||
rows := vertexRowPattern.FindAllStringSubmatch(table, -1)
|
||||
records := make([]officialPricingRecord, 0)
|
||||
currentModel := ""
|
||||
currentInput := 0.0
|
||||
|
||||
for _, row := range rows {
|
||||
cells := vertexCellPattern.FindAllStringSubmatch(row[1], -1)
|
||||
if len(cells) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
values := make([]string, 0, len(cells))
|
||||
for _, cell := range cells {
|
||||
values = append(values, cleanHTMLText(cell[1]))
|
||||
}
|
||||
if len(values) == 1 && !strings.Contains(values[0], "Model") {
|
||||
currentModel = values[0]
|
||||
currentInput = 0
|
||||
continue
|
||||
}
|
||||
if len(values) < 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
rowType := values[0]
|
||||
priceCell := values[1]
|
||||
if len(values) > 2 && strings.Contains(strings.ToLower(values[0]), "gemini") {
|
||||
currentModel = values[0]
|
||||
rowType = values[1]
|
||||
priceCell = values[2]
|
||||
}
|
||||
if strings.TrimSpace(currentModel) == "" || strings.EqualFold(currentModel, "Model") {
|
||||
continue
|
||||
}
|
||||
|
||||
switch {
|
||||
case strings.HasPrefix(rowType, "Input (text"), strings.HasPrefix(rowType, "输入(文本"):
|
||||
price, ok := firstDollarPrice(priceCell)
|
||||
if ok {
|
||||
currentInput = price
|
||||
}
|
||||
case strings.HasPrefix(rowType, "Text output"), strings.HasPrefix(rowType, "文本输出"):
|
||||
outputPrice, ok := firstDollarPrice(priceCell)
|
||||
if !ok || currentInput == 0 {
|
||||
continue
|
||||
}
|
||||
providerNameCn, providerCountry, providerWebsite := providerMetadata("Google")
|
||||
record := officialPricingRecord{
|
||||
ModelID: normalizeExternalID("vertex", currentModel),
|
||||
ModelName: currentModel,
|
||||
ProviderName: "Google",
|
||||
ProviderNameCn: providerNameCn,
|
||||
ProviderCountry: providerCountry,
|
||||
ProviderWebsite: providerWebsite,
|
||||
OperatorName: "Google Cloud Vertex AI",
|
||||
OperatorNameCn: "Google Cloud Vertex AI",
|
||||
OperatorCountry: "US",
|
||||
OperatorWebsite: "https://cloud.google.com/vertex-ai",
|
||||
OperatorType: "cloud",
|
||||
Region: "global",
|
||||
Currency: "USD",
|
||||
InputPrice: currentInput,
|
||||
OutputPrice: outputPrice,
|
||||
SourceURL: defaultVertexPricingURL,
|
||||
ModelSourceURL: defaultVertexPricingURL,
|
||||
DateConfidence: "unknown",
|
||||
DateSourceKind: "official_pricing",
|
||||
Modality: detectModality(currentModel),
|
||||
}
|
||||
record.IsFree = record.InputPrice == 0 && record.OutputPrice == 0
|
||||
records = append(records, record)
|
||||
}
|
||||
}
|
||||
return records
|
||||
}
|
||||
|
||||
func splitVertexFamilyBlocks(raw string) []string {
|
||||
indices := make([]int, 0)
|
||||
matches := vertexHeadingPattern.FindAllStringSubmatchIndex(raw, -1)
|
||||
for _, match := range matches {
|
||||
label := cleanHTMLText(raw[match[2]:match[3]])
|
||||
if !strings.Contains(strings.ToLower(label), "gemini") {
|
||||
continue
|
||||
}
|
||||
indices = append(indices, match[0])
|
||||
}
|
||||
blocks := make([]string, 0, len(indices))
|
||||
for i, start := range indices {
|
||||
end := len(raw)
|
||||
if i+1 < len(indices) {
|
||||
end = indices[i+1]
|
||||
}
|
||||
blocks = append(blocks, raw[start:end])
|
||||
}
|
||||
return blocks
|
||||
}
|
||||
|
||||
func extractVertexStandardTable(raw string) string {
|
||||
heading := vertexStandardHeadingPattern.FindStringIndex(raw)
|
||||
if heading == nil {
|
||||
return ""
|
||||
}
|
||||
segment := raw[heading[1]:]
|
||||
table := vertexTablePattern.FindStringSubmatch(segment)
|
||||
if len(table) != 2 {
|
||||
return ""
|
||||
}
|
||||
return table[1]
|
||||
}
|
||||
|
||||
func parseVertexStandardTextBlocks(raw string) []officialPricingRecord {
|
||||
lines := htmlLines(raw)
|
||||
records := make([]officialPricingRecord, 0)
|
||||
currentModelParts := make([]string, 0)
|
||||
currentInput := 0.0
|
||||
inStandard := false
|
||||
|
||||
for _, line := range lines {
|
||||
lower := strings.ToLower(line)
|
||||
sectionTitle := normalizeVertexSectionTitle(lower)
|
||||
switch {
|
||||
case sectionTitle != "":
|
||||
inStandard = sectionTitle == "standard" || sectionTitle == "标准"
|
||||
currentModelParts = currentModelParts[:0]
|
||||
currentInput = 0
|
||||
continue
|
||||
case !inStandard:
|
||||
continue
|
||||
case strings.Contains(lower, "model type price"):
|
||||
continue
|
||||
case strings.Contains(line, "$"):
|
||||
modelName := strings.TrimSpace(strings.Join(currentModelParts, " "))
|
||||
if modelName == "" {
|
||||
continue
|
||||
}
|
||||
switch {
|
||||
case strings.HasPrefix(lower, "input (text"), strings.HasPrefix(lower, "1m input text tokens"):
|
||||
if price, ok := firstDollarPrice(line); ok {
|
||||
currentInput = price
|
||||
}
|
||||
case strings.HasPrefix(lower, "text output"), strings.HasPrefix(lower, "1m output text tokens"):
|
||||
outputPrice, ok := firstDollarPrice(line)
|
||||
if !ok || currentInput == 0 {
|
||||
continue
|
||||
}
|
||||
providerNameCn, providerCountry, providerWebsite := providerMetadata("Google")
|
||||
record := officialPricingRecord{
|
||||
ModelID: normalizeExternalID("vertex", modelName),
|
||||
ModelName: modelName,
|
||||
ProviderName: "Google",
|
||||
ProviderNameCn: providerNameCn,
|
||||
ProviderCountry: providerCountry,
|
||||
ProviderWebsite: providerWebsite,
|
||||
OperatorName: "Google Cloud Vertex AI",
|
||||
OperatorNameCn: "Google Cloud Vertex AI",
|
||||
OperatorCountry: "US",
|
||||
OperatorWebsite: "https://cloud.google.com/vertex-ai",
|
||||
OperatorType: "cloud",
|
||||
Region: "global",
|
||||
Currency: "USD",
|
||||
InputPrice: currentInput,
|
||||
OutputPrice: outputPrice,
|
||||
SourceURL: defaultVertexPricingURL,
|
||||
ModelSourceURL: defaultVertexPricingURL,
|
||||
DateConfidence: "unknown",
|
||||
DateSourceKind: "official_pricing",
|
||||
Modality: detectModality(modelName),
|
||||
}
|
||||
record.IsFree = record.InputPrice == 0 && record.OutputPrice == 0
|
||||
records = append(records, record)
|
||||
currentModelParts = currentModelParts[:0]
|
||||
currentInput = 0
|
||||
}
|
||||
default:
|
||||
currentModelParts = append(currentModelParts, line)
|
||||
}
|
||||
}
|
||||
|
||||
return dedupeOfficialPricingRecords(records)
|
||||
}
|
||||
|
||||
func normalizeVertexSectionTitle(line string) string {
|
||||
title := strings.TrimSpace(strings.TrimLeft(line, "#"))
|
||||
title = strings.TrimSpace(title)
|
||||
switch title {
|
||||
case "standard", "标准", "priority", "优先级", "flex/batch", "灵活/批处理", "batch api", "live api":
|
||||
return title
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func htmlLines(raw string) []string {
|
||||
replacer := strings.NewReplacer(
|
||||
"<br>", "\n",
|
||||
"<br/>", "\n",
|
||||
"<br />", "\n",
|
||||
"</p>", "\n",
|
||||
"</div>", "\n",
|
||||
"</section>", "\n",
|
||||
"</tr>", "\n",
|
||||
"</td>", "\n",
|
||||
"</th>", "\n",
|
||||
"</li>", "\n",
|
||||
"</h1>", "\n",
|
||||
"</h2>", "\n",
|
||||
"</h3>", "\n",
|
||||
"</h4>", "\n",
|
||||
"</h5>", "\n",
|
||||
"</h6>", "\n",
|
||||
)
|
||||
withBreaks := replacer.Replace(raw)
|
||||
tagPattern := regexp.MustCompile(`(?is)<[^>]+>`)
|
||||
spacePattern := regexp.MustCompile(`[ \t]+`)
|
||||
cleaned := html.UnescapeString(withBreaks)
|
||||
cleaned = strings.ReplaceAll(cleaned, "\r\n", "\n")
|
||||
cleaned = strings.ReplaceAll(cleaned, "\r", "\n")
|
||||
cleaned = strings.ReplaceAll(cleaned, "\u00a0", " ")
|
||||
cleaned = tagPattern.ReplaceAllString(cleaned, "")
|
||||
rawLines := strings.Split(cleaned, "\n")
|
||||
lines := make([]string, 0, len(rawLines))
|
||||
for _, line := range rawLines {
|
||||
line = strings.TrimSpace(spacePattern.ReplaceAllString(line, " "))
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
lines = append(lines, line)
|
||||
}
|
||||
return lines
|
||||
}
|
||||
Reference in New Issue
Block a user