Add snapshot, signature, and drift guard support for Vertex AI, Cloudflare Workers AI, and Perplexity API, backed by a queryable audit table and recent-window view. This commit also wires the audit query layer into daily signal materialization and report generation so structure drift becomes a first-class signal instead of a log-only artifact.
174 lines
6.1 KiB
Go
174 lines
6.1 KiB
Go
//go:build llm_script
|
|
|
|
package main
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"regexp"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
type vertexPricingStructureSignature struct {
|
|
ByteSize int `json:"byte_size"`
|
|
SHA256 string `json:"sha256"`
|
|
StructureSHA256 string `json:"structure_sha256"`
|
|
NormalizedLineCount int `json:"normalized_line_count"`
|
|
TagCounts map[string]int `json:"tag_counts"`
|
|
Headings []string `json:"headings"`
|
|
ContainsGemini bool `json:"contains_gemini"`
|
|
ContainsStandard bool `json:"contains_standard"`
|
|
ContainsPriceText bool `json:"contains_price_text"`
|
|
ContainsTable bool `json:"contains_table"`
|
|
GeneratedAt string `json:"generated_at,omitempty"`
|
|
SourceURL string `json:"source_url,omitempty"`
|
|
SnapshotPath string `json:"snapshot_path,omitempty"`
|
|
}
|
|
|
|
var vertexSignatureTagPattern = regexp.MustCompile(`(?is)<(html|body|section|div|table|tr|td|th|h1|h2|h3|h4|h5|h6|script|article)\b`)
|
|
|
|
func buildVertexPricingStructureSignature(raw string) vertexPricingStructureSignature {
|
|
lines := htmlLines(raw)
|
|
tagCounts := make(map[string]int)
|
|
matches := vertexSignatureTagPattern.FindAllStringSubmatch(raw, -1)
|
|
for _, match := range matches {
|
|
tagCounts[strings.ToLower(match[1])]++
|
|
}
|
|
|
|
headings := extractVertexSignatureHeadings(raw)
|
|
signature := vertexPricingStructureSignature{
|
|
ByteSize: len([]byte(raw)),
|
|
SHA256: sha256Hex(raw),
|
|
NormalizedLineCount: len(lines),
|
|
TagCounts: tagCounts,
|
|
Headings: headings,
|
|
ContainsGemini: strings.Contains(strings.ToLower(raw), "gemini"),
|
|
ContainsStandard: containsLine(lines, "standard"),
|
|
ContainsPriceText: strings.Contains(strings.ToLower(raw), "price"),
|
|
ContainsTable: tagCounts["table"] > 0,
|
|
}
|
|
signature.StructureSHA256 = sha256Hex(vertexStructureDigestPayload(signature))
|
|
return signature
|
|
}
|
|
|
|
func writeVertexPricingSnapshotArtifacts(raw string, sourceURL string, snapshotPath string, signaturePath string, now time.Time) (vertexPricingStructureSignature, error) {
|
|
if strings.TrimSpace(snapshotPath) == "" {
|
|
return vertexPricingStructureSignature{}, fmt.Errorf("snapshot path is required")
|
|
}
|
|
if strings.TrimSpace(signaturePath) == "" {
|
|
return vertexPricingStructureSignature{}, fmt.Errorf("signature path is required")
|
|
}
|
|
if err := os.MkdirAll(filepath.Dir(snapshotPath), 0o755); err != nil {
|
|
return vertexPricingStructureSignature{}, fmt.Errorf("mkdir snapshot dir: %w", err)
|
|
}
|
|
if err := os.MkdirAll(filepath.Dir(signaturePath), 0o755); err != nil {
|
|
return vertexPricingStructureSignature{}, fmt.Errorf("mkdir signature dir: %w", err)
|
|
}
|
|
if err := os.WriteFile(snapshotPath, []byte(raw), 0o644); err != nil {
|
|
return vertexPricingStructureSignature{}, fmt.Errorf("write snapshot: %w", err)
|
|
}
|
|
|
|
signature := buildVertexPricingStructureSignature(raw)
|
|
signature.GeneratedAt = now.Format(time.RFC3339)
|
|
signature.SourceURL = sourceURL
|
|
signature.SnapshotPath = snapshotPath
|
|
payload, err := json.MarshalIndent(signature, "", " ")
|
|
if err != nil {
|
|
return vertexPricingStructureSignature{}, fmt.Errorf("marshal signature: %w", err)
|
|
}
|
|
if err := os.WriteFile(signaturePath, payload, 0o644); err != nil {
|
|
return vertexPricingStructureSignature{}, fmt.Errorf("write signature: %w", err)
|
|
}
|
|
return signature, nil
|
|
}
|
|
|
|
func resolveVertexPricingSnapshotPaths(snapshotPath string, signaturePath string, now time.Time) (string, string) {
|
|
if strings.TrimSpace(snapshotPath) == "" {
|
|
base := filepath.Join("logs", "vertex-pricing-snapshots", fmt.Sprintf("vertex-pricing-%s", now.Format("20060102-150405")))
|
|
snapshotPath = base + ".html"
|
|
if strings.TrimSpace(signaturePath) == "" {
|
|
signaturePath = base + ".signature.json"
|
|
}
|
|
}
|
|
if strings.TrimSpace(signaturePath) == "" {
|
|
signaturePath = strings.TrimSuffix(snapshotPath, filepath.Ext(snapshotPath)) + ".signature.json"
|
|
}
|
|
return snapshotPath, signaturePath
|
|
}
|
|
|
|
func extractVertexSignatureHeadings(raw string) []string {
|
|
matches := vertexHeadingPattern.FindAllStringSubmatchIndex(raw, -1)
|
|
headings := make([]string, 0, len(matches))
|
|
seen := make(map[string]struct{})
|
|
for _, match := range matches {
|
|
heading := cleanHTMLText(raw[match[2]:match[3]])
|
|
if heading == "" {
|
|
continue
|
|
}
|
|
if _, exists := seen[heading]; exists {
|
|
continue
|
|
}
|
|
seen[heading] = struct{}{}
|
|
headings = append(headings, heading)
|
|
if len(headings) >= 12 {
|
|
break
|
|
}
|
|
}
|
|
return headings
|
|
}
|
|
|
|
func vertexStructureDigestPayload(signature vertexPricingStructureSignature) string {
|
|
type tagCount struct {
|
|
Name string `json:"name"`
|
|
Count int `json:"count"`
|
|
}
|
|
tagNames := make([]string, 0, len(signature.TagCounts))
|
|
for name := range signature.TagCounts {
|
|
tagNames = append(tagNames, name)
|
|
}
|
|
sort.Strings(tagNames)
|
|
tagCounts := make([]tagCount, 0, len(tagNames))
|
|
for _, name := range tagNames {
|
|
tagCounts = append(tagCounts, tagCount{Name: name, Count: signature.TagCounts[name]})
|
|
}
|
|
payload := struct {
|
|
NormalizedLineCount int `json:"normalized_line_count"`
|
|
TagCounts []tagCount `json:"tag_counts"`
|
|
Headings []string `json:"headings"`
|
|
ContainsGemini bool `json:"contains_gemini"`
|
|
ContainsStandard bool `json:"contains_standard"`
|
|
ContainsPriceText bool `json:"contains_price_text"`
|
|
ContainsTable bool `json:"contains_table"`
|
|
}{
|
|
NormalizedLineCount: signature.NormalizedLineCount,
|
|
TagCounts: tagCounts,
|
|
Headings: signature.Headings,
|
|
ContainsGemini: signature.ContainsGemini,
|
|
ContainsStandard: signature.ContainsStandard,
|
|
ContainsPriceText: signature.ContainsPriceText,
|
|
ContainsTable: signature.ContainsTable,
|
|
}
|
|
bytes, _ := json.Marshal(payload)
|
|
return string(bytes)
|
|
}
|
|
|
|
func sha256Hex(raw string) string {
|
|
sum := sha256.Sum256([]byte(raw))
|
|
return hex.EncodeToString(sum[:])
|
|
}
|
|
|
|
func containsLine(lines []string, target string) bool {
|
|
for _, line := range lines {
|
|
if strings.EqualFold(strings.TrimSpace(line), target) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|