Files
llm-intelligence/scripts/pricing_markdown_snapshot_lib.go
phamnazage-jpg 256975e10c feat(audit): add pricing signature guards and reporting
Add snapshot, signature, and drift guard support for Vertex AI, Cloudflare Workers AI, and Perplexity API, backed by a queryable audit table and recent-window view.

This commit also wires the audit query layer into daily signal materialization and report generation so structure drift becomes a first-class signal instead of a log-only artifact.
2026-05-15 22:34:22 +08:00

252 lines
7.9 KiB
Go

//go:build llm_script
package main
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"time"
)
type markdownPricingStructureSignature struct {
ByteSize int `json:"byte_size"`
SHA256 string `json:"sha256"`
StructureSHA256 string `json:"structure_sha256"`
NormalizedLineCount int `json:"normalized_line_count"`
Headings []string `json:"headings"`
TableHeaders []string `json:"table_headers"`
Contains map[string]bool `json:"contains"`
GeneratedAt string `json:"generated_at,omitempty"`
SourceURL string `json:"source_url,omitempty"`
SnapshotPath string `json:"snapshot_path,omitempty"`
}
func buildMarkdownPricingStructureSignature(raw string, containsNeedles map[string]string) markdownPricingStructureSignature {
lines := markdownPricingLines(raw)
headings := extractMarkdownPricingHeadings(lines)
tableHeaders := extractMarkdownPricingTableHeaders(lines)
contains := make(map[string]bool, len(containsNeedles))
for key, needle := range containsNeedles {
contains[key] = strings.Contains(strings.ToLower(raw), strings.ToLower(needle))
}
signature := markdownPricingStructureSignature{
ByteSize: len([]byte(raw)),
SHA256: markdownPricingSHA256Hex(raw),
NormalizedLineCount: len(lines),
Headings: headings,
TableHeaders: tableHeaders,
Contains: contains,
}
signature.StructureSHA256 = markdownPricingSHA256Hex(markdownPricingStructureDigestPayload(signature))
return signature
}
func writeMarkdownPricingSnapshotArtifacts(raw string, sourceURL string, snapshotPath string, signaturePath string, now time.Time, containsNeedles map[string]string) (markdownPricingStructureSignature, error) {
if strings.TrimSpace(snapshotPath) == "" {
return markdownPricingStructureSignature{}, fmt.Errorf("snapshot path is required")
}
if strings.TrimSpace(signaturePath) == "" {
return markdownPricingStructureSignature{}, fmt.Errorf("signature path is required")
}
if err := os.MkdirAll(filepath.Dir(snapshotPath), 0o755); err != nil {
return markdownPricingStructureSignature{}, fmt.Errorf("mkdir snapshot dir: %w", err)
}
if err := os.MkdirAll(filepath.Dir(signaturePath), 0o755); err != nil {
return markdownPricingStructureSignature{}, fmt.Errorf("mkdir signature dir: %w", err)
}
if err := os.WriteFile(snapshotPath, []byte(raw), 0o644); err != nil {
return markdownPricingStructureSignature{}, fmt.Errorf("write snapshot: %w", err)
}
signature := buildMarkdownPricingStructureSignature(raw, containsNeedles)
signature.GeneratedAt = now.Format(time.RFC3339)
signature.SourceURL = sourceURL
signature.SnapshotPath = snapshotPath
payload, err := json.MarshalIndent(signature, "", " ")
if err != nil {
return markdownPricingStructureSignature{}, fmt.Errorf("marshal signature: %w", err)
}
if err := os.WriteFile(signaturePath, payload, 0o644); err != nil {
return markdownPricingStructureSignature{}, fmt.Errorf("write signature: %w", err)
}
return signature, nil
}
func resolveMarkdownPricingSnapshotPaths(snapshotPath string, signaturePath string, snapshotDir string, baseName string, now time.Time) (string, string) {
if strings.TrimSpace(snapshotDir) == "" {
snapshotDir = filepath.Join("logs", baseName+"-snapshots")
}
if strings.TrimSpace(snapshotPath) == "" {
base := filepath.Join(snapshotDir, fmt.Sprintf("%s-%s", baseName, now.Format("20060102-150405")))
snapshotPath = base + ".md"
if strings.TrimSpace(signaturePath) == "" {
signaturePath = base + ".signature.json"
}
}
if strings.TrimSpace(signaturePath) == "" {
signaturePath = strings.TrimSuffix(snapshotPath, filepath.Ext(snapshotPath)) + ".signature.json"
}
return snapshotPath, signaturePath
}
func readMarkdownPricingStructureSignature(path string) (markdownPricingStructureSignature, error) {
data, err := os.ReadFile(path)
if err != nil {
return markdownPricingStructureSignature{}, err
}
var signature markdownPricingStructureSignature
if err := json.Unmarshal(data, &signature); err != nil {
return markdownPricingStructureSignature{}, fmt.Errorf("unmarshal signature %s: %w", path, err)
}
return signature, nil
}
func hasMarkdownPricingStructureSignature(signature markdownPricingStructureSignature) bool {
return signature.ByteSize > 0 ||
strings.TrimSpace(signature.StructureSHA256) != "" ||
strings.TrimSpace(signature.SHA256) != "" ||
len(signature.Headings) > 0 ||
len(signature.TableHeaders) > 0 ||
len(signature.Contains) > 0
}
func markdownPricingLines(raw string) []string {
text := strings.ReplaceAll(raw, "\r\n", "\n")
text = strings.ReplaceAll(text, "\r", "\n")
rawLines := strings.Split(text, "\n")
lines := make([]string, 0, len(rawLines))
for _, line := range rawLines {
trimmed := strings.TrimSpace(line)
if trimmed == "" {
continue
}
lines = append(lines, trimmed)
}
return lines
}
func extractMarkdownPricingHeadings(lines []string) []string {
headings := make([]string, 0, 12)
seen := make(map[string]struct{})
for _, line := range lines {
if !strings.HasPrefix(line, "#") {
continue
}
heading := strings.TrimSpace(strings.TrimLeft(line, "#"))
if heading == "" {
continue
}
if _, exists := seen[heading]; exists {
continue
}
seen[heading] = struct{}{}
headings = append(headings, heading)
if len(headings) >= 12 {
break
}
}
return headings
}
func extractMarkdownPricingTableHeaders(lines []string) []string {
headers := make([]string, 0, 6)
for i, line := range lines {
if !strings.HasPrefix(line, "|") {
continue
}
if i+1 >= len(lines) || !isMarkdownSnapshotTableSeparator(splitMarkdownSnapshotTableRow(lines[i+1])) {
continue
}
headers = append(headers, line)
if len(headers) >= 6 {
break
}
}
return headers
}
func markdownPricingStructureDigestPayload(signature markdownPricingStructureSignature) string {
type containsEntry struct {
Name string `json:"name"`
Value bool `json:"value"`
}
keys := make([]string, 0, len(signature.Contains))
for key := range signature.Contains {
keys = append(keys, key)
}
sort.Strings(keys)
entries := make([]containsEntry, 0, len(keys))
for _, key := range keys {
entries = append(entries, containsEntry{Name: key, Value: signature.Contains[key]})
}
payload := struct {
NormalizedLineCount int `json:"normalized_line_count"`
Headings []string `json:"headings"`
TableHeaders []string `json:"table_headers"`
Contains []containsEntry `json:"contains"`
}{
NormalizedLineCount: signature.NormalizedLineCount,
Headings: signature.Headings,
TableHeaders: signature.TableHeaders,
Contains: entries,
}
bytes, _ := json.Marshal(payload)
return string(bytes)
}
func markdownPricingSHA256Hex(raw string) string {
sum := sha256.Sum256([]byte(raw))
return hex.EncodeToString(sum[:])
}
func splitMarkdownSnapshotTableRow(line string) []string {
trimmed := strings.TrimSpace(line)
trimmed = strings.TrimPrefix(trimmed, "|")
trimmed = strings.TrimSuffix(trimmed, "|")
if trimmed == "" {
return nil
}
parts := strings.Split(trimmed, "|")
result := make([]string, 0, len(parts))
for _, part := range parts {
result = append(result, strings.TrimSpace(part))
}
return result
}
func isMarkdownSnapshotTableSeparator(parts []string) bool {
if len(parts) == 0 {
return false
}
for _, part := range parts {
trimmed := strings.TrimSpace(part)
if trimmed == "" {
return false
}
for _, ch := range trimmed {
if ch != '-' && ch != ':' {
return false
}
}
}
return true
}
func copyFileCommon(src string, dst string) error {
data, err := os.ReadFile(src)
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(dst), 0o755); err != nil {
return err
}
return os.WriteFile(dst, data, 0o644)
}