feat(intraday): monitor DeepSeek official page drift
This commit is contained in:
183
scripts/deepseek_pricing_snapshot_lib.go
Normal file
183
scripts/deepseek_pricing_snapshot_lib.go
Normal file
@@ -0,0 +1,183 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type deepseekPricingStructureSignature struct {
|
||||
ByteSize int `json:"byte_size"`
|
||||
SHA256 string `json:"sha256"`
|
||||
StructureSHA256 string `json:"structure_sha256"`
|
||||
Title string `json:"title"`
|
||||
MetaDescription string `json:"meta_description"`
|
||||
CommitID string `json:"commit_id"`
|
||||
CanonicalURL string `json:"canonical_url"`
|
||||
Contains map[string]bool `json:"contains"`
|
||||
GeneratedAt string `json:"generated_at,omitempty"`
|
||||
SourceURL string `json:"source_url,omitempty"`
|
||||
SnapshotPath string `json:"snapshot_path,omitempty"`
|
||||
}
|
||||
|
||||
var deepseekPricingContainsNeedles = map[string]string{
|
||||
"deepseek": "deepseek",
|
||||
"platform": "platform",
|
||||
"pricing": "pricing",
|
||||
"api_docs": "api",
|
||||
"developer": "developer resources",
|
||||
}
|
||||
|
||||
var deepseekPricingTitleRe = regexp.MustCompile(`(?is)<title[^>]*>(.*?)</title>`)
|
||||
var deepseekPricingMetaDescRe = regexp.MustCompile(`(?is)<meta[^>]+name=["']description["'][^>]+content=["']([^"']+)["']`)
|
||||
var deepseekPricingCommitRe = regexp.MustCompile(`(?is)<meta[^>]+name=["']commit-id["'][^>]+content=["']([^"']+)["']`)
|
||||
var deepseekPricingCanonicalRe = regexp.MustCompile(`(?is)<meta[^>]+property=["']og:url["'][^>]+content=["']([^"']+)["']`)
|
||||
var deepseekPricingHTMLTagRe = regexp.MustCompile(`(?s)<[^>]+>`)
|
||||
|
||||
func buildDeepSeekPricingStructureSignature(raw string) deepseekPricingStructureSignature {
|
||||
title := firstDeepSeekPricingHTMLMatch(deepseekPricingTitleRe, raw)
|
||||
meta := firstDeepSeekPricingHTMLMatch(deepseekPricingMetaDescRe, raw)
|
||||
commitID := firstDeepSeekPricingHTMLMatch(deepseekPricingCommitRe, raw)
|
||||
canonicalURL := firstDeepSeekPricingHTMLMatch(deepseekPricingCanonicalRe, raw)
|
||||
contains := make(map[string]bool, len(deepseekPricingContainsNeedles))
|
||||
lower := strings.ToLower(raw)
|
||||
for key, needle := range deepseekPricingContainsNeedles {
|
||||
contains[key] = strings.Contains(lower, strings.ToLower(needle))
|
||||
}
|
||||
signature := deepseekPricingStructureSignature{
|
||||
ByteSize: len([]byte(raw)),
|
||||
SHA256: deepseekPricingSHA256Hex(raw),
|
||||
Title: title,
|
||||
MetaDescription: meta,
|
||||
CommitID: commitID,
|
||||
CanonicalURL: canonicalURL,
|
||||
Contains: contains,
|
||||
}
|
||||
signature.StructureSHA256 = deepseekPricingSHA256Hex(deepseekPricingStructureDigestPayload(signature))
|
||||
return signature
|
||||
}
|
||||
|
||||
func writeDeepSeekPricingSnapshotArtifacts(raw string, sourceURL string, snapshotPath string, signaturePath string, now time.Time) (deepseekPricingStructureSignature, error) {
|
||||
if strings.TrimSpace(snapshotPath) == "" {
|
||||
return deepseekPricingStructureSignature{}, fmt.Errorf("snapshot path is required")
|
||||
}
|
||||
if strings.TrimSpace(signaturePath) == "" {
|
||||
return deepseekPricingStructureSignature{}, fmt.Errorf("signature path is required")
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(snapshotPath), 0o755); err != nil {
|
||||
return deepseekPricingStructureSignature{}, fmt.Errorf("mkdir snapshot dir: %w", err)
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(signaturePath), 0o755); err != nil {
|
||||
return deepseekPricingStructureSignature{}, fmt.Errorf("mkdir signature dir: %w", err)
|
||||
}
|
||||
if err := os.WriteFile(snapshotPath, []byte(raw), 0o644); err != nil {
|
||||
return deepseekPricingStructureSignature{}, fmt.Errorf("write snapshot: %w", err)
|
||||
}
|
||||
signature := buildDeepSeekPricingStructureSignature(raw)
|
||||
signature.GeneratedAt = now.Format(time.RFC3339)
|
||||
signature.SourceURL = sourceURL
|
||||
signature.SnapshotPath = snapshotPath
|
||||
payload, err := json.MarshalIndent(signature, "", " ")
|
||||
if err != nil {
|
||||
return deepseekPricingStructureSignature{}, fmt.Errorf("marshal signature: %w", err)
|
||||
}
|
||||
if err := os.WriteFile(signaturePath, payload, 0o644); err != nil {
|
||||
return deepseekPricingStructureSignature{}, fmt.Errorf("write signature: %w", err)
|
||||
}
|
||||
return signature, nil
|
||||
}
|
||||
|
||||
func resolveDeepSeekPricingSnapshotPaths(snapshotPath string, signaturePath string, snapshotDir string, baseName string, now time.Time) (string, string) {
|
||||
if strings.TrimSpace(snapshotDir) == "" {
|
||||
snapshotDir = filepath.Join("logs", baseName+"-snapshots")
|
||||
}
|
||||
if strings.TrimSpace(snapshotPath) == "" {
|
||||
base := filepath.Join(snapshotDir, fmt.Sprintf("%s-%s", baseName, now.Format("20060102-150405")))
|
||||
snapshotPath = base + ".html"
|
||||
if strings.TrimSpace(signaturePath) == "" {
|
||||
signaturePath = base + ".signature.json"
|
||||
}
|
||||
}
|
||||
if strings.TrimSpace(signaturePath) == "" {
|
||||
signaturePath = strings.TrimSuffix(snapshotPath, filepath.Ext(snapshotPath)) + ".signature.json"
|
||||
}
|
||||
return snapshotPath, signaturePath
|
||||
}
|
||||
|
||||
func readDeepSeekPricingStructureSignature(path string) (deepseekPricingStructureSignature, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return deepseekPricingStructureSignature{}, err
|
||||
}
|
||||
var signature deepseekPricingStructureSignature
|
||||
if err := json.Unmarshal(data, &signature); err != nil {
|
||||
return deepseekPricingStructureSignature{}, fmt.Errorf("unmarshal signature %s: %w", path, err)
|
||||
}
|
||||
return signature, nil
|
||||
}
|
||||
|
||||
func hasDeepSeekPricingStructureSignature(signature deepseekPricingStructureSignature) bool {
|
||||
return signature.ByteSize > 0 ||
|
||||
strings.TrimSpace(signature.StructureSHA256) != "" ||
|
||||
strings.TrimSpace(signature.SHA256) != "" ||
|
||||
strings.TrimSpace(signature.Title) != "" ||
|
||||
strings.TrimSpace(signature.CommitID) != "" ||
|
||||
len(signature.Contains) > 0
|
||||
}
|
||||
|
||||
func deepseekPricingStructureDigestPayload(signature deepseekPricingStructureSignature) string {
|
||||
type containsEntry struct {
|
||||
Name string `json:"name"`
|
||||
Value bool `json:"value"`
|
||||
}
|
||||
keys := make([]string, 0, len(signature.Contains))
|
||||
for key := range signature.Contains {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
entries := make([]containsEntry, 0, len(keys))
|
||||
for _, key := range keys {
|
||||
entries = append(entries, containsEntry{Name: key, Value: signature.Contains[key]})
|
||||
}
|
||||
payload := struct {
|
||||
Title string `json:"title"`
|
||||
MetaDescription string `json:"meta_description"`
|
||||
CommitID string `json:"commit_id"`
|
||||
CanonicalURL string `json:"canonical_url"`
|
||||
Contains []containsEntry `json:"contains"`
|
||||
}{
|
||||
Title: signature.Title,
|
||||
MetaDescription: signature.MetaDescription,
|
||||
CommitID: signature.CommitID,
|
||||
CanonicalURL: signature.CanonicalURL,
|
||||
Contains: entries,
|
||||
}
|
||||
bytes, _ := json.Marshal(payload)
|
||||
return string(bytes)
|
||||
}
|
||||
|
||||
func deepseekPricingSHA256Hex(raw string) string {
|
||||
sum := sha256.Sum256([]byte(raw))
|
||||
return hex.EncodeToString(sum[:])
|
||||
}
|
||||
|
||||
func firstDeepSeekPricingHTMLMatch(re *regexp.Regexp, raw string) string {
|
||||
match := re.FindStringSubmatch(raw)
|
||||
if len(match) < 2 {
|
||||
return ""
|
||||
}
|
||||
text := deepseekPricingHTMLTagRe.ReplaceAllString(match[1], " ")
|
||||
text = strings.ReplaceAll(text, "&", "&")
|
||||
text = strings.ReplaceAll(text, " ", " ")
|
||||
text = strings.Join(strings.Fields(text), " ")
|
||||
return strings.TrimSpace(text)
|
||||
}
|
||||
Reference in New Issue
Block a user