//go:build llm_script && !scripts_pkg
package main
import (
"context"
"database/sql"
"flag"
"fmt"
"io"
"net/http"
"net/url"
"os"
"os/exec"
"regexp"
"strings"
"time"
)
const (
defaultSensenovaDocsURL = "https://platform.sensenova.cn/docs"
defaultSensenovaModelsURL = "https://www.sensenova.cn/models"
)
type sensenovaPricingImportConfig struct {
DocsURL string
ModelsURL string
Fixture string
DryRun bool
Timeout time.Duration
}
type sensenovaPricingFixture struct {
DocsHTML string
ModelsText string
}
type sensenovaPricingDocModel struct {
ModelName string
ModelID string
QuotaPer5Hour int
}
var (
sensenovaFixtureSplitMarker = "\n===SENSENOVA_MODELS_BUNDLE===\n"
sensenovaOverviewCardPattern = regexp.MustCompile(`(?s)
]*>([^<]+)
.*?调用次数限制]*>每5小时([0-9]+)次
.*?MODEL ID]*>([^<]+)`)
sensenovaModelsScriptPattern = regexp.MustCompile(`src="([^"]+/_next/static/chunks/[^"]+\.js|/_next/static/chunks/[^"]+\.js)"`)
sensenovaPricingZeroPattern = regexp.MustCompile(`(?s)"pricing"\s*:\s*\{\s*"prompt"\s*:\s*"0"\s*,\s*"completion"\s*:\s*"0"\s*,\s*"image"\s*:\s*"0"\s*,\s*"request"\s*:\s*"0"`)
sensenovaOverviewTableRowPattern = regexp.MustCompile(`(?s)]*>\s*| ]*>([^<]+) | \s*]*>.*?]*>([^<]+).*? | \s*]*>每5小时([0-9]+)次 | \s*]*>([^<]+) | \s*
`)
)
func main() {
loadSubscriptionImportEnv()
var docsURL string
var modelsURL string
var fixture string
var dryRun bool
var timeoutSeconds int
flag.StringVar(&docsURL, "docs-url", defaultSensenovaDocsURL, "商汤 SenseNova API 文档页")
flag.StringVar(&modelsURL, "models-url", defaultSensenovaModelsURL, "商汤 SenseNova 模型页")
flag.StringVar(&fixture, "fixture", "", "商汤 SenseNova 价格样例文件")
flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库")
flag.IntVar(&timeoutSeconds, "timeout", 45, "请求超时(秒)")
flag.Parse()
cfg := sensenovaPricingImportConfig{
DocsURL: docsURL,
ModelsURL: modelsURL,
Fixture: fixture,
DryRun: dryRun,
Timeout: time.Duration(timeoutSeconds) * time.Second,
}
var db *sql.DB
var err error
if !cfg.DryRun {
db, err = subscriptionImportDB()
if err != nil {
fmt.Fprintf(os.Stderr, "open db: %v\n", err)
os.Exit(1)
}
defer db.Close()
}
if err := runSensenovaPricingImport(cfg, db, os.Stdout); err != nil {
fmt.Fprintf(os.Stderr, "import_sensenova_pricing: %v\n", err)
os.Exit(1)
}
}
func runSensenovaPricingImport(cfg sensenovaPricingImportConfig, db *sql.DB, out io.Writer) error {
fixture, err := fetchSensenovaPricingFixture(cfg)
if err != nil {
return err
}
records, err := parseSensenovaPricingCatalog(fixture)
if err != nil {
return err
}
records = dedupeOfficialPricingRecords(records)
if len(records) == 0 {
return fmt.Errorf("unexpected sensenova pricing content: no records")
}
if cfg.DryRun {
_, err = fmt.Fprintf(out, "source=sensenova-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName)
return err
}
if db == nil {
return fmt.Errorf("db is required when dry-run=false")
}
if err := upsertOfficialPricingRecords(db, records, "sensenova-pricing-import"); err != nil {
return err
}
var tableRows int
if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil {
return fmt.Errorf("count region_pricing: %w", err)
}
_, err = fmt.Fprintf(out, "source=sensenova-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows)
return err
}
func fetchSensenovaPricingFixture(cfg sensenovaPricingImportConfig) (sensenovaPricingFixture, error) {
if strings.TrimSpace(cfg.Fixture) != "" {
data, err := os.ReadFile(cfg.Fixture)
if err != nil {
return sensenovaPricingFixture{}, fmt.Errorf("read fixture %s: %w", cfg.Fixture, err)
}
return splitSensenovaFixture(string(data))
}
docsHTML, err := fetchRenderedPricingPageWithChromium(cfg.DocsURL, cfg.Timeout)
if err != nil {
return sensenovaPricingFixture{}, fmt.Errorf("fetch docs render: %w", err)
}
modelsText, err := fetchSensenovaModelsBundle(cfg.ModelsURL, cfg.Timeout)
if err != nil {
return sensenovaPricingFixture{}, err
}
return sensenovaPricingFixture{DocsHTML: docsHTML, ModelsText: modelsText}, nil
}
func splitSensenovaFixture(raw string) (sensenovaPricingFixture, error) {
parts := strings.SplitN(raw, sensenovaFixtureSplitMarker, 2)
if len(parts) != 2 {
return sensenovaPricingFixture{}, fmt.Errorf("unexpected sensenova fixture: missing models bundle marker")
}
docsHTML := strings.TrimSpace(parts[0])
modelsText := strings.TrimSpace(parts[1])
if docsHTML == "" || modelsText == "" {
return sensenovaPricingFixture{}, fmt.Errorf("unexpected sensenova fixture: empty docs or models segment")
}
return sensenovaPricingFixture{DocsHTML: docsHTML, ModelsText: modelsText}, nil
}
func fetchSensenovaModelsBundle(modelsURL string, timeout time.Duration) (string, error) {
client := &http.Client{Timeout: timeout}
html, err := fetchRawPricingPage(modelsURL, "", client)
if err != nil {
return "", fmt.Errorf("fetch models page shell: %w", err)
}
scripts := sensenovaModelsScriptPattern.FindAllStringSubmatch(html, -1)
if len(scripts) == 0 {
return "", fmt.Errorf("unexpected sensenova models page: no chunk scripts found")
}
seen := make(map[string]struct{}, len(scripts))
for _, match := range scripts {
if len(match) != 2 {
continue
}
scriptURL, err := resolveSensenovaAssetURL(modelsURL, match[1])
if err != nil {
continue
}
if _, ok := seen[scriptURL]; ok {
continue
}
seen[scriptURL] = struct{}{}
bundle, err := fetchRawPricingPage(scriptURL, "", client)
if err != nil {
continue
}
if sensenovaBundleConfirmsFreeBeta(bundle) {
return bundle, nil
}
}
return "", fmt.Errorf("unexpected sensenova models page: free-beta bundle not found")
}
func resolveSensenovaAssetURL(baseURL string, assetPath string) (string, error) {
parsedBase, err := url.Parse(baseURL)
if err != nil {
return "", err
}
asset, err := url.Parse(assetPath)
if err != nil {
return "", err
}
return parsedBase.ResolveReference(asset).String(), nil
}
func sensenovaBundleConfirmsFreeBeta(raw string) bool {
hasFree := strings.Contains(raw, "公测期完全免费开放") || strings.Contains(raw, "free during public beta")
hasAllModels := strings.Contains(raw, "所有模型完全开放") || strings.Contains(raw, "all models included")
return hasFree && hasAllModels
}
func fetchRenderedPricingPageWithChromium(pageURL string, timeout time.Duration) (string, error) {
browserPath, err := lookupChromiumBinaryForSensenova()
if err != nil {
return "", err
}
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
cmd := exec.CommandContext(ctx, browserPath,
"--headless",
"--no-sandbox",
"--disable-gpu",
"--virtual-time-budget=8000",
"--dump-dom",
pageURL,
)
cmd.Stderr = io.Discard
out, err := cmd.Output()
if ctx.Err() == context.DeadlineExceeded {
return "", fmt.Errorf("chromium render timeout after %s", timeout)
}
if err != nil {
return "", fmt.Errorf("chromium dump-dom: %w", err)
}
if len(out) == 0 {
return "", fmt.Errorf("chromium dump-dom returned empty output")
}
return string(out), nil
}
func lookupChromiumBinaryForSensenova() (string, error) {
for _, name := range []string{"chromium", "chromium-browser", "google-chrome", "google-chrome-stable"} {
if path, err := exec.LookPath(name); err == nil {
return path, nil
}
}
return "", fmt.Errorf("no chromium-compatible browser found in PATH")
}
func parseSensenovaPricingCatalog(fixture sensenovaPricingFixture) ([]officialPricingRecord, error) {
if !sensenovaBundleConfirmsFreeBeta(fixture.ModelsText) {
return nil, fmt.Errorf("unexpected sensenova models bundle: missing public-beta free signal")
}
if !strings.Contains(fixture.DocsHTML, "GET https://token.sensenova.cn/v1/models") {
return nil, fmt.Errorf("unexpected sensenova docs content: missing list models endpoint")
}
if !sensenovaPricingZeroPattern.MatchString(fixture.DocsHTML) {
return nil, fmt.Errorf("unexpected sensenova docs content: missing zero pricing object example")
}
matches := sensenovaOverviewCardPattern.FindAllStringSubmatch(fixture.DocsHTML, -1)
if len(matches) == 0 {
matches = sensenovaOverviewTableRowPattern.FindAllStringSubmatch(fixture.DocsHTML, -1)
}
if len(matches) == 0 {
return nil, fmt.Errorf("unexpected sensenova docs content: no model overview cards parsed")
}
providerNameCn, providerCountry, providerWebsite := providerMetadata("SenseTime")
records := make([]officialPricingRecord, 0, len(matches))
seenModelIDs := make(map[string]struct{}, len(matches))
for _, match := range matches {
modelName, modelID := normalizeSensenovaOverviewMatch(match)
if modelName == "" || modelID == "" {
continue
}
if _, ok := seenModelIDs[modelID]; ok {
continue
}
seenModelIDs[modelID] = struct{}{}
sectionID := sensenovaSectionIDForModel(modelID)
section, err := extractHTMLSectionByID(fixture.DocsHTML, sectionID)
if err != nil {
return nil, err
}
providerName := sensenovaProviderName(modelID)
providerCn, providerCountryCode, providerSite := providerNameCn, providerCountry, providerWebsite
if providerName != "SenseTime" {
providerCn, providerCountryCode, providerSite = providerMetadata(providerName)
}
records = append(records, officialPricingRecord{
ModelID: normalizeExternalID("sensenova", modelID),
ModelName: modelName,
ProviderName: providerName,
ProviderNameCn: providerCn,
ProviderCountry: providerCountryCode,
ProviderWebsite: providerSite,
OperatorName: "SenseNova API",
OperatorNameCn: "日日新开放平台",
OperatorCountry: "CN",
OperatorWebsite: defaultSensenovaDocsURL,
OperatorType: "official",
Region: "CN",
Currency: "CNY",
InputPrice: 0,
OutputPrice: 0,
IsFree: true,
ContextLength: sensenovaContextLength(modelID, section),
SourceURL: defaultSensenovaDocsURL,
ModelSourceURL: firstNonEmptyText(defaultSensenovaDocsURL+"#"+sectionID, defaultSensenovaDocsURL),
DateConfidence: "unknown",
DateSourceKind: "official_pricing",
Modality: sensenovaModality(modelID, section),
})
}
if len(records) == 0 {
return nil, fmt.Errorf("unexpected sensenova pricing content: empty records after parse")
}
return records, nil
}
func extractHTMLSectionByID(raw string, sectionID string) (string, error) {
marker := fmt.Sprintf(`= 2; i-- {
candidate := strings.TrimSpace(match[i])
if looksLikeSensenovaModelID(candidate) {
return modelName, candidate
}
}
return modelName, ""
}
func looksLikeSensenovaModelID(value string) bool {
value = strings.ToLower(strings.TrimSpace(value))
if value == "" {
return false
}
return strings.Contains(value, "-") && (strings.HasPrefix(value, "sensenova") || strings.HasPrefix(value, "deepseek"))
}