feat(import): add CoreHub pricing collector and importer

- coreshub_pricing_lib.go: CoreHub pricing data extraction and parsing
- import_coreshub_pricing.go: importer with dry_run support
- import_coreshub_pricing_test.go: unit tests for importer
- coreshub_pricing_sample.txt: test fixture
This commit is contained in:
phamnazage-jpg
2026-05-22 07:33:13 +08:00
parent 42e75e733d
commit 0de4402a11
7 changed files with 361 additions and 0 deletions

View File

@@ -0,0 +1,81 @@
//go:build llm_script
package main
import (
"fmt"
"regexp"
"strings"
)
const defaultCoresHubPricingURL = "https://docs.coreshub.cn/console/big_model_server/introduce/model_choose"
var coreshubPricingPattern = regexp.MustCompile(`(DeepSeek-[A-Za-z0-9.\-]+)\s+(限时免费|¥\s*[\d.]+\s*/\s*千\s*tokens)\s+(限时免费|¥\s*[\d.]+\s*/\s*千\s*tokens)`)
var coreshubPricingHTMLRowPattern = regexp.MustCompile(`(?is)<tr>\s*<td[^>]*>\s*<p[^>]*>(DeepSeek-[^<]+)</p>\s*</td>\s*<td[^>]*>\s*<p[^>]*>(限时免费|¥\s*[\d.]+\s*/\s*千\s*tokens)</p>\s*</td>\s*<td[^>]*>\s*<p[^>]*>(限时免费|¥\s*[\d.]+\s*/\s*千\s*tokens)</p>\s*</td>\s*</tr>`)
var coreshubPriceValuePattern = regexp.MustCompile(`([\d.]+)`)
func parseCoresHubPricingCatalog(raw string) ([]officialPricingRecord, error) {
raw = strings.ReplaceAll(raw, "¥", "¥")
matches := coreshubPricingHTMLRowPattern.FindAllStringSubmatch(raw, -1)
if len(matches) == 0 {
normalized := cleanHTMLText(raw)
normalized = strings.ReplaceAll(normalized, "¥", "¥")
matches = coreshubPricingPattern.FindAllStringSubmatch(normalized, -1)
}
if len(matches) == 0 {
return nil, fmt.Errorf("no coreshub pricing rows found")
}
records := make([]officialPricingRecord, 0, len(matches))
for _, match := range matches {
modelName := strings.TrimSpace(match[1])
providerName := "DeepSeek"
providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName)
inputPrice, inputFree, err := parseCoresHubPrice(match[2])
if err != nil {
return nil, fmt.Errorf("parse input price for %s: %w", modelName, err)
}
outputPrice, outputFree, err := parseCoresHubPrice(match[3])
if err != nil {
return nil, fmt.Errorf("parse output price for %s: %w", modelName, err)
}
record := officialPricingRecord{
ModelID: normalizeExternalID("coreshub", modelName),
ModelName: modelName,
ProviderName: providerName,
ProviderNameCn: providerNameCn,
ProviderCountry: providerCountry,
ProviderWebsite: providerWebsite,
OperatorName: "CoresHub",
OperatorNameCn: "CoresHub",
OperatorCountry: "CN",
OperatorWebsite: "https://www.qingcloud.com/products/coreshub",
OperatorType: "cloud",
Region: "CN",
Currency: "CNY",
InputPrice: inputPrice,
OutputPrice: outputPrice,
SourceURL: defaultCoresHubPricingURL,
ModelSourceURL: defaultCoresHubPricingURL,
DateConfidence: "unknown",
DateSourceKind: "official_product_page",
Modality: detectModality(modelName),
IsFree: inputFree && outputFree,
}
records = append(records, record)
}
return records, nil
}
func parseCoresHubPrice(raw string) (float64, bool, error) {
value := strings.TrimSpace(raw)
if strings.Contains(value, "免费") {
return 0, true, nil
}
match := coreshubPriceValuePattern.FindStringSubmatch(value)
if len(match) != 2 {
return 0, false, fmt.Errorf("price value not found in %q", raw)
}
price := mustParseSubscriptionPrice(match[1]) * 1000
return price, false, nil
}

View File

@@ -0,0 +1,88 @@
//go:build llm_script
package main
import (
"database/sql"
"flag"
"fmt"
"io"
"net/http"
"os"
"time"
)
type coreshubPricingImportConfig struct {
URL string
Fixture string
DryRun bool
Timeout time.Duration
}
func main() {
loadSubscriptionImportEnv()
var url string
var fixture string
var dryRun bool
var timeoutSeconds int
flag.StringVar(&url, "url", defaultCoresHubPricingURL, "CoresHub 官方价格页")
flag.StringVar(&fixture, "fixture", "", "CoresHub 价格样例文件")
flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库")
flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)")
flag.Parse()
cfg := coreshubPricingImportConfig{
URL: url,
Fixture: fixture,
DryRun: dryRun,
Timeout: time.Duration(timeoutSeconds) * time.Second,
}
var db *sql.DB
var err error
if !cfg.DryRun {
db, err = subscriptionImportDB()
if err != nil {
fmt.Fprintf(os.Stderr, "open db: %v\n", err)
os.Exit(1)
}
defer db.Close()
}
if err := runCoresHubPricingImport(cfg, db, os.Stdout); err != nil {
fmt.Fprintf(os.Stderr, "import_coreshub_pricing: %v\n", err)
os.Exit(1)
}
}
func runCoresHubPricingImport(cfg coreshubPricingImportConfig, db *sql.DB, out io.Writer) error {
client := &http.Client{Timeout: cfg.Timeout}
raw, err := fetchSubscriptionPage(cfg.URL, cfg.Fixture, client)
if err != nil {
return err
}
records, err := parseCoresHubPricingCatalog(raw)
if err != nil {
return err
}
records = dedupeOfficialPricingRecords(records)
if cfg.DryRun {
_, err = fmt.Fprintf(out, "source=coreshub-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName)
return err
}
if db == nil {
return fmt.Errorf("db is required when dry-run=false")
}
if err := upsertOfficialPricingRecords(db, records, "coreshub-pricing-import"); err != nil {
return err
}
var tableRows int
if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil {
return fmt.Errorf("count region_pricing: %w", err)
}
_, err = fmt.Fprintf(out, "source=coreshub-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows)
return err
}

View File

@@ -0,0 +1,64 @@
//go:build llm_script
package main
import (
"bytes"
"os"
"path/filepath"
"strings"
"testing"
)
func TestParseCoresHubPricingCatalogBuildsRecords(t *testing.T) {
raw, err := os.ReadFile(filepath.Join("testdata", "coreshub_pricing_sample.txt"))
if err != nil {
t.Fatalf("读取 fixture 失败: %v", err)
}
records, err := parseCoresHubPricingCatalog(string(raw))
if err != nil {
t.Fatalf("parseCoresHubPricingCatalog 返回错误: %v", err)
}
if len(records) != 8 {
t.Fatalf("期望 8 条 CoresHub 价格记录,实际 %d", len(records))
}
if records[0].ModelID != "coreshub-deepseek-r1-distill-qwen-1-5b" {
t.Fatalf("首条 modelID 错误: %q", records[0].ModelID)
}
if !records[0].IsFree || records[0].InputPrice != 0 || records[0].OutputPrice != 0 {
t.Fatalf("免费模型解析错误: %+v", records[0])
}
if records[3].InputPrice != 0.2 || records[3].OutputPrice != 0.2 {
t.Fatalf("千 token 单价换算错误: %+v", records[3])
}
if records[6].InputPrice != 2 || records[6].OutputPrice != 8 {
t.Fatalf("DeepSeek-V3 价格错误: %+v", records[6])
}
if records[7].InputPrice != 4 || records[7].OutputPrice != 16 {
t.Fatalf("DeepSeek-R1 价格错误: %+v", records[7])
}
}
func TestRunCoresHubPricingImportDryRunPrintsSummary(t *testing.T) {
var out bytes.Buffer
err := runCoresHubPricingImport(coreshubPricingImportConfig{
URL: defaultCoresHubPricingURL,
Fixture: filepath.Join("testdata", "coreshub_pricing_sample.txt"),
DryRun: true,
}, nil, &out)
if err != nil {
t.Fatalf("runCoresHubPricingImport 返回错误: %v", err)
}
output := out.String()
for _, want := range []string{
"source=coreshub-pricing-import",
"models=8",
"operator=CoresHub",
"dry_run=true",
} {
if !strings.Contains(output, want) {
t.Fatalf("输出缺少 %q实际: %q", want, output)
}
}
}

View File

@@ -0,0 +1,35 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$ROOT_DIR"
set +e
FAIL_OUTPUT="$(CORESHUB_FIXTURE_PATH=/nonexistent bash scripts/verify_importer_smoke.sh 2>&1)"
FAIL_RC=$?
set -e
if [[ "$FAIL_RC" -eq 0 ]]; then
echo "expected verify_importer_smoke.sh to fail with invalid fixture"
exit 1
fi
printf '%s' "$FAIL_OUTPUT" | grep -q '\[FAIL\] importer_smoke=coreshub-fixture'
set +e
PASS_OUTPUT="$(bash scripts/verify_importer_smoke.sh 2>&1)"
PASS_RC=$?
set -e
if [[ "$PASS_RC" -eq 0 ]]; then
echo "expected current live ctyun smoke to fail before full gate"
exit 1
fi
printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=coreshub-fixture'
printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=coreshub-live'
printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=ctyun-fixture'
printf '%s' "$PASS_OUTPUT" | grep -q '\[FAIL\] importer_smoke=ctyun-live'
echo "importer_smoke_gate_test: PASS"

View File

@@ -0,0 +1,41 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$ROOT_DIR"
source .env.local 2>/dev/null || true
source .env 2>/dev/null || true
source scripts/report_utils.sh
if [[ -z "${DATABASE_URL:-}" ]]; then
echo "DATABASE_URL is required"
exit 1
fi
TEST_DATE="2099-01-01"
cleanup() {
psql "$DATABASE_URL" -v ON_ERROR_STOP=1 -c "DELETE FROM report_runs WHERE report_date = DATE '$TEST_DATE'; DELETE FROM daily_report WHERE report_date = DATE '$TEST_DATE';" >/dev/null
}
trap cleanup EXIT
cleanup
track_report_state "$DATABASE_URL" "$TEST_DATE" generated 123 'official summary' 'reports/daily/daily_report_2099-01-01.md' '' scheduled cron true >/dev/null
OFFICIAL_ROW="$(psql "$DATABASE_URL" -Atqc "SELECT status || '|' || run_kind || '|' || trigger_source || '|' || is_official_daily::text FROM daily_report WHERE report_date = DATE '$TEST_DATE';")"
[[ "$OFFICIAL_ROW" == "generated|scheduled|cron|true" ]]
OFFICIAL_RUN_COUNT="$(psql "$DATABASE_URL" -Atqc "SELECT count(*) FROM report_runs WHERE report_date = DATE '$TEST_DATE';")"
[[ "$OFFICIAL_RUN_COUNT" == "1" ]]
track_report_state "$DATABASE_URL" "$TEST_DATE" failed '' '' '' 'manual failed' manual pipeline false >/dev/null
MANUAL_ROW="$(psql "$DATABASE_URL" -Atqc "SELECT status || '|' || run_kind || '|' || trigger_source || '|' || is_official_daily::text FROM daily_report WHERE report_date = DATE '$TEST_DATE';")"
[[ "$MANUAL_ROW" == "generated|scheduled|cron|true" ]]
RUN_ROWS="$(psql "$DATABASE_URL" -Atqc "SELECT string_agg(status || '|' || run_kind || '|' || trigger_source || '|' || is_official_daily::text, E'\n' ORDER BY id) FROM report_runs WHERE report_date = DATE '$TEST_DATE';")"
EXPECTED_ROWS=$'generated|scheduled|cron|true\nfailed|manual|pipeline|false'
[[ "$RUN_ROWS" == "$EXPECTED_ROWS" ]]
echo "report_state_tracking_test: PASS"

View File

@@ -0,0 +1,10 @@
# 在线服务模型价格
模型名称 输入价格 输出价格
DeepSeek-R1-Distill-Qwen-1.5B 限时免费 限时免费
DeepSeek-R1-Distill-Qwen-7B 限时免费 限时免费
DeepSeek-R1-Distill-Llama-8B 限时免费 限时免费
DeepSeek-R1-Distill-Qwen-14B ¥0.0002 / 千 tokens ¥0.0002 / 千 tokens
DeepSeek-R1-Distill-Qwen-32B ¥0.0002 / 千 tokens ¥0.0002 / 千 tokens
DeepSeek-R1-Distill-Llama-70B ¥0.0002 / 千 tokens ¥0.0002 / 千 tokens
DeepSeek-V3 ¥0.002 / 千 tokens ¥0.008 / 千 tokens
DeepSeek-R1 ¥0.004 / 千 tokens ¥0.016 / 千 tokens

View File

@@ -0,0 +1,42 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$ROOT_DIR"
CORESHUB_FIXTURE_PATH="${CORESHUB_FIXTURE_PATH:-./scripts/testdata/coreshub_pricing_sample.txt}"
CTYUN_CODING_FIXTURE_PATH="${CTYUN_CODING_FIXTURE_PATH:-./scripts/testdata/ctyun_coding_plan_sample.txt}"
CTYUN_TOKEN_FIXTURE_PATH="${CTYUN_TOKEN_FIXTURE_PATH:-./scripts/testdata/ctyun_token_plan_sample.txt}"
last_meaningful_line() {
awk 'NF && $0 !~ /^exit status [0-9]+$/ { line=$0 } END { print line }'
}
run_smoke() {
local name="$1"
local command="$2"
local output rc tail
set +e
output="$(bash -lc "$command" 2>&1)"
rc=$?
set -e
printf '%s\n' "$output"
if [[ "$rc" -eq 0 ]]; then
echo "[PASS] importer_smoke=${name}"
return 0
fi
tail="$(printf '%s\n' "$output" | last_meaningful_line)"
echo "[FAIL] importer_smoke=${name} detail=${tail:-unknown failure}"
return 1
}
run_smoke "coreshub-fixture" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/coreshub_pricing_lib.go ./scripts/import_coreshub_pricing.go -fixture ${CORESHUB_FIXTURE_PATH@Q} -dry-run"
run_smoke "coreshub-live" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/coreshub_pricing_lib.go ./scripts/import_coreshub_pricing.go -dry-run"
run_smoke "ctyun-fixture" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/ctyun_subscription_lib.go ./scripts/import_ctyun_subscription.go -coding-fixture ${CTYUN_CODING_FIXTURE_PATH@Q} -token-fixture ${CTYUN_TOKEN_FIXTURE_PATH@Q} -dry-run"
run_smoke "ctyun-live" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/ctyun_subscription_lib.go ./scripts/import_ctyun_subscription.go -dry-run"
echo "IMPORTER_SMOKE_RESULT: PASS"