feat(import): add CoreHub pricing collector and importer
- coreshub_pricing_lib.go: CoreHub pricing data extraction and parsing - import_coreshub_pricing.go: importer with dry_run support - import_coreshub_pricing_test.go: unit tests for importer - coreshub_pricing_sample.txt: test fixture
This commit is contained in:
81
scripts/coreshub_pricing_lib.go
Normal file
81
scripts/coreshub_pricing_lib.go
Normal file
@@ -0,0 +1,81 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const defaultCoresHubPricingURL = "https://docs.coreshub.cn/console/big_model_server/introduce/model_choose"
|
||||
|
||||
var coreshubPricingPattern = regexp.MustCompile(`(DeepSeek-[A-Za-z0-9.\-]+)\s+(限时免费|¥\s*[\d.]+\s*/\s*千\s*tokens)\s+(限时免费|¥\s*[\d.]+\s*/\s*千\s*tokens)`)
|
||||
var coreshubPricingHTMLRowPattern = regexp.MustCompile(`(?is)<tr>\s*<td[^>]*>\s*<p[^>]*>(DeepSeek-[^<]+)</p>\s*</td>\s*<td[^>]*>\s*<p[^>]*>(限时免费|¥\s*[\d.]+\s*/\s*千\s*tokens)</p>\s*</td>\s*<td[^>]*>\s*<p[^>]*>(限时免费|¥\s*[\d.]+\s*/\s*千\s*tokens)</p>\s*</td>\s*</tr>`)
|
||||
var coreshubPriceValuePattern = regexp.MustCompile(`([\d.]+)`)
|
||||
|
||||
func parseCoresHubPricingCatalog(raw string) ([]officialPricingRecord, error) {
|
||||
raw = strings.ReplaceAll(raw, "¥", "¥")
|
||||
matches := coreshubPricingHTMLRowPattern.FindAllStringSubmatch(raw, -1)
|
||||
if len(matches) == 0 {
|
||||
normalized := cleanHTMLText(raw)
|
||||
normalized = strings.ReplaceAll(normalized, "¥", "¥")
|
||||
matches = coreshubPricingPattern.FindAllStringSubmatch(normalized, -1)
|
||||
}
|
||||
if len(matches) == 0 {
|
||||
return nil, fmt.Errorf("no coreshub pricing rows found")
|
||||
}
|
||||
|
||||
records := make([]officialPricingRecord, 0, len(matches))
|
||||
for _, match := range matches {
|
||||
modelName := strings.TrimSpace(match[1])
|
||||
providerName := "DeepSeek"
|
||||
providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName)
|
||||
inputPrice, inputFree, err := parseCoresHubPrice(match[2])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse input price for %s: %w", modelName, err)
|
||||
}
|
||||
outputPrice, outputFree, err := parseCoresHubPrice(match[3])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse output price for %s: %w", modelName, err)
|
||||
}
|
||||
record := officialPricingRecord{
|
||||
ModelID: normalizeExternalID("coreshub", modelName),
|
||||
ModelName: modelName,
|
||||
ProviderName: providerName,
|
||||
ProviderNameCn: providerNameCn,
|
||||
ProviderCountry: providerCountry,
|
||||
ProviderWebsite: providerWebsite,
|
||||
OperatorName: "CoresHub",
|
||||
OperatorNameCn: "CoresHub",
|
||||
OperatorCountry: "CN",
|
||||
OperatorWebsite: "https://www.qingcloud.com/products/coreshub",
|
||||
OperatorType: "cloud",
|
||||
Region: "CN",
|
||||
Currency: "CNY",
|
||||
InputPrice: inputPrice,
|
||||
OutputPrice: outputPrice,
|
||||
SourceURL: defaultCoresHubPricingURL,
|
||||
ModelSourceURL: defaultCoresHubPricingURL,
|
||||
DateConfidence: "unknown",
|
||||
DateSourceKind: "official_product_page",
|
||||
Modality: detectModality(modelName),
|
||||
IsFree: inputFree && outputFree,
|
||||
}
|
||||
records = append(records, record)
|
||||
}
|
||||
return records, nil
|
||||
}
|
||||
|
||||
func parseCoresHubPrice(raw string) (float64, bool, error) {
|
||||
value := strings.TrimSpace(raw)
|
||||
if strings.Contains(value, "免费") {
|
||||
return 0, true, nil
|
||||
}
|
||||
match := coreshubPriceValuePattern.FindStringSubmatch(value)
|
||||
if len(match) != 2 {
|
||||
return 0, false, fmt.Errorf("price value not found in %q", raw)
|
||||
}
|
||||
price := mustParseSubscriptionPrice(match[1]) * 1000
|
||||
return price, false, nil
|
||||
}
|
||||
88
scripts/import_coreshub_pricing.go
Normal file
88
scripts/import_coreshub_pricing.go
Normal file
@@ -0,0 +1,88 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"time"
|
||||
)
|
||||
|
||||
type coreshubPricingImportConfig struct {
|
||||
URL string
|
||||
Fixture string
|
||||
DryRun bool
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
func main() {
|
||||
loadSubscriptionImportEnv()
|
||||
|
||||
var url string
|
||||
var fixture string
|
||||
var dryRun bool
|
||||
var timeoutSeconds int
|
||||
|
||||
flag.StringVar(&url, "url", defaultCoresHubPricingURL, "CoresHub 官方价格页")
|
||||
flag.StringVar(&fixture, "fixture", "", "CoresHub 价格样例文件")
|
||||
flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库")
|
||||
flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)")
|
||||
flag.Parse()
|
||||
|
||||
cfg := coreshubPricingImportConfig{
|
||||
URL: url,
|
||||
Fixture: fixture,
|
||||
DryRun: dryRun,
|
||||
Timeout: time.Duration(timeoutSeconds) * time.Second,
|
||||
}
|
||||
|
||||
var db *sql.DB
|
||||
var err error
|
||||
if !cfg.DryRun {
|
||||
db, err = subscriptionImportDB()
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "open db: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer db.Close()
|
||||
}
|
||||
|
||||
if err := runCoresHubPricingImport(cfg, db, os.Stdout); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "import_coreshub_pricing: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func runCoresHubPricingImport(cfg coreshubPricingImportConfig, db *sql.DB, out io.Writer) error {
|
||||
client := &http.Client{Timeout: cfg.Timeout}
|
||||
raw, err := fetchSubscriptionPage(cfg.URL, cfg.Fixture, client)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
records, err := parseCoresHubPricingCatalog(raw)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
records = dedupeOfficialPricingRecords(records)
|
||||
if cfg.DryRun {
|
||||
_, err = fmt.Fprintf(out, "source=coreshub-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName)
|
||||
return err
|
||||
}
|
||||
if db == nil {
|
||||
return fmt.Errorf("db is required when dry-run=false")
|
||||
}
|
||||
if err := upsertOfficialPricingRecords(db, records, "coreshub-pricing-import"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var tableRows int
|
||||
if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil {
|
||||
return fmt.Errorf("count region_pricing: %w", err)
|
||||
}
|
||||
_, err = fmt.Fprintf(out, "source=coreshub-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows)
|
||||
return err
|
||||
}
|
||||
64
scripts/import_coreshub_pricing_test.go
Normal file
64
scripts/import_coreshub_pricing_test.go
Normal file
@@ -0,0 +1,64 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseCoresHubPricingCatalogBuildsRecords(t *testing.T) {
|
||||
raw, err := os.ReadFile(filepath.Join("testdata", "coreshub_pricing_sample.txt"))
|
||||
if err != nil {
|
||||
t.Fatalf("读取 fixture 失败: %v", err)
|
||||
}
|
||||
|
||||
records, err := parseCoresHubPricingCatalog(string(raw))
|
||||
if err != nil {
|
||||
t.Fatalf("parseCoresHubPricingCatalog 返回错误: %v", err)
|
||||
}
|
||||
if len(records) != 8 {
|
||||
t.Fatalf("期望 8 条 CoresHub 价格记录,实际 %d", len(records))
|
||||
}
|
||||
if records[0].ModelID != "coreshub-deepseek-r1-distill-qwen-1-5b" {
|
||||
t.Fatalf("首条 modelID 错误: %q", records[0].ModelID)
|
||||
}
|
||||
if !records[0].IsFree || records[0].InputPrice != 0 || records[0].OutputPrice != 0 {
|
||||
t.Fatalf("免费模型解析错误: %+v", records[0])
|
||||
}
|
||||
if records[3].InputPrice != 0.2 || records[3].OutputPrice != 0.2 {
|
||||
t.Fatalf("千 token 单价换算错误: %+v", records[3])
|
||||
}
|
||||
if records[6].InputPrice != 2 || records[6].OutputPrice != 8 {
|
||||
t.Fatalf("DeepSeek-V3 价格错误: %+v", records[6])
|
||||
}
|
||||
if records[7].InputPrice != 4 || records[7].OutputPrice != 16 {
|
||||
t.Fatalf("DeepSeek-R1 价格错误: %+v", records[7])
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunCoresHubPricingImportDryRunPrintsSummary(t *testing.T) {
|
||||
var out bytes.Buffer
|
||||
err := runCoresHubPricingImport(coreshubPricingImportConfig{
|
||||
URL: defaultCoresHubPricingURL,
|
||||
Fixture: filepath.Join("testdata", "coreshub_pricing_sample.txt"),
|
||||
DryRun: true,
|
||||
}, nil, &out)
|
||||
if err != nil {
|
||||
t.Fatalf("runCoresHubPricingImport 返回错误: %v", err)
|
||||
}
|
||||
output := out.String()
|
||||
for _, want := range []string{
|
||||
"source=coreshub-pricing-import",
|
||||
"models=8",
|
||||
"operator=CoresHub",
|
||||
"dry_run=true",
|
||||
} {
|
||||
if !strings.Contains(output, want) {
|
||||
t.Fatalf("输出缺少 %q,实际: %q", want, output)
|
||||
}
|
||||
}
|
||||
}
|
||||
35
scripts/importer_smoke_gate_test.sh
Executable file
35
scripts/importer_smoke_gate_test.sh
Executable file
@@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
set +e
|
||||
FAIL_OUTPUT="$(CORESHUB_FIXTURE_PATH=/nonexistent bash scripts/verify_importer_smoke.sh 2>&1)"
|
||||
FAIL_RC=$?
|
||||
set -e
|
||||
|
||||
if [[ "$FAIL_RC" -eq 0 ]]; then
|
||||
echo "expected verify_importer_smoke.sh to fail with invalid fixture"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
printf '%s' "$FAIL_OUTPUT" | grep -q '\[FAIL\] importer_smoke=coreshub-fixture'
|
||||
|
||||
set +e
|
||||
PASS_OUTPUT="$(bash scripts/verify_importer_smoke.sh 2>&1)"
|
||||
PASS_RC=$?
|
||||
set -e
|
||||
|
||||
if [[ "$PASS_RC" -eq 0 ]]; then
|
||||
echo "expected current live ctyun smoke to fail before full gate"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=coreshub-fixture'
|
||||
printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=coreshub-live'
|
||||
printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=ctyun-fixture'
|
||||
printf '%s' "$PASS_OUTPUT" | grep -q '\[FAIL\] importer_smoke=ctyun-live'
|
||||
|
||||
echo "importer_smoke_gate_test: PASS"
|
||||
41
scripts/report_state_tracking_test.sh
Executable file
41
scripts/report_state_tracking_test.sh
Executable file
@@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
source .env.local 2>/dev/null || true
|
||||
source .env 2>/dev/null || true
|
||||
source scripts/report_utils.sh
|
||||
|
||||
if [[ -z "${DATABASE_URL:-}" ]]; then
|
||||
echo "DATABASE_URL is required"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
TEST_DATE="2099-01-01"
|
||||
cleanup() {
|
||||
psql "$DATABASE_URL" -v ON_ERROR_STOP=1 -c "DELETE FROM report_runs WHERE report_date = DATE '$TEST_DATE'; DELETE FROM daily_report WHERE report_date = DATE '$TEST_DATE';" >/dev/null
|
||||
}
|
||||
trap cleanup EXIT
|
||||
cleanup
|
||||
|
||||
track_report_state "$DATABASE_URL" "$TEST_DATE" generated 123 'official summary' 'reports/daily/daily_report_2099-01-01.md' '' scheduled cron true >/dev/null
|
||||
|
||||
OFFICIAL_ROW="$(psql "$DATABASE_URL" -Atqc "SELECT status || '|' || run_kind || '|' || trigger_source || '|' || is_official_daily::text FROM daily_report WHERE report_date = DATE '$TEST_DATE';")"
|
||||
[[ "$OFFICIAL_ROW" == "generated|scheduled|cron|true" ]]
|
||||
|
||||
OFFICIAL_RUN_COUNT="$(psql "$DATABASE_URL" -Atqc "SELECT count(*) FROM report_runs WHERE report_date = DATE '$TEST_DATE';")"
|
||||
[[ "$OFFICIAL_RUN_COUNT" == "1" ]]
|
||||
|
||||
track_report_state "$DATABASE_URL" "$TEST_DATE" failed '' '' '' 'manual failed' manual pipeline false >/dev/null
|
||||
|
||||
MANUAL_ROW="$(psql "$DATABASE_URL" -Atqc "SELECT status || '|' || run_kind || '|' || trigger_source || '|' || is_official_daily::text FROM daily_report WHERE report_date = DATE '$TEST_DATE';")"
|
||||
[[ "$MANUAL_ROW" == "generated|scheduled|cron|true" ]]
|
||||
|
||||
RUN_ROWS="$(psql "$DATABASE_URL" -Atqc "SELECT string_agg(status || '|' || run_kind || '|' || trigger_source || '|' || is_official_daily::text, E'\n' ORDER BY id) FROM report_runs WHERE report_date = DATE '$TEST_DATE';")"
|
||||
EXPECTED_ROWS=$'generated|scheduled|cron|true\nfailed|manual|pipeline|false'
|
||||
[[ "$RUN_ROWS" == "$EXPECTED_ROWS" ]]
|
||||
|
||||
echo "report_state_tracking_test: PASS"
|
||||
10
scripts/testdata/coreshub_pricing_sample.txt
vendored
Normal file
10
scripts/testdata/coreshub_pricing_sample.txt
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
# 在线服务模型价格
|
||||
模型名称 输入价格 输出价格
|
||||
DeepSeek-R1-Distill-Qwen-1.5B 限时免费 限时免费
|
||||
DeepSeek-R1-Distill-Qwen-7B 限时免费 限时免费
|
||||
DeepSeek-R1-Distill-Llama-8B 限时免费 限时免费
|
||||
DeepSeek-R1-Distill-Qwen-14B ¥0.0002 / 千 tokens ¥0.0002 / 千 tokens
|
||||
DeepSeek-R1-Distill-Qwen-32B ¥0.0002 / 千 tokens ¥0.0002 / 千 tokens
|
||||
DeepSeek-R1-Distill-Llama-70B ¥0.0002 / 千 tokens ¥0.0002 / 千 tokens
|
||||
DeepSeek-V3 ¥0.002 / 千 tokens ¥0.008 / 千 tokens
|
||||
DeepSeek-R1 ¥0.004 / 千 tokens ¥0.016 / 千 tokens
|
||||
42
scripts/verify_importer_smoke.sh
Executable file
42
scripts/verify_importer_smoke.sh
Executable file
@@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
CORESHUB_FIXTURE_PATH="${CORESHUB_FIXTURE_PATH:-./scripts/testdata/coreshub_pricing_sample.txt}"
|
||||
CTYUN_CODING_FIXTURE_PATH="${CTYUN_CODING_FIXTURE_PATH:-./scripts/testdata/ctyun_coding_plan_sample.txt}"
|
||||
CTYUN_TOKEN_FIXTURE_PATH="${CTYUN_TOKEN_FIXTURE_PATH:-./scripts/testdata/ctyun_token_plan_sample.txt}"
|
||||
|
||||
last_meaningful_line() {
|
||||
awk 'NF && $0 !~ /^exit status [0-9]+$/ { line=$0 } END { print line }'
|
||||
}
|
||||
|
||||
run_smoke() {
|
||||
local name="$1"
|
||||
local command="$2"
|
||||
local output rc tail
|
||||
|
||||
set +e
|
||||
output="$(bash -lc "$command" 2>&1)"
|
||||
rc=$?
|
||||
set -e
|
||||
|
||||
printf '%s\n' "$output"
|
||||
if [[ "$rc" -eq 0 ]]; then
|
||||
echo "[PASS] importer_smoke=${name}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
tail="$(printf '%s\n' "$output" | last_meaningful_line)"
|
||||
echo "[FAIL] importer_smoke=${name} detail=${tail:-unknown failure}"
|
||||
return 1
|
||||
}
|
||||
|
||||
run_smoke "coreshub-fixture" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/coreshub_pricing_lib.go ./scripts/import_coreshub_pricing.go -fixture ${CORESHUB_FIXTURE_PATH@Q} -dry-run"
|
||||
run_smoke "coreshub-live" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/coreshub_pricing_lib.go ./scripts/import_coreshub_pricing.go -dry-run"
|
||||
run_smoke "ctyun-fixture" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/ctyun_subscription_lib.go ./scripts/import_ctyun_subscription.go -coding-fixture ${CTYUN_CODING_FIXTURE_PATH@Q} -token-fixture ${CTYUN_TOKEN_FIXTURE_PATH@Q} -dry-run"
|
||||
run_smoke "ctyun-live" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/ctyun_subscription_lib.go ./scripts/import_ctyun_subscription.go -dry-run"
|
||||
|
||||
echo "IMPORTER_SMOKE_RESULT: PASS"
|
||||
Reference in New Issue
Block a user