feat(import): add CoreHub pricing collector and importer
- coreshub_pricing_lib.go: CoreHub pricing data extraction and parsing - import_coreshub_pricing.go: importer with dry_run support - import_coreshub_pricing_test.go: unit tests for importer - coreshub_pricing_sample.txt: test fixture
This commit is contained in:
81
scripts/coreshub_pricing_lib.go
Normal file
81
scripts/coreshub_pricing_lib.go
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
//go:build llm_script
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
const defaultCoresHubPricingURL = "https://docs.coreshub.cn/console/big_model_server/introduce/model_choose"
|
||||||
|
|
||||||
|
var coreshubPricingPattern = regexp.MustCompile(`(DeepSeek-[A-Za-z0-9.\-]+)\s+(限时免费|¥\s*[\d.]+\s*/\s*千\s*tokens)\s+(限时免费|¥\s*[\d.]+\s*/\s*千\s*tokens)`)
|
||||||
|
var coreshubPricingHTMLRowPattern = regexp.MustCompile(`(?is)<tr>\s*<td[^>]*>\s*<p[^>]*>(DeepSeek-[^<]+)</p>\s*</td>\s*<td[^>]*>\s*<p[^>]*>(限时免费|¥\s*[\d.]+\s*/\s*千\s*tokens)</p>\s*</td>\s*<td[^>]*>\s*<p[^>]*>(限时免费|¥\s*[\d.]+\s*/\s*千\s*tokens)</p>\s*</td>\s*</tr>`)
|
||||||
|
var coreshubPriceValuePattern = regexp.MustCompile(`([\d.]+)`)
|
||||||
|
|
||||||
|
func parseCoresHubPricingCatalog(raw string) ([]officialPricingRecord, error) {
|
||||||
|
raw = strings.ReplaceAll(raw, "¥", "¥")
|
||||||
|
matches := coreshubPricingHTMLRowPattern.FindAllStringSubmatch(raw, -1)
|
||||||
|
if len(matches) == 0 {
|
||||||
|
normalized := cleanHTMLText(raw)
|
||||||
|
normalized = strings.ReplaceAll(normalized, "¥", "¥")
|
||||||
|
matches = coreshubPricingPattern.FindAllStringSubmatch(normalized, -1)
|
||||||
|
}
|
||||||
|
if len(matches) == 0 {
|
||||||
|
return nil, fmt.Errorf("no coreshub pricing rows found")
|
||||||
|
}
|
||||||
|
|
||||||
|
records := make([]officialPricingRecord, 0, len(matches))
|
||||||
|
for _, match := range matches {
|
||||||
|
modelName := strings.TrimSpace(match[1])
|
||||||
|
providerName := "DeepSeek"
|
||||||
|
providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName)
|
||||||
|
inputPrice, inputFree, err := parseCoresHubPrice(match[2])
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("parse input price for %s: %w", modelName, err)
|
||||||
|
}
|
||||||
|
outputPrice, outputFree, err := parseCoresHubPrice(match[3])
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("parse output price for %s: %w", modelName, err)
|
||||||
|
}
|
||||||
|
record := officialPricingRecord{
|
||||||
|
ModelID: normalizeExternalID("coreshub", modelName),
|
||||||
|
ModelName: modelName,
|
||||||
|
ProviderName: providerName,
|
||||||
|
ProviderNameCn: providerNameCn,
|
||||||
|
ProviderCountry: providerCountry,
|
||||||
|
ProviderWebsite: providerWebsite,
|
||||||
|
OperatorName: "CoresHub",
|
||||||
|
OperatorNameCn: "CoresHub",
|
||||||
|
OperatorCountry: "CN",
|
||||||
|
OperatorWebsite: "https://www.qingcloud.com/products/coreshub",
|
||||||
|
OperatorType: "cloud",
|
||||||
|
Region: "CN",
|
||||||
|
Currency: "CNY",
|
||||||
|
InputPrice: inputPrice,
|
||||||
|
OutputPrice: outputPrice,
|
||||||
|
SourceURL: defaultCoresHubPricingURL,
|
||||||
|
ModelSourceURL: defaultCoresHubPricingURL,
|
||||||
|
DateConfidence: "unknown",
|
||||||
|
DateSourceKind: "official_product_page",
|
||||||
|
Modality: detectModality(modelName),
|
||||||
|
IsFree: inputFree && outputFree,
|
||||||
|
}
|
||||||
|
records = append(records, record)
|
||||||
|
}
|
||||||
|
return records, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseCoresHubPrice(raw string) (float64, bool, error) {
|
||||||
|
value := strings.TrimSpace(raw)
|
||||||
|
if strings.Contains(value, "免费") {
|
||||||
|
return 0, true, nil
|
||||||
|
}
|
||||||
|
match := coreshubPriceValuePattern.FindStringSubmatch(value)
|
||||||
|
if len(match) != 2 {
|
||||||
|
return 0, false, fmt.Errorf("price value not found in %q", raw)
|
||||||
|
}
|
||||||
|
price := mustParseSubscriptionPrice(match[1]) * 1000
|
||||||
|
return price, false, nil
|
||||||
|
}
|
||||||
88
scripts/import_coreshub_pricing.go
Normal file
88
scripts/import_coreshub_pricing.go
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
//go:build llm_script
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"database/sql"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type coreshubPricingImportConfig struct {
|
||||||
|
URL string
|
||||||
|
Fixture string
|
||||||
|
DryRun bool
|
||||||
|
Timeout time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
loadSubscriptionImportEnv()
|
||||||
|
|
||||||
|
var url string
|
||||||
|
var fixture string
|
||||||
|
var dryRun bool
|
||||||
|
var timeoutSeconds int
|
||||||
|
|
||||||
|
flag.StringVar(&url, "url", defaultCoresHubPricingURL, "CoresHub 官方价格页")
|
||||||
|
flag.StringVar(&fixture, "fixture", "", "CoresHub 价格样例文件")
|
||||||
|
flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库")
|
||||||
|
flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)")
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
cfg := coreshubPricingImportConfig{
|
||||||
|
URL: url,
|
||||||
|
Fixture: fixture,
|
||||||
|
DryRun: dryRun,
|
||||||
|
Timeout: time.Duration(timeoutSeconds) * time.Second,
|
||||||
|
}
|
||||||
|
|
||||||
|
var db *sql.DB
|
||||||
|
var err error
|
||||||
|
if !cfg.DryRun {
|
||||||
|
db, err = subscriptionImportDB()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "open db: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
defer db.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := runCoresHubPricingImport(cfg, db, os.Stdout); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "import_coreshub_pricing: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func runCoresHubPricingImport(cfg coreshubPricingImportConfig, db *sql.DB, out io.Writer) error {
|
||||||
|
client := &http.Client{Timeout: cfg.Timeout}
|
||||||
|
raw, err := fetchSubscriptionPage(cfg.URL, cfg.Fixture, client)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
records, err := parseCoresHubPricingCatalog(raw)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
records = dedupeOfficialPricingRecords(records)
|
||||||
|
if cfg.DryRun {
|
||||||
|
_, err = fmt.Fprintf(out, "source=coreshub-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if db == nil {
|
||||||
|
return fmt.Errorf("db is required when dry-run=false")
|
||||||
|
}
|
||||||
|
if err := upsertOfficialPricingRecords(db, records, "coreshub-pricing-import"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
var tableRows int
|
||||||
|
if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil {
|
||||||
|
return fmt.Errorf("count region_pricing: %w", err)
|
||||||
|
}
|
||||||
|
_, err = fmt.Fprintf(out, "source=coreshub-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows)
|
||||||
|
return err
|
||||||
|
}
|
||||||
64
scripts/import_coreshub_pricing_test.go
Normal file
64
scripts/import_coreshub_pricing_test.go
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
//go:build llm_script
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestParseCoresHubPricingCatalogBuildsRecords(t *testing.T) {
|
||||||
|
raw, err := os.ReadFile(filepath.Join("testdata", "coreshub_pricing_sample.txt"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("读取 fixture 失败: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
records, err := parseCoresHubPricingCatalog(string(raw))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parseCoresHubPricingCatalog 返回错误: %v", err)
|
||||||
|
}
|
||||||
|
if len(records) != 8 {
|
||||||
|
t.Fatalf("期望 8 条 CoresHub 价格记录,实际 %d", len(records))
|
||||||
|
}
|
||||||
|
if records[0].ModelID != "coreshub-deepseek-r1-distill-qwen-1-5b" {
|
||||||
|
t.Fatalf("首条 modelID 错误: %q", records[0].ModelID)
|
||||||
|
}
|
||||||
|
if !records[0].IsFree || records[0].InputPrice != 0 || records[0].OutputPrice != 0 {
|
||||||
|
t.Fatalf("免费模型解析错误: %+v", records[0])
|
||||||
|
}
|
||||||
|
if records[3].InputPrice != 0.2 || records[3].OutputPrice != 0.2 {
|
||||||
|
t.Fatalf("千 token 单价换算错误: %+v", records[3])
|
||||||
|
}
|
||||||
|
if records[6].InputPrice != 2 || records[6].OutputPrice != 8 {
|
||||||
|
t.Fatalf("DeepSeek-V3 价格错误: %+v", records[6])
|
||||||
|
}
|
||||||
|
if records[7].InputPrice != 4 || records[7].OutputPrice != 16 {
|
||||||
|
t.Fatalf("DeepSeek-R1 价格错误: %+v", records[7])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRunCoresHubPricingImportDryRunPrintsSummary(t *testing.T) {
|
||||||
|
var out bytes.Buffer
|
||||||
|
err := runCoresHubPricingImport(coreshubPricingImportConfig{
|
||||||
|
URL: defaultCoresHubPricingURL,
|
||||||
|
Fixture: filepath.Join("testdata", "coreshub_pricing_sample.txt"),
|
||||||
|
DryRun: true,
|
||||||
|
}, nil, &out)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("runCoresHubPricingImport 返回错误: %v", err)
|
||||||
|
}
|
||||||
|
output := out.String()
|
||||||
|
for _, want := range []string{
|
||||||
|
"source=coreshub-pricing-import",
|
||||||
|
"models=8",
|
||||||
|
"operator=CoresHub",
|
||||||
|
"dry_run=true",
|
||||||
|
} {
|
||||||
|
if !strings.Contains(output, want) {
|
||||||
|
t.Fatalf("输出缺少 %q,实际: %q", want, output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
35
scripts/importer_smoke_gate_test.sh
Executable file
35
scripts/importer_smoke_gate_test.sh
Executable file
@@ -0,0 +1,35 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||||
|
cd "$ROOT_DIR"
|
||||||
|
|
||||||
|
set +e
|
||||||
|
FAIL_OUTPUT="$(CORESHUB_FIXTURE_PATH=/nonexistent bash scripts/verify_importer_smoke.sh 2>&1)"
|
||||||
|
FAIL_RC=$?
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [[ "$FAIL_RC" -eq 0 ]]; then
|
||||||
|
echo "expected verify_importer_smoke.sh to fail with invalid fixture"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
printf '%s' "$FAIL_OUTPUT" | grep -q '\[FAIL\] importer_smoke=coreshub-fixture'
|
||||||
|
|
||||||
|
set +e
|
||||||
|
PASS_OUTPUT="$(bash scripts/verify_importer_smoke.sh 2>&1)"
|
||||||
|
PASS_RC=$?
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [[ "$PASS_RC" -eq 0 ]]; then
|
||||||
|
echo "expected current live ctyun smoke to fail before full gate"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=coreshub-fixture'
|
||||||
|
printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=coreshub-live'
|
||||||
|
printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=ctyun-fixture'
|
||||||
|
printf '%s' "$PASS_OUTPUT" | grep -q '\[FAIL\] importer_smoke=ctyun-live'
|
||||||
|
|
||||||
|
echo "importer_smoke_gate_test: PASS"
|
||||||
41
scripts/report_state_tracking_test.sh
Executable file
41
scripts/report_state_tracking_test.sh
Executable file
@@ -0,0 +1,41 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||||
|
cd "$ROOT_DIR"
|
||||||
|
|
||||||
|
source .env.local 2>/dev/null || true
|
||||||
|
source .env 2>/dev/null || true
|
||||||
|
source scripts/report_utils.sh
|
||||||
|
|
||||||
|
if [[ -z "${DATABASE_URL:-}" ]]; then
|
||||||
|
echo "DATABASE_URL is required"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
TEST_DATE="2099-01-01"
|
||||||
|
cleanup() {
|
||||||
|
psql "$DATABASE_URL" -v ON_ERROR_STOP=1 -c "DELETE FROM report_runs WHERE report_date = DATE '$TEST_DATE'; DELETE FROM daily_report WHERE report_date = DATE '$TEST_DATE';" >/dev/null
|
||||||
|
}
|
||||||
|
trap cleanup EXIT
|
||||||
|
cleanup
|
||||||
|
|
||||||
|
track_report_state "$DATABASE_URL" "$TEST_DATE" generated 123 'official summary' 'reports/daily/daily_report_2099-01-01.md' '' scheduled cron true >/dev/null
|
||||||
|
|
||||||
|
OFFICIAL_ROW="$(psql "$DATABASE_URL" -Atqc "SELECT status || '|' || run_kind || '|' || trigger_source || '|' || is_official_daily::text FROM daily_report WHERE report_date = DATE '$TEST_DATE';")"
|
||||||
|
[[ "$OFFICIAL_ROW" == "generated|scheduled|cron|true" ]]
|
||||||
|
|
||||||
|
OFFICIAL_RUN_COUNT="$(psql "$DATABASE_URL" -Atqc "SELECT count(*) FROM report_runs WHERE report_date = DATE '$TEST_DATE';")"
|
||||||
|
[[ "$OFFICIAL_RUN_COUNT" == "1" ]]
|
||||||
|
|
||||||
|
track_report_state "$DATABASE_URL" "$TEST_DATE" failed '' '' '' 'manual failed' manual pipeline false >/dev/null
|
||||||
|
|
||||||
|
MANUAL_ROW="$(psql "$DATABASE_URL" -Atqc "SELECT status || '|' || run_kind || '|' || trigger_source || '|' || is_official_daily::text FROM daily_report WHERE report_date = DATE '$TEST_DATE';")"
|
||||||
|
[[ "$MANUAL_ROW" == "generated|scheduled|cron|true" ]]
|
||||||
|
|
||||||
|
RUN_ROWS="$(psql "$DATABASE_URL" -Atqc "SELECT string_agg(status || '|' || run_kind || '|' || trigger_source || '|' || is_official_daily::text, E'\n' ORDER BY id) FROM report_runs WHERE report_date = DATE '$TEST_DATE';")"
|
||||||
|
EXPECTED_ROWS=$'generated|scheduled|cron|true\nfailed|manual|pipeline|false'
|
||||||
|
[[ "$RUN_ROWS" == "$EXPECTED_ROWS" ]]
|
||||||
|
|
||||||
|
echo "report_state_tracking_test: PASS"
|
||||||
10
scripts/testdata/coreshub_pricing_sample.txt
vendored
Normal file
10
scripts/testdata/coreshub_pricing_sample.txt
vendored
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
# 在线服务模型价格
|
||||||
|
模型名称 输入价格 输出价格
|
||||||
|
DeepSeek-R1-Distill-Qwen-1.5B 限时免费 限时免费
|
||||||
|
DeepSeek-R1-Distill-Qwen-7B 限时免费 限时免费
|
||||||
|
DeepSeek-R1-Distill-Llama-8B 限时免费 限时免费
|
||||||
|
DeepSeek-R1-Distill-Qwen-14B ¥0.0002 / 千 tokens ¥0.0002 / 千 tokens
|
||||||
|
DeepSeek-R1-Distill-Qwen-32B ¥0.0002 / 千 tokens ¥0.0002 / 千 tokens
|
||||||
|
DeepSeek-R1-Distill-Llama-70B ¥0.0002 / 千 tokens ¥0.0002 / 千 tokens
|
||||||
|
DeepSeek-V3 ¥0.002 / 千 tokens ¥0.008 / 千 tokens
|
||||||
|
DeepSeek-R1 ¥0.004 / 千 tokens ¥0.016 / 千 tokens
|
||||||
42
scripts/verify_importer_smoke.sh
Executable file
42
scripts/verify_importer_smoke.sh
Executable file
@@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||||
|
cd "$ROOT_DIR"
|
||||||
|
|
||||||
|
CORESHUB_FIXTURE_PATH="${CORESHUB_FIXTURE_PATH:-./scripts/testdata/coreshub_pricing_sample.txt}"
|
||||||
|
CTYUN_CODING_FIXTURE_PATH="${CTYUN_CODING_FIXTURE_PATH:-./scripts/testdata/ctyun_coding_plan_sample.txt}"
|
||||||
|
CTYUN_TOKEN_FIXTURE_PATH="${CTYUN_TOKEN_FIXTURE_PATH:-./scripts/testdata/ctyun_token_plan_sample.txt}"
|
||||||
|
|
||||||
|
last_meaningful_line() {
|
||||||
|
awk 'NF && $0 !~ /^exit status [0-9]+$/ { line=$0 } END { print line }'
|
||||||
|
}
|
||||||
|
|
||||||
|
run_smoke() {
|
||||||
|
local name="$1"
|
||||||
|
local command="$2"
|
||||||
|
local output rc tail
|
||||||
|
|
||||||
|
set +e
|
||||||
|
output="$(bash -lc "$command" 2>&1)"
|
||||||
|
rc=$?
|
||||||
|
set -e
|
||||||
|
|
||||||
|
printf '%s\n' "$output"
|
||||||
|
if [[ "$rc" -eq 0 ]]; then
|
||||||
|
echo "[PASS] importer_smoke=${name}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
tail="$(printf '%s\n' "$output" | last_meaningful_line)"
|
||||||
|
echo "[FAIL] importer_smoke=${name} detail=${tail:-unknown failure}"
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
run_smoke "coreshub-fixture" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/coreshub_pricing_lib.go ./scripts/import_coreshub_pricing.go -fixture ${CORESHUB_FIXTURE_PATH@Q} -dry-run"
|
||||||
|
run_smoke "coreshub-live" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/coreshub_pricing_lib.go ./scripts/import_coreshub_pricing.go -dry-run"
|
||||||
|
run_smoke "ctyun-fixture" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/ctyun_subscription_lib.go ./scripts/import_ctyun_subscription.go -coding-fixture ${CTYUN_CODING_FIXTURE_PATH@Q} -token-fixture ${CTYUN_TOKEN_FIXTURE_PATH@Q} -dry-run"
|
||||||
|
run_smoke "ctyun-live" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/ctyun_subscription_lib.go ./scripts/import_ctyun_subscription.go -dry-run"
|
||||||
|
|
||||||
|
echo "IMPORTER_SMOKE_RESULT: PASS"
|
||||||
Reference in New Issue
Block a user