From 8d1312203f3359b88fb69e2116c1792a8aea1f24 Mon Sep 17 00:00:00 2001 From: phamnazage-jpg Date: Fri, 22 May 2026 07:33:38 +0800 Subject: [PATCH] feat(import): extend CTYun subscription collector - ctyun_subscription_lib.go: extend CTYun subscription data extraction - import_ctyun_subscription_test.go: update tests for CTYun - ctyun_token_plan_sample.txt: updated test fixture --- scripts/ctyun_subscription_lib.go | 201 ++++++++++++++++++- scripts/import_ctyun_subscription_test.go | 36 +++- scripts/testdata/ctyun_token_plan_sample.txt | 57 +++--- 3 files changed, 251 insertions(+), 43 deletions(-) diff --git a/scripts/ctyun_subscription_lib.go b/scripts/ctyun_subscription_lib.go index 9392358..83a79eb 100644 --- a/scripts/ctyun_subscription_lib.go +++ b/scripts/ctyun_subscription_lib.go @@ -133,10 +133,196 @@ func parseCTYunCodingPlan(raw string, publishedAt string) ([]subscriptionImportR } func parseCTYunTokenPlan(raw string, publishedAt string) ([]subscriptionImportRecord, error) { + if records, ok := parseCTYunTokenPlanNormalizedLayout(raw, publishedAt); ok { + return records, nil + } + if records, ok := parseCTYunTokenPlanCardLayout(raw, publishedAt); ok { + return records, nil + } + return parseCTYunTokenPlanLegacyLayout(raw, publishedAt) +} + +func parseCTYunTokenPlanNormalizedLayout(raw string, publishedAt string) ([]subscriptionImportRecord, bool) { + lines := strings.Split(raw, "\n") + codeByTier := map[string]string{ + "基础版": "basic", + "专业版": "pro", + "旗舰版": "flagship", + "轻享版": "starter", + "畅享版": "plus", + "尊享版": "vip", + } + + records := make([]subscriptionImportRecord, 0, 6) + for i := 0; i < len(lines); i++ { + line := strings.TrimSpace(lines[i]) + if !strings.HasPrefix(line, "Token Plan") { + continue + } + rawTier := strings.TrimSpace(strings.TrimPrefix(line, "Token Plan")) + tierCode, ok := codeByTier[rawTier] + if !ok { + continue + } + + j := i + 1 + block := make([]string, 0, 12) + for ; j < len(lines); j++ { + next := strings.TrimSpace(lines[j]) + if strings.HasPrefix(next, "Token Plan") { + break + } + if next != "" { + block = append(block, next) + } + } + model := "" + quota := "" + price := "" + notesParts := make([]string, 0, 4) + for k := 0; k < len(block); k++ { + item := block[k] + switch { + case strings.HasPrefix(item, "支持模型:"): + model = strings.TrimSpace(strings.TrimPrefix(item, "支持模型:")) + case strings.Contains(item, "Tokens"): + quota = strings.TrimSpace(strings.TrimSuffix(item, "Tokens")) + case regexp.MustCompile(`^[0-9]+$`).MatchString(item) && k+2 < len(block) && regexp.MustCompile(`^\.[0-9]+$`).MatchString(block[k+1]) && block[k+2] == "元/个/月": + price = item + block[k+1] + case item == "产品优势", item == "立即订购", strings.HasPrefix(item, "支持工具:"), strings.HasPrefix(item, "已抢购"), strings.HasSuffix(item, "用户"), item == "展开更多", item == "免费领取Tokens": + continue + default: + notesParts = append(notesParts, item) + } + } + if model == "" || quota == "" || price == "" { + continue + } + notes := "天翼云大模型 AI 专项活动页套餐。" + if len(notesParts) > 0 { + notes = strings.Join(notesParts, ";") + } + records = append(records, subscriptionImportRecord{ + ProviderName: "Telecom", + ProviderNameCn: "中国电信", + ProviderCountry: "CN", + ProviderWebsite: "https://www.ctyun.cn", + OperatorName: "CTYun", + OperatorNameCn: "天翼云", + OperatorCountry: "CN", + OperatorWebsite: "https://www.ctyun.cn", + OperatorType: "cloud", + PlanFamily: "token_plan", + PlanCode: "ctyun-token-plan-" + tierCode, + PlanName: "天翼云 Token Plan " + rawTier, + Tier: rawTier, + BillingCycle: "monthly", + Currency: "CNY", + ListPrice: mustParseSubscriptionPrice(price), + PriceUnit: "CNY/month", + QuotaValue: parseChineseTokenQuota(quota), + QuotaUnit: "tokens/month", + PlanScope: "Token Plan", + ModelScope: []string{model}, + SourceURL: defaultCTYunTokenPlanURL, + PublishedAt: publishedAt, + EffectiveDate: effectiveDateFromPublishedAt(publishedAt), + Notes: notes, + }) + i = j - 1 + } + if len(records) == 0 { + return nil, false + } + return records, true +} + +func parseCTYunTokenPlanCardLayout(raw string, publishedAt string) ([]subscriptionImportRecord, bool) { + cardPattern := regexp.MustCompile(`(?s)(.*?)
`).FindStringSubmatch(body) + if len(modelMatch) != 2 { + return nil, false + } + quotaMatch := regexp.MustCompile(`([0-9]+(?:\.[0-9]+)?亿|[0-9]+万)Tokens`).FindStringSubmatch(body) + if len(quotaMatch) != 2 { + return nil, false + } + priceMatch := regexp.MustCompile(`]*>\s*([0-9]+)\s*\s*]*>\s*\.([0-9]+)\s*\s*]*>元/个/月`).FindStringSubmatch(body) + if len(priceMatch) != 3 { + return nil, false + } + notes := "天翼云大模型 AI 专项活动页套餐。" + if featureLines := regexp.MustCompile(`card-content-gou-content"[^>]*>([^<]+)`).FindAllStringSubmatch(body, -1); len(featureLines) > 0 { + parts := make([]string, 0, len(featureLines)) + for _, line := range featureLines { + text := strings.TrimSpace(line[1]) + if text == "" || strings.HasPrefix(text, "支持模型:") || strings.Contains(text, "Tokens") { + continue + } + parts = append(parts, text) + } + if len(parts) > 0 { + notes = strings.Join(parts, ";") + } + } + + records = append(records, subscriptionImportRecord{ + ProviderName: "Telecom", + ProviderNameCn: "中国电信", + ProviderCountry: "CN", + ProviderWebsite: "https://www.ctyun.cn", + OperatorName: "CTYun", + OperatorNameCn: "天翼云", + OperatorCountry: "CN", + OperatorWebsite: "https://www.ctyun.cn", + OperatorType: "cloud", + PlanFamily: "token_plan", + PlanCode: "ctyun-token-plan-" + tierCode, + PlanName: "天翼云 " + title, + Tier: rawTier, + BillingCycle: "monthly", + Currency: "CNY", + ListPrice: mustParseSubscriptionPrice(priceMatch[1] + "." + priceMatch[2]), + PriceUnit: "CNY/month", + QuotaValue: parseChineseTokenQuota(quotaMatch[1]), + QuotaUnit: "tokens/month", + PlanScope: "Token Plan", + ModelScope: []string{strings.TrimSpace(modelMatch[1])}, + SourceURL: defaultCTYunTokenPlanURL, + PublishedAt: publishedAt, + EffectiveDate: effectiveDateFromPublishedAt(publishedAt), + Notes: notes, + }) + } + return records, true +} + +func parseCTYunTokenPlanLegacyLayout(raw string, publishedAt string) ([]subscriptionImportRecord, error) { pattern := regexp.MustCompile(`Token Plan ([^\n]+?)(\d+(?:\.\d+)?亿|\d+万)Tokens包[\s\S]*?支持模型:([^\n]+)[\s\S]*?(\d+\s*\.\s*\d+)\s*元/个`) matches := pattern.FindAllStringSubmatch(raw, -1) - if len(matches) != 6 { - return nil, fmt.Errorf("unexpected ctyun token plan count: %d", len(matches)) + if len(matches) == 0 { + return nil, fmt.Errorf("unexpected ctyun token plan count: 0") } codeByTier := map[string]string{ @@ -151,13 +337,12 @@ func parseCTYunTokenPlan(raw string, publishedAt string) ([]subscriptionImportRe records := make([]subscriptionImportRecord, 0, len(matches)) for _, match := range matches { rawTier := strings.TrimSpace(match[1]) - tierCode := codeByTier[rawTier] + tierCode, ok := codeByTier[rawTier] + if !ok { + return nil, fmt.Errorf("unexpected ctyun token plan tier: %s", rawTier) + } quotaValue := parseChineseTokenQuota(match[2]) price := mustParseSubscriptionPrice(strings.ReplaceAll(match[4], " ", "")) - planName := "天翼云 Token Plan " + rawTier - if rawTier == "Lite" || rawTier == "Pro" || rawTier == "Max" { - planName = "天翼云 Token Plan " + rawTier - } records = append(records, subscriptionImportRecord{ ProviderName: "Telecom", ProviderNameCn: "中国电信", @@ -170,7 +355,7 @@ func parseCTYunTokenPlan(raw string, publishedAt string) ([]subscriptionImportRe OperatorType: "cloud", PlanFamily: "token_plan", PlanCode: "ctyun-token-plan-" + tierCode, - PlanName: planName, + PlanName: "天翼云 Token Plan " + rawTier, Tier: rawTier, BillingCycle: "monthly", Currency: "CNY", diff --git a/scripts/import_ctyun_subscription_test.go b/scripts/import_ctyun_subscription_test.go index 45e49b9..7be1d91 100644 --- a/scripts/import_ctyun_subscription_test.go +++ b/scripts/import_ctyun_subscription_test.go @@ -24,8 +24,8 @@ func TestParseCTYunSubscriptionBuildsPlans(t *testing.T) { if err != nil { t.Fatalf("parseCTYunSubscriptionCatalog 失败: %v", err) } - if len(plans) != 9 { - t.Fatalf("期望 9 条天翼云套餐记录,实际 %d", len(plans)) + if len(plans) != 6 { + t.Fatalf("期望 6 条天翼云套餐记录,实际 %d", len(plans)) } if plans[0].PlanCode != "ctyun-coding-plan-lite-monthly" { @@ -34,14 +34,40 @@ func TestParseCTYunSubscriptionBuildsPlans(t *testing.T) { if plans[0].ListPrice != 49 { t.Fatalf("GLM Lite 月价错误: %v", plans[0].ListPrice) } - if plans[3].PlanCode != "ctyun-token-plan-lite" { + if plans[3].PlanCode != "ctyun-token-plan-basic" { t.Fatalf("首条 token planCode 错误: %q", plans[3].PlanCode) } - if plans[len(plans)-1].PlanCode != "ctyun-token-plan-vip" { + if plans[3].QuotaValue != 15000000 || plans[3].PriceUnit != "CNY/month" || plans[3].QuotaUnit != "tokens/month" { + t.Fatalf("基础版 token plan 解析错误: %+v", plans[3]) + } + if plans[len(plans)-1].PlanCode != "ctyun-token-plan-flagship" { t.Fatalf("末条 token planCode 错误: %q", plans[len(plans)-1].PlanCode) } } +func TestParseCTYunTokenPlanLegacyLayout(t *testing.T) { + legacy := `Token Plan Lite1500万Tokens包 +支持模型:GLM5 +39 .90 元/个 + +Token Plan 轻享包1000万Tokens包 +支持模型:Deepseek v3.2 +9 .90 元/个` + records, err := parseCTYunTokenPlan(legacy, "2026-05-18") + if err != nil { + t.Fatalf("legacy token plan 解析失败: %v", err) + } + if len(records) != 2 { + t.Fatalf("期望 2 条 legacy token 记录,实际 %d", len(records)) + } + if records[0].PlanCode != "ctyun-token-plan-lite" || records[0].PriceUnit != "CNY/pack" || records[0].QuotaUnit != "tokens/pack" { + t.Fatalf("legacy Lite 解析错误: %+v", records[0]) + } + if records[1].PlanCode != "ctyun-token-plan-starter" { + t.Fatalf("legacy 轻享包解析错误: %+v", records[1]) + } +} + func TestRunCTYunSubscriptionImportDryRunPrintsSummary(t *testing.T) { var out bytes.Buffer err := runCTYunSubscriptionImport(ctyunSubscriptionImportConfig{ @@ -56,7 +82,7 @@ func TestRunCTYunSubscriptionImportDryRunPrintsSummary(t *testing.T) { output := out.String() for _, want := range []string{ "source=ctyun-subscription-import", - "plans=9", + "plans=6", "provider=Telecom", "operator=CTYun", "dry_run=true", diff --git a/scripts/testdata/ctyun_token_plan_sample.txt b/scripts/testdata/ctyun_token_plan_sample.txt index 5fd3812..4400ce0 100644 --- a/scripts/testdata/ctyun_token_plan_sample.txt +++ b/scripts/testdata/ctyun_token_plan_sample.txt @@ -1,32 +1,29 @@ -# 天翼云大模型AI专项 +
+Token Plan 基础版 +
+
包月固定高额请求额度、模型免费畅用、全编码工具无缝接入
+
支持模型:GLM5
+
支持工具:TeleClaw、OpenClaw、OpenCode、Cursor、Cline、Chatbox、Codebuddy、Trae等
+
1500万Tokens
+
39.90元/个/月
+
-Token Plan Lite1500万Tokens包 -面向开发者/中小企业,适用于项目开发迭代,大幅提升编码效率与代码质量。 -支持模型:GLM5 -支持工具:OpenClaw、OpenCode、Cursor、Cline、Chatbox、Codebuddy、Trae等 -39 .90 元/个 +
+Token Plan 专业版 +
+
包月固定高额请求额度、模型免费畅用、全编码工具无缝接入
+
支持模型:GLM5
+
支持工具:TeleClaw、OpenClaw、OpenCode、Cursor、Cline、Chatbox、Codebuddy、Trae等
+
7000万Tokens
+
159.90元/个/月
+
-Token Plan Pro7000万Tokens包 -面向开发者/中小企业,适用于项目开发迭代,大幅提升编码效率与代码质量。 -支持模型:GLM5 -159 .90 元/个 - -Token Plan Max1.5亿Tokens包 -面向开发者/中小企业,适用于项目开发迭代,大幅提升编码效率与代码质量。 -支持模型:GLM5 -299 .90 元/个 - -Token Plan 轻享包1000万Tokens包 -适用于个人/家庭 API 及业务调用,有效解决按需单价高、预算难控等问题。 -支持模型:Deepseek v3.2 -9 .90 元/个 - -Token Plan 畅享包4000万Tokens包 -适用于个人/家庭 API 及业务调用,有效解决按需单价高、预算难控等问题。 -支持模型:Deepseek v3.2 -29 .90 元/个 - -Token Plan 尊享包8000万Tokens包 -适用于个人/家庭 API 及业务调用,有效解决按需单价高、预算难控等问题。 -支持模型:Deepseek v3.2 -49 .90 元/个 +
+Token Plan 旗舰版 +
+
包月固定高额请求额度、模型免费畅用、全编码工具无缝接入
+
支持模型:GLM5
+
支持工具:TeleClaw、OpenClaw、OpenCode、Cursor、Cline、Chatbox、Codebuddy、Trae等
+
1.5亿Tokens
+
299.90元/个/月
+