Files
llm-intelligence/scripts/ctyun_subscription_lib.go
phamnazage-jpg 8d1312203f feat(import): extend CTYun subscription collector
- ctyun_subscription_lib.go: extend CTYun subscription data extraction
- import_ctyun_subscription_test.go: update tests for CTYun
- ctyun_token_plan_sample.txt: updated test fixture
2026-05-22 07:33:38 +08:00

410 lines
14 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//go:build llm_script
package main
import (
"fmt"
"regexp"
"strings"
)
const (
defaultCTYunCodingPlanURL = "https://www.ctyun.cn/document/11061839/11092368"
defaultCTYunTokenPlanURL = "https://www.ctyun.cn/act/AI/zhuanxiang"
)
func parseCTYunSubscriptionCatalog(codingRaw string, tokenRaw string) ([]subscriptionImportRecord, error) {
publishedAt, known := publishedAtFromText(firstNonEmptyText(codingRaw, tokenRaw))
codingRecords, err := parseCTYunCodingPlan(codingRaw, publishedAt)
if err != nil {
return nil, err
}
tokenRecords, err := parseCTYunTokenPlan(tokenRaw, publishedAt)
if err != nil {
return nil, err
}
records := append(codingRecords, tokenRecords...)
for i := range records {
records[i].PublishedAtKnown = known
}
return records, nil
}
func parseCTYunCodingPlan(raw string, publishedAt string) ([]subscriptionImportRecord, error) {
if !strings.Contains(raw, "GLM Lite") || !strings.Contains(raw, "GLM Max") {
return nil, fmt.Errorf("ctyun coding plan tiers not found")
}
pricePattern := regexp.MustCompile(`包月价格\s+(\d+)元/月\s+(\d+)元/月\s+(\d+)元/月`)
priceMatch := pricePattern.FindStringSubmatch(raw)
if len(priceMatch) != 4 {
return nil, fmt.Errorf("ctyun coding plan monthly prices not found")
}
limitPattern := regexp.MustCompile(`每月最多约([\d,]+)次prompts`)
limitMatches := limitPattern.FindAllStringSubmatch(raw, -1)
if len(limitMatches) < 3 {
return nil, fmt.Errorf("ctyun coding plan monthly limits not found")
}
modelScope := extractCTYunCodingModels(raw)
records := []subscriptionImportRecord{
{
ProviderName: "Telecom",
ProviderNameCn: "中国电信",
ProviderCountry: "CN",
ProviderWebsite: "https://www.ctyun.cn",
OperatorName: "CTYun",
OperatorNameCn: "天翼云",
OperatorCountry: "CN",
OperatorWebsite: "https://www.ctyun.cn",
OperatorType: "cloud",
PlanFamily: "coding_plan",
PlanCode: "ctyun-coding-plan-lite-monthly",
PlanName: "天翼云 Coding Plan Lite月付",
Tier: "Lite",
BillingCycle: "monthly",
Currency: "CNY",
ListPrice: mustParseSubscriptionPrice(priceMatch[1]),
PriceUnit: "CNY/month",
QuotaValue: mustParseSubscriptionInt64(limitMatches[0][1]),
QuotaUnit: "prompts/month",
PlanScope: "Coding Plan",
ModelScope: modelScope,
SourceURL: defaultCTYunCodingPlanURL,
PublishedAt: publishedAt,
EffectiveDate: effectiveDateFromPublishedAt(publishedAt),
Notes: "每 5 小时约 80 次 prompts每周约 400 次 prompts。",
},
{
ProviderName: "Telecom",
ProviderNameCn: "中国电信",
ProviderCountry: "CN",
ProviderWebsite: "https://www.ctyun.cn",
OperatorName: "CTYun",
OperatorNameCn: "天翼云",
OperatorCountry: "CN",
OperatorWebsite: "https://www.ctyun.cn",
OperatorType: "cloud",
PlanFamily: "coding_plan",
PlanCode: "ctyun-coding-plan-pro-monthly",
PlanName: "天翼云 Coding Plan Pro月付",
Tier: "Pro",
BillingCycle: "monthly",
Currency: "CNY",
ListPrice: mustParseSubscriptionPrice(priceMatch[2]),
PriceUnit: "CNY/month",
QuotaValue: mustParseSubscriptionInt64(limitMatches[1][1]),
QuotaUnit: "prompts/month",
PlanScope: "Coding Plan",
ModelScope: modelScope,
SourceURL: defaultCTYunCodingPlanURL,
PublishedAt: publishedAt,
EffectiveDate: effectiveDateFromPublishedAt(publishedAt),
Notes: "每 5 小时约 400 次 prompts每周约 2,000 次 prompts。",
},
{
ProviderName: "Telecom",
ProviderNameCn: "中国电信",
ProviderCountry: "CN",
ProviderWebsite: "https://www.ctyun.cn",
OperatorName: "CTYun",
OperatorNameCn: "天翼云",
OperatorCountry: "CN",
OperatorWebsite: "https://www.ctyun.cn",
OperatorType: "cloud",
PlanFamily: "coding_plan",
PlanCode: "ctyun-coding-plan-max-monthly",
PlanName: "天翼云 Coding Plan Max月付",
Tier: "Max",
BillingCycle: "monthly",
Currency: "CNY",
ListPrice: mustParseSubscriptionPrice(priceMatch[3]),
PriceUnit: "CNY/month",
QuotaValue: mustParseSubscriptionInt64(limitMatches[2][1]),
QuotaUnit: "prompts/month",
PlanScope: "Coding Plan",
ModelScope: modelScope,
SourceURL: defaultCTYunCodingPlanURL,
PublishedAt: publishedAt,
EffectiveDate: effectiveDateFromPublishedAt(publishedAt),
Notes: "每 5 小时约 1,600 次 prompts每周约 8,000 次 prompts。",
},
}
return records, nil
}
func parseCTYunTokenPlan(raw string, publishedAt string) ([]subscriptionImportRecord, error) {
if records, ok := parseCTYunTokenPlanNormalizedLayout(raw, publishedAt); ok {
return records, nil
}
if records, ok := parseCTYunTokenPlanCardLayout(raw, publishedAt); ok {
return records, nil
}
return parseCTYunTokenPlanLegacyLayout(raw, publishedAt)
}
func parseCTYunTokenPlanNormalizedLayout(raw string, publishedAt string) ([]subscriptionImportRecord, bool) {
lines := strings.Split(raw, "\n")
codeByTier := map[string]string{
"基础版": "basic",
"专业版": "pro",
"旗舰版": "flagship",
"轻享版": "starter",
"畅享版": "plus",
"尊享版": "vip",
}
records := make([]subscriptionImportRecord, 0, 6)
for i := 0; i < len(lines); i++ {
line := strings.TrimSpace(lines[i])
if !strings.HasPrefix(line, "Token Plan") {
continue
}
rawTier := strings.TrimSpace(strings.TrimPrefix(line, "Token Plan"))
tierCode, ok := codeByTier[rawTier]
if !ok {
continue
}
j := i + 1
block := make([]string, 0, 12)
for ; j < len(lines); j++ {
next := strings.TrimSpace(lines[j])
if strings.HasPrefix(next, "Token Plan") {
break
}
if next != "" {
block = append(block, next)
}
}
model := ""
quota := ""
price := ""
notesParts := make([]string, 0, 4)
for k := 0; k < len(block); k++ {
item := block[k]
switch {
case strings.HasPrefix(item, "支持模型:"):
model = strings.TrimSpace(strings.TrimPrefix(item, "支持模型:"))
case strings.Contains(item, "Tokens"):
quota = strings.TrimSpace(strings.TrimSuffix(item, "Tokens"))
case regexp.MustCompile(`^[0-9]+$`).MatchString(item) && k+2 < len(block) && regexp.MustCompile(`^\.[0-9]+$`).MatchString(block[k+1]) && block[k+2] == "元/个/月":
price = item + block[k+1]
case item == "产品优势", item == "立即订购", strings.HasPrefix(item, "支持工具:"), strings.HasPrefix(item, "已抢购"), strings.HasSuffix(item, "用户"), item == "展开更多", item == "免费领取Tokens":
continue
default:
notesParts = append(notesParts, item)
}
}
if model == "" || quota == "" || price == "" {
continue
}
notes := "天翼云大模型 AI 专项活动页套餐。"
if len(notesParts) > 0 {
notes = strings.Join(notesParts, "")
}
records = append(records, subscriptionImportRecord{
ProviderName: "Telecom",
ProviderNameCn: "中国电信",
ProviderCountry: "CN",
ProviderWebsite: "https://www.ctyun.cn",
OperatorName: "CTYun",
OperatorNameCn: "天翼云",
OperatorCountry: "CN",
OperatorWebsite: "https://www.ctyun.cn",
OperatorType: "cloud",
PlanFamily: "token_plan",
PlanCode: "ctyun-token-plan-" + tierCode,
PlanName: "天翼云 Token Plan " + rawTier,
Tier: rawTier,
BillingCycle: "monthly",
Currency: "CNY",
ListPrice: mustParseSubscriptionPrice(price),
PriceUnit: "CNY/month",
QuotaValue: parseChineseTokenQuota(quota),
QuotaUnit: "tokens/month",
PlanScope: "Token Plan",
ModelScope: []string{model},
SourceURL: defaultCTYunTokenPlanURL,
PublishedAt: publishedAt,
EffectiveDate: effectiveDateFromPublishedAt(publishedAt),
Notes: notes,
})
i = j - 1
}
if len(records) == 0 {
return nil, false
}
return records, true
}
func parseCTYunTokenPlanCardLayout(raw string, publishedAt string) ([]subscriptionImportRecord, bool) {
cardPattern := regexp.MustCompile(`(?s)<span title="(Token Plan [^"]+)" class="card-header-title-text".*?</span>(.*?)<div class="card-btns-wrap"`)
cards := cardPattern.FindAllStringSubmatch(raw, -1)
if len(cards) == 0 {
return nil, false
}
codeByTier := map[string]string{
"基础版": "basic",
"专业版": "pro",
"旗舰版": "flagship",
}
records := make([]subscriptionImportRecord, 0, len(cards))
for _, card := range cards {
title := strings.TrimSpace(card[1])
body := card[2]
rawTier := strings.TrimSpace(strings.TrimPrefix(title, "Token Plan "))
tierCode, ok := codeByTier[rawTier]
if !ok {
return nil, false
}
modelMatch := regexp.MustCompile(`支持模型:([^<]+)</span>`).FindStringSubmatch(body)
if len(modelMatch) != 2 {
return nil, false
}
quotaMatch := regexp.MustCompile(`([0-9]+(?:\.[0-9]+)?亿|[0-9]+万)Tokens`).FindStringSubmatch(body)
if len(quotaMatch) != 2 {
return nil, false
}
priceMatch := regexp.MustCompile(`<span class="price-new-big"[^>]*>\s*([0-9]+)\s*</span>\s*<span class="price-new-big"[^>]*>\s*\.([0-9]+)\s*</span>\s*<span class="price-new-unit"[^>]*>元/个/月</span>`).FindStringSubmatch(body)
if len(priceMatch) != 3 {
return nil, false
}
notes := "天翼云大模型 AI 专项活动页套餐。"
if featureLines := regexp.MustCompile(`card-content-gou-content"[^>]*>([^<]+)</span>`).FindAllStringSubmatch(body, -1); len(featureLines) > 0 {
parts := make([]string, 0, len(featureLines))
for _, line := range featureLines {
text := strings.TrimSpace(line[1])
if text == "" || strings.HasPrefix(text, "支持模型:") || strings.Contains(text, "Tokens") {
continue
}
parts = append(parts, text)
}
if len(parts) > 0 {
notes = strings.Join(parts, "")
}
}
records = append(records, subscriptionImportRecord{
ProviderName: "Telecom",
ProviderNameCn: "中国电信",
ProviderCountry: "CN",
ProviderWebsite: "https://www.ctyun.cn",
OperatorName: "CTYun",
OperatorNameCn: "天翼云",
OperatorCountry: "CN",
OperatorWebsite: "https://www.ctyun.cn",
OperatorType: "cloud",
PlanFamily: "token_plan",
PlanCode: "ctyun-token-plan-" + tierCode,
PlanName: "天翼云 " + title,
Tier: rawTier,
BillingCycle: "monthly",
Currency: "CNY",
ListPrice: mustParseSubscriptionPrice(priceMatch[1] + "." + priceMatch[2]),
PriceUnit: "CNY/month",
QuotaValue: parseChineseTokenQuota(quotaMatch[1]),
QuotaUnit: "tokens/month",
PlanScope: "Token Plan",
ModelScope: []string{strings.TrimSpace(modelMatch[1])},
SourceURL: defaultCTYunTokenPlanURL,
PublishedAt: publishedAt,
EffectiveDate: effectiveDateFromPublishedAt(publishedAt),
Notes: notes,
})
}
return records, true
}
func parseCTYunTokenPlanLegacyLayout(raw string, publishedAt string) ([]subscriptionImportRecord, error) {
pattern := regexp.MustCompile(`Token Plan ([^\n]+?)(\d+(?:\.\d+)?亿|\d+万)Tokens包[\s\S]*?支持模型:([^\n]+)[\s\S]*?(\d+\s*\.\s*\d+)\s*元/个`)
matches := pattern.FindAllStringSubmatch(raw, -1)
if len(matches) == 0 {
return nil, fmt.Errorf("unexpected ctyun token plan count: 0")
}
codeByTier := map[string]string{
"Lite": "lite",
"Pro": "pro",
"Max": "max",
"轻享包": "starter",
"畅享包": "plus",
"尊享包": "vip",
}
records := make([]subscriptionImportRecord, 0, len(matches))
for _, match := range matches {
rawTier := strings.TrimSpace(match[1])
tierCode, ok := codeByTier[rawTier]
if !ok {
return nil, fmt.Errorf("unexpected ctyun token plan tier: %s", rawTier)
}
quotaValue := parseChineseTokenQuota(match[2])
price := mustParseSubscriptionPrice(strings.ReplaceAll(match[4], " ", ""))
records = append(records, subscriptionImportRecord{
ProviderName: "Telecom",
ProviderNameCn: "中国电信",
ProviderCountry: "CN",
ProviderWebsite: "https://www.ctyun.cn",
OperatorName: "CTYun",
OperatorNameCn: "天翼云",
OperatorCountry: "CN",
OperatorWebsite: "https://www.ctyun.cn",
OperatorType: "cloud",
PlanFamily: "token_plan",
PlanCode: "ctyun-token-plan-" + tierCode,
PlanName: "天翼云 Token Plan " + rawTier,
Tier: rawTier,
BillingCycle: "monthly",
Currency: "CNY",
ListPrice: price,
PriceUnit: "CNY/pack",
QuotaValue: quotaValue,
QuotaUnit: "tokens/pack",
PlanScope: "Token Plan",
ModelScope: []string{strings.TrimSpace(match[3])},
SourceURL: defaultCTYunTokenPlanURL,
PublishedAt: publishedAt,
EffectiveDate: effectiveDateFromPublishedAt(publishedAt),
Notes: "天翼云大模型 AI 专项活动页套餐。",
})
}
return records, nil
}
func parseChineseTokenQuota(raw string) int64 {
cleaned := strings.TrimSpace(strings.TrimSuffix(raw, "Tokens包"))
cleaned = strings.ReplaceAll(cleaned, " ", "")
switch {
case strings.Contains(cleaned, "亿"):
return parseDecimalMultiplier(strings.TrimSuffix(cleaned, "亿"), 100000000)
case strings.Contains(cleaned, "万"):
return parseDecimalMultiplier(strings.TrimSuffix(cleaned, "万"), 10000)
default:
return mustParseSubscriptionInt64(cleaned)
}
}
func extractCTYunCodingModels(raw string) []string {
lines := strings.Split(raw, "\n")
models := make([]string, 0, 8)
capturing := false
for _, line := range lines {
line = strings.TrimSpace(line)
switch {
case line == "支持模型":
capturing = true
continue
case line == "用量限制":
return models
case !capturing || line == "":
continue
default:
models = append(models, line)
}
}
return models
}