chore: prepare repository for publishing

This commit is contained in:
phamnazage-jpg
2026-05-13 14:42:45 +08:00
parent 55e506b2b5
commit 77e6610fd2
118 changed files with 27373 additions and 1009 deletions

View File

@@ -0,0 +1,331 @@
//go:build llm_script
package main
import (
"fmt"
"html"
"io"
"net/http"
"os"
"regexp"
"sort"
"strings"
"time"
)
const (
defaultTencentCatalogURL = "https://cloud.tencent.com/document/product/1823/130060"
)
var defaultTencentCatalogTimeout = 20 * time.Second
type fetchTencentCatalogConfig struct {
URL string
DryRun bool
Timeout time.Duration
Fixture string
}
type tencentCatalog struct {
UpdatedAt string
Plans []tencentPlan
Models []tencentModel
}
type tencentPlan struct {
Series string
Tier string
Quota string
Price string
BillingCycle string
Scene string
}
type tencentModel struct {
Series string
Name string
ModelID string
ContextLength int
Notes []string
}
func fetchTencentCatalogContent(cfg fetchTencentCatalogConfig, client *http.Client) (string, error) {
if strings.TrimSpace(cfg.Fixture) != "" {
data, err := os.ReadFile(cfg.Fixture)
if err != nil {
return "", err
}
return string(data), nil
}
req, err := http.NewRequest(http.MethodGet, cfg.URL, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", "llm-intelligence/tencent-catalog-fetcher")
resp, err := client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("unexpected status %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func parseTencentCatalog(raw string) (tencentCatalog, error) {
lines := normalizeTencentCatalogLines(raw)
var catalog tencentCatalog
var currentSeries string
var currentMode string
for i := 0; i < len(lines); i++ {
line := lines[i]
if catalog.UpdatedAt == "" {
if updatedAt := extractUpdatedAt(line); updatedAt != "" {
catalog.UpdatedAt = updatedAt
continue
}
}
if series := extractSeriesHeading(line); series != "" {
currentSeries = series
currentMode = ""
continue
}
switch line {
case "### 套餐详情":
currentMode = "plans"
continue
case "### 可用模型":
currentMode = "models"
continue
}
switch currentMode {
case "plans":
plan, nextIndex, ok := tryParseTencentPlan(lines, i, currentSeries)
if ok {
catalog.Plans = append(catalog.Plans, plan)
i = nextIndex
}
case "models":
model, nextIndex, ok := tryParseTencentModel(lines, i, currentSeries)
if ok {
catalog.Models = append(catalog.Models, model)
i = nextIndex
}
}
}
if catalog.UpdatedAt == "" {
return tencentCatalog{}, fmt.Errorf("catalog updated_at not found")
}
if len(catalog.Plans) == 0 {
return tencentCatalog{}, fmt.Errorf("catalog plans not found")
}
if len(catalog.Models) == 0 {
return tencentCatalog{}, fmt.Errorf("catalog models not found")
}
return catalog, nil
}
func normalizeTencentCatalogLines(raw string) []string {
text := html.UnescapeString(raw)
replacements := []string{"<br>", "<br/>", "<br />", "</p>", "</div>", "</li>", "</tr>", "</td>", "</h1>", "</h2>", "</h3>", "</h4>", "</pre>", "</main>"}
for _, replacement := range replacements {
text = strings.ReplaceAll(text, replacement, "\n")
}
tagPattern := regexp.MustCompile(`<[^>]+>`)
text = tagPattern.ReplaceAllString(text, "")
text = strings.ReplaceAll(text, "\r\n", "\n")
text = strings.ReplaceAll(text, "\r", "\n")
rawLines := strings.Split(text, "\n")
lines := make([]string, 0, len(rawLines))
for _, rawLine := range rawLines {
line := strings.TrimSpace(rawLine)
if line == "" {
continue
}
lines = append(lines, line)
}
return lines
}
func extractUpdatedAt(line string) string {
const prefix = "最近更新时间:"
if strings.HasPrefix(line, prefix) {
return strings.TrimSpace(strings.TrimPrefix(line, prefix))
}
return ""
}
func extractSeriesHeading(line string) string {
if !strings.HasPrefix(line, "## ") {
return ""
}
series := strings.TrimSpace(strings.TrimPrefix(line, "## "))
if strings.Contains(series, "Token Plan") || strings.Contains(series, "Coding Plan") {
return strings.TrimSpace(strings.TrimSuffix(series, "套餐"))
}
return ""
}
func tryParseTencentPlan(lines []string, start int, series string) (tencentPlan, int, bool) {
if start+4 >= len(lines) {
return tencentPlan{}, start, false
}
if !isTencentPlanName(lines[start]) {
return tencentPlan{}, start, false
}
if !isTencentPlanTier(lines[start+1]) {
return tencentPlan{}, start, false
}
if !strings.Contains(lines[start+2], "订阅月") {
return tencentPlan{}, start, false
}
if !strings.Contains(lines[start+3], "Tokens") {
return tencentPlan{}, start, false
}
if !strings.Contains(lines[start+4], "元/月") {
return tencentPlan{}, start, false
}
plan := tencentPlan{
Series: series,
Tier: strings.Trim(lines[start+1], "() "),
BillingCycle: lines[start+2],
Quota: lines[start+3],
Price: lines[start+4],
}
nextIndex := start + 4
if start+5 < len(lines) && !strings.HasPrefix(lines[start+5], "### ") && !isTencentPlanName(lines[start+5]) {
plan.Scene = lines[start+5]
nextIndex = start + 5
}
return plan, nextIndex, true
}
func tryParseTencentModel(lines []string, start int, series string) (tencentModel, int, bool) {
if start+1 >= len(lines) {
return tencentModel{}, start, false
}
if !isTencentModelID(lines[start+1]) {
return tencentModel{}, start, false
}
if isReservedTencentLine(lines[start]) {
return tencentModel{}, start, false
}
model := tencentModel{
Series: series,
Name: lines[start],
ModelID: lines[start+1],
}
notes := make([]string, 0, 4)
nextIndex := start + 1
for i := start + 2; i < len(lines); i++ {
line := lines[i]
if strings.HasPrefix(line, "## ") || strings.HasPrefix(line, "### ") {
break
}
if isTencentPlanName(line) && i+1 < len(lines) && isTencentPlanTier(lines[i+1]) {
break
}
if i+1 < len(lines) && isTencentModelID(lines[i+1]) && !isReservedTencentLine(line) {
break
}
notes = append(notes, line)
nextIndex = i
}
model.Notes = notes
model.ContextLength = extractContextLength(strings.Join(notes, " "))
return model, nextIndex, true
}
func isTencentPlanName(line string) bool {
switch line {
case "体验套餐", "基础套餐", "进阶套餐", "专业套餐":
return true
default:
return false
}
}
func isTencentPlanTier(line string) bool {
return strings.HasPrefix(line, "") && strings.HasSuffix(line, "")
}
func isReservedTencentLine(line string) bool {
if strings.HasPrefix(line, "#") {
return true
}
switch line {
case "Token Plan 个人版套餐概览", "套餐详情", "可用模型":
return true
default:
return false
}
}
func isTencentModelID(line string) bool {
modelIDPattern := regexp.MustCompile(`^[a-z0-9][a-z0-9._-]*$`)
return modelIDPattern.MatchString(line)
}
func extractContextLength(text string) int {
contextPattern := regexp.MustCompile(`(?i)(\d+)\s*([KM])\s*上下文`)
matches := contextPattern.FindStringSubmatch(text)
if len(matches) != 3 {
return 0
}
value := 0
fmt.Sscanf(matches[1], "%d", &value)
switch strings.ToUpper(matches[2]) {
case "K":
return value * 1024
case "M":
return value * 1024 * 1024
default:
return 0
}
}
func formatSeriesSummary(plans []tencentPlan) string {
counts := make(map[string]int)
for _, plan := range plans {
counts[plan.Series]++
}
series := make([]string, 0, len(counts))
for name := range counts {
series = append(series, name)
}
sort.Strings(series)
parts := make([]string, 0, len(series))
for _, name := range series {
parts = append(parts, fmt.Sprintf("%s:%d", name, counts[name]))
}
return strings.Join(parts, ",")
}