332 lines
7.5 KiB
Go
332 lines
7.5 KiB
Go
|
|
//go:build llm_script
|
|||
|
|
|
|||
|
|
package main
|
|||
|
|
|
|||
|
|
import (
|
|||
|
|
"fmt"
|
|||
|
|
"html"
|
|||
|
|
"io"
|
|||
|
|
"net/http"
|
|||
|
|
"os"
|
|||
|
|
"regexp"
|
|||
|
|
"sort"
|
|||
|
|
"strings"
|
|||
|
|
"time"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
const (
|
|||
|
|
defaultTencentCatalogURL = "https://cloud.tencent.com/document/product/1823/130060"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
var defaultTencentCatalogTimeout = 20 * time.Second
|
|||
|
|
|
|||
|
|
type fetchTencentCatalogConfig struct {
|
|||
|
|
URL string
|
|||
|
|
DryRun bool
|
|||
|
|
Timeout time.Duration
|
|||
|
|
Fixture string
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
type tencentCatalog struct {
|
|||
|
|
UpdatedAt string
|
|||
|
|
Plans []tencentPlan
|
|||
|
|
Models []tencentModel
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
type tencentPlan struct {
|
|||
|
|
Series string
|
|||
|
|
Tier string
|
|||
|
|
Quota string
|
|||
|
|
Price string
|
|||
|
|
BillingCycle string
|
|||
|
|
Scene string
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
type tencentModel struct {
|
|||
|
|
Series string
|
|||
|
|
Name string
|
|||
|
|
ModelID string
|
|||
|
|
ContextLength int
|
|||
|
|
Notes []string
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func fetchTencentCatalogContent(cfg fetchTencentCatalogConfig, client *http.Client) (string, error) {
|
|||
|
|
if strings.TrimSpace(cfg.Fixture) != "" {
|
|||
|
|
data, err := os.ReadFile(cfg.Fixture)
|
|||
|
|
if err != nil {
|
|||
|
|
return "", err
|
|||
|
|
}
|
|||
|
|
return string(data), nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
req, err := http.NewRequest(http.MethodGet, cfg.URL, nil)
|
|||
|
|
if err != nil {
|
|||
|
|
return "", err
|
|||
|
|
}
|
|||
|
|
req.Header.Set("User-Agent", "llm-intelligence/tencent-catalog-fetcher")
|
|||
|
|
|
|||
|
|
resp, err := client.Do(req)
|
|||
|
|
if err != nil {
|
|||
|
|
return "", err
|
|||
|
|
}
|
|||
|
|
defer resp.Body.Close()
|
|||
|
|
|
|||
|
|
if resp.StatusCode != http.StatusOK {
|
|||
|
|
return "", fmt.Errorf("unexpected status %d", resp.StatusCode)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
body, err := io.ReadAll(resp.Body)
|
|||
|
|
if err != nil {
|
|||
|
|
return "", err
|
|||
|
|
}
|
|||
|
|
return string(body), nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func parseTencentCatalog(raw string) (tencentCatalog, error) {
|
|||
|
|
lines := normalizeTencentCatalogLines(raw)
|
|||
|
|
|
|||
|
|
var catalog tencentCatalog
|
|||
|
|
var currentSeries string
|
|||
|
|
var currentMode string
|
|||
|
|
|
|||
|
|
for i := 0; i < len(lines); i++ {
|
|||
|
|
line := lines[i]
|
|||
|
|
|
|||
|
|
if catalog.UpdatedAt == "" {
|
|||
|
|
if updatedAt := extractUpdatedAt(line); updatedAt != "" {
|
|||
|
|
catalog.UpdatedAt = updatedAt
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if series := extractSeriesHeading(line); series != "" {
|
|||
|
|
currentSeries = series
|
|||
|
|
currentMode = ""
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
switch line {
|
|||
|
|
case "### 套餐详情":
|
|||
|
|
currentMode = "plans"
|
|||
|
|
continue
|
|||
|
|
case "### 可用模型":
|
|||
|
|
currentMode = "models"
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
switch currentMode {
|
|||
|
|
case "plans":
|
|||
|
|
plan, nextIndex, ok := tryParseTencentPlan(lines, i, currentSeries)
|
|||
|
|
if ok {
|
|||
|
|
catalog.Plans = append(catalog.Plans, plan)
|
|||
|
|
i = nextIndex
|
|||
|
|
}
|
|||
|
|
case "models":
|
|||
|
|
model, nextIndex, ok := tryParseTencentModel(lines, i, currentSeries)
|
|||
|
|
if ok {
|
|||
|
|
catalog.Models = append(catalog.Models, model)
|
|||
|
|
i = nextIndex
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if catalog.UpdatedAt == "" {
|
|||
|
|
return tencentCatalog{}, fmt.Errorf("catalog updated_at not found")
|
|||
|
|
}
|
|||
|
|
if len(catalog.Plans) == 0 {
|
|||
|
|
return tencentCatalog{}, fmt.Errorf("catalog plans not found")
|
|||
|
|
}
|
|||
|
|
if len(catalog.Models) == 0 {
|
|||
|
|
return tencentCatalog{}, fmt.Errorf("catalog models not found")
|
|||
|
|
}
|
|||
|
|
return catalog, nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func normalizeTencentCatalogLines(raw string) []string {
|
|||
|
|
text := html.UnescapeString(raw)
|
|||
|
|
|
|||
|
|
replacements := []string{"<br>", "<br/>", "<br />", "</p>", "</div>", "</li>", "</tr>", "</td>", "</h1>", "</h2>", "</h3>", "</h4>", "</pre>", "</main>"}
|
|||
|
|
for _, replacement := range replacements {
|
|||
|
|
text = strings.ReplaceAll(text, replacement, "\n")
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
tagPattern := regexp.MustCompile(`<[^>]+>`)
|
|||
|
|
text = tagPattern.ReplaceAllString(text, "")
|
|||
|
|
|
|||
|
|
text = strings.ReplaceAll(text, "\r\n", "\n")
|
|||
|
|
text = strings.ReplaceAll(text, "\r", "\n")
|
|||
|
|
|
|||
|
|
rawLines := strings.Split(text, "\n")
|
|||
|
|
lines := make([]string, 0, len(rawLines))
|
|||
|
|
for _, rawLine := range rawLines {
|
|||
|
|
line := strings.TrimSpace(rawLine)
|
|||
|
|
if line == "" {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
lines = append(lines, line)
|
|||
|
|
}
|
|||
|
|
return lines
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func extractUpdatedAt(line string) string {
|
|||
|
|
const prefix = "最近更新时间:"
|
|||
|
|
if strings.HasPrefix(line, prefix) {
|
|||
|
|
return strings.TrimSpace(strings.TrimPrefix(line, prefix))
|
|||
|
|
}
|
|||
|
|
return ""
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func extractSeriesHeading(line string) string {
|
|||
|
|
if !strings.HasPrefix(line, "## ") {
|
|||
|
|
return ""
|
|||
|
|
}
|
|||
|
|
series := strings.TrimSpace(strings.TrimPrefix(line, "## "))
|
|||
|
|
if strings.Contains(series, "Token Plan") || strings.Contains(series, "Coding Plan") {
|
|||
|
|
return strings.TrimSpace(strings.TrimSuffix(series, "套餐"))
|
|||
|
|
}
|
|||
|
|
return ""
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func tryParseTencentPlan(lines []string, start int, series string) (tencentPlan, int, bool) {
|
|||
|
|
if start+4 >= len(lines) {
|
|||
|
|
return tencentPlan{}, start, false
|
|||
|
|
}
|
|||
|
|
if !isTencentPlanName(lines[start]) {
|
|||
|
|
return tencentPlan{}, start, false
|
|||
|
|
}
|
|||
|
|
if !isTencentPlanTier(lines[start+1]) {
|
|||
|
|
return tencentPlan{}, start, false
|
|||
|
|
}
|
|||
|
|
if !strings.Contains(lines[start+2], "订阅月") {
|
|||
|
|
return tencentPlan{}, start, false
|
|||
|
|
}
|
|||
|
|
if !strings.Contains(lines[start+3], "Tokens") {
|
|||
|
|
return tencentPlan{}, start, false
|
|||
|
|
}
|
|||
|
|
if !strings.Contains(lines[start+4], "元/月") {
|
|||
|
|
return tencentPlan{}, start, false
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
plan := tencentPlan{
|
|||
|
|
Series: series,
|
|||
|
|
Tier: strings.Trim(lines[start+1], "()() "),
|
|||
|
|
BillingCycle: lines[start+2],
|
|||
|
|
Quota: lines[start+3],
|
|||
|
|
Price: lines[start+4],
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
nextIndex := start + 4
|
|||
|
|
if start+5 < len(lines) && !strings.HasPrefix(lines[start+5], "### ") && !isTencentPlanName(lines[start+5]) {
|
|||
|
|
plan.Scene = lines[start+5]
|
|||
|
|
nextIndex = start + 5
|
|||
|
|
}
|
|||
|
|
return plan, nextIndex, true
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func tryParseTencentModel(lines []string, start int, series string) (tencentModel, int, bool) {
|
|||
|
|
if start+1 >= len(lines) {
|
|||
|
|
return tencentModel{}, start, false
|
|||
|
|
}
|
|||
|
|
if !isTencentModelID(lines[start+1]) {
|
|||
|
|
return tencentModel{}, start, false
|
|||
|
|
}
|
|||
|
|
if isReservedTencentLine(lines[start]) {
|
|||
|
|
return tencentModel{}, start, false
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
model := tencentModel{
|
|||
|
|
Series: series,
|
|||
|
|
Name: lines[start],
|
|||
|
|
ModelID: lines[start+1],
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
notes := make([]string, 0, 4)
|
|||
|
|
nextIndex := start + 1
|
|||
|
|
for i := start + 2; i < len(lines); i++ {
|
|||
|
|
line := lines[i]
|
|||
|
|
if strings.HasPrefix(line, "## ") || strings.HasPrefix(line, "### ") {
|
|||
|
|
break
|
|||
|
|
}
|
|||
|
|
if isTencentPlanName(line) && i+1 < len(lines) && isTencentPlanTier(lines[i+1]) {
|
|||
|
|
break
|
|||
|
|
}
|
|||
|
|
if i+1 < len(lines) && isTencentModelID(lines[i+1]) && !isReservedTencentLine(line) {
|
|||
|
|
break
|
|||
|
|
}
|
|||
|
|
notes = append(notes, line)
|
|||
|
|
nextIndex = i
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
model.Notes = notes
|
|||
|
|
model.ContextLength = extractContextLength(strings.Join(notes, " "))
|
|||
|
|
return model, nextIndex, true
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func isTencentPlanName(line string) bool {
|
|||
|
|
switch line {
|
|||
|
|
case "体验套餐", "基础套餐", "进阶套餐", "专业套餐":
|
|||
|
|
return true
|
|||
|
|
default:
|
|||
|
|
return false
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func isTencentPlanTier(line string) bool {
|
|||
|
|
return strings.HasPrefix(line, "(") && strings.HasSuffix(line, ")")
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func isReservedTencentLine(line string) bool {
|
|||
|
|
if strings.HasPrefix(line, "#") {
|
|||
|
|
return true
|
|||
|
|
}
|
|||
|
|
switch line {
|
|||
|
|
case "Token Plan 个人版套餐概览", "套餐详情", "可用模型":
|
|||
|
|
return true
|
|||
|
|
default:
|
|||
|
|
return false
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func isTencentModelID(line string) bool {
|
|||
|
|
modelIDPattern := regexp.MustCompile(`^[a-z0-9][a-z0-9._-]*$`)
|
|||
|
|
return modelIDPattern.MatchString(line)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func extractContextLength(text string) int {
|
|||
|
|
contextPattern := regexp.MustCompile(`(?i)(\d+)\s*([KM])\s*上下文`)
|
|||
|
|
matches := contextPattern.FindStringSubmatch(text)
|
|||
|
|
if len(matches) != 3 {
|
|||
|
|
return 0
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
value := 0
|
|||
|
|
fmt.Sscanf(matches[1], "%d", &value)
|
|||
|
|
switch strings.ToUpper(matches[2]) {
|
|||
|
|
case "K":
|
|||
|
|
return value * 1024
|
|||
|
|
case "M":
|
|||
|
|
return value * 1024 * 1024
|
|||
|
|
default:
|
|||
|
|
return 0
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func formatSeriesSummary(plans []tencentPlan) string {
|
|||
|
|
counts := make(map[string]int)
|
|||
|
|
for _, plan := range plans {
|
|||
|
|
counts[plan.Series]++
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
series := make([]string, 0, len(counts))
|
|||
|
|
for name := range counts {
|
|||
|
|
series = append(series, name)
|
|||
|
|
}
|
|||
|
|
sort.Strings(series)
|
|||
|
|
|
|||
|
|
parts := make([]string, 0, len(series))
|
|||
|
|
for _, name := range series {
|
|||
|
|
parts = append(parts, fmt.Sprintf("%s:%d", name, counts[name]))
|
|||
|
|
}
|
|||
|
|
return strings.Join(parts, ",")
|
|||
|
|
}
|