Files
sub2api-cn-relay-manager/internal/probe/completion.go

124 lines
3.2 KiB
Go

package probe
import (
"bytes"
"context"
"encoding/json"
"fmt"
"net/http"
"strings"
"time"
)
type CompletionResult struct {
Model string
HTTPStatus int
LatencyMs int64
Classification string
Error string
}
func ResolveSmokeModel(requested []string, rawModels []string, profile *CapabilityProfile) (string, []string, error) {
recommended := RecommendModels(requested, rawModels)
for _, candidate := range recommended {
if profileAllowsSmoke(profile, candidate) {
return candidate, recommended, nil
}
}
for _, rawModel := range rawModels {
if strings.TrimSpace(rawModel) == "" {
continue
}
if profileAllowsSmoke(profile, rawModel) {
return rawModel, recommended, nil
}
}
if len(rawModels) > 0 && strings.TrimSpace(rawModels[0]) != "" {
return rawModels[0], recommended, nil
}
return "", recommended, fmt.Errorf("no smoke model available")
}
func SmokeCompletion(ctx context.Context, baseURL, apiKey, model string, profile *CapabilityProfile) (*CompletionResult, error) {
model = strings.TrimSpace(model)
if model == "" {
return nil, fmt.Errorf("model is required")
}
path := "/v1/chat/completions"
classification := "chat_completions"
payload := map[string]any{
"model": model,
"messages": []map[string]string{
{"role": "user", "content": "ping"},
},
"max_tokens": 8,
"temperature": 0,
}
if profile != nil && profile.TransportProfile.SupportsOpenAIResponses {
path = "/v1/responses"
classification = "responses"
payload = map[string]any{
"model": model,
"input": "ping",
}
}
requestURL, err := joinGatewayPath(baseURL, path)
if err != nil {
return nil, fmt.Errorf("resolve smoke endpoint: %w", err)
}
var body bytes.Buffer
if err := json.NewEncoder(&body).Encode(payload); err != nil {
return nil, fmt.Errorf("encode smoke payload: %w", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, requestURL, &body)
if err != nil {
return nil, fmt.Errorf("build smoke request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
if token := strings.TrimSpace(apiKey); token != "" {
req.Header.Set("Authorization", "Bearer "+token)
}
startedAt := time.Now()
resp, err := (&http.Client{Timeout: 15 * time.Second}).Do(req)
if err != nil {
return nil, fmt.Errorf("request smoke completion: %w", err)
}
defer resp.Body.Close()
result := &CompletionResult{
Model: model,
HTTPStatus: resp.StatusCode,
LatencyMs: time.Since(startedAt).Milliseconds(),
Classification: classification,
}
if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices {
result.Error = fmt.Sprintf("unexpected_status_%d", resp.StatusCode)
}
return result, nil
}
func profileAllowsSmoke(profile *CapabilityProfile, rawModel string) bool {
if profile == nil || len(profile.ModelProfiles) == 0 {
return true
}
targetRaw := strings.TrimSpace(rawModel)
targetCanonical := CanonicalModelFamily(rawModel)
for _, modelProfile := range profile.ModelProfiles {
if strings.TrimSpace(modelProfile.RawModelID) == targetRaw || modelProfile.CanonicalModelFamily == targetCanonical {
return modelProfile.SmokeChatOK
}
}
return false
}