feat(routing): add formal chat route endpoint

This commit is contained in:
phamnazage-jpg
2026-05-29 13:17:56 +08:00
parent 09b43ce2d8
commit ecdeedb103
4 changed files with 325 additions and 3 deletions

View File

@@ -52,6 +52,7 @@ type ActionSet struct {
AppendRouteStickyAudit func(context.Context, AppendRouteStickyAuditRequest) (RouteStickyAuditInfo, error)
ListRouteStickyAudit func(context.Context, ListRouteStickyAuditRequest) ([]RouteStickyAuditInfo, error)
ResolveRoute func(context.Context, ResolveRouteRequest) (ResolveRouteInfo, error)
RouteChatCompletions func(context.Context, RouteChatCompletionsRequest) (RouteChatCompletionsResult, error)
ProxyRouteChatCompletions func(context.Context, ProxyRouteChatCompletionsRequest) (ProxyRouteChatCompletionsResult, error)
SetStickyBinding func(context.Context, SetStickyBindingRequest) (StickyBindingInfo, error)
GetStickyBinding func(context.Context, GetStickyBindingRequest) (StickyBindingInfo, error)
@@ -403,6 +404,9 @@ func NewAPIHandlerWithAuth(adminAuth AdminAuthConfig, actions ActionSet) http.Ha
mux.Handle("POST /api/routing/resolve", requireAdminAccess(adminAuth, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handleResolveRoute(w, r, actions.ResolveRoute)
})))
mux.Handle("POST /api/routing/chat/completions", requireAdminAccess(adminAuth, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handleRouteChatCompletions(w, r, actions.RouteChatCompletions)
})))
mux.Handle("POST /api/routing/proxy/chat/completions", requireAdminAccess(adminAuth, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handleProxyRouteChatCompletions(w, r, actions.ProxyRouteChatCompletions)
})))
@@ -1235,6 +1239,7 @@ func NewActionSetWithStickyRuntime(sqliteDSN string, stickyRuntime stickyStoreRu
routeLogWriter := newLazyRouteLogWriter(sqliteDSN)
resolveRoute := buildResolveRouteAction(sqliteDSN, stickyRuntime, routeLogWriter)
proxyRouteChatCompletions := buildProxyRouteChatCompletionsAction(sqliteDSN, resolveRoute, routeLogWriter)
routeChatCompletions := buildRouteChatCompletionsAction(proxyRouteChatCompletions)
return ActionSet{
CreateBatchImportRun: buildCreateBatchImportRunAction(sqliteDSN),
ListBatchImportRuns: buildListBatchImportRunsAction(sqliteDSN),
@@ -1262,6 +1267,7 @@ func NewActionSetWithStickyRuntime(sqliteDSN string, stickyRuntime stickyStoreRu
AppendRouteStickyAudit: buildAppendRouteStickyAuditAction(routeLogWriter, sqliteDSN),
ListRouteStickyAudit: buildListRouteStickyAuditAction(sqliteDSN),
ResolveRoute: resolveRoute,
RouteChatCompletions: routeChatCompletions,
ProxyRouteChatCompletions: proxyRouteChatCompletions,
SetStickyBinding: buildSetStickyBindingAction(stickyRuntime),
GetStickyBinding: buildGetStickyBindingAction(stickyRuntime),

View File

@@ -40,6 +40,22 @@ type ProxyRouteChatCompletionsRequest struct {
Sync bool `json:"sync,omitempty"`
}
type RouteChatCompletionsRequest struct {
RequestID string `json:"request_id,omitempty"`
LogicalGroupID string `json:"logical_group_id"`
Model string `json:"model"`
Scope string `json:"scope"`
SubjectID string `json:"subject_id"`
UserKey string `json:"user_key,omitempty"`
ConversationKey string `json:"conversation_key,omitempty"`
GatewayAPIKey string `json:"gateway_api_key,omitempty"`
SubscriptionUserID string `json:"subscription_user_id,omitempty"`
Messages []ChatCompletionMessage `json:"messages,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"`
Temperature *float64 `json:"temperature,omitempty"`
Sync bool `json:"sync,omitempty"`
}
type ChatCompletionMessage struct {
Role string `json:"role"`
Content string `json:"content"`
@@ -68,6 +84,33 @@ type RouteChatCompletionsForwardInfo struct {
Response any `json:"response,omitempty"`
}
type RouteChatCompletionsResult struct {
RequestID string `json:"request_id"`
Backend string `json:"backend"`
LogicalGroupID string `json:"logical_group_id"`
Model string `json:"model"`
Scope string `json:"scope"`
SubjectID string `json:"subject_id"`
StickyKey string `json:"sticky_key"`
StickyHit bool `json:"sticky_hit"`
StickyAction string `json:"sticky_action"`
FallbackUsed bool `json:"fallback_used,omitempty"`
SelectedRoute RouteChatCompletionsRouteInfo `json:"selected_route"`
Forward RouteChatCompletionsForwardInfo `json:"forward"`
}
type RouteChatCompletionsRouteInfo struct {
RouteID string `json:"route_id"`
RouteName string `json:"route_name,omitempty"`
ShadowHostID string `json:"shadow_host_id"`
ShadowGroupID string `json:"shadow_group_id"`
ShadowModel string `json:"shadow_model,omitempty"`
Priority int `json:"priority"`
Weight int `json:"weight"`
BoundAt string `json:"bound_at,omitempty"`
ExpiresAt string `json:"expires_at,omitempty"`
}
func handleProxyRouteChatCompletions(w http.ResponseWriter, r *http.Request, fn func(context.Context, ProxyRouteChatCompletionsRequest) (ProxyRouteChatCompletionsResult, error)) {
if fn == nil {
writeHTTPError(w, &httpError{StatusCode: http.StatusInternalServerError, Code: "server_misconfigured", Message: "proxy-route-chat-completions action is not configured"})
@@ -86,6 +129,24 @@ func handleProxyRouteChatCompletions(w http.ResponseWriter, r *http.Request, fn
writeJSON(w, http.StatusOK, result)
}
func handleRouteChatCompletions(w http.ResponseWriter, r *http.Request, fn func(context.Context, RouteChatCompletionsRequest) (RouteChatCompletionsResult, error)) {
if fn == nil {
writeHTTPError(w, &httpError{StatusCode: http.StatusInternalServerError, Code: "server_misconfigured", Message: "route-chat-completions action is not configured"})
return
}
var req RouteChatCompletionsRequest
if err := decodeJSON(r, &req); err != nil {
writeHTTPError(w, err)
return
}
result, err := fn(r.Context(), req)
if err != nil {
writeHTTPError(w, classifyError(err))
return
}
writeJSON(w, http.StatusOK, result)
}
func buildProxyRouteChatCompletionsAction(
sqliteDSN string,
resolveRoute func(context.Context, ResolveRouteRequest) (ResolveRouteInfo, error),
@@ -166,6 +227,59 @@ func buildProxyRouteChatCompletionsAction(
}
}
func buildRouteChatCompletionsAction(
proxyRouteChatCompletions func(context.Context, ProxyRouteChatCompletionsRequest) (ProxyRouteChatCompletionsResult, error),
) func(context.Context, RouteChatCompletionsRequest) (RouteChatCompletionsResult, error) {
return func(ctx context.Context, req RouteChatCompletionsRequest) (RouteChatCompletionsResult, error) {
result, err := proxyRouteChatCompletions(ctx, ProxyRouteChatCompletionsRequest{
RequestID: req.RequestID,
LogicalGroupID: req.LogicalGroupID,
PublicModel: req.Model,
Scope: req.Scope,
SubjectID: req.SubjectID,
UserKey: req.UserKey,
ConversationKey: req.ConversationKey,
GatewayAPIKey: req.GatewayAPIKey,
SubscriptionUserID: req.SubscriptionUserID,
Messages: req.Messages,
MaxTokens: req.MaxTokens,
Temperature: req.Temperature,
Sync: req.Sync,
})
if err != nil {
return RouteChatCompletionsResult{}, err
}
return routeChatCompletionsResultFromProxy(result), nil
}
}
func routeChatCompletionsResultFromProxy(result ProxyRouteChatCompletionsResult) RouteChatCompletionsResult {
return RouteChatCompletionsResult{
RequestID: result.Resolve.RequestID,
Backend: result.Resolve.Backend,
LogicalGroupID: result.Resolve.LogicalGroupID,
Model: result.Resolve.PublicModel,
Scope: result.Resolve.Scope,
SubjectID: result.Resolve.SubjectID,
StickyKey: result.Resolve.StickyKey,
StickyHit: result.Resolve.StickyHit,
StickyAction: result.Resolve.StickyAction,
FallbackUsed: result.Resolve.FallbackUsed,
SelectedRoute: RouteChatCompletionsRouteInfo{
RouteID: result.Resolve.RouteID,
RouteName: result.Resolve.RouteName,
ShadowHostID: result.Resolve.ShadowHostID,
ShadowGroupID: result.Resolve.ShadowGroupID,
ShadowModel: result.Resolve.ShadowModel,
Priority: result.Resolve.Priority,
Weight: result.Resolve.Weight,
BoundAt: result.Resolve.BoundAt,
ExpiresAt: result.Resolve.ExpiresAt,
},
Forward: result.Forward,
}
}
func appendProxyRouteDecisionLog(
ctx context.Context,
writerSource *lazyRouteLogWriter,

View File

@@ -70,6 +70,68 @@ func TestAPIProxyRouteChatCompletionsReturnsResolveAndForward(t *testing.T) {
assertJSONContains(t, response.Body().Bytes(), "forward.upstream_status", float64(200))
}
func TestAPIRouteChatCompletionsReturnsFormalResult(t *testing.T) {
t.Parallel()
handler := NewAPIHandler("secret-token", ActionSet{
RouteChatCompletions: func(_ context.Context, req RouteChatCompletionsRequest) (RouteChatCompletionsResult, error) {
if req.LogicalGroupID != "gpt-shared" {
t.Fatalf("LogicalGroupID = %q, want gpt-shared", req.LogicalGroupID)
}
if req.Model != "gpt-5.4" {
t.Fatalf("Model = %q, want gpt-5.4", req.Model)
}
return RouteChatCompletionsResult{
RequestID: "req-route-1",
Backend: "memory",
LogicalGroupID: req.LogicalGroupID,
Model: req.Model,
Scope: req.Scope,
SubjectID: req.SubjectID,
StickyKey: "lg:gpt-shared:m:gpt-5.4:conv:conv-1",
StickyHit: false,
StickyAction: "bind",
FallbackUsed: true,
SelectedRoute: RouteChatCompletionsRouteInfo{
RouteID: "asxs",
RouteName: "ASXS",
ShadowHostID: "remote43",
ShadowGroupID: "9",
ShadowModel: "gpt-5.4",
Priority: 10,
Weight: 100,
},
Forward: RouteChatCompletionsForwardInfo{
OK: true,
HostID: "remote43",
HostBaseURL: "https://sub2api.example.com",
ShadowGroupID: "9",
ShadowModel: "gpt-5.4",
UpstreamPath: "/v1/chat/completions",
UpstreamStatus: 200,
LatencyMS: 12,
ContentType: "application/json",
},
}, nil
},
})
request := httptestRequest(t, http.MethodPost, "/api/routing/chat/completions", map[string]any{
"logical_group_id": "gpt-shared",
"model": "gpt-5.4",
"scope": "conversation",
"subject_id": "conv-1",
"gateway_api_key": "gateway-key",
"sync": true,
}, "secret-token")
response := httptestRecorder(handler, request)
assertStatusCode(t, response, http.StatusOK)
assertJSONContains(t, response.Body().Bytes(), "selected_route.route_id", "asxs")
assertJSONContains(t, response.Body().Bytes(), "selected_route.shadow_model", "gpt-5.4")
assertJSONContains(t, response.Body().Bytes(), "fallback_used", true)
assertJSONContains(t, response.Body().Bytes(), "forward.upstream_status", float64(200))
}
func TestNewActionSetProxyRouteChatCompletionsFlow(t *testing.T) {
t.Parallel()
@@ -225,6 +287,144 @@ func TestNewActionSetProxyRouteChatCompletionsFlow(t *testing.T) {
}
}
func TestNewActionSetRouteChatCompletionsFlow(t *testing.T) {
t.Parallel()
var (
gotAuthHeader string
gotModel string
gotPrompt string
)
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/v1/chat/completions" {
t.Fatalf("URL.Path = %q, want /v1/chat/completions", r.URL.Path)
}
gotAuthHeader = r.Header.Get("Authorization")
var payload struct {
Model string `json:"model"`
Messages []struct {
Role string `json:"role"`
Content string `json:"content"`
} `json:"messages"`
}
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
t.Fatalf("json.Decode() error = %v", err)
}
gotModel = payload.Model
if len(payload.Messages) > 0 {
gotPrompt = payload.Messages[0].Content
}
writeJSON(w, http.StatusOK, map[string]any{
"id": "chatcmpl_route",
"object": "chat.completion",
"choices": []map[string]any{
{
"index": 0,
"message": map[string]any{
"role": "assistant",
"content": "pong-route",
},
},
},
})
}))
defer server.Close()
dsn := "file:" + filepath.ToSlash(filepath.Join(t.TempDir(), "route-formal.db")) + "?_busy_timeout=5000"
actions := NewActionSet(dsn)
ctx := context.Background()
store, err := sqlite.Open(ctx, dsn)
if err != nil {
t.Fatalf("sqlite.Open() error = %v", err)
}
defer store.Close()
if _, err := store.Hosts().Create(ctx, sqlite.Host{
HostID: "remote43",
BaseURL: server.URL,
HostVersion: "0.1.126",
AuthType: "apikey",
AuthToken: "host-admin-token",
}); err != nil {
t.Fatalf("Hosts().Create() error = %v", err)
}
if _, err := actions.CreateLogicalGroup(ctx, CreateLogicalGroupRequest{
LogicalGroupID: "gpt-shared",
DisplayName: "GPT Shared",
Status: "active",
RoutePolicy: "priority",
StickyMode: "conversation_preferred",
ConversationTTLSeconds: 1200,
UserModelTTLSeconds: 600,
FailoverThreshold: 2,
CooldownSeconds: 300,
}); err != nil {
t.Fatalf("CreateLogicalGroup() error = %v", err)
}
if _, err := actions.CreateLogicalGroupModel(ctx, CreateLogicalGroupModelRequest{
LogicalGroupID: "gpt-shared",
PublicModel: "gpt-5.4",
Status: "active",
}); err != nil {
t.Fatalf("CreateLogicalGroupModel() error = %v", err)
}
if _, err := actions.CreateLogicalGroupRoute(ctx, CreateLogicalGroupRouteRequest{
LogicalGroupID: "gpt-shared",
RouteID: "asxs",
Name: "ASXS",
Status: "active",
Priority: 10,
ShadowGroupID: "gpt-shared__asxs",
ShadowHostID: "remote43",
UpstreamBaseURLHint: "https://api.asxs.top/v1",
}); err != nil {
t.Fatalf("CreateLogicalGroupRoute() error = %v", err)
}
if _, err := actions.CreateLogicalGroupRouteModel(ctx, CreateLogicalGroupRouteModelRequest{
LogicalGroupID: "gpt-shared",
RouteID: "asxs",
PublicModel: "gpt-5.4",
ShadowModel: "gpt-5.4",
Status: "active",
}); err != nil {
t.Fatalf("CreateLogicalGroupRouteModel() error = %v", err)
}
result, err := actions.RouteChatCompletions(ctx, RouteChatCompletionsRequest{
RequestID: "req-route-1",
LogicalGroupID: "gpt-shared",
Model: "gpt-5.4",
Scope: "conversation",
SubjectID: "conv-1",
GatewayAPIKey: "gateway-key",
Sync: true,
})
if err != nil {
t.Fatalf("RouteChatCompletions() error = %v", err)
}
if gotAuthHeader != "Bearer gateway-key" {
t.Fatalf("Authorization header = %q, want Bearer gateway-key", gotAuthHeader)
}
if gotModel != "gpt-5.4" {
t.Fatalf("forwarded model = %q, want gpt-5.4", gotModel)
}
if gotPrompt != "ping" {
t.Fatalf("forwarded prompt = %q, want ping", gotPrompt)
}
if result.SelectedRoute.RouteID != "asxs" || result.SelectedRoute.ShadowModel != "gpt-5.4" {
t.Fatalf("SelectedRoute = %+v, want asxs route with shadow model gpt-5.4", result.SelectedRoute)
}
if !result.Forward.OK || result.Forward.UpstreamStatus != http.StatusOK {
t.Fatalf("Forward = %+v, want successful 200 forward", result.Forward)
}
if result.Model != "gpt-5.4" || result.RequestID != "req-route-1" {
t.Fatalf("RouteChatCompletions() = %+v, want model gpt-5.4 and request id req-route-1", result)
}
}
func TestNewActionSetProxyRouteChatCompletionsManagedSubscriptionFlow(t *testing.T) {
t.Parallel()

View File

@@ -32,6 +32,7 @@ type ResolveRouteInfo struct {
StickyKey string `json:"sticky_key"`
StickyHit bool `json:"sticky_hit"`
StickyAction string `json:"sticky_action"`
FallbackUsed bool `json:"fallback_used,omitempty"`
RouteID string `json:"route_id"`
RouteName string `json:"route_name,omitempty"`
ShadowGroupID string `json:"shadow_group_id"`
@@ -116,7 +117,7 @@ func buildResolveRouteAction(sqliteDSN string, stickyRuntime stickyStoreRuntime,
return ResolveRouteInfo{}, err
}
} else {
info := resolveRouteInfoFromBinding(stickyRuntime.backend, stickyKey, req.Scope, req.SubjectID, candidate, binding, requestID, true, "hit")
info := resolveRouteInfoFromBinding(stickyRuntime.backend, stickyKey, req.Scope, req.SubjectID, candidate, binding, requestID, true, "hit", false)
if err := writer.AppendStickyAudit(ctx, routing.RouteStickyAuditEvent{
StickyKey: stickyKey,
StickyKeyType: req.Scope,
@@ -222,7 +223,7 @@ func buildResolveRouteAction(sqliteDSN string, stickyRuntime stickyStoreRuntime,
return ResolveRouteInfo{}, err
}
}
return resolveRouteInfoFromBinding(stickyRuntime.backend, stickyKey, req.Scope, req.SubjectID, candidate, stored, requestID, false, "bind"), nil
return resolveRouteInfoFromBinding(stickyRuntime.backend, stickyKey, req.Scope, req.SubjectID, candidate, stored, requestID, false, "bind", selection.fallbackUsed), nil
}
}
@@ -399,7 +400,7 @@ func resolveStickyTTL(group sqlite.LogicalGroup, scope string) (time.Duration, e
}
}
func resolveRouteInfoFromBinding(backend string, stickyKey string, scope string, subjectID string, candidate resolvedRouteCandidate, binding routing.StickyBinding, requestID string, stickyHit bool, stickyAction string) ResolveRouteInfo {
func resolveRouteInfoFromBinding(backend string, stickyKey string, scope string, subjectID string, candidate resolvedRouteCandidate, binding routing.StickyBinding, requestID string, stickyHit bool, stickyAction string, fallbackUsed bool) ResolveRouteInfo {
return ResolveRouteInfo{
RequestID: requestID,
Backend: backend,
@@ -410,6 +411,7 @@ func resolveRouteInfoFromBinding(backend string, stickyKey string, scope string,
StickyKey: stickyKey,
StickyHit: stickyHit,
StickyAction: stickyAction,
FallbackUsed: fallbackUsed,
RouteID: candidate.route.RouteID,
RouteName: candidate.route.Name,
ShadowGroupID: candidate.route.ShadowGroupID,