Files
sub2api-cn-relay-manager/internal/reconcile/service.go

639 lines
20 KiB
Go

package reconcile
import (
"context"
"encoding/json"
"fmt"
"regexp"
"strings"
"sub2api-cn-relay-manager/internal/access"
"sub2api-cn-relay-manager/internal/host/sub2api"
"sub2api-cn-relay-manager/internal/pack"
"sub2api-cn-relay-manager/internal/store/sqlite"
)
const (
accessModeSubscription = "subscription"
accessModeSelfService = "self_service"
accessStatusSubscriptionReady = "subscription_ready"
accessStatusSelfServiceReady = "self_service_ready"
accessStatusBroken = "broken"
batchStatusSucceeded = "succeeded"
batchStatusPartial = "partially_succeeded"
accountStatusPassed = "passed"
accountStatusWarning = "warning"
accountStatusFailed = "failed"
)
var nonSlugPattern = regexp.MustCompile(`[^a-z0-9]+`)
type Request struct {
HostID string
HostBaseURL string
AccessProbeAPIKey string
Pack pack.LoadedPack
Provider pack.ProviderManifest
}
type Result struct {
BatchID int64
Status string
MissingCount int
ExtraCount int
StaleNoiseCount int
ProbeFailureCount int
AccessStatus string
Summary map[string]any
}
type accountProbeRerunSummary struct {
Failures int
AccountIDs []string
ResponsesCapabilitySuspect bool
}
type Service struct {
store *sqlite.DB
host sub2api.HostAdapter
}
func NewService(store *sqlite.DB, host sub2api.HostAdapter) *Service {
return &Service{store: store, host: host}
}
func (s *Service) Reconcile(ctx context.Context, req Request) (Result, error) {
if s == nil || s.store == nil {
return Result{}, fmt.Errorf("store is required")
}
if s.host == nil {
return Result{}, fmt.Errorf("host adapter is required")
}
if strings.TrimSpace(req.HostID) == "" {
return Result{}, fmt.Errorf("host_id is required")
}
if strings.TrimSpace(req.HostBaseURL) == "" {
return Result{}, fmt.Errorf("host_base_url is required")
}
hostVersion, err := s.host.GetHostVersion(ctx)
if err != nil {
return Result{}, fmt.Errorf("get host version: %w", err)
}
if err := pack.CheckHostCompatibility(req.Pack.Manifest, hostVersion); err != nil {
return Result{}, err
}
packRow, err := s.store.Packs().GetByPackID(ctx, req.Pack.Manifest.PackID)
if err != nil {
return Result{}, err
}
providerRow, err := s.store.Providers().GetByPackIDAndProviderID(ctx, packRow.ID, req.Provider.ProviderID)
if err != nil {
return Result{}, err
}
hostRow, err := s.store.Hosts().GetByHostID(ctx, req.HostID)
if err != nil {
return Result{}, err
}
batchRow, err := s.store.ImportBatches().GetLatestByProviderIDAndHostID(ctx, providerRow.ID, hostRow.ID)
if err != nil {
return Result{}, err
}
switch strings.TrimSpace(batchRow.BatchStatus) {
case batchStatusSucceeded, batchStatusPartial:
default:
return Result{}, fmt.Errorf("latest import batch is %s; run import again before reconcile", batchRow.BatchStatus)
}
storedResources, err := s.storedResourcesForReconcile(ctx, providerRow.ID, hostRow.ID, batchRow.ID)
if err != nil {
return Result{}, err
}
currentBatchResources, err := s.store.ManagedResources().GetByBatchID(ctx, batchRow.ID)
if err != nil {
return Result{}, err
}
batchItems, err := s.store.ImportBatchItems().GetByBatchID(ctx, batchRow.ID)
if err != nil {
return Result{}, err
}
accessClosures, err := s.store.AccessClosures().GetByBatchID(ctx, batchRow.ID)
if err != nil {
return Result{}, err
}
snapshot, err := s.host.ListManagedResources(ctx, buildManagedResourceListRequest(req.Provider, accessClosureType(accessClosures)))
if err != nil {
return Result{}, fmt.Errorf("list managed resources: %w", err)
}
missing, extra := diffManagedResources(storedResources, snapshot)
rawExtra := extra
staleNoiseAccounts := classifyHistoricalAccountNoise(currentBatchResources, snapshot.Accounts, suggestAccountNamePrefix(req.Provider))
if len(staleNoiseAccounts) > 0 {
extra -= len(staleNoiseAccounts)
if extra < 0 {
extra = 0
}
}
probeSummary, err := s.rerunAccountProbes(ctx, batchItems, req.Provider.SmokeTestModel)
if err != nil {
return Result{}, err
}
accessStatus, accessChecked, err := s.rerunAccessClosure(ctx, batchRow.ID, accessClosures, req.AccessProbeAPIKey, req.Provider.SmokeTestModel, probeSummary.AccountIDs, probeSummary.ResponsesCapabilitySuspect)
if err != nil {
return Result{}, err
}
status := "active"
if missing > 0 || extra > 0 {
status = "drifted"
} else if probeSummary.Failures > 0 || (accessChecked && accessStatus == accessStatusBroken) {
status = "degraded"
}
summary := map[string]any{
"missing_count": missing,
"extra_count": extra,
"raw_extra_count": rawExtra,
"stale_noise_count": len(staleNoiseAccounts),
"stale_noise_accounts": staleNoiseAccounts,
"host_version": hostVersion,
"probe_failures": probeSummary.Failures,
"access_status": accessStatus,
"access_rechecked": accessChecked,
}
summaryJSON, err := json.Marshal(summary)
if err != nil {
return Result{}, fmt.Errorf("marshal reconcile summary: %w", err)
}
if _, err := s.store.ReconcileRuns().Create(ctx, sqlite.ReconcileRun{BatchID: batchRow.ID, HostID: hostRow.ID, ProviderID: providerRow.ID, Status: status, SummaryJSON: string(summaryJSON)}); err != nil {
return Result{}, err
}
return Result{
BatchID: batchRow.ID,
Status: status,
MissingCount: missing,
ExtraCount: extra,
StaleNoiseCount: len(staleNoiseAccounts),
ProbeFailureCount: probeSummary.Failures,
AccessStatus: accessStatus,
Summary: summary,
}, nil
}
func (s *Service) rerunAccountProbes(ctx context.Context, items []sqlite.ImportBatchItem, expectedModel string) (accountProbeRerunSummary, error) {
if len(items) == 0 {
return accountProbeRerunSummary{}, nil
}
summary := accountProbeRerunSummary{}
for _, item := range items {
accountID, err := accountIDFromProbeSummary(item.ProbeSummaryJSON)
if err != nil {
return accountProbeRerunSummary{}, fmt.Errorf("decode import batch item %d probe summary: %w", item.ID, err)
}
if strings.TrimSpace(accountID) == "" {
return accountProbeRerunSummary{}, fmt.Errorf("import batch item %d missing account_id in probe summary", item.ID)
}
summary.AccountIDs = append(summary.AccountIDs, accountID)
probe, err := s.host.TestAccount(ctx, accountID, expectedModel)
if err != nil {
return accountProbeRerunSummary{}, fmt.Errorf("re-test account %s: %w", accountID, err)
}
models, err := s.host.GetAccountModels(ctx, accountID)
if err != nil {
return accountProbeRerunSummary{}, fmt.Errorf("reload account models %s: %w", accountID, err)
}
smokeModelSeen := hasModel(models, expectedModel)
status := accountValidationStatus(probe, smokeModelSeen)
if smokeModelSeen && access.SuspectsOpenAIResponsesCapabilityMismatch(probe) {
summary.ResponsesCapabilitySuspect = true
}
payload, err := json.Marshal(map[string]any{
"account_id": accountID,
"probe_ok": probe.OK,
"probe_status": probe.Status,
"probe_message": probe.Message,
"models": models,
"smoke_model_seen": smokeModelSeen,
"probe_advisory": status == accountStatusWarning,
"validation_status": status,
"reconcile_rerun": true,
})
if err != nil {
return accountProbeRerunSummary{}, fmt.Errorf("marshal probe rerun summary for %s: %w", accountID, err)
}
if err := s.store.ImportBatchItems().UpdateResult(ctx, item.ID, status, string(payload)); err != nil {
return accountProbeRerunSummary{}, err
}
if _, err := s.store.ProbeResults().Create(ctx, sqlite.ProbeResult{BatchItemID: item.ID, ProbeType: "account_smoke_rerun", Status: status, SummaryJSON: string(payload)}); err != nil {
return accountProbeRerunSummary{}, err
}
if status == accountStatusFailed {
summary.Failures++
}
}
summary.AccountIDs = normalizedUniqueAccountIDs(summary.AccountIDs)
return summary, nil
}
func (s *Service) rerunAccessClosure(ctx context.Context, batchID int64, accessClosures []sqlite.AccessClosureRecord, probeAPIKey, expectedModel string, accountIDs []string, suspectResponsesCapabilityMismatch bool) (string, bool, error) {
if len(accessClosures) == 0 {
return "not_configured", false, nil
}
latest := accessClosures[len(accessClosures)-1]
status := firstNonEmpty(latest.Status, deriveHealthyAccessStatus(latest.ClosureType))
if strings.TrimSpace(probeAPIKey) == "" {
return status, false, nil
}
result, err := s.host.CheckGatewayAccess(ctx, sub2api.GatewayAccessCheckRequest{APIKey: probeAPIKey, ExpectedModel: expectedModel})
if err != nil {
return "", false, fmt.Errorf("re-check gateway access: %w", err)
}
if result.OK && result.HasExpectedModel {
completionReq := sub2api.GatewayCompletionCheckRequest{
APIKey: probeAPIKey,
Model: expectedModel,
Prompt: "ping",
MaxTokens: 8,
}
completion, err := s.host.CheckGatewayCompletion(ctx, completionReq)
if err != nil {
return "", false, fmt.Errorf("re-check gateway completion: %w", err)
}
if access.ShouldAttemptOpenAIResponsesCapabilityRepair(suspectResponsesCapabilityMismatch, completion) {
if err := access.RepairOpenAIResponsesCapability(ctx, s.host, accountIDs); err == nil {
completion, err = s.host.CheckGatewayCompletion(ctx, completionReq)
if err != nil {
return "", false, fmt.Errorf("re-check gateway completion after capability repair: %w", err)
}
}
}
result.CompletionOK = completion.OK
result.CompletionStatus = completion.StatusCode
result.CompletionType = completion.ContentType
result.CompletionBody = completion.BodyPreview
}
if gatewayAccessReady(result) {
status = deriveHealthyAccessStatus(latest.ClosureType)
} else {
status = accessStatusBroken
}
payload, err := json.Marshal(map[string]any{
"status_code": result.StatusCode,
"ok": result.OK,
"has_expected_model": result.HasExpectedModel,
"models": result.Models,
"completion_ok": result.CompletionOK,
"completion_status": result.CompletionStatus,
"completion_type": result.CompletionType,
"completion_preview": result.CompletionBody,
"reconcile_rerun": true,
})
if err != nil {
return "", false, fmt.Errorf("marshal access rerun summary: %w", err)
}
if _, err := s.store.AccessClosures().Create(ctx, sqlite.AccessClosureRecord{BatchID: batchID, ClosureType: latest.ClosureType, Status: status, DetailsJSON: string(payload)}); err != nil {
return "", false, err
}
return status, true, nil
}
func normalizedUniqueAccountIDs(accountIDs []string) []string {
seen := map[string]struct{}{}
values := make([]string, 0, len(accountIDs))
for _, rawID := range accountIDs {
accountID := strings.TrimSpace(rawID)
if accountID == "" {
continue
}
if _, ok := seen[accountID]; ok {
continue
}
seen[accountID] = struct{}{}
values = append(values, accountID)
}
return values
}
func deriveHealthyAccessStatus(closureType string) string {
switch strings.TrimSpace(closureType) {
case accessModeSubscription:
return accessStatusSubscriptionReady
case accessModeSelfService:
return accessStatusSelfServiceReady
default:
return "unknown"
}
}
func DeriveHealthyAccessStatus(closureType string) string {
return deriveHealthyAccessStatus(closureType)
}
func accessClosureType(accessClosures []sqlite.AccessClosureRecord) string {
if len(accessClosures) == 0 {
return ""
}
return strings.TrimSpace(accessClosures[len(accessClosures)-1].ClosureType)
}
func buildManagedResourceListRequest(provider pack.ProviderManifest, accessMode string) sub2api.ListManagedResourcesRequest {
names := suggestResourceNamesForMode(provider, accessMode)
req := sub2api.ListManagedResourcesRequest{
GroupName: names.Group,
ChannelName: names.Channel,
AccountNamePrefix: suggestAccountNamePrefix(provider),
}
if strings.TrimSpace(accessMode) == accessModeSubscription {
req.PlanName = names.Plan
}
return req
}
func accountIDFromProbeSummary(summaryJSON string) (string, error) {
if strings.TrimSpace(summaryJSON) == "" {
return "", nil
}
var payload map[string]any
if err := json.Unmarshal([]byte(summaryJSON), &payload); err != nil {
return "", err
}
accountID, _ := payload["account_id"].(string)
return strings.TrimSpace(accountID), nil
}
func AccountIDFromProbeSummary(summaryJSON string) (string, error) {
return accountIDFromProbeSummary(summaryJSON)
}
func (s *Service) storedResourcesForReconcile(ctx context.Context, providerID, hostID, batchID int64) ([]sqlite.ManagedResource, error) {
storedResources, err := s.store.ManagedResources().GetByBatchID(ctx, batchID)
if err != nil {
return nil, err
}
sharedResources, err := s.store.ManagedResources().ListByProviderIDAndHostID(ctx, providerID, hostID)
if err != nil {
return nil, err
}
merged := make([]sqlite.ManagedResource, 0, len(storedResources)+len(sharedResources))
seen := make(map[string]struct{}, len(storedResources)+len(sharedResources))
appendUnique := func(resource sqlite.ManagedResource) {
resourceType := strings.TrimSpace(resource.ResourceType)
resourceID := strings.TrimSpace(resource.HostResourceID)
if resourceType == "" || resourceID == "" {
return
}
key := resourceType + ":" + resourceID
if _, ok := seen[key]; ok {
return
}
seen[key] = struct{}{}
merged = append(merged, resource)
}
for _, resource := range storedResources {
appendUnique(resource)
}
for _, resource := range sharedResources {
switch strings.TrimSpace(resource.ResourceType) {
case "group", "channel", "plan":
appendUnique(resource)
}
}
return merged, nil
}
func diffManagedResources(stored []sqlite.ManagedResource, snapshot sub2api.ManagedResourceSnapshot) (int, int) {
live := map[string]map[string]struct{}{
"group": make(map[string]struct{}),
"channel": make(map[string]struct{}),
"plan": make(map[string]struct{}),
"account": make(map[string]struct{}),
}
for _, resource := range snapshot.Groups {
live["group"][strings.TrimSpace(resource.ID)] = struct{}{}
}
for _, resource := range snapshot.Channels {
live["channel"][strings.TrimSpace(resource.ID)] = struct{}{}
}
for _, resource := range snapshot.Plans {
live["plan"][strings.TrimSpace(resource.ID)] = struct{}{}
}
for _, resource := range snapshot.Accounts {
live["account"][strings.TrimSpace(resource.ID)] = struct{}{}
}
storedByType := map[string]map[string]struct{}{
"group": make(map[string]struct{}),
"channel": make(map[string]struct{}),
"plan": make(map[string]struct{}),
"account": make(map[string]struct{}),
}
for _, resource := range stored {
storedByType[strings.TrimSpace(resource.ResourceType)][strings.TrimSpace(resource.HostResourceID)] = struct{}{}
}
missing := 0
extra := 0
for resourceType, storedIDs := range storedByType {
for id := range storedIDs {
if _, ok := live[resourceType][id]; !ok {
missing++
}
}
for id := range live[resourceType] {
if _, ok := storedIDs[id]; !ok {
extra++
}
}
}
return missing, extra
}
func DiffManagedResources(stored []sqlite.ManagedResource, snapshot sub2api.ManagedResourceSnapshot) (int, int) {
return diffManagedResources(stored, snapshot)
}
func classifyHistoricalAccountNoise(currentBatchResources []sqlite.ManagedResource, snapshotAccounts []sub2api.NamedResource, accountNamePrefix string) []sub2api.NamedResource {
currentAccountIDs := make(map[string]struct{})
for _, resource := range currentBatchResources {
if strings.TrimSpace(resource.ResourceType) != "account" {
continue
}
if id := strings.TrimSpace(resource.HostResourceID); id != "" {
currentAccountIDs[id] = struct{}{}
}
}
prefix := strings.TrimSpace(accountNamePrefix)
staleNoise := make([]sub2api.NamedResource, 0)
for _, account := range snapshotAccounts {
id := strings.TrimSpace(account.ID)
if id == "" {
continue
}
if _, ok := currentAccountIDs[id]; ok {
continue
}
if prefix != "" && !strings.HasPrefix(strings.TrimSpace(account.Name), prefix) {
continue
}
staleNoise = append(staleNoise, sub2api.NamedResource{ID: id, Name: strings.TrimSpace(account.Name)})
}
return staleNoise
}
func firstNonEmpty(values ...string) string {
for _, value := range values {
if trimmed := strings.TrimSpace(value); trimmed != "" {
return trimmed
}
}
return ""
}
func hasModel(models []sub2api.AccountModel, target string) bool {
for _, model := range models {
if strings.TrimSpace(model.ID) == strings.TrimSpace(target) {
return true
}
}
return false
}
func gatewayAccessReady(result sub2api.GatewayAccessResult) bool {
return result.OK && result.HasExpectedModel && result.CompletionOK
}
func accountValidationStatus(probe sub2api.ProbeResult, smokeModelSeen bool) string {
if !smokeModelSeen {
return accountStatusFailed
}
if probe.OK {
return accountStatusPassed
}
if isAdvisoryAccountProbeFailure(probe) {
return accountStatusWarning
}
return accountStatusFailed
}
func isAdvisoryAccountProbeFailure(probe sub2api.ProbeResult) bool {
if probe.OK {
return false
}
message := strings.ToLower(strings.TrimSpace(probe.Message))
if message == "" {
return false
}
if isTransientAccountProbeFailure(message) {
return true
}
if strings.Contains(message, "api returned 403: forbidden") {
return true
}
if !strings.Contains(message, "responses api") {
return false
}
return strings.Contains(message, "当前测试接口仅支持") ||
strings.Contains(message, "账号本身可正常使用") ||
strings.Contains(message, "please directly") ||
strings.Contains(message, "actual api")
}
func isTransientAccountProbeFailure(message string) bool {
if !(strings.Contains(message, "429") ||
strings.Contains(message, "rate limit") ||
strings.Contains(message, "too many requests") ||
strings.Contains(message, "502") ||
strings.Contains(message, "503") ||
strings.Contains(message, "504") ||
strings.Contains(message, "bad gateway") ||
strings.Contains(message, "service unavailable") ||
strings.Contains(message, "timeout")) {
return false
}
return strings.Contains(message, "api returned") ||
strings.Contains(message, "rate_limit") ||
strings.Contains(message, "upstream") ||
strings.Contains(message, "temporar") ||
strings.Contains(message, "retry")
}
type resourceNames struct {
Group string
Channel string
Plan string
}
func suggestAccountNamePrefix(provider pack.ProviderManifest) string {
return fmt.Sprintf("%s-", resourceSlug(provider.ProviderID))
}
func suggestResourceNames(provider pack.ProviderManifest) resourceNames {
slug := resourceSlug(provider.ProviderID)
return resourceNames{
Group: fallbackString(strings.TrimSpace(provider.GroupTemplate.Name), fmt.Sprintf("crm-%s-group", slug)),
Channel: fallbackString(strings.TrimSpace(provider.ChannelTemplate.Name), fmt.Sprintf("crm-%s-channel", slug)),
Plan: fallbackString(strings.TrimSpace(provider.PlanTemplate.Name), fmt.Sprintf("crm-%s-plan", slug)),
}
}
func suggestResourceNamesForMode(provider pack.ProviderManifest, accessMode string) resourceNames {
base := suggestResourceNames(provider)
suffix := accessModeResourceSuffix(accessMode)
if suffix == "" {
return base
}
return resourceNames{
Group: appendResourceNameSuffix(base.Group, suffix),
Channel: appendResourceNameSuffix(base.Channel, suffix),
Plan: appendResourceNameSuffix(base.Plan, suffix),
}
}
func accessModeResourceSuffix(accessMode string) string {
switch strings.TrimSpace(accessMode) {
case accessModeSubscription:
return "subscription"
case accessModeSelfService:
return "self-service"
default:
return ""
}
}
func appendResourceNameSuffix(name, suffix string) string {
name = strings.TrimSpace(name)
suffix = strings.TrimSpace(suffix)
if name == "" || suffix == "" {
return name
}
if strings.HasSuffix(name, "-"+suffix) {
return name
}
return name + "-" + suffix
}
func resourceSlug(raw string) string {
slug := strings.ToLower(strings.TrimSpace(raw))
slug = nonSlugPattern.ReplaceAllString(slug, "-")
slug = strings.Trim(slug, "-")
if slug == "" {
return "provider"
}
return slug
}
func fallbackString(values ...string) string {
for _, value := range values {
if trimmed := strings.TrimSpace(value); trimmed != "" {
return trimmed
}
}
return ""
}