Files
sub2api-cn-relay-manager/internal/app/key_self_service_svc.go
phamnazage-jpg 4e2ee087fd feat(vNext.4): implement trusted-subject security chain for portal user key self-service
- Add portal_auth.go: Portal user session auth with HMAC-signed cookies
- Add /api/portal/session/{login,logout,state} endpoints
- Update nginx config template: cookie-to-header trusted proxy pattern
- Update frontend: sync CRM session on login/logout
- Add TRUSTED_SUBJECT_DEPLOY_GUIDE.md with remote43 deployment steps
- Update EXECUTION_BOARD.md: mark trusted-subject blocking issue as resolved

This implements the secure chain:
  Browser → Portal → nginx (cookie→header) → CRM (verify proxy secret)

Required remote43 actions:
1. Generate 64-char hex secret
2. Update .env.crm with TRUSTED_* config
3. Update nginx with cookie map and header injection
4. Restart services

Fixes EXECUTION_BOARD.md 2026-06-08 blocking issue
2026-06-09 07:48:03 +08:00

476 lines
20 KiB
Go

package app
import (
"context"
"crypto/rand"
"crypto/sha256"
"database/sql"
"encoding/hex"
"fmt"
"math/big"
"strconv"
"strings"
"time"
"sub2api-cn-relay-manager/internal/config"
"sub2api-cn-relay-manager/internal/host/sub2api"
"sub2api-cn-relay-manager/internal/metrics"
"sub2api-cn-relay-manager/internal/store/sqlite"
)
const (
keyIDAlphabet = "abcdefghijklmnopqrstuvwxyz0123456789"
defaultKeyRateLimitPerHour = 5
defaultKeyResetPerDay = 2
)
func generateKeyID() string {
n := big.NewInt(int64(len(keyIDAlphabet)))
b := make([]byte, 12)
for i := range b {
idx, _ := rand.Int(rand.Reader, n)
b[i] = keyIDAlphabet[idx.Int64()]
}
return "key_" + string(b)
}
// resolveLogicalGroupHost resolves a logical_group_id to host + shadow group host resource ID.
func resolveLogicalGroupHost(ctx context.Context, store *sqlite.DB, logicalGroupID string) (sqlite.LogicalGroup, sqlite.LogicalGroupRoute, sqlite.Host, *sub2api.Client, error) {
group, err := store.LogicalGroups().GetByLogicalGroupID(ctx, logicalGroupID)
if err != nil {
return sqlite.LogicalGroup{}, sqlite.LogicalGroupRoute{}, sqlite.Host{}, nil, fmt.Errorf("logical group %q: %w", logicalGroupID, err)
}
routes, err := store.LogicalGroupRoutes().ListByLogicalGroupID(ctx, logicalGroupID)
if err != nil {
return sqlite.LogicalGroup{}, sqlite.LogicalGroupRoute{}, sqlite.Host{}, nil, fmt.Errorf("list routes for %q: %w", logicalGroupID, err)
}
if len(routes) == 0 {
return sqlite.LogicalGroup{}, sqlite.LogicalGroupRoute{}, sqlite.Host{}, nil, fmt.Errorf("no active route for logical group %q", logicalGroupID)
}
// pick first active route by priority
var firstRoute *sqlite.LogicalGroupRoute
for i, r := range routes {
if isActiveStatus(r.Status) {
firstRoute = &routes[i]
break
}
}
if firstRoute == nil {
return sqlite.LogicalGroup{}, sqlite.LogicalGroupRoute{}, sqlite.Host{}, nil, fmt.Errorf("no active route for logical group %q", logicalGroupID)
}
hostRow, err := store.Hosts().GetByHostID(ctx, firstRoute.ShadowHostID)
if err != nil {
return sqlite.LogicalGroup{}, sqlite.LogicalGroupRoute{}, sqlite.Host{}, nil, fmt.Errorf("host %q: %w", firstRoute.ShadowHostID, err)
}
client, err := newSub2APIClient(hostRow.BaseURL, authFromStoredHost(hostRow))
if err != nil {
return sqlite.LogicalGroup{}, sqlite.LogicalGroupRoute{}, sqlite.Host{}, nil, fmt.Errorf("host client %q: %w", hostRow.HostID, err)
}
return group, *firstRoute, hostRow, client, nil
}
// resolveShadowHostGroupID resolves a shadow_group_id from a route to a host-resolved group ID.
func resolveShadowHostGroupID(ctx context.Context, client *sub2api.Client, route sqlite.LogicalGroupRoute) (string, error) {
sgID := strings.TrimSpace(route.ShadowGroupID)
// If already a numeric ID, use as-is
if _, err := strconv.ParseInt(sgID, 10, 64); err == nil {
return sgID, nil
}
// Otherwise look up via managed resources
result, err := client.ListManagedResources(ctx, sub2api.ListManagedResourcesRequest{GroupName: sgID})
if err != nil {
return "", fmt.Errorf("list managed groups for %q: %w", sgID, err)
}
if len(result.Groups) == 1 {
return result.Groups[0].ID, nil
}
if len(result.Groups) > 1 {
return "", fmt.Errorf("multiple host groups matched shadow_group_id %q", sgID)
}
return "", fmt.Errorf("shadow group %q not found on host", sgID)
}
func ensureSubjectHasAccess(ctx context.Context, client *sub2api.Client, subjectSelector, hostGroupID string) (apiKey string, err error) {
accessRef, err := client.EnsureSubscriptionAccess(ctx, sub2api.EnsureSubscriptionAccessRequest{
UserSelector: strings.TrimSpace(subjectSelector),
GroupID: hostGroupID,
})
if err != nil {
return "", fmt.Errorf("ensure subscription access: %w", err)
}
apiKey = strings.TrimSpace(accessRef.APIKey)
if apiKey == "" {
return "", fmt.Errorf("managed subscription access returned empty api key")
}
return apiKey, nil
}
func buildManagedIdentitySelector(subjectID, keyID string) string {
return strings.TrimSpace(subjectID) + "|key:" + strings.TrimSpace(keyID) + "|rot:" + generateKeyID()
}
func managedIdentitySelectorForRecord(rec *sqlite.UserKeyRecord) string {
if rec == nil {
return ""
}
if selector := strings.TrimSpace(rec.ManagedIdentitySelector); selector != "" {
return selector
}
return strings.TrimSpace(rec.OwnerSubjectID)
}
func recordUserKeyFailure(operation, result string, err error) error {
metrics.RecordUserKeyOperation(operation, result)
return err
}
func buildUserKeyHandler(sqliteDSN string, authCfg ...config.UserKeyAuthConfig) *UserKeyHandler {
var cfg config.UserKeyAuthConfig
if len(authCfg) > 0 {
cfg = authCfg[0]
}
return &UserKeyHandler{
TrustedSubjectHeader: strings.TrimSpace(cfg.TrustedSubjectHeader),
TrustedProxySecretHeader: strings.TrimSpace(cfg.TrustedProxySecretHeader),
TrustedProxySecret: strings.TrimSpace(cfg.TrustedProxySecret),
createFn: func(ctx context.Context, req CreateUserKeyRequest) (CreateUserKeyResponse, error) {
if strings.TrimSpace(req.SubjectID) == "" {
metrics.RecordUserKeyOperation("create", "unauthorized")
return CreateUserKeyResponse{}, &httpError{StatusCode: 401, Code: "unauthorized", Message: "user credentials required"}
}
if strings.TrimSpace(req.LogicalGroupID) == "" {
metrics.RecordUserKeyOperation("create", "bad_request")
return CreateUserKeyResponse{}, &httpError{StatusCode: 400, Code: "bad_request", Message: "logical_group_id is required"}
}
store, err := sqlite.Open(ctx, sqliteDSN)
if err != nil {
return CreateUserKeyResponse{}, recordUserKeyFailure("create", "open_store_error", fmt.Errorf("open store: %w", err))
}
defer store.Close()
windowStart := time.Now().UTC().Format("2006-01-02T15:00:00Z")
count, err := store.SubjectRateLimits().IncrementWindow(ctx, req.SubjectID, "create", windowStart)
if err != nil {
return CreateUserKeyResponse{}, recordUserKeyFailure("create", "rate_limit_store_error", fmt.Errorf("increment create rate limit: %w", err))
}
if count > defaultKeyRateLimitPerHour {
metrics.RecordUserKeyOperation("create", "rate_limited")
return CreateUserKeyResponse{}, &httpError{StatusCode: 429, Code: "rate_limited", Message: "create key rate limit exceeded"}
}
keyID := generateKeyID()
managedIdentitySelector := buildManagedIdentitySelector(req.SubjectID, keyID)
// Resolve logical group → host → group ID → ensure subscription access
_, route, hostRow, client, err := resolveLogicalGroupHost(ctx, store, req.LogicalGroupID)
if err != nil {
return CreateUserKeyResponse{}, recordUserKeyFailure("create", "resolve_host_error", fmt.Errorf("resolve host for %q: %w", req.LogicalGroupID, err))
}
hostGroupID, err := resolveShadowHostGroupID(ctx, client, route)
if err != nil {
return CreateUserKeyResponse{}, recordUserKeyFailure("create", "resolve_shadow_group_error", fmt.Errorf("resolve shadow group id for %q: %w", route.ShadowGroupID, err))
}
apiKey, err := ensureSubjectHasAccess(ctx, client, managedIdentitySelector, hostGroupID)
if err != nil {
return CreateUserKeyResponse{}, recordUserKeyFailure("create", "ensure_access_error", fmt.Errorf("ensure access for %q: %w", req.LogicalGroupID, err))
}
fingerprint := "sha256:" + sha256Hex(apiKey)
masked := "sk-****" + apiKey[len(apiKey)-4:]
err = store.WithTx(ctx, func(q *sqlite.Queries) error {
if _, err := q.UserKeys.Create(ctx, sqlite.UserKeyRecord{
KeyID: keyID,
OwnerSubjectID: req.SubjectID,
ManagedIdentitySelector: managedIdentitySelector,
KeyFingerprint: fingerprint,
MaskedPreview: masked,
DisplayName: strings.TrimSpace(req.DisplayName),
LogicalGroupID: strings.TrimSpace(req.LogicalGroupID),
AllowedModels: req.AllowedModels,
AdminStatus: "active",
QuotaStatus: "ok",
}); err != nil {
return fmt.Errorf("create key: %w", err)
}
if _, err := q.UserKeyAuditEvents.Create(ctx, sqlite.UserKeyAuditEvent{
EventID: generateKeyID(),
ActorSubjectID: req.SubjectID,
ActorRole: "user",
TargetKeyID: keyID,
Action: "create",
Result: "success",
Reason: "self service create via host " + hostRow.HostID,
}); err != nil {
return fmt.Errorf("audit create key: %w", err)
}
return nil
})
if err != nil {
return CreateUserKeyResponse{}, recordUserKeyFailure("create", "db_tx_error", err)
}
metrics.RecordUserKeyOperation("create", "success")
return CreateUserKeyResponse{
Key: UserKeyMeta{
KeyID: keyID,
MaskedPreview: masked,
DisplayName: strings.TrimSpace(req.DisplayName),
LogicalGroupID: strings.TrimSpace(req.LogicalGroupID),
AllowedModels: req.AllowedModels,
AdminStatus: "active",
QuotaStatus: "ok",
},
PlaintextKey: apiKey,
}, nil
},
listFn: func(ctx context.Context, subjectID string) ([]UserKeyMeta, error) {
store, err := sqlite.Open(ctx, sqliteDSN)
if err != nil {
return nil, fmt.Errorf("open store: %w", err)
}
defer store.Close()
records, err := store.UserKeys().ListByOwner(ctx, subjectID)
if err != nil {
return nil, fmt.Errorf("list keys: %w", err)
}
metas := make([]UserKeyMeta, len(records))
for i, r := range records {
metas[i] = UserKeyMeta{
KeyID: r.KeyID,
MaskedPreview: r.MaskedPreview,
DisplayName: r.DisplayName,
LogicalGroupID: r.LogicalGroupID,
AllowedModels: r.AllowedModels,
AdminStatus: r.AdminStatus,
QuotaStatus: r.QuotaStatus,
LastUsedAt: r.LastUsedAt,
CreatedAt: r.CreatedAt,
ExpiresAt: r.ExpiresAt,
}
}
return metas, nil
},
getFn: func(ctx context.Context, keyID, subjectID string) (UserKeyMeta, error) {
store, err := sqlite.Open(ctx, sqliteDSN)
if err != nil {
return UserKeyMeta{}, fmt.Errorf("open store: %w", err)
}
defer store.Close()
rec, err := store.UserKeys().GetByID(ctx, keyID)
if err != nil {
return UserKeyMeta{}, fmt.Errorf("get key: %w", err)
}
if rec.OwnerSubjectID != subjectID && subjectID != "admin" {
return UserKeyMeta{}, fmt.Errorf("key %q not found", keyID)
}
return UserKeyMeta{
KeyID: rec.KeyID,
MaskedPreview: rec.MaskedPreview,
DisplayName: rec.DisplayName,
LogicalGroupID: rec.LogicalGroupID,
AllowedModels: rec.AllowedModels,
AdminStatus: rec.AdminStatus,
QuotaStatus: rec.QuotaStatus,
LastUsedAt: rec.LastUsedAt,
CreatedAt: rec.CreatedAt,
ExpiresAt: rec.ExpiresAt,
}, nil
},
resetFn: func(ctx context.Context, keyID, subjectID string) (ResetUserKeyResponse, error) {
store, err := sqlite.Open(ctx, sqliteDSN)
if err != nil {
return ResetUserKeyResponse{}, recordUserKeyFailure("reset", "open_store_error", fmt.Errorf("open store: %w", err))
}
defer store.Close()
rec, err := store.UserKeys().GetByID(ctx, keyID)
if err != nil {
return ResetUserKeyResponse{}, recordUserKeyFailure("reset", "get_key_error", fmt.Errorf("get key: %w", err))
}
if rec.OwnerSubjectID != subjectID && subjectID != "admin" {
return ResetUserKeyResponse{}, recordUserKeyFailure("reset", "not_found", fmt.Errorf("key %q not found", keyID))
}
windowStart := time.Now().UTC().Format("2006-01-02T00:00:00Z")
count, err := store.SubjectRateLimits().IncrementWindow(ctx, subjectID, "reset", windowStart)
if err != nil {
return ResetUserKeyResponse{}, recordUserKeyFailure("reset", "rate_limit_store_error", fmt.Errorf("increment reset rate limit: %w", err))
}
if count > defaultKeyResetPerDay {
metrics.RecordUserKeyOperation("reset", "rate_limited")
return ResetUserKeyResponse{}, &httpError{StatusCode: 429, Code: "rate_limited", Message: "reset key rate limit exceeded"}
}
// Re-resolve host access to get a fresh key
_, route, _, client, err := resolveLogicalGroupHost(ctx, store, rec.LogicalGroupID)
if err != nil {
return ResetUserKeyResponse{}, recordUserKeyFailure("reset", "resolve_host_error", fmt.Errorf("resolve host for %q: %w", rec.LogicalGroupID, err))
}
hostGroupID, err := resolveShadowHostGroupID(ctx, client, route)
if err != nil {
return ResetUserKeyResponse{}, recordUserKeyFailure("reset", "resolve_shadow_group_error", fmt.Errorf("resolve shadow group id for %q: %w", route.ShadowGroupID, err))
}
managedIdentitySelector := buildManagedIdentitySelector(rec.OwnerSubjectID, keyID)
newPlaintext, err := ensureSubjectHasAccess(ctx, client, managedIdentitySelector, hostGroupID)
if err != nil {
return ResetUserKeyResponse{}, recordUserKeyFailure("reset", "ensure_access_error", fmt.Errorf("ensure access on reset for %q: %w", rec.LogicalGroupID, err))
}
hostFingerprint := "sha256:" + sha256Hex(newPlaintext)
masked := "sk-****" + newPlaintext[len(newPlaintext)-4:]
err = store.WithTx(ctx, func(q *sqlite.Queries) error {
if err := q.UserKeys.UpdateSecret(ctx, keyID, managedIdentitySelector, hostFingerprint, masked, "active"); err != nil {
return fmt.Errorf("reset key: %w", err)
}
if _, err := q.UserKeyAuditEvents.Create(ctx, sqlite.UserKeyAuditEvent{
EventID: generateKeyID(),
ActorSubjectID: subjectID,
ActorRole: "user",
TargetKeyID: keyID,
Action: "reset",
Result: "success",
Reason: "self service reset",
}); err != nil {
return fmt.Errorf("audit reset key: %w", err)
}
return nil
})
if err != nil {
return ResetUserKeyResponse{}, recordUserKeyFailure("reset", "db_tx_error", err)
}
metrics.RecordUserKeyOperation("reset", "success")
return ResetUserKeyResponse{PlaintextKey: newPlaintext, MaskedPreview: masked, AdminStatus: "active"}, nil
},
pauseFn: func(ctx context.Context, keyID, subjectID, reason string) (UserKeyMeta, error) {
store, err := sqlite.Open(ctx, sqliteDSN)
if err != nil {
return UserKeyMeta{}, recordUserKeyFailure("pause", "open_store_error", fmt.Errorf("open store: %w", err))
}
defer store.Close()
rec, err := store.UserKeys().GetByID(ctx, keyID)
if err != nil {
return UserKeyMeta{}, recordUserKeyFailure("pause", "get_key_error", fmt.Errorf("get key: %w", err))
}
if rec.OwnerSubjectID != subjectID && subjectID != "admin" {
return UserKeyMeta{}, recordUserKeyFailure("pause", "not_found", fmt.Errorf("key %q not found", keyID))
}
_, route, _, client, err := resolveLogicalGroupHost(ctx, store, rec.LogicalGroupID)
if err != nil {
return UserKeyMeta{}, recordUserKeyFailure("pause", "resolve_host_error", fmt.Errorf("resolve host for pause %q: %w", rec.LogicalGroupID, err))
}
hostGroupID, err := resolveShadowHostGroupID(ctx, client, route)
if err != nil {
return UserKeyMeta{}, recordUserKeyFailure("pause", "resolve_shadow_group_error", fmt.Errorf("resolve shadow group id for pause %q: %w", route.ShadowGroupID, err))
}
if err := client.PauseManagedSubscriptionAccess(ctx, managedIdentitySelectorForRecord(rec), hostGroupID); err != nil {
return UserKeyMeta{}, recordUserKeyFailure("pause", "pause_access_error", fmt.Errorf("pause managed subscription access: %w", err))
}
err = store.WithTx(ctx, func(q *sqlite.Queries) error {
if err := q.UserKeys.UpdateStatus(ctx, keyID, "paused"); err != nil {
return fmt.Errorf("pause key: %w", err)
}
if _, err := q.UserKeyAuditEvents.Create(ctx, sqlite.UserKeyAuditEvent{
EventID: generateKeyID(), ActorSubjectID: subjectID, ActorRole: "user",
TargetKeyID: keyID, Action: "pause", Result: "success", Reason: strings.TrimSpace(reason),
}); err != nil {
return fmt.Errorf("audit pause key: %w", err)
}
return nil
})
if err != nil {
return UserKeyMeta{}, recordUserKeyFailure("pause", "db_tx_error", err)
}
metrics.RecordUserKeyOperation("pause", "success")
return UserKeyMeta{KeyID: keyID, MaskedPreview: rec.MaskedPreview, AdminStatus: "paused"}, nil
},
resumeFn: func(ctx context.Context, keyID, subjectID string) (UserKeyMeta, error) {
store, err := sqlite.Open(ctx, sqliteDSN)
if err != nil {
return UserKeyMeta{}, recordUserKeyFailure("resume", "open_store_error", fmt.Errorf("open store: %w", err))
}
defer store.Close()
rec, err := store.UserKeys().GetByID(ctx, keyID)
if err != nil {
return UserKeyMeta{}, recordUserKeyFailure("resume", "get_key_error", fmt.Errorf("get key: %w", err))
}
if rec.OwnerSubjectID != subjectID && subjectID != "admin" {
return UserKeyMeta{}, recordUserKeyFailure("resume", "not_found", fmt.Errorf("key %q not found", keyID))
}
_, route, _, client, err := resolveLogicalGroupHost(ctx, store, rec.LogicalGroupID)
if err != nil {
return UserKeyMeta{}, recordUserKeyFailure("resume", "resolve_host_error", fmt.Errorf("resolve host for resume %q: %w", rec.LogicalGroupID, err))
}
hostGroupID, err := resolveShadowHostGroupID(ctx, client, route)
if err != nil {
return UserKeyMeta{}, recordUserKeyFailure("resume", "resolve_shadow_group_error", fmt.Errorf("resolve shadow group id for resume %q: %w", route.ShadowGroupID, err))
}
if err := client.ResumeManagedSubscriptionAccess(ctx, managedIdentitySelectorForRecord(rec), hostGroupID); err != nil {
return UserKeyMeta{}, recordUserKeyFailure("resume", "resume_access_error", fmt.Errorf("resume managed subscription access: %w", err))
}
err = store.WithTx(ctx, func(q *sqlite.Queries) error {
if err := q.UserKeys.UpdateStatus(ctx, keyID, "active"); err != nil {
return fmt.Errorf("resume key: %w", err)
}
if _, err := q.UserKeyAuditEvents.Create(ctx, sqlite.UserKeyAuditEvent{
EventID: generateKeyID(), ActorSubjectID: subjectID, ActorRole: "user",
TargetKeyID: keyID, Action: "resume", Result: "success", Reason: "self service resume",
}); err != nil {
return fmt.Errorf("audit resume key: %w", err)
}
return nil
})
if err != nil {
return UserKeyMeta{}, recordUserKeyFailure("resume", "db_tx_error", err)
}
metrics.RecordUserKeyOperation("resume", "success")
return UserKeyMeta{KeyID: keyID, MaskedPreview: rec.MaskedPreview, AdminStatus: "active"}, nil
},
deleteFn: func(ctx context.Context, keyID, subjectID string) error {
store, err := sqlite.Open(ctx, sqliteDSN)
if err != nil {
return recordUserKeyFailure("delete", "open_store_error", fmt.Errorf("open store: %w", err))
}
defer store.Close()
rec, err := store.UserKeys().GetByID(ctx, keyID)
if err != nil {
return recordUserKeyFailure("delete", "get_key_error", fmt.Errorf("get key: %w", err))
}
if rec.OwnerSubjectID != subjectID && subjectID != "admin" {
return recordUserKeyFailure("delete", "not_found", fmt.Errorf("key %q not found", keyID))
}
err = store.WithTx(ctx, func(q *sqlite.Queries) error {
if err := q.UserKeys.UpdateStatus(ctx, keyID, "retired"); err != nil {
if strings.Contains(err.Error(), sql.ErrNoRows.Error()) {
return fmt.Errorf("key %q not found", keyID)
}
return fmt.Errorf("retire key: %w", err)
}
if _, err := q.UserKeyAuditEvents.Create(ctx, sqlite.UserKeyAuditEvent{
EventID: generateKeyID(), ActorSubjectID: subjectID, ActorRole: "user",
TargetKeyID: keyID, Action: "delete", Result: "success", Reason: "self service retire",
}); err != nil {
return fmt.Errorf("audit retire key: %w", err)
}
return nil
})
if err == nil {
metrics.RecordUserKeyOperation("delete", "success")
return nil
}
return recordUserKeyFailure("delete", "db_tx_error", err)
},
}
}
func sha256Hex(s string) string {
h := sha256.Sum256([]byte(s))
return hex.EncodeToString(h[:])
}