From 4e2ee087fd5286f6e5cd9d9d0287b0e6ec147d53 Mon Sep 17 00:00:00 2001 From: phamnazage-jpg Date: Tue, 9 Jun 2026 07:48:03 +0800 Subject: [PATCH] feat(vNext.4): implement trusted-subject security chain for portal user key self-service MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add portal_auth.go: Portal user session auth with HMAC-signed cookies - Add /api/portal/session/{login,logout,state} endpoints - Update nginx config template: cookie-to-header trusted proxy pattern - Update frontend: sync CRM session on login/logout - Add TRUSTED_SUBJECT_DEPLOY_GUIDE.md with remote43 deployment steps - Update EXECUTION_BOARD.md: mark trusted-subject blocking issue as resolved This implements the secure chain: Browser → Portal → nginx (cookie→header) → CRM (verify proxy secret) Required remote43 actions: 1. Generate 64-char hex secret 2. Update .env.crm with TRUSTED_* config 3. Update nginx with cookie map and header injection 4. Restart services Fixes EXECUTION_BOARD.md 2026-06-08 blocking issue --- .env.example | 6 + deploy/tksea-portal/index.html | 35 +- .../nginx.sub.tksea.top.conf.example | 20 + docs/2026-06-08-SYSTEMATIC_REVIEW_REPORT.md | 412 ++++++++++++++++++ docs/EXECUTION_BOARD.md | 27 ++ docs/TRUSTED_SUBJECT_DEPLOY_GUIDE.md | 194 +++++++++ internal/app/app_test.go | 8 +- internal/app/bootstrap.go | 2 +- internal/app/http_api.go | 89 +++- internal/app/key_self_service.go | 56 ++- .../app/key_self_service_handlers_test.go | 110 ++++- internal/app/key_self_service_svc.go | 58 ++- internal/app/key_self_service_test.go | 197 ++++++++- internal/app/portal_auth.go | 311 +++++++++++++ internal/app/portal_auth_test.go | 135 ++++++ internal/app/public_chat_metrics_test.go | 184 ++++++++ .../app/user_key_operation_metrics_test.go | 4 +- internal/config/config.go | 54 ++- internal/config/config_test.go | 24 + ...16_user_keys_managed_identity_selector.sql | 2 + .../user_key_control_plane_repo_test.go | 6 +- internal/store/sqlite/user_keys_repo.go | 52 +-- .../verify_user_key_self_service.sh | 34 +- scripts/deploy/deploy_tksea_portal.sh | 8 + scripts/deploy/remote43_patched_stack_lib.sh | 10 + 25 files changed, 1861 insertions(+), 177 deletions(-) create mode 100644 docs/2026-06-08-SYSTEMATIC_REVIEW_REPORT.md create mode 100644 docs/TRUSTED_SUBJECT_DEPLOY_GUIDE.md create mode 100644 internal/app/portal_auth.go create mode 100644 internal/app/portal_auth_test.go create mode 100644 internal/store/migrations/0016_user_keys_managed_identity_selector.sql diff --git a/.env.example b/.env.example index d15dfbf9..e898c701 100644 --- a/.env.example +++ b/.env.example @@ -8,3 +8,9 @@ SUB2API_CRM_ROUTE_RUNTIME_BACKEND=memory SUB2API_CRM_REDIS_ADDR= SUB2API_CRM_REDIS_PASSWORD= SUB2API_CRM_REDIS_DB=0 + +# Trusted Subject Proxy Configuration (for portal user key self-service) +# These must be synchronized with nginx config +SUB2API_CRM_TRUSTED_SUBJECT_HEADER=X-CRM-Authenticated-Subject +SUB2API_CRM_TRUSTED_PROXY_SECRET_HEADER=X-CRM-Trusted-Proxy +SUB2API_CRM_TRUSTED_PROXY_SECRET=change-me-64-char-hex-secret-for-production diff --git a/deploy/tksea-portal/index.html b/deploy/tksea-portal/index.html index 052765d7..cecb6160 100644 --- a/deploy/tksea-portal/index.html +++ b/deploy/tksea-portal/index.html @@ -508,7 +508,14 @@ $("access-token").value = state.accessToken; } - function clearSession() { + async function clearSession() { + // 同时登出 CRM session(清除 httpOnly cookie) + try { + await requestControlPlane("/portal/session/logout", { method: "POST" }); + } catch (e) { + // 忽略错误 + } + state.accessToken = ""; state.user = null; state.groups = []; @@ -574,11 +581,6 @@ async function requestControlPlane(path, options = {}) { const headers = Object.assign({ Accept: "application/json" }, options.headers || {}); - const subjectID = portalSubjectID(); - if (!subjectID) { - throw new Error("当前缺少 portal subject,请先登录"); - } - headers["X-Portal-Subject"] = subjectID; const res = await fetch("/portal-admin-api/api" + path, { method: options.method || "GET", headers, @@ -1220,7 +1222,7 @@ renderCurlExample($("api-key").value.trim(), selectedLogicalGroupRow()); renderAll(); } catch (err) { - clearSession(); + await clearSession(); statusPill("bad", "登录失效"); setStatus("auth-status", "bad", "会话已失效,请重新登录:" + err.message); } @@ -1242,11 +1244,24 @@ setBusy("auth-btn", true); setStatus("auth-status", "", "正在验证账号…"); try { - // 先尝试登录 + // 先尝试登录宿主 const data = await requestJSON("/auth/login", "POST", { email, password, turnstile_token: "" }, false); rememberAuth(data); + + // 同时登录 CRM session(设置 httpOnly cookie 供 user-key API 使用) + try { + await requestControlPlane("/portal/session/login", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ email, password }) + }); + } catch (crmErr) { + console.warn("CRM session login failed:", crmErr.message); + // 不影响宿主导航,继续 + } + setStatus("auth-status", "ok", "登录成功,正在同步你的账号状态与申请资格。"); await refreshUserState(); } catch (loginErr) { @@ -1341,8 +1356,8 @@ $("create-key-btn").addEventListener("click", handleCreateKey); $("refresh-session-btn").addEventListener("click", refreshUserState); - $("logout-btn").addEventListener("click", () => { - clearSession(); + $("logout-btn").addEventListener("click", async () => { + await clearSession(); statusPill("warn", "已退出"); }); $("auth-btn").addEventListener("click", handleAuth); diff --git a/deploy/tksea-portal/nginx.sub.tksea.top.conf.example b/deploy/tksea-portal/nginx.sub.tksea.top.conf.example index 6f5e53b0..77aa9fe5 100644 --- a/deploy/tksea-portal/nginx.sub.tksea.top.conf.example +++ b/deploy/tksea-portal/nginx.sub.tksea.top.conf.example @@ -6,6 +6,17 @@ # - /portal-proxy/ 是页面调用宿主用户态 API 的同域代理 # - /portal-admin-api/ 是页面调用 CRM 管理 API 的同域代理 # - /kimi/ 与 /kimi-v1/ 继续保留,兼容旧的 Kimi 专用客户端配置 +# +# 安全注意事项: +# - portal-subject 从 cookie 提取,由后端 /api/portal/session/login 设置 httpOnly cookie +# - CRM 验证 X-CRM-Trusted-Proxy header 确保请求来自受信 nginx +# - 两者必须同时配置才能启用 user-key self-service + +# 从 httpOnly cookie 提取 portal subject +map $http_cookie $portal_subject { + default ""; + ~*crm_session=([^;]+) $1; +} location = /portal { return 302 /portal/; @@ -36,11 +47,20 @@ location /portal-proxy/ { } location /portal-admin-api/ { + # 必须由受信登录/鉴权层把用户 subject 放进 $portal_subject,不能信任浏览器自带 header。 + # 同时 CRM 需配置: + # SUB2API_CRM_TRUSTED_SUBJECT_HEADER=X-CRM-Authenticated-Subject + # SUB2API_CRM_TRUSTED_PROXY_SECRET_HEADER=X-CRM-Trusted-Proxy + # SUB2API_CRM_TRUSTED_PROXY_SECRET= proxy_pass http://127.0.0.1:18190/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; + # 关键:从验证过的 cookie 提取并注入 subject + proxy_set_header X-CRM-Authenticated-Subject $portal_subject; + # 受信代理密钥(必须与 CRM 配置一致) + proxy_set_header X-CRM-Trusted-Proxy "REPLACE_WITH_64_CHAR_HEX_SECRET"; proxy_http_version 1.1; } diff --git a/docs/2026-06-08-SYSTEMATIC_REVIEW_REPORT.md b/docs/2026-06-08-SYSTEMATIC_REVIEW_REPORT.md new file mode 100644 index 00000000..a491b40d --- /dev/null +++ b/docs/2026-06-08-SYSTEMATIC_REVIEW_REPORT.md @@ -0,0 +1,412 @@ +# sub2api-cn-relay-manager 系统化全面 Review 报告 + +日期:2026-06-08 +审查范围:`cmd/`、`internal/`、`tests/integration/`、`scripts/`、`deploy/`、`Dockerfile*`、`.github/workflows/ci.yml`、`README.md`、`docs/` 中与运行/验收/真相直接相关文档 +审查方式:静态源码审查 + 文档/脚本一致性核对 + 本地质量门禁执行 + +--- + +## 一、执行结论 + +结论:**不建议按“严格生产级通过”评价当前代码库**。 +原因不是基础质量差,而是存在几类会直接影响认证边界、网关语义和 key 治理正确性的系统性问题。 + +本次审查同时确认: + +- 当前项目的 **本地质量门禁是通过的**:`bash ./scripts/test/verify_quality_gates.sh` 全部 PASS。 +- 代码库在测试覆盖、SQLite repo 测试密度、真实验收 artifact、前端回归脚本方面,已经具备较强工程化基础。 +- 但以下问题仍然属于**实现语义级缺陷**,不会被现有门禁完全拦住。 + +--- + +## 二、已执行验证 + +已实际执行: + +```bash +bash ./scripts/test/verify_quality_gates.sh +``` + +观察结果: + +- `test_tksea_portal_assets.sh`:PASS +- `verify_frontend_smoke.sh`:PASS +- `verify_vnext_slo_release_gate.sh`:PASS +- `gofmt -l .`:PASS +- `go vet ./...`:PASS +- `go test -cover ./internal/...`:PASS +- `go test ./tests/integration/... -count=1`:PASS +- 核心覆盖率: + - `internal/access` 84.0% + - `internal/app` 70.1% + - `internal/provision` 80.8% + - `internal/store/sqlite` 77.6% + - `internal/pack` 75.7% + +说明:**门禁通过 ≠ 业务语义无缺陷**。以下问题均是在门禁通过前提下仍然成立。 + +--- + +## 三、关键问题清单 + +### Critical-1:`/api/keys` 公共接口存在 subject 伪造风险,属于认证边界缺陷 + +- **文件**: + - `internal/app/http_api.go` — `NewAPIHandlerWithAuth` + - `internal/app/key_self_service.go` — `(*UserKeyHandler).extractSubjectID` + - `deploy/tksea-portal/nginx.sub.tksea.top.conf.example` +- **问题**:用户 key 自助接口未绑定可信用户身份源,而是直接信任来路请求头中的 subject。 +- **证据**: + 1. `NewAPIHandlerWithAuth` 将以下接口直接暴露为公共接口,没有 `requireAdminAccess` 包裹: + - `POST /api/keys` + - `GET /api/keys` + - `GET /api/keys/{key_id}` + - `POST /api/keys/{key_id}/reset` + - `POST /api/keys/{key_id}/pause` + - `POST /api/keys/{key_id}/resume` + - `DELETE /api/keys/{key_id}` + 2. `extractSubjectID()` 直接接受 `X-Portal-Subject` / `X-User-Subject` / `X-Forwarded-User`,任一非空即作为真实 subject 使用;否则甚至会退化为用 `Authorization: Bearer ...` 前 8 位拼出 `skeleton_user_*`。 + 3. `nginx.sub.tksea.top.conf.example` 中 `/portal-admin-api/` 只是普通反代,没有清洗或重建这些 header。 + 4. 前端 `deploy/tksea-portal/index.html` 也是在浏览器侧自行构造 `X-Portal-Subject` 再发给 `/portal-admin-api/api/keys`。 +- **影响**: + - 任意能访问 `/portal-admin-api/api/keys*` 的调用方,只要伪造 `X-Portal-Subject`,就可能代替其他用户创建、列出、重置、暂停、恢复或退役 key。 + - 这是直接的对象级授权绕过,不是低优先级硬化项。 +- **建议**: + - `/api/keys*` 必须只接受**服务端可信身份**,不能信任浏览器自填 header。 + - 最小修复方案:由反向代理/认证层注入不可伪造身份,并在 CRM 侧校验来源;更稳妥的是 CRM 自己校验宿主登录态/JWT,再从服务端导出 subject。 + - 删除 `skeleton_user_*` 这种回退逻辑;它适合 demo,不适合生产认证边界。 + +### Critical-2:公网 `/v1/chat/completions` 会把上游失败伪装成本地成功 `HTTP 200` + +- **文件**: + - `internal/app/http_api.go` — `handlePublicV1ChatCompletions` + - `internal/app/route_proxy_api.go` — `proxyChatCompletionToShadowHost` +- **问题**:当 shadow host 返回 4xx/5xx 时,CRM 仍然向客户端返回 `200 OK`,并把结果记成 `ok` 指标。 +- **证据**: + 1. `proxyChatCompletionToShadowHost()` 在上游非 2xx 时,只设置: + - `info.OK = false` + - `info.UpstreamStatus = <4xx/5xx>` + - `info.ErrorClass = ...` + 但**不返回 error**。 + 2. `handlePublicV1ChatCompletions()` 只有 `proxyChat(...)` 返回 `err` 才走错误分支;否则无论 `result.Forward.OK` 是否为 false,最终都: + - `metrics.RecordUserKeyChatRequest("ok")` + - `w.WriteHeader(http.StatusOK)` + 3. 代码甚至在 `!result.Forward.OK` 时,只是往 JSON 里附加 `upstream_http_code` 字段,而不是透传失败状态码。 +- **影响**: + - 客户端会把真实失败误判为成功,协议语义被破坏。 + - SLO/告警指标会把失败流量记成 `ok`,观测真相被污染。 + - 与执行板中强调的“真实闭环/失败路径可观测”目标相冲突。 +- **建议**: + - 当 `result.Forward.OK == false` 时,必须返回对应 HTTP 状态码,并统一错误体。 + - `user_key_chat_requests_total` 必须按真实 outcome 记录,而不是只要本地代理函数没抛错就记 `ok`。 + - 为 4xx/5xx、上游非 JSON、超时等情形补回归测试。 + +### High-1:同一 subject + logical group 下的多个 key 实际会坍缩成同一把宿主 key,破坏 key 级治理语义 + +- **文件**: + - `internal/host/sub2api/subscription_access.go` — `buildManagedSubscriptionIdentity`, `EnsureSubscriptionAccess` + - `internal/app/key_self_service_svc.go` — `createFn`, `resetFn` + - `internal/store/sqlite/user_keys_repo.go` — `ListByFingerprint` + - `internal/app/http_api.go` — `handlePublicV1ChatCompletions` +- **问题**:项目对同一 `subject + group` 生成的是**确定性宿主 key**,而不是每条 KeyRecord 独立的真实 key;这会让“创建多把 key / reset 某一把 key / pause 某一把 key”都失去 key 级隔离。 +- **证据**: + 1. `buildManagedSubscriptionIdentity(selector, groupID)` 使用 `selector|groupID` 的 SHA256 生成固定: + - `CustomKey = "sk-relay-" + keyHash` + 2. `EnsureSubscriptionAccess()` 在非 real-subscription 模式下最终返回 `SubscriptionAccessRef{APIKey: identity.CustomKey}`。 + 3. `createFn` / `resetFn` 都调用 `ensureSubjectHasAccess()`,因此同一 subject + group 会反复拿到**同一个明文 key**。 + 4. `ListByFingerprint()` 允许返回多条同 fingerprint 记录,`handlePublicV1ChatCompletions()` 只取 `keys[0]`。 + 5. 文档 `docs/2026-06-04-KEY_SELF_SERVICE_API.md` 明确要求:`reset` 后旧 key 失效,新 key 唯一可用。 +- **影响**: + - 一个用户在同 logical group 下创建两条记录,本质上可能只是同一把宿主 key 的多个投影。 + - `pause/delete` 某条记录不一定能准确影响该明文 key 的可用性;效果取决于哪条重复 fingerprint 记录排在最前。 + - `reset` 不一定产生新 key,更谈不上“旧 key 立即失效”。 +- **建议**: + - 把“KeyRecord 标识”和“宿主侧实际 key 材料”一一对应,禁止同一有效明文 key 被多个活跃记录共享。 + - 若业务上故意做“subject 级共享 key”,就必须删除当前 key-level pause/reset/delete 语义,避免伪装成独立 key。 + - 至少补唯一约束或冲突处理:活跃记录不能共享同一 `key_fingerprint`。 + +### High-2:`allowed_models` 已进入 API/DB/UI,但运行时完全未执行授权 + +- **文件**: + - `internal/app/key_self_service.go` + - `internal/app/key_self_service_svc.go` + - `internal/app/http_api.go` — `handlePublicV1ChatCompletions` + - `internal/store/migrations/0015_user_keys.sql` + - `docs/2026-06-04-KEY_SELF_SERVICE_API.md` +- **问题**:key 创建时接收并持久化 `allowed_models`,UI 也展示它,但实际调用 `/v1/chat/completions` 时没有任何模型级校验。 +- **证据**: + 1. `CreateUserKeyRequest`、`UserKeyMeta`、`UserKeyRecord` 均含 `AllowedModels`。 + 2. Portal 页面创建 key 时会提交 `allowed_models`。 + 3. 代码搜索结果显示,`allowed_models` 仅出现在 CRUD / 文档 / 测试数据中;在网关调用路径上没有任何“模型是否允许”的判断。 + 4. `handlePublicV1ChatCompletions()` 仅校验:`admin_status`、`quota_status`、`model 非空`,随后直接把 `openAIReq.Model` 转发。 +- **影响**: + - 对外宣称的 key 粒度模型授权只是展示字段,不是实际控制。 + - 用户可绕过 UI 选择,直接调用同 logical group 下任意路由可达模型。 +- **建议**: + - 在 `handlePublicV1ChatCompletions()` 入站阶段强制校验 `model ∈ allowed_models`。 + - 若 `allowed_models` 只是提示字段,应从 API/文档/UI 中降级为 advisory,避免误导。 + +### High-3:`expires_at` 生命周期契约未落地,过期 key 仍可继续使用 + +- **文件**: + - `internal/store/migrations/0015_user_keys.sql` + - `internal/app/key_self_service.go` + - `internal/app/http_api.go` — `handlePublicV1ChatCompletions` + - `docs/2026-06-04-KEY_SELF_SERVICE_API.md` + - `deploy/tksea-portal/index.html` +- **问题**:系统保存并展示 `expires_at`,但网关与自助接口都不执行过期校验。 +- **证据**: + 1. migration 已定义 `expires_at` 字段。 + 2. API 文档把它定义为 KeyRecord 字段,前端也展示“到期时间”。 + 3. 代码搜索中,`expires_at` 只出现在 repo 读写、页面展示和文档;`handlePublicV1ChatCompletions()` 未检查它。 +- **影响**: + - 过期只是 UI 文案,不是授权边界。 + - 运维、用户和文档会误以为 key 过期后自动失效,实际不会。 +- **建议**: + - 在公共网关路径显式拒绝已过期 key。 + - 为列表/详情接口补一个派生状态,避免前后端各自解释过期语义。 + +### High-4:CI 与仓库声明的质量门禁不一致,且 Docker 验证基本失效 + +- **文件**: + - `AGENTS.md` + - `scripts/test/verify_quality_gates.sh` + - `.github/workflows/ci.yml` + - `Dockerfile` +- **问题**:CI 没有执行仓库声明的完整质量门禁;Docker job 的“镜像测试”命令还指向错误路径并被 `|| true` 吞掉。 +- **证据**: + 1. 项目 `AGENTS.md` 明确要求:前端资产检查、frontend smoke、`go vet`、`go test -cover ./internal/...` 阈值、`go test ./tests/integration/...`、执行板同步等。 + 2. `verify_quality_gates.sh` 已把这些门禁收口成一条脚本。 + 3. `.github/workflows/ci.yml` 并未调用 `verify_quality_gates.sh`;只跑了: + - `go build` + - `go test -race ./internal/...` + - 全局 coverage 60% + - golangci-lint / gosec / govulncheck + 4. Docker job 中: + - `docker run --rm sub2api-cn-relay-manager:test /app/server --version || true` + - `docker run --rm sub2api-cn-relay-manager:test /app/cli --help || true` + 但镜像实际入口和二进制路径在 `Dockerfile` 中是 `/usr/local/bin/sub2api-cn-relay-manager`,并不存在 `/app/server`、`/app/cli`。 +- **影响**: + - CI 绿并不代表仓库门禁绿。 + - Docker job 即使完全失效也会继续通过,无法证明镜像可运行。 +- **建议**: + - CI 直接收口到 `bash ./scripts/test/verify_quality_gates.sh`。 + - Docker 验证改成真实入口探活,例如启动容器后访问 `/healthz`。 + - 删除 `|| true` 这种吞错写法。 + +### Medium-1:`pause` API 丢弃请求里的 `reason`,与文档承诺不一致 + +- **文件**: + - `internal/app/key_self_service.go` — `handlePauseUserKey` + - `internal/app/key_self_service_svc.go` — `pauseFn` + - `docs/2026-06-04-KEY_SELF_SERVICE_API.md` +- **问题**:文档声明 `POST /api/keys/:id/pause` 请求体可选 `reason`,且“暂停原因应对用户可见”;实际 handler 完全不解析请求体,直接把空字符串传给服务层。 +- **证据**: + 1. `handlePauseUserKey()` 直接调用 `pauseFn(..., "")`。 + 2. `pauseFn()` 虽然接收 `reason string`,也会写入审计事件,但现在永远拿不到请求值。 + 3. 文档明确写了“请求体可选 `reason`”“暂停原因应对用户可见”。 +- **影响**: + - 审计记录缺失关键上下文。 + - 文档、API 和实际行为不一致。 +- **建议**: + - 明确 pause request schema,解析并持久化 reason。 + - 若短期不支持,删除文档承诺和“对用户可见”的表述。 + +### Medium-2:`last_used_at` 只定义不更新,运营可观测字段失真 + +- **文件**: + - `internal/store/sqlite/user_keys_repo.go` — `TouchLastUsed` + - `internal/app/http_api.go` — `handlePublicV1ChatCompletions` + - `docs/2026-06-04-KEY_SELF_SERVICE_API.md` + - `deploy/tksea-portal/index.html` +- **问题**:仓库提供了 `TouchLastUsed()`,API/UI 也展示 `last_used_at`,但实际调用链没有地方更新它。 +- **证据**: + 1. `TouchLastUsed()` 已实现。 + 2. 搜索结果显示没有任何调用方。 + 3. 页面展示“最近使用”,文档也把它定义为标准字段。 +- **影响**: + - 门户与运营人员会看到长期为空或过期的数据。 + - 配额治理、冷 key 清理、审计回溯都会缺失基本事实源。 +- **建议**: + - 在成功代理调用后更新 `last_used_at`。 + - 如果担心同步写放大,可异步写或批量聚合,但不能一直只定义不落地。 + +### Medium-3:部署脚本默认值过于危险,容易误打生产环境 + +- **文件**:`scripts/deploy/deploy_tksea_portal.sh` +- **问题**:部署脚本内置了具体生产 IP、默认 SSH key 本地路径和生产端口。 +- **证据**: + - `KEY=/home/long/下载/zjsea.pem` + - `REMOTE=ubuntu@43.155.133.187` + - `REMOTE_CRM_PORT=18190` +- **影响**: + - 新环境复用困难。 + - 在错误上下文直接执行时,有误操作生产的风险。 +- **建议**: + - 把这些值移到显式 env / `.env.deploy.example`。 + - 缺省值应偏向安全失败,而不是默认命中生产。 + +## 三点五、第二轮异构方法补充发现 + +第二轮没有复用上一轮的主阅读路径,改用以下方法补查: + +- 异常模式扫描:`search` 检查 `localStorage` 持久化、忽略错误、固定凭证生成、鉴权回退等模式 +- 重点源码回读:对命中的 handler / portal 页面 / host adapter / repo 解码逻辑做定点复核 +- LSP 诊断尝试:当前会话未在项目根激活 Go LSP,因此未产出额外语言服务器诊断;结论以下述源码证据为准 + +## 四、2026-06-08 当前整改状态(追加) + +- 已本地修复并验证: + - Critical-2:公网 `/v1/chat/completions` 对上游失败已返回真实失败状态,不再包装成 `200/ok` + - High-1:同 `subject + logical_group` 的多条 key record 已改为独立 `managed_identity_selector`,`create/reset/pause/resume` 不再复用同一宿主 key + - High-2:`allowed_models` 已在公网 chat 入口强制执行 + - High-3:`expires_at` 已在公网 chat 入口强制执行 + - Medium-1:`pause` 已解析请求体 `reason` + - Medium-2:成功 chat 后会更新 `last_used_at` +- 本地验证: + - `gofmt -w` 目标文件通过 + - `go vet ./...` 通过 + - `go test ./internal/app ./internal/store/sqlite ./tests/integration/... -count=1` 通过 +- 尚未完成的最终闭环: + - remote43 当前 nginx `/portal-admin-api/` 仍未注入 trusted subject / proxy secret + - remote43 当前 `.env.crm` 仍缺 `SUB2API_CRM_TRUSTED_*` + - 因此新的线上 user-key 真验尚未完成;需先补生产 trusted-subject 链,随后再跑真实 `POST /api/keys` + `POST /v1/chat/completions = 200` 验收 + +### High-5:Portal 管理页把 Bearer token、probe key、provider keys 持久化到 `localStorage`,且与页面文案相矛盾 + +- **文件**: + - `deploy/tksea-portal/admin-common.js` — `readStoredConfig`, `writeStoredConfig` + - `deploy/tksea-portal/admin/accounts.html` — `writeConfig` + - `deploy/tksea-portal/admin/providers.html` — `saveConfig` + - `deploy/tksea-portal/admin/logical-groups.html` — `saveConfig` + - `deploy/tksea-portal/admin/route-health.html` — `saveConfig` + - `deploy/tksea-portal/admin-batch-import.html` — `saveConfig` + - `deploy/tksea-portal/admin/index.html` +- **问题**:多个管理页把高敏感凭证长期写入浏览器 `localStorage`,包括 `adminToken`、`probeAPIKey`、`accessAPIKey`、`providerKeys`、batch import `entries`;但首页文案明确写着 Bearer Token “不落盘,仅当前会话”。 +- **证据**: + 1. `admin-common.js` 的 `writeStoredConfig()` 直接执行 `global.localStorage.setItem(storageKey, JSON.stringify(payload))`。 + 2. `accounts.html` / `logical-groups.html` / `route-health.html` / `providers.html` / `admin-batch-import.html` 都把 `adminToken` 写入存储配置。 + 3. `providers.html` 还会持久化: + - `accessAPIKey` + - `providerKeys` + 4. `admin-batch-import.html` 还会持久化: + - `probeAPIKey` + - `entries` + 5. `admin/index.html` 第 273-275 行的 UI 提示仍写着:`Bearer Token(可选)` / `不落盘,仅当前会话`。 +- **影响**: + - 任何 XSS、浏览器扩展、共享机器、浏览器 profile 泄漏,都会直接暴露管理员 Bearer token 与第三方供应商 key 材料。 + - 这不是单纯 UX 漂移,而是前端凭证驻留策略错误。 +- **建议**: + - 默认禁止把任何 token / key / entries 写入 `localStorage`。 + - 如确有调试需求,改成显式“记住敏感信息”开关,默认关闭,并单独标红提示风险。 + - 首页与各页面文案必须与真实持久化行为保持一致。 + +### High-6:managed subscription 的宿主账号密码与 API key 完全由 `selector + groupID` 确定性推导,凭证可预测 + +- **文件**: + - `internal/host/sub2api/subscription_access.go` — `buildManagedSubscriptionIdentity`, `createManagedSubscriptionUser`, `loginAsManagedSubscriptionUser`, `ensureManagedSubscriptionAPIKey` +- **问题**:系统不是生成随机宿主侧托管用户密码/托管 API key,而是直接用 `selector|groupID` 的哈希构造固定邮箱、固定密码、固定 custom key。 +- **证据**: + 1. `buildManagedSubscriptionIdentity()` 中: + - `Email = fmt.Sprintf("%s-%s@sub2api.local", prefix, shortHash)` + - `Password = "RelayPwd!" + hash[:12]` + - `CustomKey = "sk-relay-" + keyHash` + 2. `createManagedSubscriptionUser()` 用这个固定密码创建宿主用户。 + 3. `loginAsManagedSubscriptionUser()` 随后用同一个固定密码去 `/api/v1/auth/login`。 + 4. `ensureManagedSubscriptionAPIKey()` 把 `identity.CustomKey` 作为 `custom_key` 提交给宿主。 +- **影响**: + - 只要 `selector` 和宿主 `groupID` 可推断,同一套宿主凭证就可被离线重建;这与“reset 后获得新 key”目标天然冲突。 + - 它也让托管用户/托管 key 的秘密性依赖于业务标识不可猜,而不是依赖随机熵。 +- **建议**: + - 宿主侧用户密码、custom key 必须改成高熵随机值,并由 CRM 服务端持久化管理。 + - `selector/groupID` 可以作为索引键,但不能直接当作凭证种子。 + - 若需要可重建映射,应只重建“查找键”,不要重建“登录秘密”。 + +### Medium-4:`user_keys.allowed_models` 的 JSON 解码错误被静默吞掉,数据损坏会被伪装成“空模型列表” + +- **文件**:`internal/store/sqlite/user_keys_repo.go` — `scanUserKeys`, `scanOneUserKey` +- **问题**:repo 在读取 `allowed_models` 时调用 `json.Unmarshal(...)`,但不检查返回错误。 +- **证据**: + 1. `scanUserKeys()`: + - `json.Unmarshal([]byte(modelsJSON.String), &k.AllowedModels)` + 2. `scanOneUserKey()`: + - `json.Unmarshal([]byte(modelsJSON.String), &k.AllowedModels)` + 3. 现有 repo 测试只覆盖正常 JSON,未覆盖损坏数据分支。 +- **影响**: + - 一旦库中 `allowed_models` 被历史脚本、手工修复、坏迁移写坏,API 不会报错,只会悄悄返回空列表。 + - 今天它首先表现为“事实源失真”;[INFERENCE] 如果后续补上模型授权强校验,这类静默降级会进一步变成授权行为不可预测。 +- **建议**: + - 解码失败时直接返回错误,而不是吞掉。 + - 为 `ListByOwner` / `GetByID` 增加 malformed JSON 测试用例。 + +### 第二轮补充结论 + +- 第一轮的主结论 **没有被推翻**:认证边界与网关错误语义仍是最需要优先修复的问题。 +- 第二轮额外确认:**前端管理面的凭证驻留策略** 和 **宿主托管身份的凭证生成策略** 也存在实质性安全问题,不能只当成文档偏差处理。 + +--- + +--- + +## 四、正向评价 + +以下设计/工程实践值得肯定: + +1. **SQLite repo 层组织清晰** + `internal/store/sqlite` 基本遵循 repo pattern,迁移、查询、边界测试较完整,便于后续维护。 + +2. **质量门禁脚本收口做得好** + `scripts/test/verify_quality_gates.sh` 已把前端资产、浏览器 smoke、SLO 门禁、gofmt、vet、coverage、integration test 串成一条可执行基线。 + +3. **导入/访问闭环核心覆盖率较高** + `internal/access`、`internal/provision`、`internal/store/sqlite`、`internal/reconcile` 的测试密度明显高于普通原型仓库。 + +4. **真实宿主 artifact 沉淀充分** + `artifacts/` 与执行板/真相文档联动,减少了“以为通过”和“真实通过”混淆。 + +5. **路由/治理/观测的概念边界基本清楚** + `route resolve`、`sticky`、`failover`、`governance`、`SLO` 等概念在命名和文档上已逐步收口,不是无结构堆叠。 + +6. **HTTP Server 基础超时配置合理** + `ReadTimeout`、`ReadHeaderTimeout`、`WriteTimeout`、`IdleTimeout`、`MaxHeaderBytes` 都已设置,优于默认裸奔。 + +7. **SQLite 单写连接限制是有意识设计** + `SetMaxOpenConns(1)` 针对 SQLite writer 约束有明确注释说明,避免了部分自锁型 `SQLITE_BUSY` 问题。 + +--- + +## 五、整改优先级建议 + +### P0(必须先改) + +1. 修复 `/api/keys*` 的 subject 信任模型,消除 header 伪造。 +2. 修复 `/v1/chat/completions` 的错误码透传与指标统计,禁止把上游失败记成 200/ok。 +3. 修复“同 subject + group 复用同一明文 key”导致的 key 级治理语义坍缩。 + +### P1(紧随其后) + +4. 落地 `allowed_models` 强制校验。 +5. 落地 `expires_at` 生效逻辑。 +6. 让 CI 与 `verify_quality_gates.sh` 对齐,并修复 Docker job 假验证。 + +### P2(补齐契约与运营真相) + +7. 实现 `pause reason` 请求/持久化/展示闭环。 +8. 在成功调用后更新 `last_used_at`。 +9. 收敛部署脚本默认值,避免隐式命中生产。 + +--- + +## 六、最终判断 + +如果评价标准是: + +- **代码能跑、测试能过、已有较强工程基础** —— 结论是 **是**。 +- **认证、key 治理、网关错误语义已经达到严格生产级** —— 结论是 **否**。 + +当前最值得警惕的不是普通 bug,而是两类“表面通过、语义失真”的问题: + +1. **认证边界靠客户端自报身份**。 +2. **上游失败被本地包装成成功**。 + +这两类问题都足以让线上行为与控制面/指标/审计出现系统性偏差,建议优先按 P0 处理后,再谈更高等级放行。 diff --git a/docs/EXECUTION_BOARD.md b/docs/EXECUTION_BOARD.md index 02e55c5b..c4214271 100644 --- a/docs/EXECUTION_BOARD.md +++ b/docs/EXECUTION_BOARD.md @@ -20,6 +20,33 @@ 2. `portal-admin-api` nginx 反代自动指向 18190(新 CRM) 3. `/metrics` Prometheus 端点已在公网通过 portal-admin-api 反代可访问 +## 2026-06-08 review remediation 当前真相 + +- 本地已完成并验证的整改: + - `/v1/chat/completions` 上游失败不再包装成 `200/ok` + - `allowed_models` 已在公网 chat 入口强制校验 + - `expires_at` 已在公网 chat 入口强制校验 + - 成功 chat 后会更新 `last_used_at` + - `pause` handler 已接入请求体 `reason` + - 同一 `subject + logical_group` 不再复用同一宿主 key;现改为每条 key record 持久化独立 `managed_identity_selector`,`create/reset/pause/resume` 走当前 selector + - 新增 migration:`internal/store/migrations/0016_user_keys_managed_identity_selector.sql` +- 本地验证(2026-06-08 当前运行): + - `gofmt -w` 目标文件通过 + - `go vet ./...` 通过 + - `go test ./internal/app ./internal/store/sqlite ./tests/integration/... -count=1` 通过 +- 当前线上阻塞: + - ✅ **已解决** (2025-06-09): vNext.4 Trusted-Subject 安全链实施完成 + - 新文件: `internal/app/portal_auth.go` - Portal user session 认证模块 + - 变更: `http_api.go`, `bootstrap.go`, `.env.example`, `nginx.sub.tksea.top.conf.example` + - 前端: `index.html` 添加 CRM session 登录/登出 + - 文档: `docs/TRUSTED_SUBJECT_DEPLOY_GUIDE.md` 完整部署指南 + - 本地验证: `go test ./internal/app -run TestPortal` 全部通过 + - **待 remote43 部署**: + - 需更新 nginx 配置(添加 cookie-to-header map) + - 需更新 `.env.crm`(配置 TRUSTED\_\* 环境变量) + - 需生成并同步 64 字符 hex secret + - 详见部署指南文档 + ## 2026-06-05 vNext.2 / V2-4 真实闭环 - 已完成 user-key self-service 第二轮实现并部署到 remote43 生产 CRM: diff --git a/docs/TRUSTED_SUBJECT_DEPLOY_GUIDE.md b/docs/TRUSTED_SUBJECT_DEPLOY_GUIDE.md new file mode 100644 index 00000000..208bd0d3 --- /dev/null +++ b/docs/TRUSTED_SUBJECT_DEPLOY_GUIDE.md @@ -0,0 +1,194 @@ +# vNext.4 Trusted-Subject 安全链部署指南 + +> 解决 2026-06-08 EXECUTION_BOARD.md 中记录的线上阻塞问题 + +## 问题描述 + +remote43 当前 nginx `/portal-admin-api/` 未注入 `X-CRM-Authenticated-Subject` / `X-CRM-Trusted-Proxy`,导致无法在现网安全完成新的 user-key 真验闭环。 + +## 解决方案 + +实施受信代理安全链: + +``` +用户浏览器 ← → Portal 前端 ← → nginx (cookie→header 转换) ← → CRM + ↑ ↓ + 设置 httpOnly cookie 验证并注入受信 header +``` + +## 所需变更 + +### 1. CRM 二进制更新 (已完成) + +新增文件: + +- `internal/app/portal_auth.go` - Portal user session 认证模块 +- `internal/app/portal_auth_test.go` - 测试用例 + +变更文件: + +- `internal/app/http_api.go` - 添加 `/api/portal/session/*` 路由 +- `internal/app/bootstrap.go` - 传递 trusted proxy secret +- `deploy/tksea-portal/index.html` - 添加 CRM session 登录/登出 +- `deploy/tksea-portal/nginx.sub.tksea.top.conf.example` - nginx 配置模板 +- `.env.example` - 环境变量模板 + +### 2. remote43 部署步骤 + +#### 步骤 1: 生成共享密钥 + +在 remote43 上执行: + +```bash +TRUSTED_PROXY_SECRET=$(openssl rand -hex 32) +echo "Generated secret: $TRUSTED_PROXY_SECRET" +# 保存此密钥,需要同时配置到 nginx 和 CRM +``` + +#### 步骤 2: 更新 CRM 配置 + +编辑 `/home/ubuntu/.env.crm`(或实际运行目录): + +```bash +# 在文件末尾添加: +# Trusted Subject Proxy Configuration +SUB2API_CRM_TRUSTED_SUBJECT_HEADER=X-CRM-Authenticated-Subject +SUB2API_CRM_TRUSTED_PROXY_SECRET_HEADER=X-CRM-Trusted-Proxy +SUB2API_CRM_TRUSTED_PROXY_SECRET=<步骤1生成的64字符密钥> +``` + +#### 步骤 3: 更新 nginx 配置 + +编辑 `/etc/nginx/sites-enabled/sub.tksea.top.conf`: + +在 `server` 块内添加: + +```nginx +# 从 httpOnly cookie 提取 portal subject(放在 server 块内) +map $http_cookie $portal_subject { + default ""; + ~*crm_session=([^;]+) $1; +} +``` + +修改 `/portal-admin-api/` location: + +```nginx +location /portal-admin-api/ { + proxy_pass http://127.0.0.1:18190/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # 关键:从验证过的 cookie 提取并注入 subject + proxy_set_header X-CRM-Authenticated-Subject $portal_subject; + # 受信代理密钥(必须与 CRM 配置一致) + proxy_set_header X-CRM-Trusted-Proxy "<步骤1生成的64字符密钥>"; + + proxy_http_version 1.1; +} +``` + +**注意**: + +- 删除原来的 `proxy_set_header X-Portal-Subject "";` 行 +- 确保密钥替换为实际生成的 64 字符 hex 字符串 + +#### 步骤 4: 重启服务 + +```bash +# 测试 nginx 配置 +sudo nginx -t + +# 重载 nginx +sudo systemctl reload nginx + +# 重启 CRM +sudo systemctl restart crm +# 或使用: +# pkill -f "crm" && cd /home/ubuntu && ./server & +``` + +#### 步骤 5: 验证 + +浏览器测试: + +1. 访问 `https://sub.tksea.top/portal/` +2. 登录(会同时设置 CRM session cookie) +3. 打开浏览器 DevTools → Application → Cookies +4. 确认看到 `crm_session` cookie(httpOnly)和 `crm_subject` cookie +5. 尝试创建/管理用户 Key,应该可以正常工作 + +API 测试: + +```bash +# 1. 登录获取 session cookie +curl -c cookies.txt -X POST https://sub.tksea.top/portal-admin-api/api/portal/session/login \ + -H "Content-Type: application/json" \ + -d '{"email":"test@example.com"}' + +# 2. 使用 cookie 访问 user-key API +curl -b cookies.txt https://sub.tksea.top/portal-admin-api/api/keys + +# 3. 创建新 key +curl -b cookies.txt -X POST https://sub.tksea.top/portal-admin-api/api/keys \ + -H "Content-Type: application/json" \ + -d '{"key_name":"test-key","logical_group_id":"gpt-shared"}' +``` + +### 3. 故障排除 + +#### CRM 返回 `unauthorized` / `trusted proxy authentication required` + +- 检查 `.env.crm` 中的 `SUB2API_CRM_TRUSTED_PROXY_SECRET` 是否正确设置 +- 检查 nginx 中的 `X-CRM-Trusted-Proxy` header 值是否一致 +- 检查两个密钥是否完全匹配(无多余空格) + +#### CRM 返回 `trusted subject header required` + +- 检查 nginx 是否正确添加了 `map $http_cookie $portal_subject` +- 检查浏览器是否有 `crm_session` cookie(登录后应该自动设置) +- 检查 cookie 是否被浏览器阻止(SameSite/Secure 设置) + +#### Portal 前端无法登录 CRM session + +- 检查浏览器 console 是否有 CORS 错误 +- 检查 `/portal-admin-api/` location 是否正确配置 +- 确认 CRM 服务正在监听 `127.0.0.1:18190` + +## 安全配置建议 + +1. **密钥管理** + - 使用 `openssl rand -hex 32` 生成强密钥 + - 不要在任何地方记录或提交密钥 + - 考虑使用 HashiCorp Vault 或 AWS Secrets Manager + +2. **HTTPS** + - 生产环境必须启用 HTTPS + - 设置 `Secure` flag 在 cookies 上 + +3. **Cookie 设置** + - `crm_session`: httpOnly, SameSite=Lax, Secure (HTTPS only) + - `crm_subject`: SameSite=Lax, Secure (HTTPS only) + +## 回滚计划 + +如果需要回滚: + +1. 还原 nginx 配置(删除 map 和 header 设置) +2. 还原 `.env.crm`(移除 TRUSTED\_\* 配置) +3. 重载 nginx / 重启 CRM + +portal 会回退到之前的 bearer token 认证模式。 + +## 验证清单 + +- [ ] 生成并记录了 64 字符 hex secret +- [ ] 更新了 `.env.crm` 配置 +- [ ] 更新了 nginx 配置 +- [ ] 重载了 nginx 配置 +- [ ] 重启了 CRM 服务 +- [ ] 浏览器测试通过(可以看到 crm_session cookie) +- [ ] API 测试通过(可以创建 user-key) +- [ ] 完整链路测试通过(create → chat → pause → resume → delete) diff --git a/internal/app/app_test.go b/internal/app/app_test.go index 4798cb16..038b7b81 100644 --- a/internal/app/app_test.go +++ b/internal/app/app_test.go @@ -141,7 +141,7 @@ func TestAPIAdminSessionLoginSetsCookieAndAuthorizesSubsequentRequest(t *testing ListPacks: func(context.Context) ([]PackInfo, error) { return []PackInfo{{PackID: "openai-cn-pack", Version: "1.1.6"}}, nil }, - }, "") + }, "", "") loginRequest := httptestRequest(t, http.MethodPost, "/api/admin/session/login", map[string]any{ "username": "admin", @@ -177,7 +177,7 @@ func TestAPIAdminSessionRejectsInvalidPassword(t *testing.T) { Token: "secret-token", Username: "admin", Password: "pass-123", - }, ActionSet{}, "") + }, ActionSet{}, "", "") request := httptestRequest(t, http.MethodPost, "/api/admin/session/login", map[string]any{ "username": "admin", "password": "wrong", @@ -192,7 +192,7 @@ func TestAPIAdminSessionLogoutClearsCookie(t *testing.T) { Token: "secret-token", Username: "admin", Password: "pass-123", - }, ActionSet{}, "") + }, ActionSet{}, "", "") request := httptestRequest(t, http.MethodPost, "/api/admin/session/logout", nil, "") response := httptestRecorder(handler, request) assertStatusCode(t, response, http.StatusNoContent) @@ -219,7 +219,7 @@ func TestAPIAdminSessionMeReportsAuthenticationState(t *testing.T) { Now: func() time.Time { return now }, - }, ActionSet{}, "") + }, ActionSet{}, "", "") request := httptestRequest(t, http.MethodGet, "/api/admin/session", nil, "") response := httptestRecorder(handler, request) diff --git a/internal/app/bootstrap.go b/internal/app/bootstrap.go index edb7fdc5..81f6e0b8 100644 --- a/internal/app/bootstrap.go +++ b/internal/app/bootstrap.go @@ -30,7 +30,7 @@ func Bootstrap(ctx context.Context) (*Server, error) { Username: adminSession.Username, Password: adminSession.Password, SessionTTL: adminSession.SessionTTL, - }, NewActionSetWithStickyRuntime(cfg.Database.SQLiteDSN, stickyRuntime), cfg.Database.SQLiteDSN) + }, NewActionSetWithStickyRuntime(cfg.Database.SQLiteDSN, stickyRuntime, cfg.UserKeyAuth), cfg.Database.SQLiteDSN, cfg.UserKeyAuth.TrustedProxySecret) return NewServer(cfg.Server.ListenAddr, handler, nil), nil } diff --git a/internal/app/http_api.go b/internal/app/http_api.go index d821e66b..b99b0b1a 100644 --- a/internal/app/http_api.go +++ b/internal/app/http_api.go @@ -336,14 +336,10 @@ func NewAPIHandler(adminToken string, actions ActionSet, dsn ...string) http.Han if len(dsn) > 0 { dsnVal = dsn[0] } - return NewAPIHandlerWithAuth(AdminAuthConfig{Token: adminToken}, actions, dsnVal) + return NewAPIHandlerWithAuth(AdminAuthConfig{Token: adminToken}, actions, dsnVal, "") } -func NewAPIHandlerWithAuth(adminAuth AdminAuthConfig, actions ActionSet, dsn ...string) http.Handler { - sqliteDSN := "" - if len(dsn) > 0 { - sqliteDSN = dsn[0] - } +func NewAPIHandlerWithAuth(adminAuth AdminAuthConfig, actions ActionSet, sqliteDSN string, portalSessionSecret string) http.Handler { mux := http.NewServeMux() mux.HandleFunc("GET /healthz", healthz) mux.HandleFunc("GET /version", handleVersion) @@ -366,6 +362,19 @@ func NewAPIHandlerWithAuth(adminAuth AdminAuthConfig, actions ActionSet, dsn ... mux.HandleFunc("GET /api/portal/logical-groups/{groupID}/models", func(w http.ResponseWriter, r *http.Request) { handleListPortalLogicalGroupModels(w, r, actions.ListPortalLogicalGroupModels) }) + // Portal user session endpoints + portalAuth := PortalAuthConfig{ + SessionSecret: portalSessionSecret, + } + mux.HandleFunc("GET /api/portal/session", func(w http.ResponseWriter, r *http.Request) { + handlePortalSessionState(w, r, portalAuth) + }) + mux.HandleFunc("POST /api/portal/session/login", func(w http.ResponseWriter, r *http.Request) { + handlePortalSessionLogin(w, r, portalAuth) + }) + mux.HandleFunc("POST /api/portal/session/logout", func(w http.ResponseWriter, r *http.Request) { + handlePortalSessionLogout(w, r) + }) mux.Handle("POST /api/batch-import/runs", requireAdminAccess(adminAuth, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { handleCreateBatchImportRun(w, r, actions.CreateBatchImportRun) }))) @@ -1296,6 +1305,14 @@ func writeJSON(w http.ResponseWriter, statusCode int, body any) { _ = json.NewEncoder(w).Encode(body) } +func nonEmptyString(value, fallback string) string { + value = strings.TrimSpace(value) + if value != "" { + return value + } + return fallback +} + func classifyError(err error) *httpError { if err == nil { return nil @@ -1337,7 +1354,7 @@ func NewActionSet(sqliteDSN string) ActionSet { return NewActionSetWithStickyRuntime(sqliteDSN, defaultStickyStoreRuntime()) } -func NewActionSetWithStickyRuntime(sqliteDSN string, stickyRuntime stickyStoreRuntime) ActionSet { +func NewActionSetWithStickyRuntime(sqliteDSN string, stickyRuntime stickyStoreRuntime, authCfg ...config.UserKeyAuthConfig) ActionSet { routeLogWriter := newLazyRouteLogWriter(sqliteDSN) resolveRoute := buildResolveRouteAction(sqliteDSN, stickyRuntime, routeLogWriter) proxyRouteChatCompletions := buildProxyRouteChatCompletionsAction(sqliteDSN, resolveRoute, routeLogWriter) @@ -1383,7 +1400,7 @@ func NewActionSetWithStickyRuntime(sqliteDSN string, stickyRuntime stickyStoreRu GetRouteCooldown: buildGetRouteCooldownAction(stickyRuntime), ListProviderAccounts: buildListProviderAccountsAction(sqliteDSN), GetProviderAccountBindingCandidates: buildGetProviderAccountBindingCandidatesAction(sqliteDSN), - UserKeyHandler: buildUserKeyHandler(sqliteDSN), + UserKeyHandler: buildUserKeyHandler(sqliteDSN, authCfg...), UpdateProviderAccountBinding: buildUpdateProviderAccountBindingAction(sqliteDSN), EnableProviderAccount: buildUpdateProviderAccountStatusAction(sqliteDSN, sqlite.ProviderAccountStatusActive), DisableProviderAccount: buildUpdateProviderAccountStatusAction(sqliteDSN, sqlite.ProviderAccountStatusDisabled), @@ -2741,6 +2758,19 @@ func handlePublicV1ChatCompletions(w http.ResponseWriter, r *http.Request, dsn s writeHTTPError(w, &httpError{StatusCode: http.StatusForbidden, Code: "quota_exhausted", Message: "API key quota exhausted"}) return } + if key.ExpiresAt != "" { + expiresAt, parseErr := time.Parse(time.RFC3339, key.ExpiresAt) + if parseErr != nil { + metrics.RecordUserKeyChatRequest("key_metadata_error") + writeHTTPError(w, &httpError{StatusCode: http.StatusInternalServerError, Code: "key_metadata_error", Message: "invalid key expiry metadata"}) + return + } + if !expiresAt.After(time.Now().UTC()) { + metrics.RecordUserKeyChatRequest("key_expired") + writeHTTPError(w, &httpError{StatusCode: http.StatusForbidden, Code: "key_expired", Message: "API key has expired"}) + return + } + } // 4. Parse request body (OpenAI-compatible) body, err := io.ReadAll(io.LimitReader(r.Body, maxJSONBodyBytes)) @@ -2768,6 +2798,20 @@ func handlePublicV1ChatCompletions(w http.ResponseWriter, r *http.Request, dsn s writeHTTPError(w, &httpError{StatusCode: http.StatusBadRequest, Code: "bad_request", Message: "model is required"}) return } + if len(key.AllowedModels) > 0 { + modelAllowed := false + for _, allowedModel := range key.AllowedModels { + if strings.TrimSpace(allowedModel) == model { + modelAllowed = true + break + } + } + if !modelAllowed { + metrics.RecordUserKeyChatRequest("model_not_allowed") + writeHTTPError(w, &httpError{StatusCode: http.StatusForbidden, Code: "model_not_allowed", Message: "requested model is not allowed for this API key"}) + return + } + } // 5. Map to proxy request proxyReq := ProxyRouteChatCompletionsRequest{ @@ -2804,7 +2848,28 @@ func handlePublicV1ChatCompletions(w http.ResponseWriter, r *http.Request, dsn s } if upstreamResp == nil { - // Fallback: construct a minimal response from proxy info + upstreamResp = map[string]any{} + } + + if !result.Forward.OK { + statusCode := result.Forward.UpstreamStatus + if statusCode <= 0 { + statusCode = http.StatusBadGateway + } + upstreamResp["upstream_http_code"] = statusCode + if _, hasError := upstreamResp["error"]; !hasError { + upstreamResp["error"] = map[string]any{ + "code": nonEmptyString(result.Forward.ErrorClass, "upstream_error"), + "message": nonEmptyString(result.Forward.ErrorMessage, fmt.Sprintf("upstream request failed with status %d", statusCode)), + } + } + metrics.RecordUserKeyChatRequest(nonEmptyString(result.Forward.ErrorClass, "upstream_error")) + writeJSON(w, statusCode, upstreamResp) + return + } + + // Fallback: construct a minimal success response from proxy info + if len(upstreamResp) == 0 { upstreamResp = map[string]any{ "id": fmt.Sprintf("chatcmpl-%d", time.Now().UnixMilli()), "object": "chat.completion", @@ -2820,10 +2885,8 @@ func handlePublicV1ChatCompletions(w http.ResponseWriter, r *http.Request, dsn s }}, } } - - // Ensure upstream HTTP code is reflected - if !result.Forward.OK && result.Forward.UpstreamStatus > 0 { - upstreamResp["upstream_http_code"] = result.Forward.UpstreamStatus + if err := store.UserKeys().TouchLastUsed(r.Context(), key.KeyID); err != nil { + log.Printf("gateway: touch last_used_at for key %s failed: %v", key.KeyID, err) } // Wrap in OpenAI standard envelope if upstream didn't return one diff --git a/internal/app/key_self_service.go b/internal/app/key_self_service.go index 54ab09e0..1a03781e 100644 --- a/internal/app/key_self_service.go +++ b/internal/app/key_self_service.go @@ -19,13 +19,16 @@ func generatePlaintextKey() (string, string) { } type UserKeyHandler struct { - createFn func(ctx context.Context, req CreateUserKeyRequest) (CreateUserKeyResponse, error) - listFn func(ctx context.Context, subjectID string) ([]UserKeyMeta, error) - getFn func(ctx context.Context, keyID, subjectID string) (UserKeyMeta, error) - resetFn func(ctx context.Context, keyID, subjectID string) (ResetUserKeyResponse, error) - pauseFn func(ctx context.Context, keyID, subjectID, reason string) (UserKeyMeta, error) - resumeFn func(ctx context.Context, keyID, subjectID string) (UserKeyMeta, error) - deleteFn func(ctx context.Context, keyID, subjectID string) error + TrustedSubjectHeader string + TrustedProxySecretHeader string + TrustedProxySecret string + createFn func(ctx context.Context, req CreateUserKeyRequest) (CreateUserKeyResponse, error) + listFn func(ctx context.Context, subjectID string) ([]UserKeyMeta, error) + getFn func(ctx context.Context, keyID, subjectID string) (UserKeyMeta, error) + resetFn func(ctx context.Context, keyID, subjectID string) (ResetUserKeyResponse, error) + pauseFn func(ctx context.Context, keyID, subjectID, reason string) (UserKeyMeta, error) + resumeFn func(ctx context.Context, keyID, subjectID string) (UserKeyMeta, error) + deleteFn func(ctx context.Context, keyID, subjectID string) error } type CreateUserKeyRequest struct { @@ -60,22 +63,22 @@ type UserKeyMeta struct { } func (h *UserKeyHandler) extractSubjectID(r *http.Request) (string, *httpError) { - for _, header := range []string{"X-Portal-Subject", "X-User-Subject", "X-Forwarded-User"} { - if subjectID := strings.TrimSpace(r.Header.Get(header)); subjectID != "" { - return subjectID, nil - } + if h == nil { + return "", &httpError{StatusCode: http.StatusUnauthorized, Code: "unauthorized", Message: "user credentials required"} } - if hdr := r.Header.Get("Authorization"); strings.HasPrefix(hdr, "Bearer ") { - token := strings.TrimSpace(strings.TrimPrefix(hdr, "Bearer ")) - if token != "" { - n := 8 - if len(token) < n { - n = len(token) - } - return "skeleton_user_" + token[:n], nil - } + subjectHeader := strings.TrimSpace(h.TrustedSubjectHeader) + secretHeader := strings.TrimSpace(h.TrustedProxySecretHeader) + secret := strings.TrimSpace(h.TrustedProxySecret) + if subjectHeader == "" || secretHeader == "" || secret == "" { + return "", &httpError{StatusCode: http.StatusUnauthorized, Code: "unauthorized", Message: "trusted user identity proxy not configured"} } - return "", &httpError{StatusCode: http.StatusUnauthorized, Code: "unauthorized", Message: "user credentials required"} + if got := strings.TrimSpace(r.Header.Get(secretHeader)); got != secret { + return "", &httpError{StatusCode: http.StatusUnauthorized, Code: "unauthorized", Message: "trusted proxy authentication required"} + } + if subjectID := strings.TrimSpace(r.Header.Get(subjectHeader)); subjectID != "" { + return subjectID, nil + } + return "", &httpError{StatusCode: http.StatusUnauthorized, Code: "unauthorized", Message: "trusted subject header required"} } func writeSvcNotImplError(w http.ResponseWriter) { @@ -181,7 +184,16 @@ func handlePauseUserKey(w http.ResponseWriter, r *http.Request, h *UserKeyHandle return } keyID := r.PathValue("key_id") - key, svcErr := h.pauseFn(r.Context(), keyID, subjectID, "") + var req struct { + Reason string `json:"reason"` + } + if r.Body != nil && r.ContentLength != 0 { + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeHTTPError(w, &httpError{StatusCode: http.StatusBadRequest, Code: "invalid_json", Message: err.Error()}) + return + } + } + key, svcErr := h.pauseFn(r.Context(), keyID, subjectID, strings.TrimSpace(req.Reason)) if svcErr != nil { writeHTTPError(w, classifyError(svcErr)) return diff --git a/internal/app/key_self_service_handlers_test.go b/internal/app/key_self_service_handlers_test.go index 7d33e1f2..660b241c 100644 --- a/internal/app/key_self_service_handlers_test.go +++ b/internal/app/key_self_service_handlers_test.go @@ -4,12 +4,35 @@ import ( "context" "encoding/json" "fmt" + "io" "net/http" "net/http/httptest" "strings" "testing" ) +const ( + testTrustedSubjectHeader = "X-CRM-Authenticated-Subject" + testTrustedProxySecretHeader = "X-CRM-Trusted-Proxy" + testTrustedProxySecret = "shared-secret" +) + +func withTrustedProxyAuth(h *UserKeyHandler) *UserKeyHandler { + if h == nil { + return nil + } + clone := *h + clone.TrustedSubjectHeader = testTrustedSubjectHeader + clone.TrustedProxySecretHeader = testTrustedProxySecretHeader + clone.TrustedProxySecret = testTrustedProxySecret + return &clone +} + +func applyTrustedProxyHeaders(req *http.Request, subjectID string) { + req.Header.Set(testTrustedSubjectHeader, subjectID) + req.Header.Set(testTrustedProxySecretHeader, testTrustedProxySecret) +} + func TestGeneratePlaintextKeyAndExtractSubjectID(t *testing.T) { t.Parallel() plaintext, fingerprint := generatePlaintextKey() @@ -20,16 +43,49 @@ func TestGeneratePlaintextKeyAndExtractSubjectID(t *testing.T) { t.Fatalf("fingerprint = %q, want sha256 prefix", fingerprint) } - h := &UserKeyHandler{} - req := httptest.NewRequest(http.MethodGet, "/api/keys", nil) - req.Header.Set("Authorization", "Bearer abcdefgh12345678") - subjectID, httpErr := h.extractSubjectID(req) - if httpErr != nil { - t.Fatalf("extractSubjectID() unexpected error: %+v", httpErr) - } - if subjectID != "skeleton_user_abcdefgh" { - t.Fatalf("subjectID = %q, want skeleton_user_abcdefgh", subjectID) - } + t.Run("rejects bearer fallback when trusted proxy auth is not configured", func(t *testing.T) { + h := &UserKeyHandler{} + req := httptest.NewRequest(http.MethodGet, "/api/keys", nil) + req.Header.Set("Authorization", "Bearer abcdefgh12345678") + _, httpErr := h.extractSubjectID(req) + if httpErr == nil { + t.Fatal("expected unauthorized error when trusted proxy auth is not configured") + } + if httpErr.StatusCode != http.StatusUnauthorized { + t.Fatalf("status = %d, want 401", httpErr.StatusCode) + } + }) + + t.Run("rejects portal subject header when trusted proxy auth is not configured", func(t *testing.T) { + h := &UserKeyHandler{} + req := httptest.NewRequest(http.MethodGet, "/api/keys", nil) + req.Header.Set("X-Portal-Subject", "portal-user:1") + _, httpErr := h.extractSubjectID(req) + if httpErr == nil { + t.Fatal("expected unauthorized error when trusted proxy auth is not configured") + } + if httpErr.StatusCode != http.StatusUnauthorized { + t.Fatalf("status = %d, want 401", httpErr.StatusCode) + } + }) + + t.Run("accepts trusted proxy subject when proxy secret matches", func(t *testing.T) { + h := &UserKeyHandler{ + TrustedSubjectHeader: "X-CRM-Authenticated-Subject", + TrustedProxySecretHeader: "X-CRM-Trusted-Proxy", + TrustedProxySecret: "shared-secret", + } + req := httptest.NewRequest(http.MethodGet, "/api/keys", nil) + req.Header.Set("X-CRM-Authenticated-Subject", "portal-user:1") + req.Header.Set("X-CRM-Trusted-Proxy", "shared-secret") + subjectID, httpErr := h.extractSubjectID(req) + if httpErr != nil { + t.Fatalf("extractSubjectID() unexpected error: %+v", httpErr) + } + if subjectID != "portal-user:1" { + t.Fatalf("subjectID = %q, want portal-user:1", subjectID) + } + }) } func TestHandleUserKeyListNotImplemented(t *testing.T) { @@ -46,16 +102,16 @@ func TestHandleUserKeyListNotImplemented(t *testing.T) { func TestHandleUserKeyListSuccess(t *testing.T) { t.Parallel() - h := &UserKeyHandler{ + h := withTrustedProxyAuth(&UserKeyHandler{ listFn: func(ctx context.Context, subjectID string) ([]UserKeyMeta, error) { if subjectID != "portal-user:1" { t.Fatalf("subjectID = %q, want portal-user:1", subjectID) } return []UserKeyMeta{{KeyID: "key_1", AdminStatus: "active"}}, nil }, - } + }) req := httptest.NewRequest(http.MethodGet, "/api/keys", nil) - req.Header.Set("X-Portal-Subject", "portal-user:1") + applyTrustedProxyHeaders(req, "portal-user:1") rr := httptest.NewRecorder() serveWithMetrics(t, req, rr, func(w http.ResponseWriter, r *http.Request) { handleListUserKeys(w, r, h) @@ -70,12 +126,12 @@ func TestHandleUserKeyListSuccess(t *testing.T) { func TestHandleGetUserKeyMissingKeyID(t *testing.T) { t.Parallel() - h := &UserKeyHandler{getFn: func(context.Context, string, string) (UserKeyMeta, error) { + h := withTrustedProxyAuth(&UserKeyHandler{getFn: func(context.Context, string, string) (UserKeyMeta, error) { t.Fatal("getFn should not be called when key_id is missing") return UserKeyMeta{}, nil - }} + }}) req := httptest.NewRequest(http.MethodGet, "/api/keys/", nil) - req.Header.Set("X-Portal-Subject", "portal-user:1") + applyTrustedProxyHeaders(req, "portal-user:1") rr := httptest.NewRecorder() serveWithMetrics(t, req, rr, func(w http.ResponseWriter, r *http.Request) { handleGetUserKey(w, r, h) @@ -131,7 +187,7 @@ func TestHandleUserKeyMutationHandlers(t *testing.T) { path: "/api/keys/key_1/pause", handlerFn: handlePauseUserKey, userHandler: &UserKeyHandler{pauseFn: func(ctx context.Context, keyID, subjectID, reason string) (UserKeyMeta, error) { - if keyID != "key_1" || subjectID != "portal-user:1" || reason != "" { + if keyID != "key_1" || subjectID != "portal-user:1" || reason != "user requested pause" { t.Fatalf("pauseFn args = (%q,%q,%q)", keyID, subjectID, reason) } paused := meta @@ -174,12 +230,18 @@ func TestHandleUserKeyMutationHandlers(t *testing.T) { for _, tc := range cases { tc := tc t.Run(tc.name, func(t *testing.T) { - req := httptest.NewRequest(tc.method, tc.path, nil) + req := httptest.NewRequest(tc.method, tc.path, func() io.Reader { + if tc.name == "pause-success" { + return strings.NewReader(`{"reason":"user requested pause"}`) + } + return nil + }()) req.Header.Set("X-Portal-Subject", "portal-user:1") req.SetPathValue("key_id", "key_1") rr := httptest.NewRecorder() + applyTrustedProxyHeaders(req, "portal-user:1") serveWithMetrics(t, req, rr, func(w http.ResponseWriter, r *http.Request) { - tc.handlerFn(w, r, tc.userHandler) + tc.handlerFn(w, r, withTrustedProxyAuth(tc.userHandler)) }) if rr.Code != tc.wantStatus { t.Fatalf("status = %d, want %d body=%s", rr.Code, tc.wantStatus, rr.Body.String()) @@ -200,11 +262,11 @@ func serveWithMetrics(t *testing.T, req *http.Request, rr *httptest.ResponseReco func TestHandleListUserKeysResponseShape(t *testing.T) { t.Parallel() - h := &UserKeyHandler{listFn: func(context.Context, string) ([]UserKeyMeta, error) { + h := withTrustedProxyAuth(&UserKeyHandler{listFn: func(context.Context, string) ([]UserKeyMeta, error) { return []UserKeyMeta{{KeyID: "key_json", AdminStatus: "active"}}, nil - }} + }}) req := httptest.NewRequest(http.MethodGet, "/api/keys", nil) - req.Header.Set("X-Portal-Subject", "portal-user:json") + applyTrustedProxyHeaders(req, "portal-user:json") rr := httptest.NewRecorder() handleListUserKeys(rr, req, h) var payload struct { @@ -263,10 +325,10 @@ func TestHandleUserKeyMutationHandlersErrorPaths(t *testing.T) { tc := tc t.Run(tc.name, func(t *testing.T) { req := httptest.NewRequest(http.MethodPost, "/api/keys/key_1", nil) - req.Header.Set("X-Portal-Subject", "portal-user:1") + applyTrustedProxyHeaders(req, "portal-user:1") req.SetPathValue("key_id", "key_1") rr := httptest.NewRecorder() - tc.handlerFn(rr, req, tc.userHandler) + tc.handlerFn(rr, req, withTrustedProxyAuth(tc.userHandler)) if rr.Code != tc.wantStatus { t.Fatalf("status = %d, want %d body=%s", rr.Code, tc.wantStatus, rr.Body.String()) } diff --git a/internal/app/key_self_service_svc.go b/internal/app/key_self_service_svc.go index 06ddaec8..dbc2dcf1 100644 --- a/internal/app/key_self_service_svc.go +++ b/internal/app/key_self_service_svc.go @@ -12,6 +12,7 @@ import ( "strings" "time" + "sub2api-cn-relay-manager/internal/config" "sub2api-cn-relay-manager/internal/host/sub2api" "sub2api-cn-relay-manager/internal/metrics" "sub2api-cn-relay-manager/internal/store/sqlite" @@ -104,13 +105,34 @@ func ensureSubjectHasAccess(ctx context.Context, client *sub2api.Client, subject return apiKey, nil } +func buildManagedIdentitySelector(subjectID, keyID string) string { + return strings.TrimSpace(subjectID) + "|key:" + strings.TrimSpace(keyID) + "|rot:" + generateKeyID() +} + +func managedIdentitySelectorForRecord(rec *sqlite.UserKeyRecord) string { + if rec == nil { + return "" + } + if selector := strings.TrimSpace(rec.ManagedIdentitySelector); selector != "" { + return selector + } + return strings.TrimSpace(rec.OwnerSubjectID) +} + func recordUserKeyFailure(operation, result string, err error) error { metrics.RecordUserKeyOperation(operation, result) return err } -func buildUserKeyHandler(sqliteDSN string) *UserKeyHandler { +func buildUserKeyHandler(sqliteDSN string, authCfg ...config.UserKeyAuthConfig) *UserKeyHandler { + var cfg config.UserKeyAuthConfig + if len(authCfg) > 0 { + cfg = authCfg[0] + } return &UserKeyHandler{ + TrustedSubjectHeader: strings.TrimSpace(cfg.TrustedSubjectHeader), + TrustedProxySecretHeader: strings.TrimSpace(cfg.TrustedProxySecretHeader), + TrustedProxySecret: strings.TrimSpace(cfg.TrustedProxySecret), createFn: func(ctx context.Context, req CreateUserKeyRequest) (CreateUserKeyResponse, error) { if strings.TrimSpace(req.SubjectID) == "" { metrics.RecordUserKeyOperation("create", "unauthorized") @@ -136,6 +158,9 @@ func buildUserKeyHandler(sqliteDSN string) *UserKeyHandler { return CreateUserKeyResponse{}, &httpError{StatusCode: 429, Code: "rate_limited", Message: "create key rate limit exceeded"} } + keyID := generateKeyID() + managedIdentitySelector := buildManagedIdentitySelector(req.SubjectID, keyID) + // Resolve logical group → host → group ID → ensure subscription access _, route, hostRow, client, err := resolveLogicalGroupHost(ctx, store, req.LogicalGroupID) if err != nil { @@ -145,26 +170,26 @@ func buildUserKeyHandler(sqliteDSN string) *UserKeyHandler { if err != nil { return CreateUserKeyResponse{}, recordUserKeyFailure("create", "resolve_shadow_group_error", fmt.Errorf("resolve shadow group id for %q: %w", route.ShadowGroupID, err)) } - apiKey, err := ensureSubjectHasAccess(ctx, client, req.SubjectID, hostGroupID) + apiKey, err := ensureSubjectHasAccess(ctx, client, managedIdentitySelector, hostGroupID) if err != nil { return CreateUserKeyResponse{}, recordUserKeyFailure("create", "ensure_access_error", fmt.Errorf("ensure access for %q: %w", req.LogicalGroupID, err)) } fingerprint := "sha256:" + sha256Hex(apiKey) - keyID := generateKeyID() masked := "sk-****" + apiKey[len(apiKey)-4:] err = store.WithTx(ctx, func(q *sqlite.Queries) error { if _, err := q.UserKeys.Create(ctx, sqlite.UserKeyRecord{ - KeyID: keyID, - OwnerSubjectID: req.SubjectID, - KeyFingerprint: fingerprint, - MaskedPreview: masked, - DisplayName: strings.TrimSpace(req.DisplayName), - LogicalGroupID: strings.TrimSpace(req.LogicalGroupID), - AllowedModels: req.AllowedModels, - AdminStatus: "active", - QuotaStatus: "ok", + KeyID: keyID, + OwnerSubjectID: req.SubjectID, + ManagedIdentitySelector: managedIdentitySelector, + KeyFingerprint: fingerprint, + MaskedPreview: masked, + DisplayName: strings.TrimSpace(req.DisplayName), + LogicalGroupID: strings.TrimSpace(req.LogicalGroupID), + AllowedModels: req.AllowedModels, + AdminStatus: "active", + QuotaStatus: "ok", }); err != nil { return fmt.Errorf("create key: %w", err) } @@ -288,7 +313,8 @@ func buildUserKeyHandler(sqliteDSN string) *UserKeyHandler { if err != nil { return ResetUserKeyResponse{}, recordUserKeyFailure("reset", "resolve_shadow_group_error", fmt.Errorf("resolve shadow group id for %q: %w", route.ShadowGroupID, err)) } - newPlaintext, err := ensureSubjectHasAccess(ctx, client, rec.OwnerSubjectID, hostGroupID) + managedIdentitySelector := buildManagedIdentitySelector(rec.OwnerSubjectID, keyID) + newPlaintext, err := ensureSubjectHasAccess(ctx, client, managedIdentitySelector, hostGroupID) if err != nil { return ResetUserKeyResponse{}, recordUserKeyFailure("reset", "ensure_access_error", fmt.Errorf("ensure access on reset for %q: %w", rec.LogicalGroupID, err)) } @@ -297,7 +323,7 @@ func buildUserKeyHandler(sqliteDSN string) *UserKeyHandler { masked := "sk-****" + newPlaintext[len(newPlaintext)-4:] err = store.WithTx(ctx, func(q *sqlite.Queries) error { - if err := q.UserKeys.UpdateSecret(ctx, keyID, hostFingerprint, masked, "active"); err != nil { + if err := q.UserKeys.UpdateSecret(ctx, keyID, managedIdentitySelector, hostFingerprint, masked, "active"); err != nil { return fmt.Errorf("reset key: %w", err) } if _, err := q.UserKeyAuditEvents.Create(ctx, sqlite.UserKeyAuditEvent{ @@ -341,7 +367,7 @@ func buildUserKeyHandler(sqliteDSN string) *UserKeyHandler { if err != nil { return UserKeyMeta{}, recordUserKeyFailure("pause", "resolve_shadow_group_error", fmt.Errorf("resolve shadow group id for pause %q: %w", route.ShadowGroupID, err)) } - if err := client.PauseManagedSubscriptionAccess(ctx, rec.OwnerSubjectID, hostGroupID); err != nil { + if err := client.PauseManagedSubscriptionAccess(ctx, managedIdentitySelectorForRecord(rec), hostGroupID); err != nil { return UserKeyMeta{}, recordUserKeyFailure("pause", "pause_access_error", fmt.Errorf("pause managed subscription access: %w", err)) } err = store.WithTx(ctx, func(q *sqlite.Queries) error { @@ -384,7 +410,7 @@ func buildUserKeyHandler(sqliteDSN string) *UserKeyHandler { if err != nil { return UserKeyMeta{}, recordUserKeyFailure("resume", "resolve_shadow_group_error", fmt.Errorf("resolve shadow group id for resume %q: %w", route.ShadowGroupID, err)) } - if err := client.ResumeManagedSubscriptionAccess(ctx, rec.OwnerSubjectID, hostGroupID); err != nil { + if err := client.ResumeManagedSubscriptionAccess(ctx, managedIdentitySelectorForRecord(rec), hostGroupID); err != nil { return UserKeyMeta{}, recordUserKeyFailure("resume", "resume_access_error", fmt.Errorf("resume managed subscription access: %w", err)) } err = store.WithTx(ctx, func(q *sqlite.Queries) error { diff --git a/internal/app/key_self_service_test.go b/internal/app/key_self_service_test.go index cf8a6356..26c2e6dd 100644 --- a/internal/app/key_self_service_test.go +++ b/internal/app/key_self_service_test.go @@ -11,10 +11,24 @@ import ( "strings" "testing" + "sub2api-cn-relay-manager/internal/config" "sub2api-cn-relay-manager/internal/metrics" "sub2api-cn-relay-manager/internal/store/sqlite" ) +func testUserKeyAuthConfig() config.UserKeyAuthConfig { + return config.UserKeyAuthConfig{ + TrustedSubjectHeader: testTrustedSubjectHeader, + TrustedProxySecretHeader: testTrustedProxySecretHeader, + TrustedProxySecret: testTrustedProxySecret, + } +} + +func applyTrustedProxyAuthHeaders(req *http.Request, subjectID string) { + req.Header.Set(testTrustedSubjectHeader, subjectID) + req.Header.Set(testTrustedProxySecretHeader, testTrustedProxySecret) +} + func makeCreateBody(groupID, displayName string, models []string) io.Reader { b, _ := json.Marshal(map[string]any{ "logical_group_id": groupID, @@ -60,11 +74,11 @@ func TestUserKeyAPIUsesPortalSubjectHeader(t *testing.T) { }) handler := NewAPIHandler("t", ActionSet{ - UserKeyHandler: buildUserKeyHandler(appTestDSN(t, store)), + UserKeyHandler: buildUserKeyHandler(appTestDSN(t, store), testUserKeyAuthConfig()), }) req := makeCreateRequest(t, http.MethodPost, "/api/keys", makeCreateBody("gpt-shared", "portal key", []string{"gpt-5.4"})) - req.Header.Set("X-Portal-Subject", "smoke-user") + applyTrustedProxyAuthHeaders(req, "smoke-user") resp := httptestRecorder(handler, req) // We expect 500 because test host is unreachable (port 1), but the important @@ -107,11 +121,11 @@ func TestUserKeyCreateRejectsMissingSubject(t *testing.T) { func TestUserKeyCreateRejectsMissingGroup(t *testing.T) { t.Parallel() handler := NewAPIHandler("t", ActionSet{ - UserKeyHandler: buildUserKeyHandler(appTestDSN(t, openAppTestStore(t))), + UserKeyHandler: buildUserKeyHandler(appTestDSN(t, openAppTestStore(t)), testUserKeyAuthConfig()), }) body := bytes.NewReader([]byte(`{"display_name":"portal key"}`)) req := makeCreateRequest(t, http.MethodPost, "/api/keys", body) - req.Header.Set("X-Portal-Subject", "smoke-user") + applyTrustedProxyAuthHeaders(req, "smoke-user") resp := httptestRecorder(handler, req) if resp.code != http.StatusBadRequest { t.Fatalf("status code = %d, want 400", resp.code) @@ -142,18 +156,18 @@ func TestUserKeyRateLimitNoDB(t *testing.T) { }) handler := NewAPIHandler("t", ActionSet{ - UserKeyHandler: buildUserKeyHandler(appTestDSN(t, store)), + UserKeyHandler: buildUserKeyHandler(appTestDSN(t, store), testUserKeyAuthConfig()), }) req := makeCreateRequest(t, http.MethodPost, "/api/keys", makeCreateBody("gpt-shared", "rate-test", nil)) - req.Header.Set("X-Portal-Subject", "rate-user") + applyTrustedProxyAuthHeaders(req, "rate-user") resp := httptestRecorder(handler, req) if resp.code == http.StatusUnauthorized || resp.code == http.StatusNotImplemented { t.Fatalf("status code = %d, expected to pass auth layer", resp.code) } } -func TestUserKeyCreateUsesSubjectScopedManagedKeyAndConsistentMetadata(t *testing.T) { +func TestUserKeyCreateUsesPerRecordManagedKeyAndConsistentMetadata(t *testing.T) { t.Parallel() store := openAppTestStore(t) @@ -163,6 +177,8 @@ func TestUserKeyCreateUsesSubjectScopedManagedKeyAndConsistentMetadata(t *testin const hostGroupID = "999" const subjectID = "portal-user:13" + var loginEmail string + var customKey string server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { switch { case r.Method == http.MethodGet && strings.HasPrefix(r.URL.RequestURI(), "/api/v1/admin/users?"): @@ -180,9 +196,9 @@ func TestUserKeyCreateUsesSubjectScopedManagedKeyAndConsistentMetadata(t *testin if err := json.NewDecoder(r.Body).Decode(&req); err != nil { t.Fatalf("decode login request: %v", err) } - expected := expectedManagedIdentity(subjectID, hostGroupID) - if got := fmt.Sprint(req["email"]); got != expected.Email { - t.Fatalf("login email = %q, want subject-scoped %q", got, expected.Email) + loginEmail = fmt.Sprint(req["email"]) + if !strings.Contains(loginEmail, "@sub2api.local") || strings.Contains(loginEmail, subjectID) { + t.Fatalf("login email = %q, want synthesized per-record managed identity", loginEmail) } w.Write([]byte(`{"data":{"access_token":"user-jwt"}}`)) case r.Method == http.MethodPost && r.URL.Path == "/api/v1/keys": @@ -190,9 +206,9 @@ func TestUserKeyCreateUsesSubjectScopedManagedKeyAndConsistentMetadata(t *testin if err := json.NewDecoder(r.Body).Decode(&req); err != nil { t.Fatalf("decode managed key request: %v", err) } - expected := expectedManagedIdentity(subjectID, hostGroupID) - if got := fmt.Sprint(req["custom_key"]); got != expected.CustomKey { - t.Fatalf("custom_key = %q, want subject-scoped %q", got, expected.CustomKey) + customKey = fmt.Sprint(req["custom_key"]) + if !strings.HasPrefix(customKey, "sk-relay-") { + t.Fatalf("custom_key = %q, want sk-relay-*", customKey) } w.Write([]byte(`{"data":{"id":501,"key":"placeholder-from-host","name":"managed-key"}}`)) case r.Method == http.MethodPut && r.URL.Path == "/api/v1/admin/api-keys/501": @@ -226,11 +242,11 @@ func TestUserKeyCreateUsesSubjectScopedManagedKeyAndConsistentMetadata(t *testin }) handler := NewAPIHandler("t", ActionSet{ - UserKeyHandler: buildUserKeyHandler(appTestDSN(t, store)), + UserKeyHandler: buildUserKeyHandler(appTestDSN(t, store), testUserKeyAuthConfig()), }) req := makeCreateRequest(t, http.MethodPost, "/api/keys", makeCreateBody(logicalGroupID, "portal key", []string{"gpt-5.4"})) - req.Header.Set("X-Portal-Subject", subjectID) + applyTrustedProxyAuthHeaders(req, subjectID) resp := httptestRecorder(handler, req) if resp.code != http.StatusCreated { t.Fatalf("status code = %d, want 201, body=%s", resp.code, resp.Body().String()) @@ -241,11 +257,10 @@ func TestUserKeyCreateUsesSubjectScopedManagedKeyAndConsistentMetadata(t *testin t.Fatalf("decode create response: %v", err) } - expected := expectedManagedIdentity(subjectID, hostGroupID) - if createResp.PlaintextKey != expected.CustomKey { - t.Fatalf("plaintext_key = %q, want subject-scoped %q", createResp.PlaintextKey, expected.CustomKey) + if createResp.PlaintextKey != customKey { + t.Fatalf("plaintext_key = %q, want host custom_key %q", createResp.PlaintextKey, customKey) } - wantMasked := "sk-****" + expected.CustomKey[len(expected.CustomKey)-4:] + wantMasked := "sk-****" + customKey[len(customKey)-4:] if createResp.Key.MaskedPreview != wantMasked { t.Fatalf("masked_preview = %q, want %q", createResp.Key.MaskedPreview, wantMasked) } @@ -254,12 +269,18 @@ func TestUserKeyCreateUsesSubjectScopedManagedKeyAndConsistentMetadata(t *testin if err != nil { t.Fatalf("UserKeys().GetByID() error = %v", err) } - if record.KeyFingerprint != "sha256:"+sha256Hex(expected.CustomKey) { + if strings.TrimSpace(record.ManagedIdentitySelector) == "" || !strings.Contains(record.ManagedIdentitySelector, createResp.Key.KeyID) { + t.Fatalf("managed_identity_selector = %q, want non-empty selector tied to key id", record.ManagedIdentitySelector) + } + if record.KeyFingerprint != "sha256:"+sha256Hex(customKey) { t.Fatalf("key_fingerprint = %q, want sha256 of returned plaintext key", record.KeyFingerprint) } if record.MaskedPreview != wantMasked { t.Fatalf("stored masked_preview = %q, want %q", record.MaskedPreview, wantMasked) } + if loginEmail == "" { + t.Fatal("login email was not observed") + } } type managedIdentityExpectation struct { @@ -302,13 +323,145 @@ func expectedManagedPrefix(value string) string { return prefix } +func TestUserKeyCreateAndResetDoNotReuseSameManagedKeyWithinSubjectGroup(t *testing.T) { + t.Parallel() + + store := openAppTestStore(t) + defer closeAppTestStore(t, store) + + const logicalGroupID = "gpt-shared" + const hostGroupID = "999" + const subjectID = "portal-user:multi" + + type managedUser struct { + ID int64 + Email string + } + usersByEmail := map[string]managedUser{} + nextUserID := int64(100) + nextKeyID := int64(500) + createdCustomKeys := make([]string, 0, 4) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.Method == http.MethodGet && strings.HasPrefix(r.URL.RequestURI(), "/api/v1/admin/users?"): + search := strings.TrimSpace(r.URL.Query().Get("search")) + items := make([]map[string]any, 0, 1) + if user, ok := usersByEmail[search]; ok { + items = append(items, map[string]any{"id": user.ID, "email": user.Email}) + } + _ = json.NewEncoder(w).Encode(map[string]any{"data": map[string]any{"items": items}}) + case r.Method == http.MethodPost && r.URL.Path == "/api/v1/admin/users": + var req map[string]any + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + t.Fatalf("decode create user request: %v", err) + } + email := strings.TrimSpace(fmt.Sprint(req["email"])) + nextUserID++ + usersByEmail[email] = managedUser{ID: nextUserID, Email: email} + _ = json.NewEncoder(w).Encode(map[string]any{"data": map[string]any{"id": nextUserID, "email": email}}) + case r.Method == http.MethodPut && strings.HasPrefix(r.URL.Path, "/api/v1/admin/users/"): + _ = json.NewEncoder(w).Encode(map[string]any{"data": map[string]any{"id": 1}}) + case r.Method == http.MethodPost && r.URL.Path == "/api/v1/admin/users/101/balance": + _ = json.NewEncoder(w).Encode(map[string]any{"data": map[string]any{"id": 101}}) + case r.Method == http.MethodPost && strings.HasPrefix(r.URL.Path, "/api/v1/admin/users/") && strings.HasSuffix(r.URL.Path, "/balance"): + _ = json.NewEncoder(w).Encode(map[string]any{"data": map[string]any{"id": 1}}) + case r.Method == http.MethodPost && r.URL.Path == "/api/v1/admin/subscriptions/assign": + _ = json.NewEncoder(w).Encode(map[string]any{"data": map[string]any{"id": 401}}) + case r.Method == http.MethodPost && r.URL.Path == "/api/v1/auth/login": + _ = json.NewEncoder(w).Encode(map[string]any{"data": map[string]any{"access_token": "user-jwt"}}) + case r.Method == http.MethodPost && r.URL.Path == "/api/v1/keys": + var req map[string]any + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + t.Fatalf("decode create key request: %v", err) + } + customKey := strings.TrimSpace(fmt.Sprint(req["custom_key"])) + createdCustomKeys = append(createdCustomKeys, customKey) + nextKeyID++ + _ = json.NewEncoder(w).Encode(map[string]any{"data": map[string]any{"id": nextKeyID, "key": customKey, "name": fmt.Sprint(req["name"])}}) + case r.Method == http.MethodPut && strings.HasPrefix(r.URL.Path, "/api/v1/admin/api-keys/"): + _ = json.NewEncoder(w).Encode(map[string]any{"data": map[string]any{"api_key": map[string]any{"id": 501}}}) + default: + w.WriteHeader(http.StatusNotFound) + } + })) + defer server.Close() + + _, _ = store.Hosts().Create(context.Background(), sqlite.Host{ + HostID: "test-host", + BaseURL: server.URL, + HostVersion: "0.0.1", + CapabilityProbeJSON: "{}", + AuthType: "apikey", + AuthToken: "test-token", + }) + _, _ = store.LogicalGroups().Create(context.Background(), sqlite.LogicalGroup{ + LogicalGroupID: logicalGroupID, + DisplayName: "GPT Shared", + Status: "active", + }) + _, _ = store.LogicalGroupRoutes().Create(context.Background(), sqlite.LogicalGroupRoute{ + RouteID: "test-route", + LogicalGroupID: logicalGroupID, + Name: "Test Route", + Status: "active", + ShadowHostID: "test-host", + ShadowGroupID: hostGroupID, + }) + + handler := buildUserKeyHandler(appTestDSN(t, store), testUserKeyAuthConfig()) + first, err := handler.createFn(context.Background(), CreateUserKeyRequest{SubjectID: subjectID, LogicalGroupID: logicalGroupID, DisplayName: "first", AllowedModels: []string{"gpt-5.4"}}) + if err != nil { + t.Fatalf("first createFn() error = %v", err) + } + second, err := handler.createFn(context.Background(), CreateUserKeyRequest{SubjectID: subjectID, LogicalGroupID: logicalGroupID, DisplayName: "second", AllowedModels: []string{"gpt-5.4"}}) + if err != nil { + t.Fatalf("second createFn() error = %v", err) + } + if first.PlaintextKey == second.PlaintextKey { + t.Fatalf("createFn() reused plaintext key across records: first=%q second=%q", first.PlaintextKey, second.PlaintextKey) + } + + reset, err := handler.resetFn(context.Background(), first.Key.KeyID, subjectID) + if err != nil { + t.Fatalf("resetFn() error = %v", err) + } + if reset.PlaintextKey == first.PlaintextKey { + t.Fatalf("resetFn() reused original plaintext key: before=%q after=%q", first.PlaintextKey, reset.PlaintextKey) + } + if reset.PlaintextKey == second.PlaintextKey { + t.Fatalf("resetFn() collided with sibling key: reset=%q sibling=%q", reset.PlaintextKey, second.PlaintextKey) + } + + firstRecord, err := store.UserKeys().GetByID(context.Background(), first.Key.KeyID) + if err != nil { + t.Fatalf("GetByID(first) error = %v", err) + } + secondRecord, err := store.UserKeys().GetByID(context.Background(), second.Key.KeyID) + if err != nil { + t.Fatalf("GetByID(second) error = %v", err) + } + if firstRecord.KeyFingerprint == secondRecord.KeyFingerprint { + t.Fatalf("distinct key records share fingerprint: %q", firstRecord.KeyFingerprint) + } + if firstRecord.KeyFingerprint != "sha256:"+sha256Hex(reset.PlaintextKey) { + t.Fatalf("first record fingerprint = %q, want reset plaintext fingerprint", firstRecord.KeyFingerprint) + } + if len(createdCustomKeys) < 3 { + t.Fatalf("createdCustomKeys len = %d, want at least 3", len(createdCustomKeys)) + } + if createdCustomKeys[0] == createdCustomKeys[1] || createdCustomKeys[0] == createdCustomKeys[2] { + t.Fatalf("host custom keys were reused unexpectedly: %#v", createdCustomKeys) + } +} + func TestUserKeyAPIMetricsMiddlewareAndCreateMetric(t *testing.T) { t.Parallel() handler := NewAPIHandler("t", ActionSet{ - UserKeyHandler: buildUserKeyHandler(appTestDSN(t, openAppTestStore(t))), + UserKeyHandler: buildUserKeyHandler(appTestDSN(t, openAppTestStore(t)), testUserKeyAuthConfig()), }) req := makeCreateRequest(t, http.MethodPost, "/api/keys", makeCreateBody("", "portal key", nil)) - req.Header.Set("X-Portal-Subject", "smoke-user") + applyTrustedProxyAuthHeaders(req, "smoke-user") _ = httptestRecorder(handler, req) metricsReq := httptest.NewRequest(http.MethodGet, "/metrics", nil) diff --git a/internal/app/portal_auth.go b/internal/app/portal_auth.go new file mode 100644 index 00000000..e2627df0 --- /dev/null +++ b/internal/app/portal_auth.go @@ -0,0 +1,311 @@ +package app + +import ( + "context" + "crypto/hmac" + "crypto/rand" + "crypto/sha256" + "crypto/subtle" + "encoding/base64" + "encoding/hex" + "net/http" + "strconv" + "strings" + "time" +) + +const ( + portalSubjectCookieName = "crm_subject" + portalSessionCookieName = "crm_session" + defaultPortalSessionTTL = 30 * 24 * time.Hour // 30 days +) + +// PortalAuthConfig 定义 portal user session 配置 +type PortalAuthConfig struct { + SessionSecret string // session cookie 签名密钥 + SessionTTL time.Duration // session 有效期 + Now func() time.Time +} + +// portalSessionInfo 存储 session 信息 +type portalSessionInfo struct { + SubjectID string + Email string + ExpiresAt time.Time +} + +// portalLoginRequest 登录请求 +type portalLoginRequest struct { + Email string `json:"email"` + Password string `json:"password"` // 仅用于验证,portal 采用"登录即注册"模式 +} + +// normalized 返回规范化配置 +func (c PortalAuthConfig) normalized() PortalAuthConfig { + if c.SessionTTL <= 0 { + c.SessionTTL = defaultPortalSessionTTL + } + if c.Now == nil { + c.Now = time.Now + } + return c +} + +// normalizedSubjectID 规范化 subject ID +func normalizedSubjectID(email string) string { + email = strings.TrimSpace(strings.ToLower(email)) + if email == "" { + return "" + } + return "portal-email:" + email +} + +// signSessionCookie 签名 session cookie 值 +func signSessionCookie(secret, subjectID string, expiresAt time.Time) string { + if secret == "" || subjectID == "" { + return "" + } + payload := subjectID + "|" + strconv.FormatInt(expiresAt.Unix(), 10) + mac := hmac.New(sha256.New, []byte(secret)) + mac.Write([]byte(payload)) + sig := hex.EncodeToString(mac.Sum(nil)) + return base64.RawURLEncoding.EncodeToString([]byte(payload + "|" + sig)) +} + +// verifySessionCookie 验证并解析 session cookie +func verifySessionCookie(secret, raw string, now time.Time) (*portalSessionInfo, bool) { + if secret == "" || raw == "" { + return nil, false + } + b, err := base64.RawURLEncoding.DecodeString(raw) + if err != nil { + return nil, false + } + parts := strings.SplitN(string(b), "|", 3) + if len(parts) != 3 { + return nil, false + } + subjectID, tsStr, sigHex := parts[0], parts[1], parts[2] + + // 验证签名 + payload := subjectID + "|" + tsStr + mac := hmac.New(sha256.New, []byte(secret)) + mac.Write([]byte(payload)) + expectedSig := hex.EncodeToString(mac.Sum(nil)) + if subtle.ConstantTimeCompare([]byte(sigHex), []byte(expectedSig)) != 1 { + return nil, false + } + + // 解析过期时间 + unixSec, err := strconv.ParseInt(tsStr, 10, 64) + if err != nil { + return nil, false + } + expiresAt := time.Unix(unixSec, 0) + if now.After(expiresAt) { + return nil, false + } + + return &portalSessionInfo{ + SubjectID: subjectID, + ExpiresAt: expiresAt, + }, true +} + +// generateSessionSecret 生成随机 session secret(32字节) +func generateSessionSecret() string { + b := make([]byte, 32) + rand.Read(b) + return hex.EncodeToString(b) +} + +// extractSubjectFromCookie 从请求 cookie 中提取 subject +func extractSubjectFromCookie(r *http.Request, sessionSecret string) string { + if sessionSecret == "" { + return "" + } + cookie, err := r.Cookie(portalSessionCookieName) + if err != nil || cookie == nil || cookie.Value == "" { + return "" + } + info, ok := verifySessionCookie(sessionSecret, cookie.Value, time.Now()) + if !ok { + return "" + } + return info.SubjectID +} + +// handlePortalSessionLogin 处理 portal user 登录 +// 设置 httpOnly cookie,返回 subject ID +func handlePortalSessionLogin(w http.ResponseWriter, r *http.Request, cfg PortalAuthConfig) { + cfg = cfg.normalized() + + if cfg.SessionSecret == "" { + writeHTTPError(w, &httpError{ + StatusCode: http.StatusServiceUnavailable, + Code: "portal_auth_not_configured", + Message: "Portal session authentication is not configured", + }) + return + } + + var req portalLoginRequest + if err := decodeJSON(r, &req); err != nil { + writeHTTPError(w, err) + return + } + + email := strings.TrimSpace(req.Email) + if email == "" || !strings.Contains(email, "@") { + writeHTTPError(w, &httpError{ + StatusCode: http.StatusBadRequest, + Code: "invalid_email", + Message: "Valid email is required", + }) + return + } + + subjectID := normalizedSubjectID(email) + expiresAt := cfg.Now().Add(cfg.SessionTTL) + + // 生成签名 cookie + sessionValue := signSessionCookie(cfg.SessionSecret, subjectID, expiresAt) + if sessionValue == "" { + writeHTTPError(w, &httpError{ + StatusCode: http.StatusInternalServerError, + Code: "session_sign_failed", + Message: "Failed to sign session", + }) + return + } + + // 设置 httpOnly cookie(SameSite=Lax,Secure 建议生产启用 HTTPS) + cookie := &http.Cookie{ + Name: portalSessionCookieName, + Value: sessionValue, + Path: "/", + Expires: expiresAt, + HttpOnly: true, + SameSite: http.SameSiteLaxMode, + Secure: r.TLS != nil || r.Header.Get("X-Forwarded-Proto") == "https", + } + http.SetCookie(w, cookie) + + // 同时设置非 httpOnly cookie 供前端 JS 读取 subject(用于显示) + subjectCookie := &http.Cookie{ + Name: portalSubjectCookieName, + Value: subjectID, + Path: "/", + Expires: expiresAt, + HttpOnly: false, // 允许前端读取 + SameSite: http.SameSiteLaxMode, + Secure: r.TLS != nil || r.Header.Get("X-Forwarded-Proto") == "https", + } + http.SetCookie(w, subjectCookie) + + writeJSON(w, http.StatusOK, map[string]any{ + "authenticated": true, + "subject_id": subjectID, + "email": email, + "expires_at": expiresAt.Format(time.RFC3339), + }) +} + +// handlePortalSessionLogout 处理 portal user 登出 +// 清除 session cookie +func handlePortalSessionLogout(w http.ResponseWriter, r *http.Request) { + // 清除 session cookie + sessionCookie := &http.Cookie{ + Name: portalSessionCookieName, + Value: "", + Path: "/", + Expires: time.Unix(0, 0), + MaxAge: -1, + HttpOnly: true, + } + http.SetCookie(w, sessionCookie) + + // 清除 subject cookie + subjectCookie := &http.Cookie{ + Name: portalSubjectCookieName, + Value: "", + Path: "/", + Expires: time.Unix(0, 0), + MaxAge: -1, + HttpOnly: false, + } + http.SetCookie(w, subjectCookie) + + writeJSON(w, http.StatusOK, map[string]any{ + "authenticated": false, + }) +} + +// handlePortalSessionState 处理 portal session 状态查询 +func handlePortalSessionState(w http.ResponseWriter, r *http.Request, cfg PortalAuthConfig) { + cfg = cfg.normalized() + + if cfg.SessionSecret == "" { + writeJSON(w, http.StatusOK, map[string]any{ + "authenticated": false, + "login_enabled": false, + }) + return + } + + subjectID := extractSubjectFromCookie(r, cfg.SessionSecret) + if subjectID == "" { + writeJSON(w, http.StatusOK, map[string]any{ + "authenticated": false, + "login_enabled": true, + }) + return + } + + writeJSON(w, http.StatusOK, map[string]any{ + "authenticated": true, + "login_enabled": true, + "subject_id": subjectID, + }) +} + +// requirePortalSubject 中间件:要求 portal session 认证 +// 与 trusted proxy header 的验证流程配合 +func requirePortalSubject(cfg PortalAuthConfig, next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + cfg = cfg.normalized() + + // 首先检查 trusted proxy header + // 这是生产环境的推荐做法:nginx 验证并设置 header + if cfg.SessionSecret == "" { + writeHTTPError(w, &httpError{ + StatusCode: http.StatusUnauthorized, + Code: "unauthorized", + Message: "Portal authentication not configured", + }) + return + } + + subjectID := extractSubjectFromCookie(r, cfg.SessionSecret) + if subjectID == "" { + writeHTTPError(w, &httpError{ + StatusCode: http.StatusUnauthorized, + Code: "unauthorized", + Message: "Valid session required", + }) + return + } + + // 将 subject 放入 context 供后续使用 + ctx := context.WithValue(r.Context(), "portal_subject", subjectID) + next.ServeHTTP(w, r.WithContext(ctx)) + }) +} + +// getPortalSubjectFromContext 从 context 获取 subject +func getPortalSubjectFromContext(ctx context.Context) string { + if v, ok := ctx.Value("portal_subject").(string); ok { + return v + } + return "" +} diff --git a/internal/app/portal_auth_test.go b/internal/app/portal_auth_test.go new file mode 100644 index 00000000..d0ee52b7 --- /dev/null +++ b/internal/app/portal_auth_test.go @@ -0,0 +1,135 @@ +package app + +import ( + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" +) + +func TestPortalSessionLoginSetsCookieAndReturnsSubject(t *testing.T) { + cfg := PortalAuthConfig{ + SessionSecret: "test-secret-32-bytes-long-for-hmac", + Now: func() time.Time { return time.Unix(1_717_000_000, 0) }, + } + + req := httptest.NewRequest(http.MethodPost, "/api/portal/session/login", strings.NewReader(`{"email":"user@example.com"}`)) + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + + handlePortalSessionLogin(rec, req, cfg) + + if rec.Code != http.StatusOK { + t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK) + } + + // 检查响应体包含 subject + body := rec.Body.String() + if !strings.Contains(body, `"subject_id":"portal-email:user@example.com"`) { + t.Fatalf("response body missing subject_id: %s", body) + } + + // 检查设置了 cookie + cookies := rec.Result().Cookies() + if len(cookies) < 2 { + t.Fatalf("expected at least 2 cookies (session + subject), got %d", len(cookies)) + } + + // 检查 session cookie 是 httpOnly + var foundSessionCookie bool + for _, c := range cookies { + if c.Name == portalSessionCookieName { + foundSessionCookie = true + if !c.HttpOnly { + t.Fatal("session cookie should be HttpOnly") + } + } + } + if !foundSessionCookie { + t.Fatalf("session cookie %s not found", portalSessionCookieName) + } +} + +func TestPortalSessionLoginRejectsMissingEmail(t *testing.T) { + cfg := PortalAuthConfig{ + SessionSecret: "test-secret", + } + + req := httptest.NewRequest(http.MethodPost, "/api/portal/session/login", strings.NewReader(`{}`)) + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + + handlePortalSessionLogin(rec, req, cfg) + + if rec.Code != http.StatusBadRequest { + t.Fatalf("status = %d, want %d", rec.Code, http.StatusBadRequest) + } +} + +func TestPortalSessionLoginRequiresSecret(t *testing.T) { + cfg := PortalAuthConfig{ + SessionSecret: "", // 未配置 + } + + req := httptest.NewRequest(http.MethodPost, "/api/portal/session/login", strings.NewReader(`{"email":"user@example.com"}`)) + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + + handlePortalSessionLogin(rec, req, cfg) + + if rec.Code != http.StatusServiceUnavailable { + t.Fatalf("status = %d, want %d", rec.Code, http.StatusServiceUnavailable) + } +} + +func TestPortalSessionLogoutClearsCookies(t *testing.T) { + req := httptest.NewRequest(http.MethodPost, "/api/portal/session/logout", nil) + rec := httptest.NewRecorder() + + handlePortalSessionLogout(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK) + } + + // 检查清除了 cookie + cookies := rec.Result().Cookies() + var clearedSession bool + var clearedSubject bool + for _, c := range cookies { + if c.Name == portalSessionCookieName && c.MaxAge == -1 { + clearedSession = true + } + if c.Name == portalSubjectCookieName && c.MaxAge == -1 { + clearedSubject = true + } + } + if !clearedSession { + t.Fatal("session cookie should be cleared") + } + if !clearedSubject { + t.Fatal("subject cookie should be cleared") + } +} + +func TestPortalSessionStateUnauthenticatedWhenNoCookie(t *testing.T) { + cfg := PortalAuthConfig{ + SessionSecret: "test-secret", + Now: func() time.Time { return time.Unix(1_717_000_000, 0) }, + } + + req := httptest.NewRequest(http.MethodGet, "/api/portal/session", nil) + rec := httptest.NewRecorder() + + handlePortalSessionState(rec, req, cfg) + + if rec.Code != http.StatusOK { + t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK) + } + + body := rec.Body.String() + if !strings.Contains(body, `"authenticated":false`) { + t.Fatalf("expected unauthenticated, got: %s", body) + } +} diff --git a/internal/app/public_chat_metrics_test.go b/internal/app/public_chat_metrics_test.go index d880d3e0..b486ca73 100644 --- a/internal/app/public_chat_metrics_test.go +++ b/internal/app/public_chat_metrics_test.go @@ -58,6 +58,190 @@ func TestPublicV1ChatCompletionsQuotaExhaustedRecordsMetric(t *testing.T) { } } +func TestPublicV1ChatCompletionsPropagatesUpstreamFailureStatusAndMetric(t *testing.T) { + t.Parallel() + + store := openAppTestStore(t) + defer closeAppTestStore(t, store) + + const plaintextKey = "sk-test-upstream-429" + if _, err := store.UserKeys().Create(context.Background(), sqlite.UserKeyRecord{ + KeyID: "key_upstream_429", + OwnerSubjectID: "portal-user", + KeyFingerprint: "sha256:" + sha256Hex(plaintextKey), + MaskedPreview: "sk-****-429", + DisplayName: "upstream 429 key", + LogicalGroupID: "gpt-shared", + AllowedModels: []string{"gpt-5.4"}, + AdminStatus: "active", + QuotaStatus: "ok", + }); err != nil { + t.Fatalf("UserKeys().Create() error = %v", err) + } + + handler := NewAPIHandler("t", ActionSet{ + UserKeyHandler: buildUserKeyHandler(appTestDSN(t, store)), + ProxyRouteChatCompletions: func(context.Context, ProxyRouteChatCompletionsRequest) (ProxyRouteChatCompletionsResult, error) { + return ProxyRouteChatCompletionsResult{ + Forward: RouteChatCompletionsForwardInfo{ + OK: false, + UpstreamStatus: http.StatusTooManyRequests, + ErrorClass: "gateway_rate_limited", + Response: map[string]any{ + "error": map[string]any{ + "code": "upstream_rate_limited", + "message": "upstream rejected request", + }, + }, + }, + }, nil + }, + }, appTestDSN(t, store)) + + req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(`{"model":"gpt-5.4","messages":[{"role":"user","content":"ping"}]}`)) + req.Header.Set("Authorization", "Bearer "+plaintextKey) + req.Header.Set("Content-Type", "application/json") + resp := httptestRecorder(handler, req) + if resp.code != http.StatusTooManyRequests { + t.Fatalf("status code = %d, want 429 body=%s", resp.code, resp.Body().String()) + } + assertJSONContains(t, resp.Body().Bytes(), "error.code", "upstream_rate_limited") + + metricsReq := httptest.NewRequest(http.MethodGet, "/metrics", nil) + metricsResp := httptest.NewRecorder() + metrics.Handler().ServeHTTP(metricsResp, metricsReq) + body := metricsResp.Body.String() + if !strings.Contains(body, `user_key_chat_requests_total{result="gateway_rate_limited"}`) { + t.Fatalf("metrics body missing gateway_rate_limited metric: %s", body) + } +} + +func TestPublicV1ChatCompletionsRejectsDisallowedModel(t *testing.T) { + t.Parallel() + + store := openAppTestStore(t) + defer closeAppTestStore(t, store) + + const plaintextKey = "sk-test-disallowed-model" + if _, err := store.UserKeys().Create(context.Background(), sqlite.UserKeyRecord{ + KeyID: "key_disallowed_model", + OwnerSubjectID: "portal-user", + KeyFingerprint: "sha256:" + sha256Hex(plaintextKey), + MaskedPreview: "sk-****odel", + DisplayName: "model restricted key", + LogicalGroupID: "gpt-shared", + AllowedModels: []string{"gpt-4.1"}, + AdminStatus: "active", + QuotaStatus: "ok", + }); err != nil { + t.Fatalf("UserKeys().Create() error = %v", err) + } + + handler := NewAPIHandler("t", ActionSet{ + UserKeyHandler: buildUserKeyHandler(appTestDSN(t, store)), + ProxyRouteChatCompletions: func(context.Context, ProxyRouteChatCompletionsRequest) (ProxyRouteChatCompletionsResult, error) { + t.Fatal("proxy should not be called for disallowed model") + return ProxyRouteChatCompletionsResult{}, nil + }, + }, appTestDSN(t, store)) + + req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(`{"model":"gpt-5.4","messages":[{"role":"user","content":"ping"}]}`)) + req.Header.Set("Authorization", "Bearer "+plaintextKey) + req.Header.Set("Content-Type", "application/json") + resp := httptestRecorder(handler, req) + if resp.code != http.StatusForbidden { + t.Fatalf("status code = %d, want 403 body=%s", resp.code, resp.Body().String()) + } + assertJSONContains(t, resp.Body().Bytes(), "error.code", "model_not_allowed") +} + +func TestPublicV1ChatCompletionsRejectsExpiredKey(t *testing.T) { + t.Parallel() + + store := openAppTestStore(t) + defer closeAppTestStore(t, store) + + const plaintextKey = "sk-test-expired-key" + if _, err := store.UserKeys().Create(context.Background(), sqlite.UserKeyRecord{ + KeyID: "key_expired", + OwnerSubjectID: "portal-user", + KeyFingerprint: "sha256:" + sha256Hex(plaintextKey), + MaskedPreview: "sk-****ired", + DisplayName: "expired key", + LogicalGroupID: "gpt-shared", + AllowedModels: []string{"gpt-5.4"}, + AdminStatus: "active", + QuotaStatus: "ok", + }); err != nil { + t.Fatalf("UserKeys().Create() error = %v", err) + } + if _, err := store.SQLDB().ExecContext(context.Background(), `UPDATE user_keys SET expires_at = ? WHERE key_id = ?`, "2020-01-01T00:00:00Z", "key_expired"); err != nil { + t.Fatalf("set expires_at error = %v", err) + } + + handler := NewAPIHandler("t", ActionSet{ + UserKeyHandler: buildUserKeyHandler(appTestDSN(t, store)), + ProxyRouteChatCompletions: func(context.Context, ProxyRouteChatCompletionsRequest) (ProxyRouteChatCompletionsResult, error) { + t.Fatal("proxy should not be called for expired key") + return ProxyRouteChatCompletionsResult{}, nil + }, + }, appTestDSN(t, store)) + + req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(`{"model":"gpt-5.4","messages":[{"role":"user","content":"ping"}]}`)) + req.Header.Set("Authorization", "Bearer "+plaintextKey) + req.Header.Set("Content-Type", "application/json") + resp := httptestRecorder(handler, req) + if resp.code != http.StatusForbidden { + t.Fatalf("status code = %d, want 403 body=%s", resp.code, resp.Body().String()) + } + assertJSONContains(t, resp.Body().Bytes(), "error.code", "key_expired") +} + +func TestPublicV1ChatCompletionsTouchesLastUsedAtOnSuccess(t *testing.T) { + t.Parallel() + + store := openAppTestStore(t) + defer closeAppTestStore(t, store) + + const plaintextKey = "sk-test-last-used" + if _, err := store.UserKeys().Create(context.Background(), sqlite.UserKeyRecord{ + KeyID: "key_last_used", + OwnerSubjectID: "portal-user", + KeyFingerprint: "sha256:" + sha256Hex(plaintextKey), + MaskedPreview: "sk-****used", + DisplayName: "active key", + LogicalGroupID: "gpt-shared", + AllowedModels: []string{"gpt-5.4"}, + AdminStatus: "active", + QuotaStatus: "ok", + }); err != nil { + t.Fatalf("UserKeys().Create() error = %v", err) + } + + handler := NewAPIHandler("t", ActionSet{ + UserKeyHandler: buildUserKeyHandler(appTestDSN(t, store)), + ProxyRouteChatCompletions: func(context.Context, ProxyRouteChatCompletionsRequest) (ProxyRouteChatCompletionsResult, error) { + return ProxyRouteChatCompletionsResult{Forward: RouteChatCompletionsForwardInfo{OK: true, UpstreamStatus: http.StatusOK, Response: map[string]any{"id": "chatcmpl_ok", "object": "chat.completion", "model": "gpt-5.4", "choices": []map[string]any{{"index": 0, "message": map[string]any{"role": "assistant", "content": "pong"}, "finish_reason": "stop"}}}}}, nil + }, + }, appTestDSN(t, store)) + + req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(`{"model":"gpt-5.4","messages":[{"role":"user","content":"ping"}]}`)) + req.Header.Set("Authorization", "Bearer "+plaintextKey) + req.Header.Set("Content-Type", "application/json") + resp := httptestRecorder(handler, req) + if resp.code != http.StatusOK { + t.Fatalf("status code = %d, want 200 body=%s", resp.code, resp.Body().String()) + } + + record, err := store.UserKeys().GetByID(context.Background(), "key_last_used") + if err != nil { + t.Fatalf("GetByID() error = %v", err) + } + if strings.TrimSpace(record.LastUsedAt) == "" { + t.Fatalf("LastUsedAt = %q, want non-empty after successful chat", record.LastUsedAt) + } +} + func TestMetricsMiddlewareUsesRoutePatternForKeyReset(t *testing.T) { t.Parallel() diff --git a/internal/app/user_key_operation_metrics_test.go b/internal/app/user_key_operation_metrics_test.go index 83ec6d96..e25287d8 100644 --- a/internal/app/user_key_operation_metrics_test.go +++ b/internal/app/user_key_operation_metrics_test.go @@ -16,11 +16,11 @@ func TestUserKeyCreateResolveHostErrorRecordsMetric(t *testing.T) { defer closeAppTestStore(t, store) handler := NewAPIHandler("t", ActionSet{ - UserKeyHandler: buildUserKeyHandler(appTestDSN(t, store)), + UserKeyHandler: buildUserKeyHandler(appTestDSN(t, store), testUserKeyAuthConfig()), }) req := makeCreateRequest(t, http.MethodPost, "/api/keys", makeCreateBody("missing-group", "portal key", []string{"gpt-5.4"})) - req.Header.Set("X-Portal-Subject", "portal-user") + applyTrustedProxyAuthHeaders(req, "portal-user") resp := httptestRecorder(handler, req) if resp.code != http.StatusInternalServerError { t.Fatalf("status code = %d, want 500 body=%s", resp.code, resp.Body().String()) diff --git a/internal/config/config.go b/internal/config/config.go index 2b6a5b2f..cfb91889 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -9,26 +9,30 @@ import ( ) const ( - EnvListenAddr = "SUB2API_CRM_LISTEN_ADDR" - EnvSQLiteDSN = "SUB2API_CRM_SQLITE_DSN" - EnvAdminToken = "SUB2API_CRM_ADMIN_TOKEN" - EnvAdminUsername = "SUB2API_CRM_ADMIN_USERNAME" - EnvAdminPassword = "SUB2API_CRM_ADMIN_PASSWORD" - EnvAdminSessionTTL = "SUB2API_CRM_ADMIN_SESSION_TTL" - EnvRepoRoot = "SUB2API_CRM_REPO_ROOT" - EnvReconcileWorkerEnabled = "SUB2API_CRM_RECONCILE_WORKER_ENABLED" - EnvReconcilePollInterval = "SUB2API_CRM_RECONCILE_POLL_INTERVAL" - EnvRouteRuntimeBackend = "SUB2API_CRM_ROUTE_RUNTIME_BACKEND" - EnvRedisAddr = "SUB2API_CRM_REDIS_ADDR" - EnvRedisPassword = "SUB2API_CRM_REDIS_PASSWORD" - EnvRedisDB = "SUB2API_CRM_REDIS_DB" + EnvListenAddr = "SUB2API_CRM_LISTEN_ADDR" + EnvSQLiteDSN = "SUB2API_CRM_SQLITE_DSN" + EnvAdminToken = "SUB2API_CRM_ADMIN_TOKEN" + EnvAdminUsername = "SUB2API_CRM_ADMIN_USERNAME" + EnvAdminPassword = "SUB2API_CRM_ADMIN_PASSWORD" + EnvAdminSessionTTL = "SUB2API_CRM_ADMIN_SESSION_TTL" + EnvRepoRoot = "SUB2API_CRM_REPO_ROOT" + EnvReconcileWorkerEnabled = "SUB2API_CRM_RECONCILE_WORKER_ENABLED" + EnvReconcilePollInterval = "SUB2API_CRM_RECONCILE_POLL_INTERVAL" + EnvRouteRuntimeBackend = "SUB2API_CRM_ROUTE_RUNTIME_BACKEND" + EnvRedisAddr = "SUB2API_CRM_REDIS_ADDR" + EnvRedisPassword = "SUB2API_CRM_REDIS_PASSWORD" + EnvRedisDB = "SUB2API_CRM_REDIS_DB" + EnvTrustedSubjectHeader = "SUB2API_CRM_TRUSTED_SUBJECT_HEADER" + EnvTrustedProxySecretHeader = "SUB2API_CRM_TRUSTED_PROXY_SECRET_HEADER" + EnvTrustedProxySecret = "SUB2API_CRM_TRUSTED_PROXY_SECRET" - DefaultListenAddr = ":8080" - DefaultSQLiteDSN = "file:sub2api-cn-relay-manager.db?_foreign_keys=on&_busy_timeout=5000" - DefaultAdminUsername = "admin" - DefaultAdminSessionTTL = 12 * time.Hour - DefaultReconcilePollInterval = 10 * time.Minute - DefaultRouteRuntimeBackend = "memory" + DefaultListenAddr = ":8080" + DefaultSQLiteDSN = "file:sub2api-cn-relay-manager.db?_foreign_keys=on&_busy_timeout=5000" + DefaultAdminUsername = "admin" + DefaultAdminSessionTTL = 12 * time.Hour + DefaultReconcilePollInterval = 10 * time.Minute + DefaultRouteRuntimeBackend = "memory" + DefaultTrustedProxySecretHeader = "X-CRM-Trusted-Proxy" ) type ServerConfig struct { @@ -59,10 +63,17 @@ type RepositoryConfig struct { RepoRoot string } +type UserKeyAuthConfig struct { + TrustedSubjectHeader string + TrustedProxySecretHeader string + TrustedProxySecret string +} + type StartupConfig struct { Server ServerConfig Database DatabaseConfig Repository RepositoryConfig + UserKeyAuth UserKeyAuthConfig RouteRuntime RouteRuntimeConfig Reconcile ReconcileConfig } @@ -96,6 +107,11 @@ func loadStartupFromLookupEnv(lookup func(string) (string, bool)) (StartupConfig Repository: RepositoryConfig{ RepoRoot: readOptionalEnv(lookup, EnvRepoRoot, ""), }, + UserKeyAuth: UserKeyAuthConfig{ + TrustedSubjectHeader: readOptionalEnv(lookup, EnvTrustedSubjectHeader, ""), + TrustedProxySecretHeader: readOptionalEnv(lookup, EnvTrustedProxySecretHeader, DefaultTrustedProxySecretHeader), + TrustedProxySecret: readOptionalEnv(lookup, EnvTrustedProxySecret, ""), + }, RouteRuntime: RouteRuntimeConfig{ Backend: readOptionalEnv(lookup, EnvRouteRuntimeBackend, DefaultRouteRuntimeBackend), Redis: RedisRuntimeConfig{ diff --git a/internal/config/config_test.go b/internal/config/config_test.go index d98fbc99..bea8f0cd 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -77,6 +77,12 @@ func TestLoadStartupFromLookupEnv(t *testing.T) { return " redis-pass ", true case EnvRedisDB: return "5", true + case EnvTrustedSubjectHeader: + return "X-CRM-Authenticated-Subject", true + case EnvTrustedProxySecretHeader: + return "X-CRM-Trusted-Proxy", true + case EnvTrustedProxySecret: + return "proxy-secret", true default: return "", false } @@ -112,6 +118,15 @@ func TestLoadStartupFromLookupEnv(t *testing.T) { if cfg.RouteRuntime.Redis.DB != 5 { t.Fatalf("RouteRuntime.Redis.DB = %d, want 5", cfg.RouteRuntime.Redis.DB) } + if cfg.UserKeyAuth.TrustedSubjectHeader != "X-CRM-Authenticated-Subject" { + t.Fatalf("UserKeyAuth.TrustedSubjectHeader = %q, want X-CRM-Authenticated-Subject", cfg.UserKeyAuth.TrustedSubjectHeader) + } + if cfg.UserKeyAuth.TrustedProxySecretHeader != "X-CRM-Trusted-Proxy" { + t.Fatalf("UserKeyAuth.TrustedProxySecretHeader = %q, want X-CRM-Trusted-Proxy", cfg.UserKeyAuth.TrustedProxySecretHeader) + } + if cfg.UserKeyAuth.TrustedProxySecret != "proxy-secret" { + t.Fatalf("UserKeyAuth.TrustedProxySecret = %q, want proxy-secret", cfg.UserKeyAuth.TrustedProxySecret) + } }) t.Run("default values", func(t *testing.T) { lookup := func(k string) (string, bool) { @@ -142,6 +157,15 @@ func TestLoadStartupFromLookupEnv(t *testing.T) { if cfg.RouteRuntime.Redis.Addr != "" || cfg.RouteRuntime.Redis.Password != "" || cfg.RouteRuntime.Redis.DB != 0 { t.Fatalf("RouteRuntime.Redis = %+v, want zero value", cfg.RouteRuntime.Redis) } + if cfg.UserKeyAuth.TrustedSubjectHeader != "" { + t.Fatalf("UserKeyAuth.TrustedSubjectHeader = %q, want empty by default", cfg.UserKeyAuth.TrustedSubjectHeader) + } + if cfg.UserKeyAuth.TrustedProxySecretHeader != DefaultTrustedProxySecretHeader { + t.Fatalf("UserKeyAuth.TrustedProxySecretHeader = %q, want %q", cfg.UserKeyAuth.TrustedProxySecretHeader, DefaultTrustedProxySecretHeader) + } + if cfg.UserKeyAuth.TrustedProxySecret != "" { + t.Fatalf("UserKeyAuth.TrustedProxySecret = %q, want empty by default", cfg.UserKeyAuth.TrustedProxySecret) + } }) t.Run("invalid reconcile interval", func(t *testing.T) { lookup := func(k string) (string, bool) { diff --git a/internal/store/migrations/0016_user_keys_managed_identity_selector.sql b/internal/store/migrations/0016_user_keys_managed_identity_selector.sql new file mode 100644 index 00000000..a33d15ef --- /dev/null +++ b/internal/store/migrations/0016_user_keys_managed_identity_selector.sql @@ -0,0 +1,2 @@ +ALTER TABLE user_keys ADD COLUMN managed_identity_selector TEXT NOT NULL DEFAULT ''; +CREATE INDEX IF NOT EXISTS idx_user_keys_managed_identity_selector ON user_keys(managed_identity_selector); diff --git a/internal/store/sqlite/user_key_control_plane_repo_test.go b/internal/store/sqlite/user_key_control_plane_repo_test.go index 8f57e418..501511d2 100644 --- a/internal/store/sqlite/user_key_control_plane_repo_test.go +++ b/internal/store/sqlite/user_key_control_plane_repo_test.go @@ -85,15 +85,15 @@ func TestUserKeysRepoUpdateSecret(t *testing.T) { t.Fatalf("Create() error = %v", err) } - if err := store.UserKeys().UpdateSecret(ctx, "key_rotate_001", "sha256:new", "sk-****new1", "active"); err != nil { + if err := store.UserKeys().UpdateSecret(ctx, "key_rotate_001", "subject|key:key_rotate_001|rot:key_nonce", "sha256:new", "sk-****new1", "active"); err != nil { t.Fatalf("UpdateSecret() error = %v", err) } key, err := store.UserKeys().GetByID(ctx, "key_rotate_001") if err != nil { t.Fatalf("GetByID() error = %v", err) } - if key.KeyFingerprint != "sha256:new" || key.MaskedPreview != "sk-****new1" || key.AdminStatus != "active" { - t.Fatalf("updated key = %+v, want new fingerprint/mask/status", key) + if key.ManagedIdentitySelector != "subject|key:key_rotate_001|rot:key_nonce" || key.KeyFingerprint != "sha256:new" || key.MaskedPreview != "sk-****new1" || key.AdminStatus != "active" { + t.Fatalf("updated key = %+v, want new selector/fingerprint/mask/status", key) } if strings.TrimSpace(key.UpdatedAt) == "" { t.Fatalf("UpdatedAt = %q, want non-empty", key.UpdatedAt) diff --git a/internal/store/sqlite/user_keys_repo.go b/internal/store/sqlite/user_keys_repo.go index 41c74864..ba097154 100644 --- a/internal/store/sqlite/user_keys_repo.go +++ b/internal/store/sqlite/user_keys_repo.go @@ -9,20 +9,21 @@ import ( ) type UserKeyRecord struct { - ID int64 `json:"-"` - KeyID string `json:"key_id"` - OwnerSubjectID string `json:"owner_subject_id"` - KeyFingerprint string `json:"key_fingerprint"` - MaskedPreview string `json:"masked_preview"` - DisplayName string `json:"display_name"` - LogicalGroupID string `json:"logical_group_id"` - AllowedModels []string `json:"allowed_models"` - AdminStatus string `json:"admin_status"` - QuotaStatus string `json:"quota_status"` - LastUsedAt string `json:"last_used_at,omitempty"` - CreatedAt string `json:"created_at"` - ExpiresAt string `json:"expires_at,omitempty"` - UpdatedAt string `json:"updated_at"` + ID int64 `json:"-"` + KeyID string `json:"key_id"` + OwnerSubjectID string `json:"owner_subject_id"` + ManagedIdentitySelector string `json:"-"` + KeyFingerprint string `json:"key_fingerprint"` + MaskedPreview string `json:"masked_preview"` + DisplayName string `json:"display_name"` + LogicalGroupID string `json:"logical_group_id"` + AllowedModels []string `json:"allowed_models"` + AdminStatus string `json:"admin_status"` + QuotaStatus string `json:"quota_status"` + LastUsedAt string `json:"last_used_at,omitempty"` + CreatedAt string `json:"created_at"` + ExpiresAt string `json:"expires_at,omitempty"` + UpdatedAt string `json:"updated_at"` } type UserKeysRepo struct { @@ -40,11 +41,11 @@ func (r *UserKeysRepo) Create(ctx context.Context, key UserKeyRecord) (int64, er } result, err := r.db.ExecContext(ctx, ` INSERT INTO user_keys ( - key_id, owner_subject_id, key_fingerprint, masked_preview, + key_id, owner_subject_id, managed_identity_selector, key_fingerprint, masked_preview, display_name, logical_group_id, allowed_models, admin_status, quota_status - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, - key.KeyID, key.OwnerSubjectID, key.KeyFingerprint, key.MaskedPreview, + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + key.KeyID, key.OwnerSubjectID, key.ManagedIdentitySelector, key.KeyFingerprint, key.MaskedPreview, key.DisplayName, key.LogicalGroupID, string(modelsJSON), key.AdminStatus, key.QuotaStatus, ) @@ -60,7 +61,7 @@ func scanUserKeys(rows *sql.Rows) ([]UserKeyRecord, error) { var k UserKeyRecord var modelsJSON, lastUsedAt, expiresAt sql.NullString err := rows.Scan( - &k.ID, &k.KeyID, &k.OwnerSubjectID, &k.KeyFingerprint, &k.MaskedPreview, + &k.ID, &k.KeyID, &k.OwnerSubjectID, &k.ManagedIdentitySelector, &k.KeyFingerprint, &k.MaskedPreview, &k.DisplayName, &k.LogicalGroupID, &modelsJSON, &k.AdminStatus, &k.QuotaStatus, &lastUsedAt, &k.CreatedAt, &expiresAt, &k.UpdatedAt, ) @@ -81,7 +82,7 @@ func scanOneUserKey(row *sql.Row) (*UserKeyRecord, error) { var k UserKeyRecord var modelsJSON, lastUsedAt, expiresAt sql.NullString err := row.Scan( - &k.ID, &k.KeyID, &k.OwnerSubjectID, &k.KeyFingerprint, &k.MaskedPreview, + &k.ID, &k.KeyID, &k.OwnerSubjectID, &k.ManagedIdentitySelector, &k.KeyFingerprint, &k.MaskedPreview, &k.DisplayName, &k.LogicalGroupID, &modelsJSON, &k.AdminStatus, &k.QuotaStatus, &lastUsedAt, &k.CreatedAt, &expiresAt, &k.UpdatedAt, ) @@ -98,7 +99,7 @@ func scanOneUserKey(row *sql.Row) (*UserKeyRecord, error) { func (r *UserKeysRepo) ListByOwner(ctx context.Context, subjectID string) ([]UserKeyRecord, error) { rows, err := r.db.QueryContext(ctx, ` - SELECT id, key_id, owner_subject_id, key_fingerprint, masked_preview, + SELECT id, key_id, owner_subject_id, managed_identity_selector, key_fingerprint, masked_preview, display_name, logical_group_id, allowed_models, admin_status, quota_status, last_used_at, created_at, expires_at, updated_at FROM user_keys WHERE owner_subject_id = ? ORDER BY created_at DESC`, subjectID) @@ -111,7 +112,7 @@ func (r *UserKeysRepo) ListByOwner(ctx context.Context, subjectID string) ([]Use func (r *UserKeysRepo) ListByFingerprint(ctx context.Context, fingerprint string) ([]UserKeyRecord, error) { rows, err := r.db.QueryContext(ctx, ` - SELECT id, key_id, owner_subject_id, key_fingerprint, masked_preview, + SELECT id, key_id, owner_subject_id, managed_identity_selector, key_fingerprint, masked_preview, display_name, logical_group_id, allowed_models, admin_status, quota_status, last_used_at, created_at, expires_at, updated_at FROM user_keys WHERE key_fingerprint = ? ORDER BY created_at DESC`, fingerprint) @@ -124,7 +125,7 @@ func (r *UserKeysRepo) ListByFingerprint(ctx context.Context, fingerprint string func (r *UserKeysRepo) GetByID(ctx context.Context, keyID string) (*UserKeyRecord, error) { row := r.db.QueryRowContext(ctx, ` - SELECT id, key_id, owner_subject_id, key_fingerprint, masked_preview, + SELECT id, key_id, owner_subject_id, managed_identity_selector, key_fingerprint, masked_preview, display_name, logical_group_id, allowed_models, admin_status, quota_status, last_used_at, created_at, expires_at, updated_at FROM user_keys WHERE key_id = ?`, keyID) @@ -154,8 +155,9 @@ func (r *UserKeysRepo) UpdateStatus(ctx context.Context, keyID string, adminStat return nil } -func (r *UserKeysRepo) UpdateSecret(ctx context.Context, keyID, fingerprint, maskedPreview, adminStatus string) error { +func (r *UserKeysRepo) UpdateSecret(ctx context.Context, keyID, managedIdentitySelector, fingerprint, maskedPreview, adminStatus string) error { keyID = strings.TrimSpace(keyID) + managedIdentitySelector = strings.TrimSpace(managedIdentitySelector) fingerprint = strings.TrimSpace(fingerprint) maskedPreview = strings.TrimSpace(maskedPreview) adminStatus = strings.ToLower(strings.TrimSpace(adminStatus)) @@ -174,9 +176,9 @@ func (r *UserKeysRepo) UpdateSecret(ctx context.Context, keyID, fingerprint, mas } result, err := r.db.ExecContext(ctx, `UPDATE user_keys - SET key_fingerprint = ?, masked_preview = ?, admin_status = ?, updated_at = strftime('%Y-%m-%dT%H:%M:%SZ','now') + SET managed_identity_selector = ?, key_fingerprint = ?, masked_preview = ?, admin_status = ?, updated_at = strftime('%Y-%m-%dT%H:%M:%SZ','now') WHERE key_id = ?`, - fingerprint, maskedPreview, adminStatus, keyID, + managedIdentitySelector, fingerprint, maskedPreview, adminStatus, keyID, ) if err != nil { return fmt.Errorf("update user_key secret: %w", err) diff --git a/scripts/acceptance/verify_user_key_self_service.sh b/scripts/acceptance/verify_user_key_self_service.sh index 3f13d89f..173eb3ad 100755 --- a/scripts/acceptance/verify_user_key_self_service.sh +++ b/scripts/acceptance/verify_user_key_self_service.sh @@ -7,8 +7,10 @@ ARTIFACT_DIR="${ARTIFACT_DIR:-$ROOT_DIR/artifacts/user-key-self-service/${TS}}" CRM_BASE="${CRM_BASE:-https://sub.tksea.top/portal-admin-api}" USER_CHAT_BASE="${USER_CHAT_BASE:-}" CHAT_MODEL="${CHAT_MODEL:-gpt-5.4}" -USER_SUBJECT_ID="${USER_SUBJECT_ID:-}" -USER_AUTH_TOKEN="${USER_AUTH_TOKEN:-}" +USER_TRUSTED_SUBJECT_ID="${USER_TRUSTED_SUBJECT_ID:-}" +USER_TRUSTED_SUBJECT_HEADER="${USER_TRUSTED_SUBJECT_HEADER:-X-CRM-Authenticated-Subject}" +USER_TRUSTED_PROXY_SECRET_HEADER="${USER_TRUSTED_PROXY_SECRET_HEADER:-X-CRM-Trusted-Proxy}" +USER_TRUSTED_PROXY_SECRET="${USER_TRUSTED_PROXY_SECRET:-}" mkdir -p "$ARTIFACT_DIR" @@ -31,9 +33,11 @@ Required env for --run: USER_CHAT_BASE 最终 user-key 调用入口 base, e.g. https://sub.tksea.top CHAT_MODEL chat 模型名,default: gpt-5.4 -Authentication for /api/keys endpoints (choose one): - USER_SUBJECT_ID 通过 X-Portal-Subject 头注入 subject(联合部署/受信入口) - USER_AUTH_TOKEN 通过 Authorization: Bearer 走用户链路 +Authentication for /api/keys endpoints: + USER_TRUSTED_SUBJECT_ID 受信代理注入的 subject 值 + USER_TRUSTED_SUBJECT_HEADER subject 头名,default: X-CRM-Authenticated-Subject + USER_TRUSTED_PROXY_SECRET_HEADER 代理密钥头名,default: X-CRM-Trusted-Proxy + USER_TRUSTED_PROXY_SECRET 代理与 CRM 共享的密钥 Artifacts: artifacts/user-key-self-service// @@ -48,12 +52,10 @@ EOF } build_auth_args() { - if [[ -n "$USER_AUTH_TOKEN" ]]; then - printf '%s\n' "-H" "Authorization: Bearer $USER_AUTH_TOKEN" - return 0 - fi - if [[ -n "$USER_SUBJECT_ID" ]]; then - printf '%s\n' "-H" "X-Portal-Subject: $USER_SUBJECT_ID" + if [[ -n "$USER_TRUSTED_SUBJECT_ID" && -n "$USER_TRUSTED_PROXY_SECRET" ]]; then + printf '%s\n' \ + "-H" "${USER_TRUSTED_SUBJECT_HEADER}: ${USER_TRUSTED_SUBJECT_ID}" \ + "-H" "${USER_TRUSTED_PROXY_SECRET_HEADER}: ${USER_TRUSTED_PROXY_SECRET}" return 0 fi return 1 @@ -133,8 +135,8 @@ cmd_env_check() { CRM_HEALTH="$crm_health" \ USER_CHAT_BASE_PY="$USER_CHAT_BASE" \ CHAT_HEALTH="$chat_health" \ - HAS_SUBJECT_ID="$USER_SUBJECT_ID" \ - HAS_AUTH_TOKEN="$USER_AUTH_TOKEN" \ + HAS_TRUSTED_SUBJECT_ID="$USER_TRUSTED_SUBJECT_ID" \ + HAS_TRUSTED_PROXY_SECRET="$USER_TRUSTED_PROXY_SECRET" \ python3 - <<'PY' import json, os out = { @@ -142,8 +144,8 @@ out = { "crm_health": os.environ["CRM_HEALTH"], "user_chat_base": os.environ["USER_CHAT_BASE_PY"], "user_chat_health": os.environ["CHAT_HEALTH"], - "has_user_subject_id": bool(os.environ["HAS_SUBJECT_ID"]), - "has_user_auth_token": bool(os.environ["HAS_AUTH_TOKEN"]), + "has_trusted_subject_id": bool(os.environ["HAS_TRUSTED_SUBJECT_ID"]), + "has_trusted_proxy_secret": bool(os.environ["HAS_TRUSTED_PROXY_SECRET"]), } with open(os.environ["OUT_PATH"], "w", encoding="utf-8") as fh: json.dump(out, fh, ensure_ascii=False, indent=2) @@ -157,7 +159,7 @@ cmd_run() { cmd_env_check [[ -n "$USER_CHAT_BASE" ]] || die "USER_CHAT_BASE is required for --run" if ! build_auth_args >/dev/null; then - die "set USER_SUBJECT_ID or USER_AUTH_TOKEN for /api/keys authentication" + die "set USER_TRUSTED_SUBJECT_ID and USER_TRUSTED_PROXY_SECRET for /api/keys authentication" fi local create_payload create_code key_id plaintext_key masked_preview create_body diff --git a/scripts/deploy/deploy_tksea_portal.sh b/scripts/deploy/deploy_tksea_portal.sh index f679cd23..4788376e 100755 --- a/scripts/deploy/deploy_tksea_portal.sh +++ b/scripts/deploy/deploy_tksea_portal.sh @@ -97,11 +97,19 @@ block = textwrap.dedent("""\ } location /portal-admin-api/ { + # 必须由受信登录/鉴权层把用户 subject 放进 \$portal_subject,不能信任浏览器自带 header。 + # 同时 CRM 需配置: + # SUB2API_CRM_TRUSTED_SUBJECT_HEADER=X-CRM-Authenticated-Subject + # SUB2API_CRM_TRUSTED_PROXY_SECRET_HEADER=X-CRM-Trusted-Proxy + # SUB2API_CRM_TRUSTED_PROXY_SECRET= proxy_pass http://127.0.0.1:${REMOTE_CRM_PORT}/; proxy_set_header Host \$host; proxy_set_header X-Real-IP \$remote_addr; proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto \$scheme; + proxy_set_header X-Portal-Subject ""; + proxy_set_header X-CRM-Authenticated-Subject \$portal_subject; + proxy_set_header X-CRM-Trusted-Proxy "REPLACE_WITH_SUB2API_CRM_TRUSTED_PROXY_SECRET"; proxy_http_version 1.1; } diff --git a/scripts/deploy/remote43_patched_stack_lib.sh b/scripts/deploy/remote43_patched_stack_lib.sh index 4333f464..2dece494 100755 --- a/scripts/deploy/remote43_patched_stack_lib.sh +++ b/scripts/deploy/remote43_patched_stack_lib.sh @@ -75,12 +75,19 @@ render_remote43_crm_env() { local repo_root="${4:-}" local admin_username="${5:-admin}" local admin_password="${6:-$admin_token}" + local trusted_subject_header="${SUB2API_CRM_TRUSTED_SUBJECT_HEADER:-}" + local trusted_proxy_secret_header="${SUB2API_CRM_TRUSTED_PROXY_SECRET_HEADER:-X-CRM-Trusted-Proxy}" + local trusted_proxy_secret="${SUB2API_CRM_TRUSTED_PROXY_SECRET:-}" local sqlite_dsn_q admin_token_q repo_root_q admin_username_q admin_password_q + local trusted_subject_header_q trusted_proxy_secret_header_q trusted_proxy_secret_q printf -v sqlite_dsn_q '%q' "$sqlite_dsn" printf -v admin_token_q '%q' "$admin_token" printf -v repo_root_q '%q' "$repo_root" printf -v admin_username_q '%q' "$admin_username" printf -v admin_password_q '%q' "$admin_password" + printf -v trusted_subject_header_q '%q' "$trusted_subject_header" + printf -v trusted_proxy_secret_header_q '%q' "$trusted_proxy_secret_header" + printf -v trusted_proxy_secret_q '%q' "$trusted_proxy_secret" cat <