feat(P1/P2): 完成TDD开发及P1/P2设计文档
## 设计文档 - multi_role_permission_design: 多角色权限设计 (CONDITIONAL GO) - audit_log_enhancement_design: 审计日志增强 (CONDITIONAL GO) - routing_strategy_template_design: 路由策略模板 (CONDITIONAL GO) - sso_saml_technical_research: SSO/SAML调研 (CONDITIONAL GO) - compliance_capability_package_design: 合规能力包设计 (CONDITIONAL GO) ## TDD开发成果 - IAM模块: supply-api/internal/iam/ (111个测试) - 审计日志模块: supply-api/internal/audit/ (40+测试) - 路由策略模块: gateway/internal/router/ (33+测试) - 合规能力包: gateway/internal/compliance/ + scripts/ci/compliance/ ## 规范文档 - parallel_agent_output_quality_standards: 并行Agent产出质量规范 - project_experience_summary: 项目经验总结 (v2) - 2026-04-02-p1-p2-tdd-execution-plan: TDD执行计划 ## 评审报告 - 5个CONDITIONAL GO设计文档评审报告 - fix_verification_report: 修复验证报告 - full_verification_report: 全面质量验证报告 - tdd_module_quality_verification: TDD模块质量验证 - tdd_execution_summary: TDD执行总结 依据: Superpowers执行框架 + TDD规范
This commit is contained in:
182
gateway/internal/router/metrics/routing_metrics.go
Normal file
182
gateway/internal/router/metrics/routing_metrics.go
Normal file
@@ -0,0 +1,182 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
)
|
||||
|
||||
// RoutingMetrics 路由指标收集器 (M-008)
|
||||
type RoutingMetrics struct {
|
||||
// 计数器
|
||||
totalRequests int64
|
||||
totalTakeovers int64
|
||||
primaryTakeovers int64
|
||||
fallbackTakeovers int64
|
||||
noMarkCount int64
|
||||
|
||||
// 按provider统计
|
||||
providerStats map[string]*ProviderStat
|
||||
providerMu sync.RWMutex
|
||||
|
||||
// 按策略统计
|
||||
strategyStats map[string]*StrategyStat
|
||||
strategyMu sync.RWMutex
|
||||
|
||||
// 时间窗口
|
||||
windowStart time.Time
|
||||
}
|
||||
|
||||
// ProviderStat Provider统计
|
||||
type ProviderStat struct {
|
||||
Count int64
|
||||
LatencySum int64
|
||||
Errors int64
|
||||
}
|
||||
|
||||
// StrategyStat 策略统计
|
||||
type StrategyStat struct {
|
||||
Count int64
|
||||
Takeovers int64
|
||||
LatencySum int64
|
||||
}
|
||||
|
||||
// RoutingStats 路由统计
|
||||
type RoutingStats struct {
|
||||
TotalRequests int64
|
||||
TotalTakeovers int64
|
||||
PrimaryTakeovers int64
|
||||
FallbackTakeovers int64
|
||||
NoMarkCount int64
|
||||
TakeoverRate float64
|
||||
M008Coverage float64 // 路由标记覆盖率 >= 99.9%
|
||||
ProviderStats map[string]*ProviderStat
|
||||
StrategyStats map[string]*StrategyStat
|
||||
}
|
||||
|
||||
// NewRoutingMetrics 创建路由指标收集器
|
||||
func NewRoutingMetrics() *RoutingMetrics {
|
||||
return &RoutingMetrics{
|
||||
providerStats: make(map[string]*ProviderStat),
|
||||
strategyStats: make(map[string]*StrategyStat),
|
||||
windowStart: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
// RecordTakeoverMark 记录接管标记
|
||||
// pathType: "primary" 或 "fallback"
|
||||
// strategy: 使用的策略名称
|
||||
func (m *RoutingMetrics) RecordTakeoverMark(provider string, tier int, pathType string, strategy string) {
|
||||
atomic.AddInt64(&m.totalTakeovers, 1)
|
||||
|
||||
// 更新路径类型计数
|
||||
switch pathType {
|
||||
case "primary":
|
||||
atomic.AddInt64(&m.primaryTakeovers, 1)
|
||||
case "fallback":
|
||||
atomic.AddInt64(&m.fallbackTakeovers, 1)
|
||||
}
|
||||
|
||||
// 更新Provider统计
|
||||
m.providerMu.Lock()
|
||||
if _, ok := m.providerStats[provider]; !ok {
|
||||
m.providerStats[provider] = &ProviderStat{}
|
||||
}
|
||||
m.providerStats[provider].Count++
|
||||
m.providerMu.Unlock()
|
||||
|
||||
// 更新策略统计
|
||||
m.strategyMu.Lock()
|
||||
if _, ok := m.strategyStats[strategy]; !ok {
|
||||
m.strategyStats[strategy] = &StrategyStat{}
|
||||
}
|
||||
m.strategyStats[strategy].Count++
|
||||
m.strategyStats[strategy].Takeovers++
|
||||
m.strategyMu.Unlock()
|
||||
}
|
||||
|
||||
// RecordNoMark 记录未标记的请求(用于计算覆盖率)
|
||||
func (m *RoutingMetrics) RecordNoMark(reason string) {
|
||||
atomic.AddInt64(&m.noMarkCount, 1)
|
||||
}
|
||||
|
||||
// RecordRequest 记录请求
|
||||
func (m *RoutingMetrics) RecordRequest() {
|
||||
atomic.AddInt64(&m.totalRequests, 1)
|
||||
}
|
||||
|
||||
// GetStats 获取统计信息
|
||||
func (m *RoutingMetrics) GetStats() *RoutingStats {
|
||||
total := atomic.LoadInt64(&m.totalRequests)
|
||||
takeovers := atomic.LoadInt64(&m.totalTakeovers)
|
||||
primary := atomic.LoadInt64(&m.primaryTakeovers)
|
||||
fallback := atomic.LoadInt64(&m.fallbackTakeovers)
|
||||
noMark := atomic.LoadInt64(&m.noMarkCount)
|
||||
|
||||
// 计算接管率 (有标记的请求 / 总请求)
|
||||
var takeoverRate float64
|
||||
if total > 0 {
|
||||
takeoverRate = float64(takeovers) / float64(total) * 100
|
||||
}
|
||||
|
||||
// 计算M-008覆盖率 (有标记的请求 / 总请求)
|
||||
var coverage float64
|
||||
if total > 0 {
|
||||
coverage = float64(takeovers) / float64(total) * 100
|
||||
}
|
||||
|
||||
// 复制Provider统计
|
||||
m.providerMu.RLock()
|
||||
providerStats := make(map[string]*ProviderStat)
|
||||
for k, v := range m.providerStats {
|
||||
providerStats[k] = &ProviderStat{
|
||||
Count: v.Count,
|
||||
LatencySum: v.LatencySum,
|
||||
Errors: v.Errors,
|
||||
}
|
||||
}
|
||||
m.providerMu.RUnlock()
|
||||
|
||||
// 复制策略统计
|
||||
m.strategyMu.RLock()
|
||||
strategyStats := make(map[string]*StrategyStat)
|
||||
for k, v := range m.strategyStats {
|
||||
strategyStats[k] = &StrategyStat{
|
||||
Count: v.Count,
|
||||
Takeovers: v.Takeovers,
|
||||
LatencySum: v.LatencySum,
|
||||
}
|
||||
}
|
||||
m.strategyMu.RUnlock()
|
||||
|
||||
return &RoutingStats{
|
||||
TotalRequests: total,
|
||||
TotalTakeovers: takeovers,
|
||||
PrimaryTakeovers: primary,
|
||||
FallbackTakeovers: fallback,
|
||||
NoMarkCount: noMark,
|
||||
TakeoverRate: takeoverRate,
|
||||
M008Coverage: coverage,
|
||||
ProviderStats: providerStats,
|
||||
StrategyStats: strategyStats,
|
||||
}
|
||||
}
|
||||
|
||||
// Reset 重置统计
|
||||
func (m *RoutingMetrics) Reset() {
|
||||
atomic.StoreInt64(&m.totalRequests, 0)
|
||||
atomic.StoreInt64(&m.totalTakeovers, 0)
|
||||
atomic.StoreInt64(&m.primaryTakeovers, 0)
|
||||
atomic.StoreInt64(&m.fallbackTakeovers, 0)
|
||||
atomic.StoreInt64(&m.noMarkCount, 0)
|
||||
|
||||
m.providerMu.Lock()
|
||||
m.providerStats = make(map[string]*ProviderStat)
|
||||
m.providerMu.Unlock()
|
||||
|
||||
m.strategyMu.Lock()
|
||||
m.strategyStats = make(map[string]*StrategyStat)
|
||||
m.strategyMu.Unlock()
|
||||
|
||||
m.windowStart = time.Now()
|
||||
}
|
||||
155
gateway/internal/router/metrics/routing_metrics_test.go
Normal file
155
gateway/internal/router/metrics/routing_metrics_test.go
Normal file
@@ -0,0 +1,155 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
// TestRoutingMetrics_M008_TakeoverMarkCoverage 测试M-008指标采集的完整覆盖
|
||||
func TestRoutingMetrics_M008_TakeoverMarkCoverage(t *testing.T) {
|
||||
metrics := NewRoutingMetrics()
|
||||
|
||||
// 模拟主路径调用
|
||||
metrics.RecordTakeoverMark("ProviderA", 1, "primary", "cost_based")
|
||||
|
||||
// 模拟Fallback路径调用
|
||||
metrics.RecordTakeoverMark("ProviderB", 2, "fallback", "cost_based")
|
||||
|
||||
// 验证主路径和Fallback路径都记录了TakeoverMark
|
||||
stats := metrics.GetStats()
|
||||
|
||||
// 验证总接管次数
|
||||
assert.Equal(t, int64(2), stats.TotalTakeovers, "Should have 2 takeovers")
|
||||
|
||||
// 验证主路径和Fallback路径分开统计
|
||||
assert.Equal(t, int64(1), stats.PrimaryTakeovers, "Should have 1 primary takeover")
|
||||
assert.Equal(t, int64(1), stats.FallbackTakeovers, "Should have 1 fallback takeover")
|
||||
}
|
||||
|
||||
// TestRoutingMetrics_PrimaryPath 测试主路径M-008采集
|
||||
func TestRoutingMetrics_PrimaryPath(t *testing.T) {
|
||||
metrics := NewRoutingMetrics()
|
||||
|
||||
metrics.RecordTakeoverMark("ProviderA", 1, "primary", "cost_based")
|
||||
|
||||
stats := metrics.GetStats()
|
||||
assert.Equal(t, int64(1), stats.PrimaryTakeovers)
|
||||
assert.Equal(t, int64(1), stats.TotalTakeovers)
|
||||
}
|
||||
|
||||
// TestRoutingMetrics_FallbackPath 测试Fallback路径M-008采集
|
||||
func TestRoutingMetrics_FallbackPath(t *testing.T) {
|
||||
metrics := NewRoutingMetrics()
|
||||
|
||||
// Tier1失败,Tier2成功
|
||||
metrics.RecordTakeoverMark("ProviderA", 1, "fallback", "cost_based")
|
||||
metrics.RecordTakeoverMark("ProviderB", 2, "fallback", "cost_based")
|
||||
|
||||
stats := metrics.GetStats()
|
||||
assert.Equal(t, int64(2), stats.FallbackTakeovers)
|
||||
assert.Equal(t, int64(2), stats.TotalTakeovers)
|
||||
}
|
||||
|
||||
// TestRoutingMetrics_TakeoverRate 测试接管率计算
|
||||
func TestRoutingMetrics_TakeoverRate(t *testing.T) {
|
||||
metrics := NewRoutingMetrics()
|
||||
|
||||
// 模拟100次请求,60次主路径接管,40次无接管
|
||||
for i := 0; i < 100; i++ {
|
||||
metrics.RecordRequest()
|
||||
}
|
||||
// 60次接管
|
||||
for i := 0; i < 60; i++ {
|
||||
metrics.RecordTakeoverMark("ProviderA", 1, "primary", "cost_based")
|
||||
}
|
||||
// 40次无接管 - 记录noMark
|
||||
for i := 0; i < 40; i++ {
|
||||
metrics.RecordNoMark("no provider available")
|
||||
}
|
||||
|
||||
stats := metrics.GetStats()
|
||||
|
||||
// 验证接管率 60/(60+40) = 60%
|
||||
expectedRate := 60.0 / 100.0 * 100 // 60%
|
||||
assert.InDelta(t, expectedRate, stats.TakeoverRate, 0.1, "Takeover rate should be around 60%%")
|
||||
}
|
||||
|
||||
// TestRoutingMetrics_M008Coverage 测试M-008覆盖率
|
||||
func TestRoutingMetrics_M008Coverage(t *testing.T) {
|
||||
metrics := NewRoutingMetrics()
|
||||
|
||||
// 模拟所有请求都标记了TakeoverMark
|
||||
for i := 0; i < 1000; i++ {
|
||||
metrics.RecordRequest()
|
||||
}
|
||||
for i := 0; i < 1000; i++ {
|
||||
metrics.RecordTakeoverMark("ProviderA", 1, "primary", "cost_based")
|
||||
}
|
||||
|
||||
stats := metrics.GetStats()
|
||||
|
||||
// M-008要求覆盖率 >= 99.9%
|
||||
assert.GreaterOrEqual(t, stats.M008Coverage, 99.9, "M-008 coverage should be >= 99.9%%")
|
||||
}
|
||||
|
||||
// TestRoutingMetrics_Concurrent 测试并发安全
|
||||
func TestRoutingMetrics_Concurrent(t *testing.T) {
|
||||
metrics := NewRoutingMetrics()
|
||||
|
||||
// 并发记录
|
||||
done := make(chan bool)
|
||||
for i := 0; i < 100; i++ {
|
||||
go func() {
|
||||
metrics.RecordTakeoverMark("ProviderA", 1, "primary", "cost_based")
|
||||
done <- true
|
||||
}()
|
||||
}
|
||||
|
||||
// 等待所有goroutine完成
|
||||
for i := 0; i < 100; i++ {
|
||||
<-done
|
||||
}
|
||||
|
||||
stats := metrics.GetStats()
|
||||
assert.Equal(t, int64(100), stats.TotalTakeovers, "Should handle concurrent recordings")
|
||||
}
|
||||
|
||||
// TestRoutingMetrics_RouteMarkCoverage 测试路由标记覆盖率
|
||||
func TestRoutingMetrics_RouteMarkCoverage(t *testing.T) {
|
||||
metrics := NewRoutingMetrics()
|
||||
|
||||
// 模拟所有请求都有标记
|
||||
for i := 0; i < 1000; i++ {
|
||||
metrics.RecordRequest()
|
||||
metrics.RecordTakeoverMark("ProviderA", 1, "primary", "cost_based")
|
||||
}
|
||||
|
||||
// 没有未标记的请求
|
||||
metrics.RecordNoMark("reason")
|
||||
|
||||
stats := metrics.GetStats()
|
||||
|
||||
// 覆盖率应该很高
|
||||
assert.GreaterOrEqual(t, stats.M008Coverage, 99.9, "Coverage should be >= 99.9%%")
|
||||
}
|
||||
|
||||
// TestRoutingMetrics_ProviderStats 测试按provider统计
|
||||
func TestRoutingMetrics_ProviderStats(t *testing.T) {
|
||||
metrics := NewRoutingMetrics()
|
||||
|
||||
metrics.RecordTakeoverMark("ProviderA", 1, "primary", "cost_based")
|
||||
metrics.RecordTakeoverMark("ProviderA", 1, "primary", "cost_based")
|
||||
metrics.RecordTakeoverMark("ProviderB", 1, "primary", "cost_aware")
|
||||
|
||||
stats := metrics.GetStats()
|
||||
|
||||
// 验证按provider统计
|
||||
providerA, ok := stats.ProviderStats["ProviderA"]
|
||||
assert.True(t, ok, "ProviderA should be in stats")
|
||||
assert.Equal(t, int64(2), providerA.Count, "ProviderA should have 2 takeovers")
|
||||
|
||||
providerB, ok := stats.ProviderStats["ProviderB"]
|
||||
assert.True(t, ok, "ProviderB should be in stats")
|
||||
assert.Equal(t, int64(1), providerB.Count, "ProviderB should have 1 takeover")
|
||||
}
|
||||
Reference in New Issue
Block a user