feat(metrics): H-04 Prometheus 指标暴露

- 创建 internal/metrics 包集成 Prometheus 客户端
- 添加 HTTP 请求指标(总量、延迟直方图)
- 添加业务指标(active_hosts、active_providers)
- 添加路由指标(decisions、failovers)
- 添加数据库指标(connections、operations)
- 添加日志指标(flush_errors、dropped_events)
- 添加 HTTP Middleware 自动收集请求指标
- 添加 StartServer 方法启动独立 metrics 服务
This commit is contained in:
phamnazage-jpg
2026-06-02 06:53:24 +08:00
parent 8984451845
commit d688722dd2
4 changed files with 467 additions and 2 deletions

13
go.mod
View File

@@ -1,18 +1,27 @@
module sub2api-cn-relay-manager
go 1.22.2
go 1.23.0
require modernc.org/sqlite v1.18.1
require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 // indirect
github.com/mattn/go-isatty v0.0.12 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/prometheus/client_golang v1.23.2 // indirect
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.66.1 // indirect
github.com/prometheus/procfs v0.16.1 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 // indirect
go.yaml.in/yaml/v2 v2.4.2 // indirect
golang.org/x/mod v0.3.0 // indirect
golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac // indirect
golang.org/x/sys v0.35.0 // indirect
golang.org/x/tools v0.0.0-20201124115921-2c860bdd6e78 // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
google.golang.org/protobuf v1.36.8 // indirect
gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
lukechampine.com/uint128 v1.1.1 // indirect
modernc.org/cc/v3 v3.36.0 // indirect

22
go.sum
View File

@@ -1,7 +1,12 @@
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/google/go-cmp v0.5.3 h1:x95R7cp+rSeeqAMI2knLtQ0DKlaBhv2NrtrOvafPHRo=
github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs=
@@ -10,11 +15,23 @@ github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHX
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/mattn/go-sqlite3 v1.14.14 h1:qZgc/Rwetq+MtyE18WhzjokPD93dNqLGNT3QJuLvBGw=
github.com/mattn/go-sqlite3 v1.14.14/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 h1:OdAsTTz6OkFY5QxjkYwrChwuRruF69c169dPK26NUlk=
github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
@@ -31,6 +48,8 @@ golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac h1:oN6lz7iLW/YC7un8pq+9bOLyXrprv2+DKfkJY+2LJJw=
golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -42,6 +61,9 @@ golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8T
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc=
gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc=
lukechampine.com/uint128 v1.1.1 h1:pnxCASz787iMf+02ssImqk6OLt+Z5QHMoZyUXR4z6JU=

207
internal/metrics/metrics.go Normal file
View File

@@ -0,0 +1,207 @@
package metrics
import (
"context"
"net/http"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
var (
// HTTP 请求指标
HTTPRequestsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "http_requests_total",
Help: "Total number of HTTP requests",
},
[]string{"method", "path", "status"},
)
HTTPRequestDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "http_request_duration_seconds",
Help: "HTTP request duration in seconds",
Buckets: prometheus.DefBuckets,
},
[]string{"method", "path"},
)
// 业务指标
ActiveHosts = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "active_hosts",
Help: "Number of active hosts",
},
)
ActiveProviders = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "active_providers",
Help: "Number of active providers",
},
)
RouteDecisionsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "route_decisions_total",
Help: "Total number of route decisions",
},
[]string{"logical_group", "status"},
)
RouteFailoversTotal = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "route_failovers_total",
Help: "Total number of route failovers",
},
)
// 数据库指标
DBConnectionsActive = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "db_connections_active",
Help: "Number of active database connections",
},
)
DBOperationsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "db_operations_total",
Help: "Total number of database operations",
},
[]string{"operation", "table"},
)
// 日志指标
LogFlushErrorsTotal = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "log_flush_errors_total",
Help: "Total number of log flush errors",
},
)
LogDroppedEventsTotal = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "log_dropped_events_total",
Help: "Total number of dropped log events",
},
)
)
func init() {
// 注册所有指标
prometheus.MustRegister(HTTPRequestsTotal)
prometheus.MustRegister(HTTPRequestDuration)
prometheus.MustRegister(ActiveHosts)
prometheus.MustRegister(ActiveProviders)
prometheus.MustRegister(RouteDecisionsTotal)
prometheus.MustRegister(RouteFailoversTotal)
prometheus.MustRegister(DBConnectionsActive)
prometheus.MustRegister(DBOperationsTotal)
prometheus.MustRegister(LogFlushErrorsTotal)
prometheus.MustRegister(LogDroppedEventsTotal)
}
// Handler returns the HTTP handler for metrics endpoint
func Handler() http.Handler {
return promhttp.Handler()
}
// RecordHTTPRequest records metrics for an HTTP request
func RecordHTTPRequest(method, path string, status int, duration time.Duration) {
HTTPRequestsTotal.WithLabelValues(method, path, http.StatusText(status)).Inc()
HTTPRequestDuration.WithLabelValues(method, path).Observe(duration.Seconds())
}
// RecordRouteDecision records a route decision
func RecordRouteDecision(logicalGroup, status string) {
RouteDecisionsTotal.WithLabelValues(logicalGroup, status).Inc()
}
// RecordRouteFailover records a route failover
func RecordRouteFailover() {
RouteFailoversTotal.Inc()
}
// SetActiveHosts sets the active hosts gauge
func SetActiveHosts(count float64) {
ActiveHosts.Set(count)
}
// SetActiveProviders sets the active providers gauge
func SetActiveProviders(count float64) {
ActiveProviders.Set(count)
}
// RecordDBOperation records a database operation
func RecordDBOperation(operation, table string) {
DBOperationsTotal.WithLabelValues(operation, table).Inc()
}
// SetDBConnections sets the active DB connections gauge
func SetDBConnections(count float64) {
DBConnectionsActive.Set(count)
}
// RecordLogFlushError records a log flush error
func RecordLogFlushError() {
LogFlushErrorsTotal.Inc()
}
// RecordLogDroppedEvent records a dropped log event
func RecordLogDroppedEvent() {
LogDroppedEventsTotal.Inc()
}
// Middleware wraps an HTTP handler with metrics collection
func Middleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
start := time.Now()
// Wrap response writer to capture status code
wrapped := &responseWriter{ResponseWriter: w, statusCode: http.StatusOK}
next.ServeHTTP(wrapped, r)
duration := time.Since(start)
RecordHTTPRequest(r.Method, r.URL.Path, wrapped.statusCode, duration)
})
}
type responseWriter struct {
http.ResponseWriter
statusCode int
}
func (rw *responseWriter) WriteHeader(code int) {
rw.statusCode = code
rw.ResponseWriter.WriteHeader(code)
}
// StartServer starts a metrics server on the given address
func StartServer(ctx context.Context, addr string) *http.Server {
mux := http.NewServeMux()
mux.Handle("/metrics", Handler())
server := &http.Server{
Addr: addr,
Handler: mux,
}
go func() {
<-ctx.Done()
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
_ = server.Shutdown(shutdownCtx)
}()
go func() {
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
// Log error but don't crash - metrics are optional
}
}()
return server
}

View File

@@ -0,0 +1,227 @@
package metrics
import (
"context"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
)
func TestHTTPRequestsTotal(t *testing.T) {
RecordHTTPRequest("GET", "/test", 200, 100*time.Millisecond)
// Verify the counter was incremented
// Note: We can't easily read the counter value directly, but we can verify
// the handler returns the metric
req := httptest.NewRequest("GET", "/metrics", nil)
rr := httptest.NewRecorder()
Handler().ServeHTTP(rr, req)
if rr.Code != http.StatusOK {
t.Errorf("Expected status 200, got %d", rr.Code)
}
body := rr.Body.String()
if !strings.Contains(body, "http_requests_total") {
t.Error("Expected metrics endpoint to contain http_requests_total")
}
}
func TestRecordRouteDecision(t *testing.T) {
RecordRouteDecision("test-group", "success")
RecordRouteDecision("test-group", "success")
RecordRouteDecision("test-group", "failed")
req := httptest.NewRequest("GET", "/metrics", nil)
rr := httptest.NewRecorder()
Handler().ServeHTTP(rr, req)
if rr.Code != http.StatusOK {
t.Errorf("Expected status 200, got %d", rr.Code)
}
body := rr.Body.String()
if !strings.Contains(body, "route_decisions_total") {
t.Error("Expected metrics endpoint to contain route_decisions_total")
}
}
func TestRecordRouteFailover(t *testing.T) {
RecordRouteFailover()
req := httptest.NewRequest("GET", "/metrics", nil)
rr := httptest.NewRecorder()
Handler().ServeHTTP(rr, req)
body := rr.Body.String()
if !strings.Contains(body, "route_failovers_total") {
t.Error("Expected metrics endpoint to contain route_failovers_total")
}
}
func TestSetActiveHosts(t *testing.T) {
SetActiveHosts(10)
req := httptest.NewRequest("GET", "/metrics", nil)
rr := httptest.NewRecorder()
Handler().ServeHTTP(rr, req)
body := rr.Body.String()
if !strings.Contains(body, "active_hosts") {
t.Error("Expected metrics endpoint to contain active_hosts")
}
}
func TestSetActiveProviders(t *testing.T) {
SetActiveProviders(5)
req := httptest.NewRequest("GET", "/metrics", nil)
rr := httptest.NewRecorder()
Handler().ServeHTTP(rr, req)
body := rr.Body.String()
if !strings.Contains(body, "active_providers") {
t.Error("Expected metrics endpoint to contain active_providers")
}
}
func TestRecordDBOperation(t *testing.T) {
RecordDBOperation("insert", "hosts")
RecordDBOperation("select", "hosts")
req := httptest.NewRequest("GET", "/metrics", nil)
rr := httptest.NewRecorder()
Handler().ServeHTTP(rr, req)
body := rr.Body.String()
if !strings.Contains(body, "db_operations_total") {
t.Error("Expected metrics endpoint to contain db_operations_total")
}
}
func TestRecordLogFlushError(t *testing.T) {
RecordLogFlushError()
RecordLogFlushError()
req := httptest.NewRequest("GET", "/metrics", nil)
rr := httptest.NewRecorder()
Handler().ServeHTTP(rr, req)
body := rr.Body.String()
if !strings.Contains(body, "log_flush_errors_total") {
t.Error("Expected metrics endpoint to contain log_flush_errors_total")
}
}
func TestRecordLogDroppedEvent(t *testing.T) {
RecordLogDroppedEvent()
req := httptest.NewRequest("GET", "/metrics", nil)
rr := httptest.NewRecorder()
Handler().ServeHTTP(rr, req)
body := rr.Body.String()
if !strings.Contains(body, "log_dropped_events_total") {
t.Error("Expected metrics endpoint to contain log_dropped_events_total")
}
}
func TestMiddleware(t *testing.T) {
handler := Middleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusCreated)
w.Write([]byte("OK"))
}))
req := httptest.NewRequest("POST", "/api/test", nil)
rr := httptest.NewRecorder()
handler.ServeHTTP(rr, req)
if rr.Code != http.StatusCreated {
t.Errorf("Expected status 201, got %d", rr.Code)
}
if rr.Body.String() != "OK" {
t.Errorf("Expected body 'OK', got '%s'", rr.Body.String())
}
}
func TestResponseWriter(t *testing.T) {
base := httptest.NewRecorder()
rw := &responseWriter{ResponseWriter: base, statusCode: 200}
rw.WriteHeader(http.StatusTeapot)
if rw.statusCode != http.StatusTeapot {
t.Errorf("Expected status code %d, got %d", http.StatusTeapot, rw.statusCode)
}
}
func TestStartServer(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// Start metrics server on a random port
server := StartServer(ctx, "127.0.0.1:0")
// Give server time to start
time.Sleep(100 * time.Millisecond)
// Verify server is running by making a request
addr := server.Addr
if addr == "" {
t.Fatal("Server address not available")
}
// Server should shut down gracefully when context is cancelled
cancel()
// Give server time to shut down
time.Sleep(100 * time.Millisecond)
}
func TestHandlerContent(t *testing.T) {
// Set some metrics values
SetActiveHosts(42)
SetActiveProviders(7)
req := httptest.NewRequest("GET", "/metrics", nil)
rr := httptest.NewRecorder()
Handler().ServeHTTP(rr, req)
if rr.Code != http.StatusOK {
t.Errorf("Expected status 200, got %d", rr.Code)
}
contentType := rr.Header().Get("Content-Type")
if !strings.Contains(contentType, "text/plain") && !strings.Contains(contentType, "application/openmetrics") {
t.Errorf("Expected text/plain or openmetrics content type, got %s", contentType)
}
body := rr.Body.String()
// Check for expected metrics
expectedMetrics := []string{
"# HELP",
"# TYPE",
"active_hosts",
"active_providers",
}
for _, metric := range expectedMetrics {
if !strings.Contains(body, metric) {
t.Errorf("Expected metrics to contain '%s'", metric)
}
}
}