Files
sub2api-cn-relay-manager/internal/metrics/metrics.go
phamnazage-jpg d688722dd2 feat(metrics): H-04 Prometheus 指标暴露
- 创建 internal/metrics 包集成 Prometheus 客户端
- 添加 HTTP 请求指标(总量、延迟直方图)
- 添加业务指标(active_hosts、active_providers)
- 添加路由指标(decisions、failovers)
- 添加数据库指标(connections、operations)
- 添加日志指标(flush_errors、dropped_events)
- 添加 HTTP Middleware 自动收集请求指标
- 添加 StartServer 方法启动独立 metrics 服务
2026-06-02 06:53:24 +08:00

208 lines
5.1 KiB
Go

package metrics
import (
"context"
"net/http"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
var (
// HTTP 请求指标
HTTPRequestsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "http_requests_total",
Help: "Total number of HTTP requests",
},
[]string{"method", "path", "status"},
)
HTTPRequestDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "http_request_duration_seconds",
Help: "HTTP request duration in seconds",
Buckets: prometheus.DefBuckets,
},
[]string{"method", "path"},
)
// 业务指标
ActiveHosts = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "active_hosts",
Help: "Number of active hosts",
},
)
ActiveProviders = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "active_providers",
Help: "Number of active providers",
},
)
RouteDecisionsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "route_decisions_total",
Help: "Total number of route decisions",
},
[]string{"logical_group", "status"},
)
RouteFailoversTotal = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "route_failovers_total",
Help: "Total number of route failovers",
},
)
// 数据库指标
DBConnectionsActive = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "db_connections_active",
Help: "Number of active database connections",
},
)
DBOperationsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "db_operations_total",
Help: "Total number of database operations",
},
[]string{"operation", "table"},
)
// 日志指标
LogFlushErrorsTotal = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "log_flush_errors_total",
Help: "Total number of log flush errors",
},
)
LogDroppedEventsTotal = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "log_dropped_events_total",
Help: "Total number of dropped log events",
},
)
)
func init() {
// 注册所有指标
prometheus.MustRegister(HTTPRequestsTotal)
prometheus.MustRegister(HTTPRequestDuration)
prometheus.MustRegister(ActiveHosts)
prometheus.MustRegister(ActiveProviders)
prometheus.MustRegister(RouteDecisionsTotal)
prometheus.MustRegister(RouteFailoversTotal)
prometheus.MustRegister(DBConnectionsActive)
prometheus.MustRegister(DBOperationsTotal)
prometheus.MustRegister(LogFlushErrorsTotal)
prometheus.MustRegister(LogDroppedEventsTotal)
}
// Handler returns the HTTP handler for metrics endpoint
func Handler() http.Handler {
return promhttp.Handler()
}
// RecordHTTPRequest records metrics for an HTTP request
func RecordHTTPRequest(method, path string, status int, duration time.Duration) {
HTTPRequestsTotal.WithLabelValues(method, path, http.StatusText(status)).Inc()
HTTPRequestDuration.WithLabelValues(method, path).Observe(duration.Seconds())
}
// RecordRouteDecision records a route decision
func RecordRouteDecision(logicalGroup, status string) {
RouteDecisionsTotal.WithLabelValues(logicalGroup, status).Inc()
}
// RecordRouteFailover records a route failover
func RecordRouteFailover() {
RouteFailoversTotal.Inc()
}
// SetActiveHosts sets the active hosts gauge
func SetActiveHosts(count float64) {
ActiveHosts.Set(count)
}
// SetActiveProviders sets the active providers gauge
func SetActiveProviders(count float64) {
ActiveProviders.Set(count)
}
// RecordDBOperation records a database operation
func RecordDBOperation(operation, table string) {
DBOperationsTotal.WithLabelValues(operation, table).Inc()
}
// SetDBConnections sets the active DB connections gauge
func SetDBConnections(count float64) {
DBConnectionsActive.Set(count)
}
// RecordLogFlushError records a log flush error
func RecordLogFlushError() {
LogFlushErrorsTotal.Inc()
}
// RecordLogDroppedEvent records a dropped log event
func RecordLogDroppedEvent() {
LogDroppedEventsTotal.Inc()
}
// Middleware wraps an HTTP handler with metrics collection
func Middleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
start := time.Now()
// Wrap response writer to capture status code
wrapped := &responseWriter{ResponseWriter: w, statusCode: http.StatusOK}
next.ServeHTTP(wrapped, r)
duration := time.Since(start)
RecordHTTPRequest(r.Method, r.URL.Path, wrapped.statusCode, duration)
})
}
type responseWriter struct {
http.ResponseWriter
statusCode int
}
func (rw *responseWriter) WriteHeader(code int) {
rw.statusCode = code
rw.ResponseWriter.WriteHeader(code)
}
// StartServer starts a metrics server on the given address
func StartServer(ctx context.Context, addr string) *http.Server {
mux := http.NewServeMux()
mux.Handle("/metrics", Handler())
server := &http.Server{
Addr: addr,
Handler: mux,
}
go func() {
<-ctx.Done()
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
_ = server.Shutdown(shutdownCtx)
}()
go func() {
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
// Log error but don't crash - metrics are optional
}
}()
return server
}