245 lines
6.1 KiB
Go
245 lines
6.1 KiB
Go
package metrics
|
|
|
|
import (
|
|
"context"
|
|
"net/http"
|
|
"strconv"
|
|
"time"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
|
)
|
|
|
|
var (
|
|
// HTTP 请求指标
|
|
HTTPRequestsTotal = prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "http_requests_total",
|
|
Help: "Total number of HTTP requests",
|
|
},
|
|
[]string{"method", "path", "status"},
|
|
)
|
|
|
|
HTTPRequestDuration = prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Name: "http_request_duration_seconds",
|
|
Help: "HTTP request duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
},
|
|
[]string{"method", "path"},
|
|
)
|
|
|
|
// 业务指标
|
|
ActiveHosts = prometheus.NewGauge(
|
|
prometheus.GaugeOpts{
|
|
Name: "active_hosts",
|
|
Help: "Number of active hosts",
|
|
},
|
|
)
|
|
|
|
ActiveProviders = prometheus.NewGauge(
|
|
prometheus.GaugeOpts{
|
|
Name: "active_providers",
|
|
Help: "Number of active providers",
|
|
},
|
|
)
|
|
|
|
RouteDecisionsTotal = prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "route_decisions_total",
|
|
Help: "Total number of route decisions",
|
|
},
|
|
[]string{"logical_group", "status"},
|
|
)
|
|
|
|
RouteFailoversTotal = prometheus.NewCounter(
|
|
prometheus.CounterOpts{
|
|
Name: "route_failovers_total",
|
|
Help: "Total number of route failovers",
|
|
},
|
|
)
|
|
|
|
UserKeyOperationsTotal = prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "user_key_operations_total",
|
|
Help: "Total number of user key self-service and governance operations",
|
|
},
|
|
[]string{"operation", "result"},
|
|
)
|
|
|
|
UserKeyChatRequestsTotal = prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "user_key_chat_requests_total",
|
|
Help: "Total number of public user-key chat completion requests",
|
|
},
|
|
[]string{"result"},
|
|
)
|
|
|
|
// 数据库指标
|
|
DBConnectionsActive = prometheus.NewGauge(
|
|
prometheus.GaugeOpts{
|
|
Name: "db_connections_active",
|
|
Help: "Number of active database connections",
|
|
},
|
|
)
|
|
|
|
DBOperationsTotal = prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "db_operations_total",
|
|
Help: "Total number of database operations",
|
|
},
|
|
[]string{"operation", "table"},
|
|
)
|
|
|
|
// 日志指标
|
|
LogFlushErrorsTotal = prometheus.NewCounter(
|
|
prometheus.CounterOpts{
|
|
Name: "log_flush_errors_total",
|
|
Help: "Total number of log flush errors",
|
|
},
|
|
)
|
|
|
|
LogDroppedEventsTotal = prometheus.NewCounter(
|
|
prometheus.CounterOpts{
|
|
Name: "log_dropped_events_total",
|
|
Help: "Total number of dropped log events",
|
|
},
|
|
)
|
|
)
|
|
|
|
func init() {
|
|
// 注册所有指标
|
|
prometheus.MustRegister(HTTPRequestsTotal)
|
|
prometheus.MustRegister(HTTPRequestDuration)
|
|
prometheus.MustRegister(ActiveHosts)
|
|
prometheus.MustRegister(ActiveProviders)
|
|
prometheus.MustRegister(RouteDecisionsTotal)
|
|
prometheus.MustRegister(RouteFailoversTotal)
|
|
prometheus.MustRegister(UserKeyOperationsTotal)
|
|
prometheus.MustRegister(UserKeyChatRequestsTotal)
|
|
prometheus.MustRegister(DBConnectionsActive)
|
|
prometheus.MustRegister(DBOperationsTotal)
|
|
prometheus.MustRegister(LogFlushErrorsTotal)
|
|
prometheus.MustRegister(LogDroppedEventsTotal)
|
|
}
|
|
|
|
// Handler returns the HTTP handler for metrics endpoint
|
|
func Handler() http.Handler {
|
|
return promhttp.Handler()
|
|
}
|
|
|
|
// RecordHTTPRequest records metrics for an HTTP request
|
|
func RecordHTTPRequest(method, path string, status int, duration time.Duration) {
|
|
if path == "" {
|
|
path = "unknown"
|
|
}
|
|
statusLabel := strconv.Itoa(status)
|
|
HTTPRequestsTotal.WithLabelValues(method, path, statusLabel).Inc()
|
|
HTTPRequestDuration.WithLabelValues(method, path).Observe(duration.Seconds())
|
|
}
|
|
|
|
// RecordRouteDecision records a route decision
|
|
func RecordRouteDecision(logicalGroup, status string) {
|
|
RouteDecisionsTotal.WithLabelValues(logicalGroup, status).Inc()
|
|
}
|
|
|
|
// RecordRouteFailover records a route failover
|
|
func RecordRouteFailover() {
|
|
RouteFailoversTotal.Inc()
|
|
}
|
|
|
|
// RecordUserKeyOperation records a user key lifecycle/governance operation.
|
|
func RecordUserKeyOperation(operation, result string) {
|
|
UserKeyOperationsTotal.WithLabelValues(operation, result).Inc()
|
|
}
|
|
|
|
// RecordUserKeyChatRequest records a public user-key chat completion request outcome.
|
|
func RecordUserKeyChatRequest(result string) {
|
|
UserKeyChatRequestsTotal.WithLabelValues(result).Inc()
|
|
}
|
|
|
|
// SetActiveHosts sets the active hosts gauge
|
|
func SetActiveHosts(count float64) {
|
|
ActiveHosts.Set(count)
|
|
}
|
|
|
|
// SetActiveProviders sets the active providers gauge
|
|
func SetActiveProviders(count float64) {
|
|
ActiveProviders.Set(count)
|
|
}
|
|
|
|
// RecordDBOperation records a database operation
|
|
func RecordDBOperation(operation, table string) {
|
|
DBOperationsTotal.WithLabelValues(operation, table).Inc()
|
|
}
|
|
|
|
// SetDBConnections sets the active DB connections gauge
|
|
func SetDBConnections(count float64) {
|
|
DBConnectionsActive.Set(count)
|
|
}
|
|
|
|
// RecordLogFlushError records a log flush error
|
|
func RecordLogFlushError() {
|
|
LogFlushErrorsTotal.Inc()
|
|
}
|
|
|
|
// RecordLogDroppedEvent records a dropped log event
|
|
func RecordLogDroppedEvent() {
|
|
LogDroppedEventsTotal.Inc()
|
|
}
|
|
|
|
// Middleware wraps an HTTP handler with metrics collection
|
|
func Middleware(next http.Handler) http.Handler {
|
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
start := time.Now()
|
|
|
|
// Wrap response writer to capture status code
|
|
wrapped := &responseWriter{ResponseWriter: w, statusCode: http.StatusOK}
|
|
|
|
next.ServeHTTP(wrapped, r)
|
|
|
|
duration := time.Since(start)
|
|
path := r.Pattern
|
|
if path == "" {
|
|
path = r.URL.Path
|
|
}
|
|
RecordHTTPRequest(r.Method, path, wrapped.statusCode, duration)
|
|
})
|
|
}
|
|
|
|
type responseWriter struct {
|
|
http.ResponseWriter
|
|
statusCode int
|
|
}
|
|
|
|
func (rw *responseWriter) WriteHeader(code int) {
|
|
rw.statusCode = code
|
|
rw.ResponseWriter.WriteHeader(code)
|
|
}
|
|
|
|
// StartServer starts a metrics server on the given address
|
|
func StartServer(ctx context.Context, addr string) *http.Server {
|
|
mux := http.NewServeMux()
|
|
mux.Handle("/metrics", Handler())
|
|
|
|
server := &http.Server{
|
|
Addr: addr,
|
|
Handler: mux,
|
|
}
|
|
|
|
go func() {
|
|
<-ctx.Done()
|
|
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
_ = server.Shutdown(shutdownCtx)
|
|
}()
|
|
|
|
go func() {
|
|
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
|
// Log error but don't crash - metrics are optional
|
|
}
|
|
}()
|
|
|
|
return server
|
|
}
|