Files
tokens-reef/backend/internal/handler/admin/ops_prometheus_bridge_handler.go
Developer 349d783fd1 refactor: clean up project structure
- Remove old review reports (keep latest only)
- Move docs/ to deploy/docs-backup/
- Move performance-testing/ to deploy/
- Clean up test output files
- Organize root directory
2026-04-06 23:36:03 +08:00

205 lines
5.7 KiB
Go

package admin
import (
"crypto/subtle"
"log/slog"
"net/http"
"os"
"strings"
"time"
"github.com/Wei-Shaw/sub2api/internal/pkg/response"
"github.com/Wei-Shaw/sub2api/internal/service"
"github.com/gin-gonic/gin"
)
// ============================================================
// Alertmanager Webhook Handler
//
// 将 Prometheus Alertmanager 告警桥接到现有 ops_alert_events 表,
// 复用现有的邮件通知、静默检查等基础设施。
//
// Alertmanager 中配置 webhook receiver 指向该接口:
//
// receivers:
// - name: 'ops-bridge'
// webhook_configs:
// - url: 'http://localhost:8080/admin/ops/prometheus-alerts'
// send_resolved: true
// http_config:
// bearer_token: '<INTERNAL_TOKEN>'
//
// ============================================================
// alertmanagerPayload is the JSON body sent by Alertmanager.
type alertmanagerPayload struct {
Receiver string `json:"receiver"`
Status string `json:"status"` // "firing" | "resolved"
Alerts []alertmanagerAlert `json:"alerts"`
GroupLabels map[string]string `json:"groupLabels"`
CommonLabels map[string]string `json:"commonLabels"`
CommonAnnotations map[string]string `json:"commonAnnotations"`
ExternalURL string `json:"externalURL"`
Version string `json:"version"`
}
type alertmanagerAlert struct {
Status string `json:"status"` // "firing" | "resolved"
Labels map[string]string `json:"labels"`
Annotations map[string]string `json:"annotations"`
StartsAt time.Time `json:"startsAt"`
EndsAt time.Time `json:"endsAt"`
GeneratorURL string `json:"generatorURL"`
Fingerprint string `json:"fingerprint"`
}
// PromAlertsBridgeHandler receives Alertmanager webhook and writes to ops_alert_events.
//
// POST /admin/ops/prometheus-alerts
//
// Security: This endpoint is NOT protected by JWT (it's called by Alertmanager).
// It validates the Authorization header against INTERNAL_WEBHOOK_TOKEN env var
// using constant-time comparison to prevent timing attacks.
func (h *OpsHandler) PromAlertsBridgeHandler(c *gin.Context) {
// Validate bearer token against INTERNAL_WEBHOOK_TOKEN env var
expectedToken := os.Getenv("INTERNAL_WEBHOOK_TOKEN")
if expectedToken == "" {
slog.Error("INTERNAL_WEBHOOK_TOKEN not configured, rejecting prometheus webhook")
response.Error(c, http.StatusServiceUnavailable, "webhook token not configured")
return
}
authHeader := c.GetHeader("Authorization")
const prefix = "Bearer "
if !strings.HasPrefix(authHeader, prefix) {
response.Error(c, http.StatusUnauthorized, "missing or invalid authorization header")
return
}
token := strings.TrimPrefix(authHeader, prefix)
// Constant-time comparison to prevent timing attacks
if subtle.ConstantTimeCompare([]byte(token), []byte(expectedToken)) != 1 {
slog.Warn("invalid webhook token received", "remote_ip", c.ClientIP())
response.Error(c, http.StatusUnauthorized, "invalid token")
return
}
var payload alertmanagerPayload
if err := c.ShouldBindJSON(&payload); err != nil {
response.Error(c, http.StatusBadRequest, "invalid payload: "+err.Error())
return
}
if len(payload.Alerts) == 0 {
c.Status(http.StatusNoContent)
return
}
var successCount, failCount int
for _, alert := range payload.Alerts {
event := convertAlertToEvent(alert)
if err := h.opsService.CreateExternalAlertEvent(c.Request.Context(), event); err != nil {
slog.Warn("failed to persist prometheus alert event",
"fingerprint", alert.Fingerprint,
"alertname", alert.Labels["alertname"],
"err", err,
)
failCount++
continue
}
successCount++
}
slog.Info("prometheus alerts bridged",
"total", len(payload.Alerts),
"success", successCount,
"failed", failCount,
)
c.JSON(http.StatusOK, gin.H{
"total": len(payload.Alerts),
"success": successCount,
"failed": failCount,
})
}
// convertAlertToEvent maps an Alertmanager alert to an OpsAlertEvent.
// We use a synthetic rule_id=0 to distinguish Prometheus-sourced events
// from native evaluator-sourced events.
func convertAlertToEvent(a alertmanagerAlert) *service.OpsAlertEvent {
// Map Alertmanager severity to internal severity levels
severity := mapSeverity(a.Labels["severity"])
// Map status
status := service.OpsAlertStatusFiring
if a.Status == "resolved" {
status = service.OpsAlertStatusResolved
}
title := a.Annotations["summary"]
if title == "" {
title = a.Labels["alertname"]
}
description := a.Annotations["description"]
if description == "" {
description = a.Annotations["message"]
}
firedAt := a.StartsAt
if firedAt.IsZero() {
firedAt = time.Now()
}
var resolvedAt *time.Time
if a.Status == "resolved" && !a.EndsAt.IsZero() {
t := a.EndsAt
resolvedAt = &t
}
// Collect useful label dimensions to store in Dimensions
dimensions := make(map[string]any)
for k, v := range a.Labels {
if k != "alertname" && k != "severity" {
dimensions[k] = v
}
}
// Add Prometheus-specific metadata
dimensions["source"] = "prometheus"
dimensions["fingerprint"] = a.Fingerprint
if runbook := a.Annotations["runbook_url"]; runbook != "" {
dimensions["runbook_url"] = runbook
}
return &service.OpsAlertEvent{
// RuleID = 0 indicates an externally-sourced alert (Prometheus)
RuleID: 0,
Severity: severity,
Status: status,
Title: title,
Description: description,
Dimensions: dimensions,
FiredAt: firedAt,
ResolvedAt: resolvedAt,
}
}
// mapSeverity converts Prometheus severity labels to internal levels.
func mapSeverity(s string) string {
switch strings.ToLower(s) {
case "critical":
return "P0"
case "high":
return "P1"
case "warning", "medium":
return "P2"
default:
return "P3"
}
}