# =============================================================================
# Alertmanager 配置文件
# =============================================================================

global:
  smtp_smarthost: 'localhost:587'
  smtp_from: 'alerts@sub2api.org'
  smtp_auth_username: ''
  smtp_auth_password: ''
  
  # 告警分组
  slack_api_url: '${SLACK_WEBHOOK_URL}'
  
  # 解决通知等待时间
  resolve_timeout: 5m

# 路由树
templates:
- '/etc/alertmanager/templates/*.tmpl'

route:
  group_by: ['alertname', 'cluster', 'service']
  group_wait: 10s
  group_interval: 10s
  repeat_interval: 12h
  receiver: 'default'
  
  routes:
    # Critical 告警 - 立即通知
    - match:
        severity: critical
      receiver: 'critical'
      group_wait: 0s
      repeat_interval: 5m
      continue: true
    
    # High 告警 - 快速通知
    - match:
        severity: high
      receiver: 'high'
      group_wait: 30s
      repeat_interval: 30m
      continue: true
    
    # SLO 相关告警
    - match:
        slo: api-availability
      receiver: 'slo-team'
      continue: true
    
    # 基础设施告警
    - match_re:
        alertname: Database.*|Redis.*|HighCPU.*|HighMemory.*
      receiver: 'infra-team'
      continue: true
    
    # 所有告警都桥接回内置 ops_alert_events（末尾，continue: false）
    - receiver: 'ops-bridge'

# 接收器配置
receivers:
  - name: 'default'
    email_configs:
      - to: 'oncall@sub2api.org'
        send_resolved: true
        headers:
          Subject: '[Alert] {{ .GroupLabels.alertname }}'

  - name: 'critical'
    email_configs:
      - to: 'sre-lead@sub2api.org'
        send_resolved: true
    slack_configs:
      - channel: '#alerts-critical'
        send_resolved: true
        title: '🔴 CRITICAL: {{ .GroupLabels.alertname }}'
        text: |
          {{ range .Alerts }}
          *Summary:* {{ .Annotations.summary }}
          *Description:* {{ .Annotations.description }}
          *Runbook:* {{ .Annotations.runbook_url }}
          {{ end }}
    webhook_configs:
      - url: '${PAGERDUTY_WEBHOOK_URL}'
        send_resolved: true

  - name: 'high'
    email_configs:
      - to: 'oncall@sub2api.org'
        send_resolved: true
    slack_configs:
      - channel: '#alerts-high'
        send_resolved: true
        title: '🟠 HIGH: {{ .GroupLabels.alertname }}'
        text: |
          {{ range .Alerts }}
          *Summary:* {{ .Annotations.summary }}
          *Description:* {{ .Annotations.description }}
          {{ end }}

  - name: 'slo-team'
    email_configs:
      - to: 'slo-team@sub2api.org'
        send_resolved: true

  - name: 'infra-team'
    slack_configs:
      - channel: '#infra-alerts'
        send_resolved: true
        title: '🔧 INFRA: {{ .GroupLabels.alertname }}'

  # ops-bridge: 将 Prometheus 告警写回内置 ops_alert_events 表
  # 这使得运维人员可在现有 Ops Dashboard 中统一查看所有告警
  # 需要通过环境变量注入 bearer token:
  #   ALERTMANAGER_INTERNAL_TOKEN=<same value as app INTERNAL_WEBHOOK_TOKEN>
  - name: 'ops-bridge'
    webhook_configs:
      - url: 'http://host.docker.internal:8080/admin/ops/prometheus-alerts'
        send_resolved: true
        max_alerts: 50
        http_config:
          bearer_token: '${ALERTMANAGER_INTERNAL_TOKEN}'

# 抑制规则
inhibit_rules:
  # 高严重级别抑制低严重级别
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'high'
    equal: ['alertname', 'cluster', 'service']
  
  # 相同告警抑制重复通知
  - source_match:
      severity: 'high'
    target_match:
      severity: 'medium'
    equal: ['alertname', 'cluster', 'service']
  
  # 应用宕机时抑制所有应用层告警（避免告警风暴）
  - source_match:
      alertname: 'Sub2APIDown'
    target_match_re:
      alertname: 'HighErrorRate|HighLatency.*|SLOErrorBudget.*'
    equal: ['job']