Files
tokens-reef/deploy/monitoring/prometheus/prometheus-single.yml

98 lines
2.8 KiB
YAML
Raw Permalink Normal View History

# Sub2API 单机版 Prometheus 配置
# 优化目标: 内存 < 128MB, 存储 < 2GB, 保留 15天
global:
scrape_interval: 30s # 单机放宽抓取间隔
evaluation_interval: 30s
external_labels:
cluster: 'sub2api-single'
replica: 'single'
# 告警规则
rule_files:
- 'rules/sub2api-alerts-light.yml'
# 抓取配置
scrape_configs:
# Sub2API 应用指标
- job_name: 'sub2api-app'
static_configs:
- targets: ['host.docker.internal:8080']
labels:
service: 'sub2api'
tier: 'backend'
metrics_path: '/metrics'
scrape_interval: 30s
scrape_timeout: 10s
# 只抓取关键指标,减少数据量
params:
collect[]:
- 'http'
- 'runtime'
- 'database'
# Node Exporter - 系统指标
- job_name: 'node-exporter'
static_configs:
- targets: ['node-exporter:9100']
labels:
instance: 'sub2api-server'
scrape_interval: 30s
scrape_timeout: 10s
# Prometheus 自身指标
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
labels:
instance: 'prometheus'
scrape_interval: 30s
# Blackbox Exporter - TLS 证书检查 & 端点可用性探测
# 需要在 docker-compose.single.yml 中添加 blackbox-exporter 容器
# 参考: deploy/monitoring/docker-compose.single.yml 中的 blackbox-exporter service
- job_name: 'blackbox-https'
metrics_path: /probe
params:
module: [http_2xx]
static_configs:
- targets:
# TODO: 替换为实际域名
- https://sub2api.example.com/health
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: blackbox-exporter:9115
# TLS 证书过期专项检查 (TCP 模式,只检查证书)
- job_name: 'blackbox-tls-cert'
metrics_path: /probe
params:
module: [tcp_tls]
static_configs:
- targets:
# TODO: 替换为实际域名:端口
- sub2api.example.com:443
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: blackbox-exporter:9115
scrape_interval: 300s # 5 分钟检查一次,节省资源
# 启用 Alertmanager 集成 (取消注释以启用)
# Phase 1: 暂用现有 ops 告警系统
# Phase 2 开始启用 Alertmanager同时通过 ops-bridge webhook 回写 ops_alert_events
alerting:
alertmanagers:
- static_configs:
- targets: [] # Phase 2: 替换为 ['alertmanager:9093']
# 生产环境建议配置 TLS:
# tls_config:
# ca_file: /etc/prometheus/certs/ca.crt