202 lines
6.7 KiB
YAML
202 lines
6.7 KiB
YAML
|
|
# =============================================================================
|
||
|
|
# Sub2API 监控栈 - Prometheus + Grafana + Loki + Jaeger
|
||
|
|
# =============================================================================
|
||
|
|
# 使用方法:
|
||
|
|
# 1. 创建监控目录: mkdir -p monitoring/{prometheus-data,grafana-data,loki-data}
|
||
|
|
# 2. 启动: docker-compose -f docker-compose.monitoring.yml up -d
|
||
|
|
# 3. 访问 Grafana: http://localhost:3000 (admin/admin)
|
||
|
|
# =============================================================================
|
||
|
|
|
||
|
|
version: '3.8'
|
||
|
|
|
||
|
|
services:
|
||
|
|
# ===========================================================================
|
||
|
|
# Prometheus - 时序数据库
|
||
|
|
# ===========================================================================
|
||
|
|
prometheus:
|
||
|
|
image: prom/prometheus:v2.50.0
|
||
|
|
container_name: sub2api-prometheus
|
||
|
|
restart: unless-stopped
|
||
|
|
command:
|
||
|
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
||
|
|
- '--storage.tsdb.path=/prometheus'
|
||
|
|
- '--storage.tsdb.retention.time=30d'
|
||
|
|
- '--storage.tsdb.retention.size=50GB'
|
||
|
|
- '--web.enable-lifecycle'
|
||
|
|
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
|
||
|
|
- '--web.console.templates=/usr/share/prometheus/consoles'
|
||
|
|
volumes:
|
||
|
|
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||
|
|
- ./prometheus/rules:/etc/prometheus/rules:ro
|
||
|
|
- ./prometheus-data:/prometheus
|
||
|
|
ports:
|
||
|
|
- "9090:9090"
|
||
|
|
networks:
|
||
|
|
- monitoring-network
|
||
|
|
healthcheck:
|
||
|
|
test: ["CMD", "wget", "-q", "-O", "-", "http://localhost:9090/-/healthy"]
|
||
|
|
interval: 30s
|
||
|
|
timeout: 10s
|
||
|
|
retries: 3
|
||
|
|
|
||
|
|
# ===========================================================================
|
||
|
|
# Grafana - 可视化平台
|
||
|
|
# ===========================================================================
|
||
|
|
grafana:
|
||
|
|
image: grafana/grafana:10.3.1
|
||
|
|
container_name: sub2api-grafana
|
||
|
|
restart: unless-stopped
|
||
|
|
environment:
|
||
|
|
- GF_SECURITY_ADMIN_USER=${GRAFANA_ADMIN_USER:-admin}
|
||
|
|
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD:-admin}
|
||
|
|
- GF_USERS_ALLOW_SIGN_UP=false
|
||
|
|
- GF_SERVER_ROOT_URL=${GRAFANA_ROOT_URL:-http://localhost:3000}
|
||
|
|
- GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource
|
||
|
|
volumes:
|
||
|
|
- ./grafana-data:/var/lib/grafana
|
||
|
|
- ./grafana/provisioning:/etc/grafana/provisioning:ro
|
||
|
|
- ./grafana/dashboards:/var/lib/grafana/dashboards:ro
|
||
|
|
ports:
|
||
|
|
- "3000:3000"
|
||
|
|
networks:
|
||
|
|
- monitoring-network
|
||
|
|
depends_on:
|
||
|
|
- prometheus
|
||
|
|
- loki
|
||
|
|
- jaeger
|
||
|
|
healthcheck:
|
||
|
|
test: ["CMD", "wget", "-q", "-O", "-", "http://localhost:3000/api/health"]
|
||
|
|
interval: 30s
|
||
|
|
timeout: 10s
|
||
|
|
retries: 3
|
||
|
|
|
||
|
|
# ===========================================================================
|
||
|
|
# Loki - 日志聚合
|
||
|
|
# ===========================================================================
|
||
|
|
loki:
|
||
|
|
image: grafana/loki:2.9.4
|
||
|
|
container_name: sub2api-loki
|
||
|
|
restart: unless-stopped
|
||
|
|
command: -config.file=/etc/loki/local-config.yaml
|
||
|
|
volumes:
|
||
|
|
- ./loki/loki-config.yaml:/etc/loki/local-config.yaml:ro
|
||
|
|
- ./loki-data:/loki
|
||
|
|
ports:
|
||
|
|
- "3100:3100"
|
||
|
|
networks:
|
||
|
|
- monitoring-network
|
||
|
|
healthcheck:
|
||
|
|
test: ["CMD", "wget", "-q", "-O", "-", "http://localhost:3100/ready"]
|
||
|
|
interval: 30s
|
||
|
|
timeout: 10s
|
||
|
|
retries: 3
|
||
|
|
|
||
|
|
# ===========================================================================
|
||
|
|
# Promtail - 日志收集器
|
||
|
|
# ===========================================================================
|
||
|
|
promtail:
|
||
|
|
image: grafana/promtail:2.9.4
|
||
|
|
container_name: sub2api-promtail
|
||
|
|
restart: unless-stopped
|
||
|
|
command: -config.file=/etc/promtail/config.yml
|
||
|
|
volumes:
|
||
|
|
- ./promtail/promtail-config.yml:/etc/promtail/config.yml:ro
|
||
|
|
- /var/log:/var/log:ro
|
||
|
|
- /var/lib/docker/containers:/var/lib/docker/containers:ro
|
||
|
|
networks:
|
||
|
|
- monitoring-network
|
||
|
|
depends_on:
|
||
|
|
- loki
|
||
|
|
|
||
|
|
# ===========================================================================
|
||
|
|
# Jaeger - 分布式追踪
|
||
|
|
# ===========================================================================
|
||
|
|
jaeger:
|
||
|
|
image: jaegertracing/all-in-one:1.54
|
||
|
|
container_name: sub2api-jaeger
|
||
|
|
restart: unless-stopped
|
||
|
|
environment:
|
||
|
|
- COLLECTOR_OTLP_ENABLED=true
|
||
|
|
ports:
|
||
|
|
- "16686:16686" # UI
|
||
|
|
- "4317:4317" # OTLP gRPC
|
||
|
|
- "4318:4318" # OTLP HTTP
|
||
|
|
- "14268:14268" # Jaeger Thrift
|
||
|
|
networks:
|
||
|
|
- monitoring-network
|
||
|
|
healthcheck:
|
||
|
|
test: ["CMD", "wget", "-q", "-O", "-", "http://localhost:16686"]
|
||
|
|
interval: 30s
|
||
|
|
timeout: 10s
|
||
|
|
retries: 3
|
||
|
|
|
||
|
|
# ===========================================================================
|
||
|
|
# Node Exporter - 主机指标
|
||
|
|
# ===========================================================================
|
||
|
|
node-exporter:
|
||
|
|
image: prom/node-exporter:v1.7.0
|
||
|
|
container_name: sub2api-node-exporter
|
||
|
|
restart: unless-stopped
|
||
|
|
command:
|
||
|
|
- '--path.procfs=/host/proc'
|
||
|
|
- '--path.rootfs=/rootfs'
|
||
|
|
- '--path.sysfs=/host/sys'
|
||
|
|
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
|
||
|
|
volumes:
|
||
|
|
- /proc:/host/proc:ro
|
||
|
|
- /sys:/host/sys:ro
|
||
|
|
- /:/rootfs:ro
|
||
|
|
ports:
|
||
|
|
- "9100:9100"
|
||
|
|
networks:
|
||
|
|
- monitoring-network
|
||
|
|
|
||
|
|
# ===========================================================================
|
||
|
|
# cAdvisor - 容器指标
|
||
|
|
# ===========================================================================
|
||
|
|
cadvisor:
|
||
|
|
image: gcr.io/cadvisor/cadvisor:v0.47.2
|
||
|
|
container_name: sub2api-cadvisor
|
||
|
|
restart: unless-stopped
|
||
|
|
privileged: true
|
||
|
|
devices:
|
||
|
|
- /dev/kmsg:/dev/kmsg
|
||
|
|
volumes:
|
||
|
|
- /:/rootfs:ro
|
||
|
|
- /var/run:/var/run:ro
|
||
|
|
- /sys:/sys:ro
|
||
|
|
- /var/lib/docker:/var/lib/docker:ro
|
||
|
|
- /dev/disk:/dev/disk:ro
|
||
|
|
ports:
|
||
|
|
- "8081:8080"
|
||
|
|
networks:
|
||
|
|
- monitoring-network
|
||
|
|
|
||
|
|
# ===========================================================================
|
||
|
|
# Alertmanager - 告警管理
|
||
|
|
# ===========================================================================
|
||
|
|
alertmanager:
|
||
|
|
image: prom/alertmanager:v0.27.0
|
||
|
|
container_name: sub2api-alertmanager
|
||
|
|
restart: unless-stopped
|
||
|
|
command:
|
||
|
|
- '--config.file=/etc/alertmanager/config.yml'
|
||
|
|
- '--storage.path=/alertmanager'
|
||
|
|
- '--web.external-url=http://localhost:9093'
|
||
|
|
volumes:
|
||
|
|
- ./alertmanager/config.yml:/etc/alertmanager/config.yml:ro
|
||
|
|
- ./alertmanager-data:/alertmanager
|
||
|
|
ports:
|
||
|
|
- "9093:9093"
|
||
|
|
networks:
|
||
|
|
- monitoring-network
|
||
|
|
healthcheck:
|
||
|
|
test: ["CMD", "wget", "-q", "-O", "-", "http://localhost:9093/-/healthy"]
|
||
|
|
interval: 30s
|
||
|
|
timeout: 10s
|
||
|
|
retries: 3
|
||
|
|
|
||
|
|
networks:
|
||
|
|
monitoring-network:
|
||
|
|
driver: bridge
|