Files
tokens-reef/deploy/monitoring/docker-compose.single.yml

136 lines
4.1 KiB
YAML
Raw Permalink Normal View History

# Sub2API 单机监控栈 (2核4G优化版)
# 资源限制: Prometheus 128MB, Grafana 128MB, Node Exporter 32MB
# 总内存占用: ~300MB
version: '3.8'
services:
prometheus:
image: prom/prometheus:v2.50.0
container_name: sub2api-prometheus
restart: unless-stopped
mem_limit: 128m
cpus: '0.2'
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.retention.time=15d'
- '--storage.tsdb.retention.size=2GB'
- '--storage.tsdb.wal-compression'
- '--storage.tsdb.min-block-duration=2h'
- '--storage.tsdb.max-block-duration=2h'
- '--query.max-samples=50000000'
- '--query.timeout=2m'
- '--web.enable-lifecycle'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
volumes:
- ./prometheus/prometheus-single.yml:/etc/prometheus/prometheus.yml:ro
- ./prometheus/rules:/etc/prometheus/rules:ro
- prometheus-data:/prometheus
ports:
- "9090:9090"
networks:
- monitoring
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://localhost:9090/-/healthy"]
interval: 30s
timeout: 10s
retries: 3
grafana:
image: grafana/grafana:10.3.0
container_name: sub2api-grafana
restart: unless-stopped
mem_limit: 128m
cpus: '0.1'
environment:
- GF_SECURITY_ADMIN_USER=${GRAFANA_ADMIN_USER:-admin}
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD:-admin}
- GF_INSTALL_PLUGINS=grafana-clock-panel
- GF_ANALYTICS_REPORTING_ENABLED=false
- GF_ANALYTICS_CHECK_FOR_UPDATES=false
- GF_LOG_LEVEL=warn
- GF_LOG_MODE=console
- GF_DATABASE_TYPE=sqlite3
- GF_DATABASE_PATH=/var/lib/grafana/grafana.db
- GF_SESSION_PROVIDER=memory
- GF_METRICS_ENABLED=false
- GF_TRACING_ENABLED=false
volumes:
- ./grafana/grafana-single.ini:/etc/grafana/grafana.ini:ro
- ./grafana/provisioning:/etc/grafana/provisioning:ro
- ./grafana/dashboards:/var/lib/grafana/dashboards:ro
- grafana-data:/var/lib/grafana
ports:
- "3000:3000"
networks:
- monitoring
depends_on:
- prometheus
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://localhost:3000/api/health"]
interval: 30s
timeout: 10s
retries: 3
node-exporter:
image: prom/node-exporter:v1.7.0
container_name: sub2api-node-exporter
restart: unless-stopped
mem_limit: 32m
cpus: '0.05'
command:
- '--path.procfs=/host/proc'
- '--path.rootfs=/rootfs'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
- '--collector.cpu.info'
- '--collector.meminfo'
- '--no-collector.wifi'
- '--no-collector.hwmon'
- '--no-collector.btrfs'
# 暴露 textfile_collector 目录,供备份脚本写入心跳指标
- '--collector.textfile.directory=/var/lib/node_exporter/textfile_collector'
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
- /var/lib/node_exporter:/var/lib/node_exporter:ro
networks:
- monitoring
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://localhost:9100/metrics"]
interval: 30s
timeout: 10s
retries: 3
# Blackbox Exporter - HTTP/TLS 探测 & 证书过期检查
# 内存占用 ~16MB可放心加入单机部署
blackbox-exporter:
image: prom/blackbox-exporter:v0.24.0
container_name: sub2api-blackbox
restart: unless-stopped
mem_limit: 32m
cpus: '0.05'
command:
- '--config.file=/etc/blackbox_exporter/config.yml'
volumes:
- ./blackbox/config.yml:/etc/blackbox_exporter/config.yml:ro
networks:
- monitoring
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://localhost:9115/-/healthy"]
interval: 30s
timeout: 10s
retries: 3
volumes:
prometheus-data:
driver: local
grafana-data:
driver: local
networks:
monitoring:
driver: bridge