1074 lines
23 KiB
Markdown
1074 lines
23 KiB
Markdown
|
|
# 部署和运维指南
|
|||
|
|
|
|||
|
|
## 概述
|
|||
|
|
|
|||
|
|
本文档描述用户管理系统的部署方案和运维规范,包括容器化部署、集群部署、监控告警、日志管理等。
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
## 1. 部署方案
|
|||
|
|
|
|||
|
|
### 1.1 系统架构
|
|||
|
|
|
|||
|
|
```
|
|||
|
|
┌─────────────────────────────────────────────────────────┐
|
|||
|
|
│ 负载均衡 (Nginx) │
|
|||
|
|
└────────────────────┬────────────────────────────────────┘
|
|||
|
|
│
|
|||
|
|
┌────────────┴────────────┐
|
|||
|
|
│ │
|
|||
|
|
┌───────▼────────┐ ┌────────▼────────┐
|
|||
|
|
│ 应用实例 1 │ │ 应用实例 N │
|
|||
|
|
│ (Port 8080) │ │ (Port 8080) │
|
|||
|
|
└───────┬────────┘ └────────┬────────┘
|
|||
|
|
│ │
|
|||
|
|
└───────────┬───────────┘
|
|||
|
|
│
|
|||
|
|
┌───────────┴───────────┐
|
|||
|
|
│ │
|
|||
|
|
┌───────▼────────┐ ┌────────▼────────┐
|
|||
|
|
│ MySQL │ │ Redis │
|
|||
|
|
│ (主从复制) │ │ (哨兵模式) │
|
|||
|
|
└────────────────┘ └─────────────────┘
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
### 1.3 Docker 部署
|
|||
|
|
|
|||
|
|
#### 单机 Docker 部署
|
|||
|
|
|
|||
|
|
**docker-compose.yml(单机版)**
|
|||
|
|
|
|||
|
|
```yaml
|
|||
|
|
version: '3.8'
|
|||
|
|
|
|||
|
|
services:
|
|||
|
|
user-management:
|
|||
|
|
image: user-management-system:1.0.0
|
|||
|
|
container_name: user-ms
|
|||
|
|
ports:
|
|||
|
|
- "8080:8080"
|
|||
|
|
volumes:
|
|||
|
|
- ./data:/app/data
|
|||
|
|
- ./config:/app/config
|
|||
|
|
- ./logs:/app/logs
|
|||
|
|
environment:
|
|||
|
|
- SPRING_PROFILES_ACTIVE=docker
|
|||
|
|
- DATABASE_TYPE=sqlite
|
|||
|
|
- DATABASE_PATH=/app/data/user_management.db
|
|||
|
|
restart: unless-stopped
|
|||
|
|
healthcheck:
|
|||
|
|
test: ["CMD", "curl", "-f", "http://localhost:8080/health/ready"]
|
|||
|
|
interval: 30s
|
|||
|
|
timeout: 10s
|
|||
|
|
retries: 3
|
|||
|
|
start_period: 40s
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
**启动命令**
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 启动
|
|||
|
|
docker-compose up -d
|
|||
|
|
|
|||
|
|
# 查看日志
|
|||
|
|
docker-compose logs -f
|
|||
|
|
|
|||
|
|
# 停止
|
|||
|
|
docker-compose down
|
|||
|
|
|
|||
|
|
# 停止并删除数据
|
|||
|
|
docker-compose down -v
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 集群 Docker 部署
|
|||
|
|
|
|||
|
|
#### 目录结构
|
|||
|
|
|
|||
|
|
```
|
|||
|
|
deployment/
|
|||
|
|
├── docker/
|
|||
|
|
│ ├── auth-service/
|
|||
|
|
│ │ └── Dockerfile
|
|||
|
|
│ ├── user-service/
|
|||
|
|
│ │ └── Dockerfile
|
|||
|
|
│ ├── permission-service/
|
|||
|
|
│ │ └── Dockerfile
|
|||
|
|
│ └── gateway/
|
|||
|
|
│ └── Dockerfile
|
|||
|
|
├── docker-compose.yml
|
|||
|
|
├── docker-compose.prod.yml
|
|||
|
|
└── init/
|
|||
|
|
└── init.sql
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### Dockerfile 示例(Go)
|
|||
|
|
|
|||
|
|
```dockerfile
|
|||
|
|
# 构建阶段
|
|||
|
|
FROM golang:1.21-alpine AS builder
|
|||
|
|
|
|||
|
|
WORKDIR /app
|
|||
|
|
|
|||
|
|
# 复制依赖文件
|
|||
|
|
COPY go.mod go.sum ./
|
|||
|
|
RUN go mod download
|
|||
|
|
|
|||
|
|
# 复制源代码
|
|||
|
|
COPY . .
|
|||
|
|
|
|||
|
|
# 编译
|
|||
|
|
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \
|
|||
|
|
-ldflags="-w -s" \
|
|||
|
|
-o user-service \
|
|||
|
|
./cmd/user-service
|
|||
|
|
|
|||
|
|
# 运行阶段
|
|||
|
|
FROM alpine:latest
|
|||
|
|
|
|||
|
|
RUN apk --no-cache add ca-certificates tzdata
|
|||
|
|
|
|||
|
|
WORKDIR /app
|
|||
|
|
|
|||
|
|
COPY --from=builder /app/user-service .
|
|||
|
|
|
|||
|
|
EXPOSE 8080
|
|||
|
|
|
|||
|
|
CMD ["./user-service"]
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### docker-compose.yml
|
|||
|
|
|
|||
|
|
```yaml
|
|||
|
|
version: '3.8'
|
|||
|
|
|
|||
|
|
services:
|
|||
|
|
mysql:
|
|||
|
|
image: mysql:8.0
|
|||
|
|
container_name: user-ms-mysql
|
|||
|
|
environment:
|
|||
|
|
MYSQL_ROOT_PASSWORD: root_password
|
|||
|
|
MYSQL_DATABASE: user_management
|
|||
|
|
MYSQL_USER: app_user
|
|||
|
|
MYSQL_PASSWORD: app_password
|
|||
|
|
ports:
|
|||
|
|
- "3306:3306"
|
|||
|
|
volumes:
|
|||
|
|
- mysql-data:/var/lib/mysql
|
|||
|
|
- ./init/init.sql:/docker-entrypoint-initdb.d/init.sql
|
|||
|
|
networks:
|
|||
|
|
- user-ms-network
|
|||
|
|
|
|||
|
|
redis:
|
|||
|
|
image: redis:7-alpine
|
|||
|
|
container_name: user-ms-redis
|
|||
|
|
ports:
|
|||
|
|
- "6379:6379"
|
|||
|
|
volumes:
|
|||
|
|
- redis-data:/data
|
|||
|
|
networks:
|
|||
|
|
- user-ms-network
|
|||
|
|
|
|||
|
|
auth-service:
|
|||
|
|
build:
|
|||
|
|
context: ./docker/auth-service
|
|||
|
|
container_name: user-ms-auth-service
|
|||
|
|
environment:
|
|||
|
|
- SPRING_PROFILES_ACTIVE=prod
|
|||
|
|
- DB_HOST=mysql
|
|||
|
|
- DB_PORT=3306
|
|||
|
|
- DB_NAME=user_management
|
|||
|
|
- DB_USER=app_user
|
|||
|
|
- DB_PASSWORD=app_password
|
|||
|
|
- REDIS_HOST=redis
|
|||
|
|
- REDIS_PORT=6379
|
|||
|
|
ports:
|
|||
|
|
- "8081:8080"
|
|||
|
|
depends_on:
|
|||
|
|
- mysql
|
|||
|
|
- redis
|
|||
|
|
networks:
|
|||
|
|
- user-ms-network
|
|||
|
|
|
|||
|
|
user-service:
|
|||
|
|
build:
|
|||
|
|
context: ./docker/user-service
|
|||
|
|
container_name: user-ms-user-service
|
|||
|
|
environment:
|
|||
|
|
- SPRING_PROFILES_ACTIVE=prod
|
|||
|
|
- DB_HOST=mysql
|
|||
|
|
- DB_PORT=3306
|
|||
|
|
- DB_NAME=user_management
|
|||
|
|
- DB_USER=app_user
|
|||
|
|
- DB_PASSWORD=app_password
|
|||
|
|
- REDIS_HOST=redis
|
|||
|
|
- REDIS_PORT=6379
|
|||
|
|
ports:
|
|||
|
|
- "8082:8080"
|
|||
|
|
depends_on:
|
|||
|
|
- mysql
|
|||
|
|
- redis
|
|||
|
|
networks:
|
|||
|
|
- user-ms-network
|
|||
|
|
|
|||
|
|
permission-service:
|
|||
|
|
build:
|
|||
|
|
context: ./docker/permission-service
|
|||
|
|
container_name: user-ms-permission-service
|
|||
|
|
environment:
|
|||
|
|
- SPRING_PROFILES_ACTIVE=prod
|
|||
|
|
- DB_HOST=mysql
|
|||
|
|
- DB_PORT=3306
|
|||
|
|
- DB_NAME=user_management
|
|||
|
|
- DB_USER=app_user
|
|||
|
|
- DB_PASSWORD=app_password
|
|||
|
|
- REDIS_HOST=redis
|
|||
|
|
- REDIS_PORT=6379
|
|||
|
|
ports:
|
|||
|
|
- "8083:8080"
|
|||
|
|
depends_on:
|
|||
|
|
- mysql
|
|||
|
|
- redis
|
|||
|
|
networks:
|
|||
|
|
- user-ms-network
|
|||
|
|
|
|||
|
|
gateway:
|
|||
|
|
build:
|
|||
|
|
context: ./docker/gateway
|
|||
|
|
container_name: user-ms-gateway
|
|||
|
|
environment:
|
|||
|
|
- AUTH_SERVICE_URL=http://auth-service:8080
|
|||
|
|
- USER_SERVICE_URL=http://user-service:8080
|
|||
|
|
- PERMISSION_SERVICE_URL=http://permission-service:8080
|
|||
|
|
ports:
|
|||
|
|
- "8080:8080"
|
|||
|
|
depends_on:
|
|||
|
|
- auth-service
|
|||
|
|
- user-service
|
|||
|
|
- permission-service
|
|||
|
|
networks:
|
|||
|
|
- user-ms-network
|
|||
|
|
|
|||
|
|
prometheus:
|
|||
|
|
image: prom/prometheus:latest
|
|||
|
|
container_name: user-ms-prometheus
|
|||
|
|
ports:
|
|||
|
|
- "9090:9090"
|
|||
|
|
volumes:
|
|||
|
|
- ./deployment/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
|
|||
|
|
- prometheus-data:/prometheus
|
|||
|
|
networks:
|
|||
|
|
- user-ms-network
|
|||
|
|
|
|||
|
|
grafana:
|
|||
|
|
image: grafana/grafana:latest
|
|||
|
|
container_name: user-ms-grafana
|
|||
|
|
ports:
|
|||
|
|
- "3000:3000"
|
|||
|
|
environment:
|
|||
|
|
- GF_SECURITY_ADMIN_PASSWORD=admin
|
|||
|
|
volumes:
|
|||
|
|
- grafana-data:/var/lib/grafana
|
|||
|
|
- ./deployment/grafana/provisioning:/etc/grafana/provisioning
|
|||
|
|
networks:
|
|||
|
|
- user-ms-network
|
|||
|
|
|
|||
|
|
volumes:
|
|||
|
|
mysql-data:
|
|||
|
|
redis-data:
|
|||
|
|
prometheus-data:
|
|||
|
|
grafana-data:
|
|||
|
|
|
|||
|
|
networks:
|
|||
|
|
user-ms-network:
|
|||
|
|
driver: bridge
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 启动命令
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 开发环境
|
|||
|
|
docker-compose up -d
|
|||
|
|
|
|||
|
|
# 生产环境
|
|||
|
|
docker-compose -f docker-compose.prod.yml up -d
|
|||
|
|
|
|||
|
|
# 查看日志
|
|||
|
|
docker-compose logs -f
|
|||
|
|
|
|||
|
|
# 停止服务
|
|||
|
|
docker-compose down
|
|||
|
|
|
|||
|
|
# 清理数据
|
|||
|
|
docker-compose down -v
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
### 1.3 Kubernetes 部署
|
|||
|
|
|
|||
|
|
#### Helm Charts 结构
|
|||
|
|
|
|||
|
|
```
|
|||
|
|
deployment/kubernetes/helm/user-management-system/
|
|||
|
|
├── Chart.yaml
|
|||
|
|
├── values.yaml
|
|||
|
|
├── values-prod.yaml
|
|||
|
|
└── templates/
|
|||
|
|
├── _helpers.tpl
|
|||
|
|
├── deployment.yaml
|
|||
|
|
├── service.yaml
|
|||
|
|
├── ingress.yaml
|
|||
|
|
├── configmap.yaml
|
|||
|
|
├── secret.yaml
|
|||
|
|
├── hpa.yaml
|
|||
|
|
└── pdb.yaml
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### values.yaml
|
|||
|
|
|
|||
|
|
```yaml
|
|||
|
|
# 默认配置
|
|||
|
|
replicaCount: 2
|
|||
|
|
|
|||
|
|
image:
|
|||
|
|
repository: example.com/user-management-system
|
|||
|
|
pullPolicy: IfNotPresent
|
|||
|
|
tag: "1.0.0"
|
|||
|
|
|
|||
|
|
imagePullSecrets: []
|
|||
|
|
nameOverride: ""
|
|||
|
|
fullnameOverride: ""
|
|||
|
|
|
|||
|
|
serviceAccount:
|
|||
|
|
create: true
|
|||
|
|
annotations: {}
|
|||
|
|
name: ""
|
|||
|
|
|
|||
|
|
podAnnotations: {}
|
|||
|
|
|
|||
|
|
podSecurityContext: {}
|
|||
|
|
# fsGroup: 2000
|
|||
|
|
|
|||
|
|
securityContext: {}
|
|||
|
|
# capabilities:
|
|||
|
|
# drop:
|
|||
|
|
# - ALL
|
|||
|
|
# readOnlyRootFilesystem: true
|
|||
|
|
# runAsNonRoot: true
|
|||
|
|
# runAsUser: 1000
|
|||
|
|
|
|||
|
|
service:
|
|||
|
|
type: ClusterIP
|
|||
|
|
port: 8080
|
|||
|
|
|
|||
|
|
ingress:
|
|||
|
|
enabled: true
|
|||
|
|
className: "nginx"
|
|||
|
|
annotations: {}
|
|||
|
|
# kubernetes.io/ingress.class: nginx
|
|||
|
|
# cert-manager.io/cluster-issuer: letsencrypt-prod
|
|||
|
|
hosts:
|
|||
|
|
- host: api.example.com
|
|||
|
|
paths:
|
|||
|
|
- path: /
|
|||
|
|
pathType: Prefix
|
|||
|
|
tls: []
|
|||
|
|
# - secretName: user-ms-tls
|
|||
|
|
# hosts:
|
|||
|
|
# - api.example.com
|
|||
|
|
|
|||
|
|
resources:
|
|||
|
|
limits:
|
|||
|
|
cpu: 1000m
|
|||
|
|
memory: 512Mi
|
|||
|
|
requests:
|
|||
|
|
cpu: 500m
|
|||
|
|
memory: 256Mi
|
|||
|
|
|
|||
|
|
autoscaling:
|
|||
|
|
enabled: true
|
|||
|
|
minReplicas: 2
|
|||
|
|
maxReplicas: 10
|
|||
|
|
targetCPUUtilizationPercentage: 70
|
|||
|
|
targetMemoryUtilizationPercentage: 80
|
|||
|
|
|
|||
|
|
nodeSelector: {}
|
|||
|
|
|
|||
|
|
tolerations: []
|
|||
|
|
|
|||
|
|
affinity: {}
|
|||
|
|
|
|||
|
|
# 数据库配置
|
|||
|
|
database:
|
|||
|
|
host: mysql-service
|
|||
|
|
port: 3306
|
|||
|
|
name: user_management
|
|||
|
|
username: app_user
|
|||
|
|
password: app_password
|
|||
|
|
|
|||
|
|
# Redis 配置
|
|||
|
|
redis:
|
|||
|
|
host: redis-service
|
|||
|
|
port: 6379
|
|||
|
|
password: ""
|
|||
|
|
database: 0
|
|||
|
|
|
|||
|
|
# 环境变量
|
|||
|
|
env:
|
|||
|
|
- name: SPRING_PROFILES_ACTIVE
|
|||
|
|
value: "prod"
|
|||
|
|
- name: LOG_LEVEL
|
|||
|
|
value: "info"
|
|||
|
|
|
|||
|
|
# 健康检查
|
|||
|
|
livenessProbe:
|
|||
|
|
httpGet:
|
|||
|
|
path: /health/live
|
|||
|
|
port: 8080
|
|||
|
|
initialDelaySeconds: 30
|
|||
|
|
periodSeconds: 10
|
|||
|
|
timeoutSeconds: 5
|
|||
|
|
failureThreshold: 3
|
|||
|
|
|
|||
|
|
readinessProbe:
|
|||
|
|
httpGet:
|
|||
|
|
path: /health/ready
|
|||
|
|
port: 8080
|
|||
|
|
initialDelaySeconds: 10
|
|||
|
|
periodSeconds: 5
|
|||
|
|
timeoutSeconds: 3
|
|||
|
|
failureThreshold: 3
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 部署命令
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 安装 Helm Chart
|
|||
|
|
helm install user-ms ./user-management-system \
|
|||
|
|
-f values-prod.yaml \
|
|||
|
|
--namespace production \
|
|||
|
|
--create-namespace
|
|||
|
|
|
|||
|
|
# 更新部署
|
|||
|
|
helm upgrade user-ms ./user-management-system \
|
|||
|
|
-f values-prod.yaml \
|
|||
|
|
--namespace production
|
|||
|
|
|
|||
|
|
# 回滚
|
|||
|
|
helm rollback user-ms 1 --namespace production
|
|||
|
|
|
|||
|
|
# 卸载
|
|||
|
|
helm uninstall user-ms --namespace production
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
### 1.4 传统安装包部署
|
|||
|
|
|
|||
|
|
#### 目录结构
|
|||
|
|
|
|||
|
|
```
|
|||
|
|
user-management-system-1.0.0/
|
|||
|
|
├── bin/
|
|||
|
|
│ ├── auth-service
|
|||
|
|
│ ├── user-service
|
|||
|
|
│ ├── permission-service
|
|||
|
|
│ └── gateway
|
|||
|
|
├── config/
|
|||
|
|
│ ├── application.yml
|
|||
|
|
│ └── application-prod.yml
|
|||
|
|
├── lib/
|
|||
|
|
│ ├── *.jar
|
|||
|
|
│ └── *.so
|
|||
|
|
├── scripts/
|
|||
|
|
│ ├── install.sh
|
|||
|
|
│ ├── start.sh
|
|||
|
|
│ ├── stop.sh
|
|||
|
|
│ └── restart.sh
|
|||
|
|
└── README.md
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 安装脚本(install.sh)
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
#!/bin/bash
|
|||
|
|
|
|||
|
|
set -e
|
|||
|
|
|
|||
|
|
echo "开始安装用户管理系统..."
|
|||
|
|
|
|||
|
|
# 检查 Java 环境
|
|||
|
|
if ! command -v java &> /dev/null; then
|
|||
|
|
echo "错误: 未检测到 Java 环境"
|
|||
|
|
exit 1
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# 检查 MySQL
|
|||
|
|
if ! command -v mysql &> /dev/null; then
|
|||
|
|
echo "错误: 未检测到 MySQL"
|
|||
|
|
exit 1
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# 创建用户
|
|||
|
|
if ! id -u userms &> /dev/null; then
|
|||
|
|
echo "创建系统用户 userms..."
|
|||
|
|
useradd -r -s /bin/false userms
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# 创建目录
|
|||
|
|
INSTALL_DIR="/opt/user-management-system"
|
|||
|
|
echo "安装目录: $INSTALL_DIR"
|
|||
|
|
mkdir -p $INSTALL_DIR/{bin,config,lib,logs}
|
|||
|
|
|
|||
|
|
# 复制文件
|
|||
|
|
echo "复制文件..."
|
|||
|
|
cp -r bin/* $INSTALL_DIR/bin/
|
|||
|
|
cp -r config/* $INSTALL_DIR/config/
|
|||
|
|
cp -r lib/* $INSTALL_DIR/lib/
|
|||
|
|
|
|||
|
|
# 设置权限
|
|||
|
|
chown -R userms:userms $INSTALL_DIR
|
|||
|
|
chmod +x $INSTALL_DIR/bin/*
|
|||
|
|
chmod +x scripts/*.sh
|
|||
|
|
|
|||
|
|
# 创建服务文件
|
|||
|
|
cat > /etc/systemd/system/user-ms.service <<EOF
|
|||
|
|
[Unit]
|
|||
|
|
Description=User Management System
|
|||
|
|
After=network.target mysql.service
|
|||
|
|
|
|||
|
|
[Service]
|
|||
|
|
Type=forking
|
|||
|
|
User=userms
|
|||
|
|
WorkingDirectory=$INSTALL_DIR
|
|||
|
|
ExecStart=$INSTALL_DIR/scripts/start.sh
|
|||
|
|
ExecStop=$INSTALL_DIR/scripts/stop.sh
|
|||
|
|
Restart=on-failure
|
|||
|
|
RestartSec=10
|
|||
|
|
|
|||
|
|
[Install]
|
|||
|
|
WantedBy=multi-user.target
|
|||
|
|
EOF
|
|||
|
|
|
|||
|
|
# 重载 systemd
|
|||
|
|
systemctl daemon-reload
|
|||
|
|
|
|||
|
|
echo "安装完成!"
|
|||
|
|
echo "请修改配置文件 $INSTALL_DIR/config/application-prod.yml"
|
|||
|
|
echo "启动服务: systemctl start user-ms"
|
|||
|
|
echo "设置开机启动: systemctl enable user-ms"
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 启动脚本(start.sh)
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
#!/bin/bash
|
|||
|
|
|
|||
|
|
INSTALL_DIR="/opt/user-management-system"
|
|||
|
|
LOG_DIR="$INSTALL_DIR/logs"
|
|||
|
|
|
|||
|
|
cd $INSTALL_DIR
|
|||
|
|
|
|||
|
|
echo "启动用户管理系统..."
|
|||
|
|
|
|||
|
|
# 启动认证服务
|
|||
|
|
nohup $INSTALL_DIR/bin/auth-service \
|
|||
|
|
--spring.config.location=$INSTALL_DIR/config/application-prod.yml \
|
|||
|
|
> $LOG_DIR/auth-service.log 2>&1 &
|
|||
|
|
AUTH_PID=$!
|
|||
|
|
echo "认证服务启动 (PID: $AUTH_PID)"
|
|||
|
|
|
|||
|
|
# 启动用户服务
|
|||
|
|
nohup $INSTALL_DIR/bin/user-service \
|
|||
|
|
--spring.config.location=$INSTALL_DIR/config/application-prod.yml \
|
|||
|
|
> $LOG_DIR/user-service.log 2>&1 &
|
|||
|
|
USER_PID=$!
|
|||
|
|
echo "用户服务启动 (PID: $USER_PID)"
|
|||
|
|
|
|||
|
|
# 启动权限服务
|
|||
|
|
nohup $INSTALL_DIR/bin/permission-service \
|
|||
|
|
--spring.config.location=$INSTALL_DIR/config/application-prod.yml \
|
|||
|
|
> $LOG_DIR/permission-service.log 2>&1 &
|
|||
|
|
PERM_PID=$!
|
|||
|
|
echo "权限服务启动 (PID: $PERM_PID)"
|
|||
|
|
|
|||
|
|
# 启动网关
|
|||
|
|
nohup $INSTALL_DIR/bin/gateway \
|
|||
|
|
--spring.config.location=$INSTALL_DIR/config/application-prod.yml \
|
|||
|
|
> $LOG_DIR/gateway.log 2>&1 &
|
|||
|
|
GATEWAY_PID=$!
|
|||
|
|
echo "网关启动 (PID: $GATEWAY_PID)"
|
|||
|
|
|
|||
|
|
# 保存 PID
|
|||
|
|
echo $AUTH_PID > $LOG_DIR/auth-service.pid
|
|||
|
|
echo $USER_PID > $LOG_DIR/user-service.pid
|
|||
|
|
echo $PERM_PID > $LOG_DIR/permission-service.pid
|
|||
|
|
echo $GATEWAY_PID > $LOG_DIR/gateway.pid
|
|||
|
|
|
|||
|
|
echo "启动完成!"
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
## 2. 监控与告警
|
|||
|
|
|
|||
|
|
### 2.1 Prometheus 配置
|
|||
|
|
|
|||
|
|
#### prometheus.yml
|
|||
|
|
|
|||
|
|
```yaml
|
|||
|
|
global:
|
|||
|
|
scrape_interval: 15s
|
|||
|
|
evaluation_interval: 15s
|
|||
|
|
|
|||
|
|
alerting:
|
|||
|
|
alertmanagers:
|
|||
|
|
- static_configs:
|
|||
|
|
- targets: ['alertmanager:9093']
|
|||
|
|
|
|||
|
|
rule_files:
|
|||
|
|
- "alerts/*.yml"
|
|||
|
|
|
|||
|
|
scrape_configs:
|
|||
|
|
- job_name: 'user-ms-auth'
|
|||
|
|
static_configs:
|
|||
|
|
- targets: ['auth-service:8080']
|
|||
|
|
metrics_path: '/metrics'
|
|||
|
|
|
|||
|
|
- job_name: 'user-ms-user'
|
|||
|
|
static_configs:
|
|||
|
|
- targets: ['user-service:8080']
|
|||
|
|
metrics_path: '/metrics'
|
|||
|
|
|
|||
|
|
- job_name: 'user-ms-permission'
|
|||
|
|
static_configs:
|
|||
|
|
- targets: ['permission-service:8080']
|
|||
|
|
metrics_path: '/metrics'
|
|||
|
|
|
|||
|
|
- job_name: 'mysql'
|
|||
|
|
static_configs:
|
|||
|
|
- targets: ['mysql-exporter:9104']
|
|||
|
|
|
|||
|
|
- job_name: 'redis'
|
|||
|
|
static_configs:
|
|||
|
|
- targets: ['redis-exporter:9121']
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 告警规则(alerts.yml)
|
|||
|
|
|
|||
|
|
```yaml
|
|||
|
|
groups:
|
|||
|
|
- name: user-ms-alerts
|
|||
|
|
interval: 30s
|
|||
|
|
rules:
|
|||
|
|
# 高错误率告警
|
|||
|
|
- alert: HighErrorRate
|
|||
|
|
expr: rate(http_requests_total{status=~"5.."}[5m]) > 0.05
|
|||
|
|
for: 5m
|
|||
|
|
labels:
|
|||
|
|
severity: critical
|
|||
|
|
annotations:
|
|||
|
|
summary: "高错误率告警"
|
|||
|
|
description: "{{ $labels.instance }} 的错误率超过 5%"
|
|||
|
|
|
|||
|
|
# 高响应时间告警
|
|||
|
|
- alert: HighResponseTime
|
|||
|
|
expr: histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m])) > 0.5
|
|||
|
|
for: 5m
|
|||
|
|
labels:
|
|||
|
|
severity: warning
|
|||
|
|
annotations:
|
|||
|
|
summary: "高响应时间告警"
|
|||
|
|
description: "{{ $labels.instance }} 的 P99 响应时间超过 500ms"
|
|||
|
|
|
|||
|
|
# CPU 使用率告警
|
|||
|
|
- alert: HighCPUUsage
|
|||
|
|
expr: rate(process_cpu_seconds_total[5m]) > 0.7
|
|||
|
|
for: 5m
|
|||
|
|
labels:
|
|||
|
|
severity: warning
|
|||
|
|
annotations:
|
|||
|
|
summary: "高 CPU 使用率"
|
|||
|
|
description: "{{ $labels.instance }} 的 CPU 使用率超过 70%"
|
|||
|
|
|
|||
|
|
# 内存使用率告警
|
|||
|
|
- alert: HighMemoryUsage
|
|||
|
|
expr: (jvm_memory_used_bytes{area="heap"} / jvm_memory_max_bytes{area="heap"}) > 0.8
|
|||
|
|
for: 5m
|
|||
|
|
labels:
|
|||
|
|
severity: warning
|
|||
|
|
annotations:
|
|||
|
|
summary: "高内存使用率"
|
|||
|
|
description: "{{ $labels.instance }} 的堆内存使用率超过 80%"
|
|||
|
|
|
|||
|
|
# 数据库连接告警
|
|||
|
|
- alert: DatabaseConnectionPoolExhausted
|
|||
|
|
expr: hikaricp_connections_active / hikaricp_connections_max > 0.9
|
|||
|
|
for: 5m
|
|||
|
|
labels:
|
|||
|
|
severity: critical
|
|||
|
|
annotations:
|
|||
|
|
summary: "数据库连接池耗尽"
|
|||
|
|
description: "{{ $labels.instance }} 的数据库连接池使用率超过 90%"
|
|||
|
|
|
|||
|
|
# 在线用户数异常
|
|||
|
|
- alert: LowOnlineUsers
|
|||
|
|
expr: system_online_users < 10
|
|||
|
|
for: 10m
|
|||
|
|
labels:
|
|||
|
|
severity: warning
|
|||
|
|
annotations:
|
|||
|
|
summary: "在线用户数异常"
|
|||
|
|
description: "在线用户数低于 10,可能存在服务异常"
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
### 2.2 Grafana 仪表盘
|
|||
|
|
|
|||
|
|
#### 核心指标面板
|
|||
|
|
|
|||
|
|
| 面板名称 | 指标 | 说明 |
|
|||
|
|
|----------|------|------|
|
|||
|
|
| 总用户数 | `system_total_users` | 系统总用户数 |
|
|||
|
|
| 在线用户数 | `system_online_users` | 当前在线用户数 |
|
|||
|
|
| 今日注册数 | `increase(user_register_total[1d])` | 今日注册用户数 |
|
|||
|
|
| 今日登录数 | `increase(user_login_total[1d])` | 今日登录次数 |
|
|||
|
|
| QPS | `rate(http_requests_total[1m])` | 每秒请求数 |
|
|||
|
|
| 响应时间 (P99) | `histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m]))` | P99 响应时间 |
|
|||
|
|
| 错误率 | `rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m])` | 错误率 |
|
|||
|
|
| CPU 使用率 | `rate(process_cpu_seconds_total[5m])` | CPU 使用率 |
|
|||
|
|
| 内存使用率 | `jvm_memory_used_bytes{area="heap"} / jvm_memory_max_bytes{area="heap"}` | 内存使用率 |
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
### 2.3 日志管理
|
|||
|
|
|
|||
|
|
#### 日志配置(Logback)
|
|||
|
|
|
|||
|
|
```xml
|
|||
|
|
<configuration>
|
|||
|
|
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
|
|||
|
|
<encoder>
|
|||
|
|
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
|
|||
|
|
</encoder>
|
|||
|
|
</appender>
|
|||
|
|
|
|||
|
|
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
|||
|
|
<file>logs/application.log</file>
|
|||
|
|
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
|
|||
|
|
<fileNamePattern>logs/application.%d{yyyy-MM-dd}.log</fileNamePattern>
|
|||
|
|
<maxHistory>30</maxHistory>
|
|||
|
|
<totalSizeCap>10GB</totalSizeCap>
|
|||
|
|
</rollingPolicy>
|
|||
|
|
<encoder>
|
|||
|
|
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
|
|||
|
|
</encoder>
|
|||
|
|
</appender>
|
|||
|
|
|
|||
|
|
<root level="INFO">
|
|||
|
|
<appender-ref ref="CONSOLE" />
|
|||
|
|
<appender-ref ref="FILE" />
|
|||
|
|
</root>
|
|||
|
|
</configuration>
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### ELK 集成
|
|||
|
|
|
|||
|
|
```yaml
|
|||
|
|
# Filebeat 配置
|
|||
|
|
filebeat.inputs:
|
|||
|
|
- type: log
|
|||
|
|
enabled: true
|
|||
|
|
paths:
|
|||
|
|
- /opt/user-management-system/logs/*.log
|
|||
|
|
fields:
|
|||
|
|
app: user-management-system
|
|||
|
|
env: production
|
|||
|
|
|
|||
|
|
output.elasticsearch:
|
|||
|
|
hosts: ["elasticsearch:9200"]
|
|||
|
|
indices:
|
|||
|
|
- index: "user-ms-%{+yyyy.MM.dd}"
|
|||
|
|
when.contains:
|
|||
|
|
app: "user-management-system"
|
|||
|
|
|
|||
|
|
setup.template.name: "user-ms"
|
|||
|
|
setup.template.pattern: "user-ms-*"
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
## 3. 运维操作
|
|||
|
|
|
|||
|
|
### 3.1 日常巡检
|
|||
|
|
|
|||
|
|
#### 巡检清单
|
|||
|
|
|
|||
|
|
| 检查项 | 检查方法 | 正常值 | 异常处理 |
|
|||
|
|
|--------|----------|--------|----------|
|
|||
|
|
| 服务状态 | systemctl status | Active | 重启服务 |
|
|||
|
|
| 磁盘空间 | df -h | 使用率 < 80% | 清理日志 |
|
|||
|
|
| 内存使用 | free -h | 使用率 < 80% | 扩容或优化 |
|
|||
|
|
| CPU 使用 | top | 使用率 < 70% | 扩容或优化 |
|
|||
|
|
| 数据库连接 | SHOW PROCESSLIST | 连接数 < 100 | 优化连接池 |
|
|||
|
|
| Redis 连接 | redis-cli info clients | 连接数正常 | 扩容 Redis |
|
|||
|
|
| API 响应时间 | curl -w @curl-format.txt | < 500ms | 优化代码 |
|
|||
|
|
| 错误日志 | tail -f error.log | 无新错误 | 排查问题 |
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
### 3.2 备份与恢复
|
|||
|
|
|
|||
|
|
#### 数据库备份
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
#!/bin/bash
|
|||
|
|
# backup-mysql.sh
|
|||
|
|
|
|||
|
|
BACKUP_DIR="/backup/mysql"
|
|||
|
|
DATE=$(date +%Y%m%d_%H%M%S)
|
|||
|
|
DB_NAME="user_management"
|
|||
|
|
DB_USER="root"
|
|||
|
|
DB_PASSWORD="your_password"
|
|||
|
|
|
|||
|
|
mkdir -p $BACKUP_DIR
|
|||
|
|
|
|||
|
|
# 全量备份
|
|||
|
|
mysqldump -u$DB_USER -p$DB_PASSWORD $DB_NAME | gzip > $BACKUP_DIR/$DB_NAME_$DATE.sql.gz
|
|||
|
|
|
|||
|
|
# 删除 7 天前的备份
|
|||
|
|
find $BACKUP_DIR -name "*.sql.gz" -mtime +7 -delete
|
|||
|
|
|
|||
|
|
echo "备份完成: $BACKUP_DIR/$DB_NAME_$DATE.sql.gz"
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 数据恢复
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 解压备份文件
|
|||
|
|
gunzip user_management_20260310_120000.sql.gz
|
|||
|
|
|
|||
|
|
# 恢复数据库
|
|||
|
|
mysql -u root -p user_management < user_management_20260310_120000.sql
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
### 3.3 版本升级
|
|||
|
|
|
|||
|
|
#### 升级流程
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 1. 备份数据库
|
|||
|
|
./scripts/backup-mysql.sh
|
|||
|
|
|
|||
|
|
# 2. 停止服务
|
|||
|
|
./scripts/stop.sh
|
|||
|
|
|
|||
|
|
# 3. 备份旧版本
|
|||
|
|
cp -r /opt/user-management-system /opt/user-management-system.bak
|
|||
|
|
|
|||
|
|
# 4. 部署新版本
|
|||
|
|
unzip user-management-system-1.1.0.zip -d /opt/
|
|||
|
|
|
|||
|
|
# 5. 执行数据库迁移
|
|||
|
|
mysql -u root -p user_management < migration/1.1.0.sql
|
|||
|
|
|
|||
|
|
# 6. 启动服务
|
|||
|
|
./scripts/start.sh
|
|||
|
|
|
|||
|
|
# 7. 验证服务
|
|||
|
|
curl http://localhost:8080/health
|
|||
|
|
curl http://localhost:8080/health/live
|
|||
|
|
curl http://localhost:8080/health/ready
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 回滚流程
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 1. 停止服务
|
|||
|
|
./scripts/stop.sh
|
|||
|
|
|
|||
|
|
# 2. 删除新版本
|
|||
|
|
rm -rf /opt/user-management-system
|
|||
|
|
|
|||
|
|
# 3. 恢复旧版本
|
|||
|
|
mv /opt/user-management-system.bak /opt/user-management-system
|
|||
|
|
|
|||
|
|
# 4. 恢复数据库
|
|||
|
|
mysql -u root -p user_management < /backup/mysql/user_management_20260310_120000.sql
|
|||
|
|
|
|||
|
|
# 5. 启动服务
|
|||
|
|
./scripts/start.sh
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
### 3.4 故障排查
|
|||
|
|
|
|||
|
|
#### 常见问题
|
|||
|
|
|
|||
|
|
| 问题 | 可能原因 | 排查方法 | 解决方案 |
|
|||
|
|
|------|----------|----------|----------|
|
|||
|
|
| 服务启动失败 | 端口被占用 | netstat -tunlp | 修改端口或停止占用进程 |
|
|||
|
|
| 数据库连接失败 | 网络问题 | ping、telnet | 检查网络和防火墙 |
|
|||
|
|
| 响应慢 | 数据库查询慢 | 慢查询日志 | 优化 SQL、加索引 |
|
|||
|
|
| 内存溢出 | 内存泄漏 | jmap -heap | 优化代码、扩容 |
|
|||
|
|
| 登录失败 | 验证码过期 | 检查 Redis | 调整验证码有效期 |
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
## 4. 性能优化
|
|||
|
|
|
|||
|
|
### 4.1 数据库优化
|
|||
|
|
|
|||
|
|
#### 索引优化
|
|||
|
|
|
|||
|
|
```sql
|
|||
|
|
-- 查看慢查询
|
|||
|
|
SHOW VARIABLES LIKE 'slow_query%';
|
|||
|
|
SHOW VARIABLES LIKE 'long_query_time';
|
|||
|
|
|
|||
|
|
-- 分析慢查询
|
|||
|
|
EXPLAIN SELECT * FROM users WHERE username = 'john_doe';
|
|||
|
|
|
|||
|
|
-- 添加索引
|
|||
|
|
CREATE INDEX idx_username ON users(username);
|
|||
|
|
CREATE INDEX idx_email ON users(email);
|
|||
|
|
CREATE INDEX idx_phone ON users(phone);
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 查询优化
|
|||
|
|
|
|||
|
|
```sql
|
|||
|
|
-- 使用覆盖索引
|
|||
|
|
SELECT id, username, email FROM users WHERE status = 1;
|
|||
|
|
|
|||
|
|
-- 避免 SELECT *
|
|||
|
|
SELECT id, username FROM users WHERE id = ?;
|
|||
|
|
|
|||
|
|
-- 使用 LIMIT 分页
|
|||
|
|
SELECT * FROM users ORDER BY id LIMIT 20 OFFSET 0;
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
### 4.2 Redis 优化
|
|||
|
|
|
|||
|
|
#### 缓存策略
|
|||
|
|
|
|||
|
|
```yaml
|
|||
|
|
cache:
|
|||
|
|
# 用户信息缓存
|
|||
|
|
user_info:
|
|||
|
|
ttl: 3600 # 1 小时
|
|||
|
|
max_size: 10000
|
|||
|
|
|
|||
|
|
# 权限信息缓存
|
|||
|
|
user_permissions:
|
|||
|
|
ttl: 1800 # 30 分钟
|
|||
|
|
max_size: 5000
|
|||
|
|
|
|||
|
|
# Token 黑名单
|
|||
|
|
token_blacklist:
|
|||
|
|
ttl: 2592000 # 30 天
|
|||
|
|
max_size: 50000
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### Redis 配置
|
|||
|
|
|
|||
|
|
```ini
|
|||
|
|
# redis.conf
|
|||
|
|
maxmemory 2gb
|
|||
|
|
maxmemory-policy allkeys-lru
|
|||
|
|
save 900 1
|
|||
|
|
save 300 10
|
|||
|
|
save 60 10000
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
### 4.3 应用优化
|
|||
|
|
|
|||
|
|
#### JVM 参数优化
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
java -jar app.jar \
|
|||
|
|
-Xms512m \
|
|||
|
|
-Xmx2g \
|
|||
|
|
-XX:+UseG1GC \
|
|||
|
|
-XX:MaxGCPauseMillis=200 \
|
|||
|
|
-XX:+HeapDumpOnOutOfMemoryError \
|
|||
|
|
-XX:HeapDumpPath=/opt/logs/heap_dump.hprof
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 连接池优化
|
|||
|
|
|
|||
|
|
```yaml
|
|||
|
|
datasource:
|
|||
|
|
hikari:
|
|||
|
|
maximum-pool-size: 50
|
|||
|
|
minimum-idle: 10
|
|||
|
|
connection-timeout: 30000
|
|||
|
|
idle-timeout: 600000
|
|||
|
|
max-lifetime: 1800000
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
## 5. 安全加固
|
|||
|
|
|
|||
|
|
### 5.1 防火墙配置
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 只开放必要端口
|
|||
|
|
firewall-cmd --permanent --add-port=80/tcp
|
|||
|
|
firewall-cmd --permanent --add-port=443/tcp
|
|||
|
|
firewall-cmd --permanent --add-port=22/tcp
|
|||
|
|
firewall-cmd --reload
|
|||
|
|
|
|||
|
|
# 限制数据库访问
|
|||
|
|
firewall-cmd --permanent --add-rich-rule='rule family="ipv4" source address="10.0.0.0/8" port port="3306" protocol="tcp" accept'
|
|||
|
|
firewall-cmd --reload
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
### 5.2 SSL/TLS 配置
|
|||
|
|
|
|||
|
|
```nginx
|
|||
|
|
server {
|
|||
|
|
listen 443 ssl http2;
|
|||
|
|
server_name api.example.com;
|
|||
|
|
|
|||
|
|
ssl_certificate /path/to/cert.pem;
|
|||
|
|
ssl_certificate_key /path/to/key.pem;
|
|||
|
|
|
|||
|
|
ssl_protocols TLSv1.2 TLSv1.3;
|
|||
|
|
ssl_ciphers HIGH:!aNULL:!MD5;
|
|||
|
|
ssl_prefer_server_ciphers on;
|
|||
|
|
|
|||
|
|
ssl_session_cache shared:SSL:10m;
|
|||
|
|
ssl_session_timeout 10m;
|
|||
|
|
|
|||
|
|
add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
|
|||
|
|
}
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
## 6. 监控告警联系人
|
|||
|
|
|
|||
|
|
| 级别 | 联系人 | 通知方式 |
|
|||
|
|
|------|--------|----------|
|
|||
|
|
| Critical | 运维团队 | 电话 + 短信 + 邮件 |
|
|||
|
|
| Warning | 开发团队 | 邮件 + 钉钉/企业微信 |
|
|||
|
|
| Info | 项目经理 | 邮件 |
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
*本文档持续更新中,如有疑问请联系运维团队。*
|