docs(review): add remediation plans and readiness artifacts

Add design, review, and production-readiness documents for the April remediation cycle.\nInclude supporting SQL and supply-api operational design notes so review conclusions and implementation guidance stay versioned together.
This commit is contained in:
Your Name
2026-04-13 18:54:45 +08:00
parent a2f042f1c2
commit aecba5ff27
24 changed files with 7880 additions and 0 deletions

View File

@@ -0,0 +1,102 @@
-- ============================================================================
-- 审计事件表 Schema 迁移 v1 -> v2
-- 问题: 数据库中的 audit_events 表是旧版 schema与代码中的 model 不匹配
--
-- 旧版 schema (当前生产表):
-- domain_code, action_code, severity, client_ip, before_data, after_data
--
-- 新版 schema (代码期望):
-- event_id, event_name, event_category, timestamp, timestamp_ms,
-- action, source_ip, operator_id, operator_type 等
-- ============================================================================
-- 1. 备份现有数据(如果表中有数据)
CREATE TABLE IF NOT EXISTS audit_events_backup AS
SELECT * FROM audit_events WHERE 1=0;
-- 如果有数据则备份
INSERT INTO audit_events_backup SELECT * FROM audit_events;
-- 2. 删除旧表和约束
DROP TABLE IF EXISTS audit_events CASCADE;
-- 3. 创建新表(使用 partition_strategy_v1.sql 中的定义)
CREATE TABLE IF NOT EXISTS audit_events (
id BIGSERIAL,
event_id VARCHAR(100) NOT NULL,
event_name VARCHAR(100) NOT NULL,
event_category VARCHAR(50),
event_sub_category VARCHAR(50),
timestamp TIMESTAMPTZ NOT NULL,
timestamp_ms BIGINT NOT NULL,
request_id VARCHAR(100),
idempotency_key VARCHAR(128),
tenant_id BIGINT,
object_type VARCHAR(100),
object_id VARCHAR(100),
action VARCHAR(100) NOT NULL,
result_code VARCHAR(50),
source_ip VARCHAR(50),
created_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id, timestamp)
) PARTITION BY RANGE (timestamp);
-- 4. 创建索引
CREATE INDEX idx_audit_events_tenant_id ON audit_events(tenant_id);
CREATE INDEX idx_audit_events_request_id ON audit_events(request_id);
CREATE INDEX idx_audit_events_created_at ON audit_events(created_at);
CREATE INDEX idx_audit_events_object ON audit_events(object_type, object_id);
-- 5. 创建初始分区过去12个月 + 未来3个月
DO $$
DECLARE
i INT;
target_date DATE;
partition_name TEXT;
start_date DATE;
end_date DATE;
BEGIN
-- 过去12个月
FOR i IN -12..0 LOOP
target_date := (CURRENT_DATE + (i || ' months')::INTERVAL)::DATE;
start_date := date_trunc('month', target_date)::DATE;
end_date := (start_date + INTERVAL '1 month')::DATE;
partition_name := 'audit_events_' || to_char(start_date, 'YYYY_MM');
IF NOT EXISTS (
SELECT 1 FROM pg_class WHERE relname = partition_name
) THEN
EXECUTE format(
'CREATE TABLE %I PARTITION OF audit_events FOR VALUES FROM (%L) TO (%L)',
partition_name, start_date, end_date
);
RAISE NOTICE 'Created partition: %', partition_name;
END IF;
END LOOP;
-- 未来3个月
FOR i IN 1..3 LOOP
target_date := (CURRENT_DATE + (i || ' months')::INTERVAL)::DATE;
start_date := date_trunc('month', target_date)::DATE;
end_date := (start_date + INTERVAL '1 month')::DATE;
partition_name := 'audit_events_' || to_char(start_date, 'YYYY_MM');
IF NOT EXISTS (
SELECT 1 FROM pg_class WHERE relname = partition_name
) THEN
EXECUTE format(
'CREATE TABLE %I PARTITION OF audit_events FOR VALUES FROM (%L) TO (%L)',
partition_name, start_date, end_date
);
RAISE NOTICE 'Created partition: %', partition_name;
END IF;
END LOOP;
END $$;
-- 6. 验证
SELECT table_name, count(*) as partition_count
FROM pg_tables
WHERE table_name LIKE 'audit_events%' AND table_name != 'audit_events' AND table_name != 'audit_events_backup'
GROUP BY table_name;
COMMENT ON TABLE audit_events IS '审计事件表 - 按月分区保留12个月';

View File

@@ -0,0 +1,347 @@
# Supply API 数据字典 v1.0
> **文档版本**: v1.0
> **创建日期**: 2026-04-07
> **基于**: supply_schema_v1.sql
---
## 1. supply_accounts (供应商账户表)
| 字段名 | 类型 | 约束 | 默认值 | 说明 |
|--------|------|------|--------|------|
| id | BIGINT | PRIMARY KEY | 自增 | 账户唯一标识 |
| user_id | BIGINT | NOT NULL | - | 所属用户ID |
| platform | VARCHAR(50) | NOT NULL | - | 平台标识 (openai/anthropic/azure等) |
| account_type | VARCHAR(20) | NOT NULL | - | 账户类型: api_key/oauth |
| account_name | VARCHAR(100) | - | - | 账户显示名称 |
| encrypted_credentials | TEXT | NOT NULL | - | 加密存储的凭证 (AES-256-GCM) |
| key_id | VARCHAR(100) | - | - | 凭证密钥标识 |
| status | VARCHAR(20) | NOT NULL | 'pending' | 状态: pending/active/suspended/disabled |
| risk_level | VARCHAR(20) | NOT NULL | 'normal' | 风险等级: low/normal/high |
| total_quota | NUMERIC(20,6) | - | - | 账户总配额 |
| available_quota | NUMERIC(20,6) | - | - | 可用配额 |
| frozen_quota | NUMERIC(20,6) | NOT NULL | 0 | 冻结配额 |
| is_verified | BOOLEAN | - | FALSE | 是否已验证 |
| verified_at | TIMESTAMPTZ | - | - | 验证时间 |
| last_check_at | TIMESTAMPTZ | - | - | 最后检查时间 |
| tos_compliant | BOOLEAN | - | TRUE | TOS合规状态 |
| tos_check_result | TEXT | - | - | TOS检查结果 |
| total_requests | BIGINT | - | 0 | 累计请求数 |
| total_tokens | BIGINT | - | 0 | 累计使用Token数 |
| total_cost | NUMERIC(20,6) | - | 0 | 累计消费金额 |
| success_rate | NUMERIC(5,2) | - | 0 | 请求成功率(%) |
| risk_score | INT | - | 0 | 风险评分 (0-100) |
| risk_reason | TEXT | - | - | 风险原因 |
| is_frozen | BOOLEAN | - | FALSE | 是否被冻结 |
| frozen_reason | TEXT | - | - | 冻结原因 |
| created_at | TIMESTAMPTZ | - | CURRENT_TIMESTAMP | 创建时间 |
| updated_at | TIMESTAMPTZ | - | CURRENT_TIMESTAMP | 更新时间 |
| created_by | BIGINT | - | - | 创建人 |
| updated_by | BIGINT | - | - | 更新人 |
**索引**:
- idx_supply_accounts_user_id (user_id)
- idx_supply_accounts_platform (platform)
- idx_supply_accounts_status (status)
- idx_supply_accounts_risk_level (risk_level)
---
## 2. supply_packages (供应套餐表)
| 字段名 | 类型 | 约束 | 默认值 | 说明 |
|--------|------|------|--------|------|
| id | BIGINT | PRIMARY KEY | 自增 | 套餐唯一标识 |
| supply_account_id | BIGINT | NOT NULL, FK | - | 关联供应商账户 |
| user_id | BIGINT | NOT NULL | - | 创建人用户ID |
| platform | VARCHAR(50) | NOT NULL | - | 平台标识 |
| model | VARCHAR(100) | NOT NULL | - | 模型标识 |
| total_quota | NUMERIC(20,6) | NOT NULL | - | 套餐总配额 |
| available_quota | NUMERIC(20,6) | NOT NULL | - | 可用配额 |
| sold_quota | NUMERIC(20,6) | - | 0 | 已售配额 |
| reserved_quota | NUMERIC(20,6) | - | 0 | 预留配额 |
| price_per_1m_input | NUMERIC(20,6) | - | - | 每百万输入Token价格 |
| price_per_1m_output | NUMERIC(20,6) | - | - | 每百万输出Token价格 |
| min_purchase | NUMERIC(20,6) | - | - | 最小购买量 |
| start_at | TIMESTAMPTZ | - | - | 生效时间 |
| end_at | TIMESTAMPTZ | - | - | 失效时间 |
| valid_days | INT | - | - | 有效天数 |
| status | VARCHAR(20) | NOT NULL | 'draft' | 状态: draft/active/paused/sold_out/expired |
| max_concurrent | INT | - | 10 | 最大并发数 |
| rate_limit_rpm | INT | - | 60 | 每分钟限流 |
| total_orders | INT | - | 0 | 累计订单数 |
| total_revenue | NUMERIC(20,6) | - | 0 | 累计收入 |
| rating | NUMERIC(3,2) | - | 0 | 平均评分 |
| rating_count | INT | - | 0 | 评分次数 |
| created_at | TIMESTAMPTZ | - | CURRENT_TIMESTAMP | 创建时间 |
| updated_at | TIMESTAMPTZ | - | CURRENT_TIMESTAMP | 更新时间 |
**索引**:
- idx_supply_packages_supply_account_id (supply_account_id)
- idx_supply_packages_user_id (user_id)
- idx_supply_packages_platform_model (platform, model)
- idx_supply_packages_status (status)
---
## 3. supply_orders (供应订单表)
| 字段名 | 类型 | 约束 | 默认值 | 说明 |
|--------|------|------|--------|------|
| id | BIGINT | PRIMARY KEY | 自增 | 订单唯一标识 |
| order_no | VARCHAR(64) | NOT NULL, UNIQUE | - | 订单编号 |
| buyer_user_id | BIGINT | NOT NULL | - | 买家用户ID |
| buyer_team_id | BIGINT | - | - | 买家团队ID |
| supply_account_id | BIGINT | NOT NULL, FK | - | 供应商账户ID |
| supplier_user_id | BIGINT | NOT NULL | - | 供应商用户ID |
| supply_package_id | BIGINT | NOT NULL, FK | - | 套餐ID |
| platform | VARCHAR(50) | NOT NULL | - | 平台标识 |
| model | VARCHAR(100) | NOT NULL | - | 模型标识 |
| quota_amount | NUMERIC(20,6) | NOT NULL | - | 购买配额量 |
| quota_tokens | BIGINT | - | - | 购买Token配额 |
| unit_price | NUMERIC(20,6) | NOT NULL | - | 单价 |
| total_amount | NUMERIC(20,6) | NOT NULL | - | 总金额 |
| platform_fee | NUMERIC(20,6) | NOT NULL | - | 平台手续费 |
| supplier_earnings | NUMERIC(20,6) | NOT NULL | - | 供应商实收 |
| status | VARCHAR(20) | NOT NULL | 'pending' | 状态: pending/paid/using/expired/refunded |
| used_quota | NUMERIC(20,6) | - | 0 | 已使用配额 |
| remaining_quota | NUMERIC(20,6) | - | - | 剩余配额 |
| expired_at | TIMESTAMPTZ | - | - | 过期时间 |
| payment_method | VARCHAR(20) | - | - | 支付方式 |
| paid_at | TIMESTAMPTZ | - | - | 支付时间 |
| payment_transaction_id | VARCHAR(100) | - | - | 支付流水号 |
| created_at | TIMESTAMPTZ | - | CURRENT_TIMESTAMP | 创建时间 |
| updated_at | TIMESTAMPTZ | - | CURRENT_TIMESTAMP | 更新时间 |
**索引**:
- idx_supply_orders_buyer_user_id (buyer_user_id)
- idx_supply_orders_supplier_user_id (supplier_user_id)
- idx_supply_orders_supply_package_id (supply_package_id)
- idx_supply_orders_status (status)
---
## 4. supply_usage_records (使用记录表)
| 字段名 | 类型 | 约束 | 默认值 | 说明 |
|--------|------|------|--------|------|
| id | BIGINT | PRIMARY KEY | 自增 | 记录唯一标识 |
| order_id | BIGINT | NOT NULL, FK | - | 关联订单ID |
| buyer_user_id | BIGINT | NOT NULL | - | 买家用户ID |
| supply_account_id | BIGINT | NOT NULL, FK | - | 供应商账户ID |
| supplier_user_id | BIGINT | NOT NULL | - | 供应商用户ID |
| request_id | VARCHAR(64) | NOT NULL | - | 请求唯一ID |
| upstream_request_id | VARCHAR(128) | - | - | 上游请求ID |
| api_key_id | BIGINT | - | - | API Key ID |
| platform | VARCHAR(50) | NOT NULL | - | 平台标识 |
| model | VARCHAR(100) | NOT NULL | - | 模型标识 |
| endpoint | VARCHAR(100) | NOT NULL | - | API端点 |
| request_tokens | BIGINT | - | - | 请求Token数 |
| response_tokens | BIGINT | - | - | 响应Token数 |
| total_tokens | BIGINT | STORED GENERATED | - | 总Token数 (计算字段) |
| input_cost | NUMERIC(20,6) | - | - | 输入费用 |
| output_cost | NUMERIC(20,6) | - | - | 输出费用 |
| total_cost | NUMERIC(20,6) | NOT NULL | - | 总费用 |
| unit_price | NUMERIC(20,6) | NOT NULL | - | 单价 |
| response_status | INT | - | - | 响应状态码 |
| latency_ms | INT | - | - | 延迟(毫秒) |
| error_message | TEXT | - | - | 错误信息 |
| success | BOOLEAN | - | TRUE | 是否成功 |
| started_at | TIMESTAMPTZ | NOT NULL | - | 开始时间 |
| completed_at | TIMESTAMPTZ | - | - | 完成时间 |
| created_at | TIMESTAMPTZ | - | CURRENT_TIMESTAMP | 创建时间 |
**索引**:
- idx_supply_usage_records_request_id (request_id)
- idx_supply_usage_records_order_id (order_id)
- idx_supply_usage_records_supply_account_id (supply_account_id)
- idx_supply_usage_records_platform_model (platform, model)
- idx_supply_usage_records_started_at (started_at)
---
## 5. supply_earnings (收益表)
| 字段名 | 类型 | 约束 | 默认值 | 说明 |
|--------|------|------|--------|------|
| id | BIGINT | PRIMARY KEY | 自增 | 收益记录ID |
| user_id | BIGINT | NOT NULL | - | 用户ID |
| supply_account_id | BIGINT | FK | - | 供应商账户ID |
| order_id | BIGINT | FK | - | 关联订单ID |
| usage_record_id | BIGINT | FK | - | 使用记录ID |
| earnings_type | VARCHAR(20) | NOT NULL | - | 类型: usage/bonus/refund |
| amount | NUMERIC(20,6) | NOT NULL | - | 收益金额 |
| currency | VARCHAR(10) | - | 'CNY' | 币种 |
| status | VARCHAR(20) | NOT NULL | 'pending' | 状态: pending/available/withdrawn/frozen |
| available_amount | NUMERIC(20,6) | - | 0 | 可用金额 |
| frozen_amount | NUMERIC(20,6) | - | 0 | 冻结金额 |
| withdrawn_amount | NUMERIC(20,6) | - | 0 | 已提现金额 |
| description | TEXT | - | - | 描述 |
| earned_at | TIMESTAMPTZ | - | CURRENT_TIMESTAMP | 收益时间 |
| available_at | TIMESTAMPTZ | - | - | 可用时间 |
| created_at | TIMESTAMPTZ | - | CURRENT_TIMESTAMP | 创建时间 |
| updated_at | TIMESTAMPTZ | - | CURRENT_TIMESTAMP | 更新时间 |
**索引**:
- idx_supply_earnings_user_id (user_id)
- idx_supply_earnings_status (status)
- idx_supply_earnings_earned_at (earned_at)
---
## 6. supply_settlements (结算表)
| 字段名 | 类型 | 约束 | 默认值 | 说明 |
|--------|------|------|--------|------|
| id | BIGINT | PRIMARY KEY | 自增 | 结算单ID |
| settlement_no | VARCHAR(64) | NOT NULL, UNIQUE | - | 结算单号 |
| user_id | BIGINT | NOT NULL | - | 用户ID (供应商) |
| total_amount | NUMERIC(20,6) | NOT NULL | - | 总金额 |
| fee_amount | NUMERIC(20,6) | - | 0 | 手续费 |
| net_amount | NUMERIC(20,6) | NOT NULL | - | 净金额 |
| status | VARCHAR(20) | NOT NULL | 'pending' | 状态: pending/processing/completed/failed |
| payment_method | VARCHAR(20) | - | - | 支付方式 |
| payment_account | VARCHAR(100) | - | - | 支付账户 |
| payment_transaction_id | VARCHAR(100) | - | - | 支付流水号 |
| paid_at | TIMESTAMPTZ | - | - | 支付时间 |
| period_start | DATE | NOT NULL | - | 结算周期开始 |
| period_end | DATE | NOT NULL | - | 结算周期结束 |
| total_orders | INT | - | 0 | 关联订单数 |
| total_usage_records | INT | - | 0 | 关联使用记录数 |
| version | INT | - | 0 | 乐观锁版本号 |
| created_at | TIMESTAMPTZ | - | CURRENT_TIMESTAMP | 创建时间 |
| updated_at | TIMESTAMPTZ | - | CURRENT_TIMESTAMP | 更新时间 |
**索引**:
- idx_supply_settlements_user_id (user_id)
- idx_supply_settlements_status (status)
- idx_supply_settlements_period (period_start, period_end)
---
## 7. supply_idempotency_records (幂等记录表)
| 字段名 | 类型 | 约束 | 默认值 | 说明 |
|--------|------|------|--------|------|
| id | BIGINT | PRIMARY KEY | 自增 | 记录ID |
| tenant_id | BIGINT | NOT NULL | - | 租户ID |
| operator_id | BIGINT | NOT NULL | - | 操作人ID |
| api_path | VARCHAR(200) | NOT NULL | - | API路径 |
| idempotency_key | VARCHAR(128) | NOT NULL | - | 幂等键 |
| request_id | VARCHAR(64) | NOT NULL | - | 请求ID |
| payload_hash | CHAR(64) | NOT NULL | - | 请求体SHA256摘要 |
| response_code | INT | - | - | 响应码 |
| response_body | JSONB | - | - | 响应体 |
| status | VARCHAR(20) | NOT NULL | 'processing' | 状态: processing/succeeded/failed |
| expires_at | TIMESTAMPTZ | NOT NULL | - | 过期时间 (默认24h,提现72h) |
| created_at | TIMESTAMPTZ | - | CURRENT_TIMESTAMP | 创建时间 |
| updated_at | TIMESTAMPTZ | - | CURRENT_TIMESTAMP | 更新时间 |
**索引**:
- UNIQUE (tenant_id, operator_id, api_path, idempotency_key)
- idx_idempotency_tenant_operator_path_key (tenant_id, operator_id, api_path, idempotency_key) WHERE expires_at > CURRENT_TIMESTAMP
- idx_idempotency_request_id (request_id)
- idx_idempotency_expires_at (expires_at) WHERE status != 'processing'
- idx_idempotency_status_expires (status, expires_at)
---
## 8. 枚举类型定义
### account_type (账户类型)
| 值 | 说明 |
|----|------|
| api_key | API Key认证 |
| oauth | OAuth认证 |
### account_status (账户状态)
| 值 | 说明 |
|----|------|
| pending | 待验证 |
| active | 激活 |
| suspended | 暂停 |
| disabled | 禁用 |
### risk_level (风险等级)
| 值 | 说明 |
|----|------|
| low | 低风险 |
| normal | 正常 |
| high | 高风险 |
### package_status (套餐状态)
| 值 | 说明 |
|----|------|
| draft | 草稿 |
| active | 生效中 |
| paused | 已暂停 |
| sold_out | 售罄 |
| expired | 已过期 |
### order_status (订单状态)
| 值 | 说明 |
|----|------|
| pending | 待支付 |
| paid | 已支付 |
| using | 使用中 |
| expired | 已过期 |
| refunded | 已退款 |
### settlement_status (结算状态)
| 值 | 说明 |
|----|------|
| pending | 待处理 |
| processing | 处理中 |
| completed | 已完成 |
| failed | 失败 |
### earnings_type (收益类型)
| 值 | 说明 |
|----|------|
| usage | 使用收益 |
| bonus | 奖励 |
| refund | 退款 |
### earnings_status (收益状态)
| 值 | 说明 |
|----|------|
| pending | 待确认 |
| available | 可提现 |
| withdrawn | 已提现 |
| frozen | 冻结中 |
---
## 9. 数据类型说明
| 类型 | 说明 |
|------|------|
| BIGINT | 64位整数主键和外部键使用 |
| VARCHAR(n) | 变长字符串最大n字符 |
| TEXT | 无长度限制的文本 |
| NUMERIC(p,s) | 精确数值p为总位数s为小数位 |
| BOOLEAN | 布尔值 |
| TIMESTAMPTZ | 带时区的时间戳 |
| DATE | 日期 |
| JSONB | JSON二进制格式支持索引 |
| CHAR(n) | 定长字符串 |
---
## 10. 字段命名规范
| 前缀/后缀 | 说明 | 示例 |
|-----------|------|------|
| _id | ID字段 | user_id, order_id |
| _at | 时间字段 | created_at, paid_at |
| _amount | 金额字段 | total_amount, net_amount |
| is_ | 布尔字段 | is_verified, is_frozen |
| total_ | 累计字段 | total_requests, total_cost |
| available_ | 可用字段 | available_quota, available_amount |
| encrypted_ | 加密字段 | encrypted_credentials |
---
> **维护记录**:
> - v1.0 (2026-04-07): 初始版本基于supply_schema_v1.sql

View File

@@ -0,0 +1,315 @@
# 数据库索引维护策略 v1.0
> **文档版本**: v1.0
> **创建日期**: 2026-04-07
> **问题**: P1-009 高频写入表的索引维护策略未定义
---
## 1. 概述
本文档定义高频写入表的索引维护策略,包括 `REINDEX``VACUUM` 自动化方案,确保数据库性能稳定。
### 1.1 高频写入表清单
| 表名 | 写入频率 | 日均增量 | 备注 |
|------|----------|----------|------|
| supply_usage_records | 极高 | ~1000万条 | 核心业务表 |
| supply_idempotency_records | 高 | ~100万条 | 幂等检查 |
| audit_events | 高 | ~500万条 | 审计日志 |
| billing_ledger_entries | 中 | ~10万条 | 账务明细 |
---
## 2. VACUUM 维护策略
### 2.1 自动 VACUUM 配置
PostgreSQL 默认启用 autovacuum但需要针对高频表进行调优
```sql
-- supply_usage_records 表配置
ALTER TABLE supply_usage_records SET (
autovacuum_vacuum_threshold = 50,
autovacuum_analyze_threshold = 50,
autovacuum_vacuum_scale_factor = 0.01,
autovacuum_analyze_scale_factor = 0.01,
autovacuum_vacuum_cost_delay = 2,
autovacuum_vacuum_cost_limit = 200
);
-- supply_idempotency_records 表配置
ALTER TABLE supply_idempotency_records SET (
autovacuum_vacuum_threshold = 100,
autovacuum_analyze_threshold = 100,
autovacuum_vacuum_scale_factor = 0.05,
autovacuum_analyze_scale_factor = 0.02
);
```
### 2.2 VACUUM 策略矩阵
| 表名 | autovacuum_enabled | vacuum_threshold | vacuum_scale_factor | 分析频率 |
|------|-------------------|------------------|---------------------|----------|
| supply_usage_records | true | 50 | 0.01 (1%) | 每1%变化 |
| supply_idempotency_records | true | 100 | 0.05 (5%) | 每2%变化 |
| supply_orders | true | 500 | 0.05 (5%) | 每周 |
| supply_packages | true | 1000 | 0.1 (10%) | 每月 |
### 2.3 手动 VACUUM 计划
**日常维护** (低峰期 02:00-04:00):
```bash
# vacuum analyze 高频表
vacuumdb -h localhost -U postgres -d supply_db \
--table 'supply_usage_records' \
--analyze \
--verbose
# 批量 vacuum 多个表
vacuumdb -h localhost -U postgres -d supply_db \
--all \
--analyze \
--verbose
```
**周维护** (周日 03:00-05:00):
```bash
# 全面 vacuum + analyze
vacuumdb -h localhost -U postgres -d supply_db \
--all \
--analyze \
--full \
--verbose
```
---
## 3. REINDEX 维护策略
### 3.1 REINDEX 触发条件
| 触发条件 | 说明 | 影响 |
|----------|------|------|
| 索引膨胀率 > 20% | B-tree 索引膨胀 | 性能下降 |
| 大量删除后 | DELETE > 30% 总行数 | 索引包含大量空页 |
| 长时间运行后 | 运行 > 30天 | 索引统计信息陈旧 |
| 硬件故障后 | 系统重启 | 确保索引一致性 |
### 3.2 索引膨胀检测
```sql
-- 检测索引膨胀率
SELECT
schemaname,
tablename,
indexname,
pg_size_pretty(pg_relation_size(indexrelid)) AS index_size,
idx_scan,
idx_tup_read,
idx_tup_fetch,
ROUND(
(pg_relation_size(indexrelid)::numeric /
pg_relation_size(indrelid) * 100),
2
) AS index_ratio
FROM
pg_stat_user_indexes
WHERE
pg_relation_size(indexrelid) > 1024 * 1024 -- > 1MB
ORDER BY
pg_relation_size(indexrelid) DESC;
```
### 3.3 REINDEX 执行计划
**月维护** (每月第一个周日 04:00-06:00):
```bash
# 重建单个膨胀索引
reindexdb -h localhost -U postgres -d supply_db \
--index 'idx_supply_usage_records_request_id' \
--verbose
# 重建表的所有索引
reindexdb -h localhost -U postgres -d supply_db \
--table 'supply_usage_records' \
--verbose
# 全库索引重建 (慎用,会锁表)
reindexdb -h localhost -U postgres -d supply_db \
--all \
--verbose
```
### 3.4 联机型 REINDEX 方案
对于不可停机的关键表,使用 `REINDEX CONCURRENTLY`:
```bash
# 联机重建索引 (不锁表)
reindexdb -h localhost -U postgres -d supply_db \
--index 'idx_supply_usage_records_request_id' \
--concurrently \
--verbose
```
---
## 4. 自动化脚本
### 4.1 每日维护脚本 (daily_vacuum.sh)
```bash
#!/bin/bash
# daily_vacuum.sh - 每日索引维护
# 执行时间: 每日 02:00
set -e
DB_HOST="localhost"
DB_PORT="5432"
DB_NAME="supply_db"
DB_USER="postgres"
LOG_FILE="/var/log/postgresql/daily_vacuum_$(date +%Y%m%d).log"
echo "=== 开始每日 VACUUM 维护: $(date) ===" | tee -a "$LOG_FILE"
# 高频表优先 vacuum
TABLES=(
"supply_usage_records"
"supply_idempotency_records"
"supply_orders"
"supply_earnings"
)
for TABLE in "${TABLES[@]}"; do
echo "VACUUM $TABLE ..." | tee -a "$LOG_FILE"
vacuumdb -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" \
--table "$TABLE" \
--analyze \
--verbose 2>&1 | tee -a "$LOG_FILE"
done
echo "=== VACUUM 维护完成: $(date) ===" | tee -a "$LOG_FILE"
```
### 4.2 每周维护脚本 (weekly_reindex.sh)
```bash
#!/bin/bash
# weekly_reindex.sh - 每周 REINDEX 维护
# 执行时间: 每周日 03:00
set -e
DB_HOST="localhost"
DB_PORT="5432"
DB_NAME="supply_db"
DB_USER="postgres"
LOG_FILE="/var/log/postgresql/weekly_reindex_$(date +%Y%m%d).log"
echo "=== 开始每周 REINDEX 维护: $(date) ===" | tee -a "$LOG_FILE"
# 检查并重建膨胀索引
膨胀索引=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c "
SELECT indexname FROM pg_stat_user_indexes
WHERE pg_relation_size(indexrelid) > 10 * 1024 * 1024
AND idx_scan = 0
AND schemaname = 'public';
")
for INDEX in $膨胀索引; do
echo "REINDEX INDEX $INDEX ..." | tee -a "$LOG_FILE"
reindexdb -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" \
--index "$INDEX" \
--concurrently \
--verbose 2>&1 | tee -a "$LOG_FILE"
done
echo "=== REINDEX 维护完成: $(date) ===" | tee -a "$LOG_FILE"
```
### 4.3 Cron 任务配置
```bash
# /etc/cron.d/postgresql_maintenance
# 每日凌晨2点执行 vacuum
0 2 * * * postgres /home/postgres/scripts/daily_vacuum.sh
# 每周日凌晨3点执行 reindex
0 3 * * 0 postgres /home/postgres/scripts/weekly_reindex.sh
```
---
## 5. 监控指标
### 5.1 关键监控指标
| 指标 | 告警阈值 | 说明 |
|------|----------|------|
| index膨胀率 | > 20% | 触发 REINDEX |
| dead_tuples | > 10000 | 触发 VACUUM |
| last_autovacuum | > 24h | 可能 autovacuum 异常 |
| idx_scan | = 0 | 索引未使用,考虑删除 |
### 5.2 监控查询
```sql
-- 检测需要维护的表
SELECT
schemaname,
relname AS table_name,
n_dead_tup,
n_live_tup,
last_autovacuum,
last_autoanalyze
FROM
pg_stat_user_tables
WHERE
n_dead_tup > 1000
ORDER BY
n_dead_tup DESC;
-- 检测未使用的索引
SELECT
schemaname,
tablename,
indexname,
idx_scan
FROM
pg_stat_user_indexes
WHERE
idx_scan = 0
AND NOT indexname LIKE '%_pkey'
ORDER BY
pg_relation_size(indexrelid) DESC;
```
---
## 6. 最佳实践
1. **避免在高峰期维护**: 维护操作安排在低峰期 (02:00-06:00)
2. **优先自动 vacuum**: 配置合理的 autovacuum 参数,减少手动干预
3. **监控索引膨胀**: 定期检测膨胀率,及时重建
4. **使用 CONCURRENTLY**: 关键表使用 `REINDEX CONCURRENTLY` 避免锁表
5. **保留维护日志**: 记录每次维护执行情况,便于分析问题
---
## 7. 恢复时间预估
| 操作 | 表大小 | 预计耗时 | 锁类型 |
|------|--------|----------|--------|
| VACUUM ANALYZE | 10GB | 5-10min | 轻量锁 |
| REINDEX | 1GB | 1-2min | 表锁* |
| REINDEX CONCURRENTLY | 1GB | 3-5min | 无锁 |
| VACUUM FULL | 10GB | 15-30min | 表锁 |
*使用 `REINDEX CONCURRENTLY` 可避免锁表
---
> **维护记录**:
> - v1.0 (2026-04-07): 初始版本