Files
llm-intelligence/db/migrations/002_sprint1_complete_schema.sql
phamnazage-jpg 77e6610fd2
Some checks failed
CI / test (push) Has been cancelled
chore: prepare repository for publishing
2026-05-13 14:42:45 +08:00

363 lines
16 KiB
PL/PgSQL
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
-- Sprint 1: 数据层补全
-- 将数据库从 3 张表升级到 8 张完整表 + audit_log
-- 日期: 2026-05-10
-- 负责人: 宰相
-- ============================================================
-- 一、新表创建
-- ============================================================
-- 1.1 model_provider: 模型厂商表
CREATE TABLE IF NOT EXISTS model_provider (
id BIGSERIAL PRIMARY KEY,
name TEXT NOT NULL UNIQUE, -- "OpenAI", "百度", "DeepSeek"
name_cn TEXT, -- 中文名
country TEXT NOT NULL DEFAULT 'unknown', -- "US" / "CN" / "EU"
website TEXT,
founded_year INTEGER,
description TEXT,
logo_url TEXT,
status TEXT NOT NULL DEFAULT 'active', -- active / deprecated
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
created_by TEXT DEFAULT 'system',
updated_by TEXT DEFAULT 'system',
deleted_at TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_provider_country ON model_provider(country);
CREATE INDEX IF NOT EXISTS idx_provider_status ON model_provider(status);
CREATE INDEX IF NOT EXISTS idx_provider_deleted ON model_provider(deleted_at) WHERE deleted_at IS NOT NULL;
COMMENT ON TABLE model_provider IS '模型厂商/开发商信息';
-- 1.2 operator: 运营商/云平台表
CREATE TABLE IF NOT EXISTS operator (
id BIGSERIAL PRIMARY KEY,
name TEXT NOT NULL UNIQUE, -- "阿里云", "AWS", "OpenRouter"
name_cn TEXT,
country TEXT NOT NULL DEFAULT 'unknown',
website TEXT,
description TEXT,
status TEXT NOT NULL DEFAULT 'active',
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
created_by TEXT DEFAULT 'system',
updated_by TEXT DEFAULT 'system',
deleted_at TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_operator_country ON operator(country);
CREATE INDEX IF NOT EXISTS idx_operator_status ON operator(status);
COMMENT ON TABLE operator IS '模型运营平台/云服务商';
-- 1.3 region_pricing: 区域定价表(替代 model_prices
CREATE TABLE IF NOT EXISTS region_pricing (
id BIGSERIAL PRIMARY KEY,
model_id BIGINT NOT NULL REFERENCES models(id) ON DELETE CASCADE,
operator_id BIGINT REFERENCES operator(id) ON DELETE SET NULL,
region TEXT NOT NULL DEFAULT 'global', -- global / cn / us / eu
currency TEXT NOT NULL DEFAULT 'USD',
input_price_per_mtok REAL NOT NULL DEFAULT 0,
output_price_per_mtok REAL NOT NULL DEFAULT 0,
request_price REAL, -- 按请求计费
effective_date DATE NOT NULL DEFAULT CURRENT_DATE,
is_free BOOLEAN NOT NULL DEFAULT FALSE,
source_url TEXT,
notes TEXT,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
created_by TEXT DEFAULT 'system',
updated_by TEXT DEFAULT 'system',
UNIQUE(model_id, operator_id, region, currency, effective_date),
-- CHECK 约束
CONSTRAINT chk_price_non_negative CHECK (input_price_per_mtok >= 0 AND output_price_per_mtok >= 0),
CONSTRAINT chk_currency_valid CHECK (currency IN ('CNY', 'USD', 'EUR'))
);
CREATE INDEX IF NOT EXISTS idx_region_pricing_model_id ON region_pricing(model_id);
CREATE INDEX IF NOT EXISTS idx_region_pricing_region ON region_pricing(region);
CREATE INDEX IF NOT EXISTS idx_region_pricing_currency ON region_pricing(currency);
CREATE INDEX IF NOT EXISTS idx_region_pricing_is_free ON region_pricing(is_free);
CREATE INDEX IF NOT EXISTS idx_region_pricing_effective ON region_pricing(effective_date);
COMMENT ON TABLE region_pricing IS '模型区域定价信息含CNY/USD/EUR';
-- 1.4 pricing_history: 价格变动历史
CREATE TABLE IF NOT EXISTS pricing_history (
id BIGSERIAL PRIMARY KEY,
model_id BIGINT NOT NULL REFERENCES models(id) ON DELETE CASCADE,
region TEXT NOT NULL DEFAULT 'global',
currency TEXT NOT NULL DEFAULT 'USD',
old_input_price REAL,
new_input_price REAL,
old_output_price REAL,
new_output_price REAL,
change_percent REAL, -- 变动百分比
changed_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
reason TEXT, -- 变动原因(如官方公告)
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_pricing_history_model ON pricing_history(model_id);
CREATE INDEX IF NOT EXISTS idx_pricing_history_changed ON pricing_history(changed_at);
COMMENT ON TABLE pricing_history IS '模型价格变动历史追踪';
-- 1.5 free_tier: 免费政策库
CREATE TABLE IF NOT EXISTS free_tier (
id BIGSERIAL PRIMARY KEY,
model_id BIGINT NOT NULL REFERENCES models(id) ON DELETE CASCADE,
operator_id BIGINT REFERENCES operator(id) ON DELETE SET NULL,
free_type TEXT NOT NULL DEFAULT 'limited', -- unlimited / limited / trial
max_requests INTEGER, -- 免费请求次数上限
max_tokens BIGINT, -- 免费Token上限
expiration_date DATE, -- 免费政策到期日
description TEXT,
source_url TEXT,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
UNIQUE(model_id, operator_id)
);
CREATE INDEX IF NOT EXISTS idx_free_tier_model ON free_tier(model_id);
CREATE INDEX IF NOT EXISTS idx_free_tier_type ON free_tier(free_type);
COMMENT ON TABLE free_tier IS '模型免费政策/额度信息';
-- 1.6 daily_report: 日报存储(替代 report_runs
CREATE TABLE IF NOT EXISTS daily_report (
id BIGSERIAL PRIMARY KEY,
report_date DATE NOT NULL UNIQUE,
status TEXT NOT NULL DEFAULT 'pending', -- pending / generated / failed
model_count INTEGER, -- 当日模型总数
new_models INTEGER DEFAULT 0, -- 新上线模型数
price_changes INTEGER DEFAULT 0, -- 价格变动数
free_models INTEGER DEFAULT 0, -- 免费模型数
summary_md TEXT, -- Markdown摘要
output_path TEXT,
error_message TEXT,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_daily_report_date ON daily_report(report_date);
CREATE INDEX IF NOT EXISTS idx_daily_report_status ON daily_report(status);
COMMENT ON TABLE daily_report IS '每日自动报告运行记录';
-- 1.7 user_subscription: 用户订阅Phase 2 基础)
CREATE TABLE IF NOT EXISTS user_subscription (
id BIGSERIAL PRIMARY KEY,
user_id TEXT NOT NULL,
subscribe_type TEXT NOT NULL DEFAULT 'daily', -- daily / price_alert / new_model
model_id BIGINT REFERENCES models(id) ON DELETE SET NULL,
provider_id BIGINT REFERENCES model_provider(id) ON DELETE SET NULL,
threshold REAL, -- 价格变动阈值(%)
is_active BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
UNIQUE(user_id, subscribe_type, model_id, provider_id)
);
CREATE INDEX IF NOT EXISTS idx_subscription_user ON user_subscription(user_id);
CREATE INDEX IF NOT EXISTS idx_subscription_active ON user_subscription(is_active);
COMMENT ON TABLE user_subscription IS '用户订阅配置Phase 2';
-- 1.8 audit_log: 审计日志【新增】
CREATE TABLE IF NOT EXISTS audit_log (
id BIGSERIAL PRIMARY KEY,
table_name TEXT NOT NULL,
record_id BIGINT NOT NULL,
field_name TEXT,
old_value TEXT,
new_value TEXT,
operation TEXT NOT NULL, -- INSERT / UPDATE / DELETE
operator TEXT DEFAULT 'system', -- 操作人/采集器标识
batch_id TEXT, -- 采集批次号
source_url TEXT,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_audit_table ON audit_log(table_name);
CREATE INDEX IF NOT EXISTS idx_audit_record ON audit_log(record_id);
CREATE INDEX IF NOT EXISTS idx_audit_batch ON audit_log(batch_id);
CREATE INDEX IF NOT EXISTS idx_audit_created ON audit_log(created_at);
COMMENT ON TABLE audit_log IS '数据变更审计日志(血缘追踪)';
-- ============================================================
-- 二、现有表字段扩充
-- ============================================================
-- 2.1 修改 models 表,添加 TECHNICAL_DESIGN 中定义的字段
DO $$
BEGIN
-- 添加 provider_id 外键字段
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='models' AND column_name='provider_id') THEN
ALTER TABLE models ADD COLUMN provider_id BIGINT REFERENCES model_provider(id) ON DELETE SET NULL;
END IF;
-- 添加 version 字段
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='models' AND column_name='version') THEN
ALTER TABLE models ADD COLUMN version TEXT;
END IF;
-- 添加 modality 字段
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='models' AND column_name='modality') THEN
ALTER TABLE models ADD COLUMN modality TEXT NOT NULL DEFAULT 'text';
END IF;
-- 添加 release_date 字段
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='models' AND column_name='release_date') THEN
ALTER TABLE models ADD COLUMN release_date DATE;
END IF;
-- 添加 elo_score 字段
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='models' AND column_name='elo_score') THEN
ALTER TABLE models ADD COLUMN elo_score REAL;
END IF;
-- 添加 benchmark_scores 字段
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='models' AND column_name='benchmark_scores') THEN
ALTER TABLE models ADD COLUMN benchmark_scores JSONB;
END IF;
-- 添加 data_confidence 字段
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='models' AND column_name='data_confidence') THEN
ALTER TABLE models ADD COLUMN data_confidence TEXT DEFAULT 'official';
END IF;
-- 添加 retrieved_at 字段
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='models' AND column_name='retrieved_at') THEN
ALTER TABLE models ADD COLUMN retrieved_at TIMESTAMP;
END IF;
-- 添加 batch_id 字段(血缘追踪)
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='models' AND column_name='batch_id') THEN
ALTER TABLE models ADD COLUMN batch_id TEXT;
END IF;
-- 添加 collector_version 字段
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='models' AND column_name='collector_version') THEN
ALTER TABLE models ADD COLUMN collector_version TEXT DEFAULT 'v1.0';
END IF;
-- 添加 source_url 字段
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='models' AND column_name='source_url') THEN
ALTER TABLE models ADD COLUMN source_url TEXT;
END IF;
-- 添加审计字段
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='models' AND column_name='created_by') THEN
ALTER TABLE models ADD COLUMN created_by TEXT DEFAULT 'system';
END IF;
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='models' AND column_name='updated_by') THEN
ALTER TABLE models ADD COLUMN updated_by TEXT DEFAULT 'system';
END IF;
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='models' AND column_name='deleted_at') THEN
ALTER TABLE models ADD COLUMN deleted_at TIMESTAMP;
END IF;
END $$;
-- 2.2 为 models 表添加 CHECK 约束
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname='chk_models_context_length') THEN
ALTER TABLE models ADD CONSTRAINT chk_models_context_length CHECK (context_length IS NULL OR context_length <= 10000000);
END IF;
IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname='chk_models_modality') THEN
ALTER TABLE models ADD CONSTRAINT chk_models_modality CHECK (modality IN ('text', 'vision', 'audio', 'video', 'code', 'multimodal'));
END IF;
IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname='chk_models_data_confidence') THEN
ALTER TABLE models ADD CONSTRAINT chk_models_data_confidence CHECK (data_confidence IN ('official', 'inferred', 'unverified', 'stale'));
END IF;
END $$;
-- 2.3 创建 models 表的新索引
CREATE INDEX IF NOT EXISTS idx_models_provider_id ON models(provider_id);
CREATE INDEX IF NOT EXISTS idx_models_modality ON models(modality);
CREATE INDEX IF NOT EXISTS idx_models_data_confidence ON models(data_confidence);
CREATE INDEX IF NOT EXISTS idx_models_retrieved_at ON models(retrieved_at);
CREATE INDEX IF NOT EXISTS idx_models_batch_id ON models(batch_id);
-- ============================================================
-- 三、审计触发器(自动更新 updated_at
-- ============================================================
-- 3.1 创建触发器函数
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ language 'plpgsql';
-- 3.2 为所有业务表创建触发器
DO $$
DECLARE
tbl TEXT;
tables TEXT[] := ARRAY['models', 'model_provider', 'operator', 'region_pricing',
'pricing_history', 'free_tier', 'daily_report',
'user_subscription'];
BEGIN
FOREACH tbl IN ARRAY tables
LOOP
IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = tbl || '_updated_at') THEN
EXECUTE format('CREATE TRIGGER %I_updated_at BEFORE UPDATE ON %I FOR EACH ROW EXECUTE FUNCTION update_updated_at_column()', tbl, tbl);
END IF;
END LOOP;
END $$;
-- ============================================================
-- 四、数据迁移model_prices → region_pricing
-- ============================================================
-- 4.1 先创建默认 operatorOpenRouter
INSERT INTO operator (name, name_cn, country, description)
VALUES ('OpenRouter', 'OpenRouter', 'US', 'OpenRouter API聚合平台')
ON CONFLICT (name) DO NOTHING;
-- 4.2 迁移 model_prices 数据到 region_pricing
INSERT INTO region_pricing (
model_id, operator_id, region, currency,
input_price_per_mtok, output_price_per_mtok,
is_free, effective_date, source_url, created_at
)
SELECT
mp.model_id,
(SELECT id FROM operator WHERE name = 'OpenRouter' LIMIT 1),
'global',
mp.currency,
COALESCE(mp.input_price_per_mtok, 0),
COALESCE(mp.output_price_per_mtok, 0),
COALESCE(m.is_free, FALSE),
COALESCE(mp.effective_date, CURRENT_DATE),
mp.source_url,
mp.created_at
FROM model_prices mp
JOIN models m ON mp.model_id = m.id
ON CONFLICT (model_id, operator_id, region, currency, effective_date) DO NOTHING;
-- ============================================================
-- 五、迁移 report_runs → daily_report
-- ============================================================
INSERT INTO daily_report (report_date, status, output_path, error_message, created_at)
SELECT report_date, status, output_path, error_message, created_at
FROM report_runs
ON CONFLICT (report_date) DO NOTHING;
-- ============================================================
-- 六、完成标记
-- ============================================================
SELECT 'Sprint 1 Schema Migration Complete' AS status;