chore: initial import

This commit is contained in:
phamnazage-jpg
2026-05-12 17:47:32 +08:00
commit fc54ba84b2
104 changed files with 11575 additions and 0 deletions

311
scripts/aiops-single-node.sh Executable file
View File

@@ -0,0 +1,311 @@
#!/usr/bin/env bash
set -Eeuo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
RUNTIME_DIR="$ROOT_DIR/.runtime"
BACKUP_DIR="$ROOT_DIR/backups"
COMPOSE_FILE="$ROOT_DIR/docker-compose.single.yml"
ENV_FILE="$RUNTIME_DIR/single-node.env"
CONFIG_FILE="$RUNTIME_DIR/config.single.yaml"
BINARY_FILE="$RUNTIME_DIR/ai-ops"
PROJECT_NAME="${AI_OPS_PROJECT:-ai-ops-single}"
APP_PORT="${AI_OPS_APP_PORT:-18080}"
DB_PORT="${AI_OPS_DB_PORT:-15432}"
REDIS_PORT="${AI_OPS_REDIS_PORT:-16379}"
DB_USER="${AI_OPS_DB_USER:-aiops}"
DB_NAME="${AI_OPS_DB_NAME:-ai_ops}"
DB_PASSWORD="${AI_OPS_DB_PASSWORD:-aiops123}"
log() { printf '[ai-ops] %s\n' "$*"; }
fail() { printf '[ai-ops][ERROR] %s\n' "$*" >&2; exit 1; }
need_cmd() { command -v "$1" >/dev/null 2>&1 || fail "missing command: $1"; }
engine() {
if command -v docker >/dev/null 2>&1 && docker info >/dev/null 2>&1; then
echo docker
elif command -v podman >/dev/null 2>&1; then
echo podman
else
fail "docker or podman is required"
fi
}
compose_cmd() {
local eng="$1"
if [[ "$eng" == docker ]]; then
if docker compose version >/dev/null 2>&1; then
echo "docker compose"
elif command -v docker-compose >/dev/null 2>&1; then
echo "docker-compose"
else
fail "docker compose plugin or docker-compose is required"
fi
else
if command -v podman-compose >/dev/null 2>&1; then
echo "podman-compose"
else
fail "podman-compose is required for podman mode"
fi
fi
}
rand_hex() {
if command -v openssl >/dev/null 2>&1; then
openssl rand -hex "$1"
else
head -c "$1" /dev/urandom | od -An -tx1 | tr -d ' \n'
fi
}
load_env() {
local keys=(AI_OPS_PROJECT AI_OPS_APP_PORT AI_OPS_DB_PORT AI_OPS_REDIS_PORT AI_OPS_DB_USER AI_OPS_DB_PASSWORD AI_OPS_DB_NAME AI_OPS_JWT_SECRET AI_OPS_METRICS_AUTH AI_OPS_POSTGRES_IMAGE AI_OPS_REDIS_IMAGE AI_OPS_RUNTIME_IMAGE)
local saved_key saved_val
declare -A saved=()
for saved_key in "${keys[@]}"; do
saved_val="${!saved_key-}"
if [[ -n "$saved_val" ]]; then
saved["$saved_key"]="$saved_val"
fi
done
if [[ -f "$ENV_FILE" ]]; then
set -a
# shellcheck disable=SC1090
source "$ENV_FILE"
set +a
fi
for saved_key in "${!saved[@]}"; do
export "$saved_key=${saved[$saved_key]}"
done
PROJECT_NAME="${AI_OPS_PROJECT:-$PROJECT_NAME}"
APP_PORT="${AI_OPS_APP_PORT:-$APP_PORT}"
DB_PORT="${AI_OPS_DB_PORT:-$DB_PORT}"
REDIS_PORT="${AI_OPS_REDIS_PORT:-$REDIS_PORT}"
DB_USER="${AI_OPS_DB_USER:-$DB_USER}"
DB_NAME="${AI_OPS_DB_NAME:-$DB_NAME}"
DB_PASSWORD="${AI_OPS_DB_PASSWORD:-$DB_PASSWORD}"
}
write_env_if_missing() {
mkdir -p "$RUNTIME_DIR" "$BACKUP_DIR"
if [[ ! -f "$ENV_FILE" ]]; then
umask 077
cat >"$ENV_FILE" <<EOF_ENV
AI_OPS_PROJECT=$PROJECT_NAME
AI_OPS_APP_PORT=$APP_PORT
AI_OPS_DB_PORT=$DB_PORT
AI_OPS_REDIS_PORT=$REDIS_PORT
AI_OPS_BIND_ADDR=${AI_OPS_BIND_ADDR:-127.0.0.1}
AI_OPS_DB_USER=$DB_USER
AI_OPS_DB_PASSWORD=$DB_PASSWORD
AI_OPS_DB_NAME=$DB_NAME
AI_OPS_JWT_SECRET=$(rand_hex 32)
AI_OPS_METRICS_AUTH=$(rand_hex 24)
AI_OPS_POSTGRES_IMAGE=docker.io/library/postgres:16-alpine
AI_OPS_REDIS_IMAGE=docker.io/library/redis:8-alpine
AI_OPS_RUNTIME_IMAGE=docker.io/library/alpine:3.19
EOF_ENV
log "created $ENV_FILE with generated secrets"
fi
}
write_config() {
load_env
: "${AI_OPS_JWT_SECRET:?missing AI_OPS_JWT_SECRET}"
: "${AI_OPS_METRICS_AUTH:?missing AI_OPS_METRICS_AUTH}"
cat >"$CONFIG_FILE" <<EOF_CFG
server:
port: 8080
mode: production
jwt_secret: "${AI_OPS_JWT_SECRET}"
metrics_auth: "${AI_OPS_METRICS_AUTH}"
database:
host: postgres
port: 5432
user: "${AI_OPS_DB_USER:-aiops}"
password: "${AI_OPS_DB_PASSWORD:-aiops123}"
dbname: "${AI_OPS_DB_NAME:-ai_ops}"
sslmode: disable
pool_size: 10
redis:
host: redis
port: 6379
password: ""
db: 0
metrics:
prometheus_url: "http://localhost:9090"
retention_days: 7
EOF_CFG
}
build_binary() {
need_cmd go
mkdir -p "$RUNTIME_DIR"
log "building static binary"
(cd "$ROOT_DIR" && CGO_ENABLED=0 go build -buildvcs=false -o "$BINARY_FILE" ./cmd/ai-ops)
}
compose() {
local eng cmd
eng="$(engine)"
cmd="$(compose_cmd "$eng")"
load_env
export AI_OPS_PROJECT PROJECT_NAME AI_OPS_APP_PORT AI_OPS_DB_PORT AI_OPS_REDIS_PORT AI_OPS_DB_USER AI_OPS_DB_PASSWORD AI_OPS_DB_NAME AI_OPS_JWT_SECRET AI_OPS_METRICS_AUTH AI_OPS_POSTGRES_IMAGE AI_OPS_REDIS_IMAGE AI_OPS_RUNTIME_IMAGE
if [[ "$cmd" == "docker compose" ]]; then
(cd "$ROOT_DIR" && docker compose -p "$PROJECT_NAME" -f "$COMPOSE_FILE" "$@")
elif [[ "$cmd" == "docker-compose" ]]; then
(cd "$ROOT_DIR" && docker-compose -p "$PROJECT_NAME" -f "$COMPOSE_FILE" "$@")
else
(cd "$ROOT_DIR" && podman-compose -f "$COMPOSE_FILE" "$@")
fi
}
container_exec() {
local eng
eng="$(engine)"
"$eng" exec "$@"
}
curl_json() {
local path="$1"
curl -fsS --max-time 5 "http://127.0.0.1:${APP_PORT}${path}"
}
wait_ready() {
local i
for i in $(seq 1 60); do
if curl_json /actuator/health/ready >/dev/null 2>&1; then
log "ready: http://127.0.0.1:${APP_PORT}"
return 0
fi
sleep 1
done
compose logs --tail=120 ai-ops || true
fail "service did not become ready"
}
cmd_init() {
write_env_if_missing
write_config
log "runtime initialized under $RUNTIME_DIR"
}
cmd_start() {
cmd_init
build_binary
compose up -d
wait_ready
cmd_smoke
}
cmd_stop() { compose down; }
cmd_restart() { compose restart ai-ops; wait_ready; }
cmd_status() { compose ps; curl_json /actuator/health/ready || true; }
cmd_logs() { compose logs --tail="${TAIL:-200}" "${SERVICE:-ai-ops}"; }
cmd_smoke() {
load_env
log "health"
curl_json /health >/dev/null
curl_json /actuator/health/ready >/dev/null
log "login"
local token
token="$(curl -fsS --max-time 5 -X POST "http://127.0.0.1:${APP_PORT}/api/v1/ai-ops/login" -H 'Content-Type: application/json' -d '{"username":"admin","password":"admin"}' | python3 -c 'import sys,json; d=json.load(sys.stdin); print((d.get("data") or d).get("token", ""))')"
[[ -n "$token" ]] || fail "login did not return token"
log "authenticated APIs"
curl -fsS --max-time 5 -H "Authorization: Bearer $token" "http://127.0.0.1:${APP_PORT}/api/v1/ai-ops/alerts?page=1&page_size=5" >/dev/null
curl -fsS --max-time 5 -H "Authorization: Bearer $token" "http://127.0.0.1:${APP_PORT}/api/v1/ai-ops/rules" >/dev/null
curl -fsS --max-time 5 -H "Authorization: Bearer $token" "http://127.0.0.1:${APP_PORT}/api/v1/ai-ops/channels" >/dev/null
curl -fsS --max-time 5 "http://127.0.0.1:${APP_PORT}/ops/dashboard" >/dev/null
curl -fsS --max-time 5 "http://127.0.0.1:${APP_PORT}/openapi.json" >/dev/null
log "SMOKE_OK"
}
cmd_backup() {
load_env
mkdir -p "$BACKUP_DIR"
local ts out
ts="$(date +%Y%m%d-%H%M%S)"
out="$BACKUP_DIR/ai_ops_${ts}.sql.gz"
log "creating database backup: $out"
container_exec "${PROJECT_NAME}-postgres" pg_dump -U "${AI_OPS_DB_USER:-aiops}" "${AI_OPS_DB_NAME:-ai_ops}" | gzip >"$out"
test -s "$out" || fail "empty backup: $out"
log "BACKUP_OK $out"
}
cmd_restore() {
local file="${1:-}"
[[ -n "$file" && -f "$file" ]] || fail "usage: $0 restore backups/file.sql.gz"
load_env
log "restoring from $file"
compose stop ai-ops
container_exec "${PROJECT_NAME}-postgres" psql -v ON_ERROR_STOP=1 -U "${AI_OPS_DB_USER:-aiops}" "${AI_OPS_DB_NAME:-ai_ops}" -c 'DROP SCHEMA public CASCADE; CREATE SCHEMA public;'
zcat "$file" | container_exec -i "${PROJECT_NAME}-postgres" psql -v ON_ERROR_STOP=1 -U "${AI_OPS_DB_USER:-aiops}" "${AI_OPS_DB_NAME:-ai_ops}"
compose start ai-ops
wait_ready
cmd_smoke
log "RESTORE_OK"
}
cmd_recover() {
log "recovering single-node stack"
compose up -d postgres redis
compose up -d ai-ops
wait_ready
cmd_smoke
log "RECOVER_OK"
}
cmd_doctor() {
log "doctor: commands"
need_cmd go
command -v curl >/dev/null 2>&1 || fail "missing curl"
command -v python3 >/dev/null 2>&1 || fail "missing python3"
engine >/dev/null
compose_cmd "$(engine)" >/dev/null
log "doctor: ports"
ss -ltn 2>/dev/null | grep -E ":(${APP_PORT}|${DB_PORT}|${REDIS_PORT}) " || true
log "doctor: config"
cmd_init
log "DOCTOR_OK"
}
usage() {
cat <<'EOF_USAGE'
Usage: scripts/aiops-single-node.sh <command>
Commands:
init Generate .runtime/single-node.env and config.single.yaml
start Build binary, start DB/Redis/App, wait ready, run smoke
stop Stop and remove containers, keep volumes
restart Restart app container and wait ready
status Show compose status and readiness JSON
logs Show app logs; override SERVICE=postgres|redis|ai-ops TAIL=300
smoke Run health/login/API/dashboard/openapi smoke checks
backup Create backups/ai_ops_<timestamp>.sql.gz via pg_dump
restore Restore a gzipped SQL backup, restart app, run smoke
recover Recreate stopped containers from existing volumes and smoke test
doctor Check local prerequisites and render runtime config
EOF_USAGE
}
main() {
case "${1:-}" in
init) cmd_init ;;
start) cmd_start ;;
stop) cmd_stop ;;
restart) cmd_restart ;;
status) cmd_status ;;
logs) cmd_logs ;;
smoke) cmd_smoke ;;
backup) shift; cmd_backup "$@" ;;
restore) shift; cmd_restore "$@" ;;
recover) cmd_recover ;;
doctor) cmd_doctor ;;
*) usage; exit 2 ;;
esac
}
main "$@"