193 lines
6.3 KiB
Bash
193 lines
6.3 KiB
Bash
#!/usr/bin/env bash
|
|
|
|
set -euo pipefail
|
|
|
|
LIMIT=7
|
|
DB_URL="${DATABASE_URL:-}"
|
|
INPUT_PATH=""
|
|
THRESHOLD=""
|
|
FIELD_SEP=$'\x1f'
|
|
NOW_RAW="${LLM_NOW:-}"
|
|
AGED_PRECONDITION_COUNT=0
|
|
AGED_PRECONDITION_MINUTES=1440
|
|
|
|
|
|
usage() {
|
|
cat <<'EOF'
|
|
用法:
|
|
bash scripts/collector_stats_window_audit.sh --db <DATABASE_URL> [--limit N] [--assert-success-rate PCT]
|
|
bash scripts/collector_stats_window_audit.sh --input <tsv-file> [--limit N] [--assert-success-rate PCT]
|
|
|
|
输入 TSV 列顺序:
|
|
source<TAB>success<TAB>error_message<TAB>created_at
|
|
EOF
|
|
}
|
|
|
|
classify_failure() {
|
|
local message normalized
|
|
message="${1:-}"
|
|
normalized="$(printf '%s' "$message" | tr '[:upper:]' '[:lower:]')"
|
|
|
|
if [[ -z "${normalized// }" ]]; then
|
|
printf '%s\n' "collector_runtime_failure"
|
|
return
|
|
fi
|
|
|
|
case "$normalized" in
|
|
*"api key"*|*"openrouter_api_key"*|*"database_url"*|*"strict real mode"*|*"password authentication failed"*|*"permission denied"*|*"role does not exist"*|*"relation does not exist"*|*"must provide"*|*"未设置"*)
|
|
printf '%s\n' "precondition_missing"
|
|
;;
|
|
*"429"*|*"rate limit"*|*"too many requests"*|*"timeout"*|*"temporarily unavailable"*|*"transport closed"*|*"connection reset"*|*"connection refused"*|*"eof"*|*"tls handshake timeout"*|*"no such host"*|*"i/o timeout"*|*"unexpected status 403"*|*"unexpected status 502"*|*"unexpected status 503"*|*"unexpected status 504"*|*"signature drift"*|*"no pricing cards found"*|*"no model rows parsed"*|*"no model overview cards parsed"*|*"unexpected * pricing content"*)
|
|
printf '%s\n' "external_provider_failure"
|
|
;;
|
|
*)
|
|
printf '%s\n' "collector_runtime_failure"
|
|
;;
|
|
esac
|
|
}
|
|
|
|
minutes_since_created() {
|
|
local created_at="$1"
|
|
python3 - <<'PY' "$created_at" "$NOW_RAW"
|
|
from datetime import datetime
|
|
import sys
|
|
created = datetime.strptime(sys.argv[1], '%Y-%m-%d %H:%M:%S')
|
|
raw_now = sys.argv[2].strip()
|
|
now = datetime.strptime(raw_now, '%Y-%m-%d %H:%M') if raw_now else datetime.now()
|
|
print(int((now - created).total_seconds() // 60))
|
|
PY
|
|
}
|
|
|
|
|
|
fetch_rows_from_db() {
|
|
if [[ -z "${DB_URL:-}" ]]; then
|
|
echo "missing --db / DATABASE_URL" >&2
|
|
return 1
|
|
fi
|
|
psql "$DB_URL" -F "$FIELD_SEP" -Atqc "
|
|
SELECT
|
|
COALESCE(source, ''),
|
|
CASE WHEN success THEN 't' ELSE 'f' END,
|
|
COALESCE(error_message, ''),
|
|
TO_CHAR(created_at, 'YYYY-MM-DD HH24:MI:SS')
|
|
FROM collector_stats
|
|
ORDER BY created_at DESC
|
|
LIMIT ${LIMIT};
|
|
"
|
|
}
|
|
|
|
fetch_rows_from_file() {
|
|
if [[ -z "${INPUT_PATH:-}" ]]; then
|
|
echo "missing --input" >&2
|
|
return 1
|
|
fi
|
|
head -n "$LIMIT" "$INPUT_PATH"
|
|
}
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--db)
|
|
DB_URL="$2"
|
|
shift 2
|
|
;;
|
|
--input)
|
|
INPUT_PATH="$2"
|
|
shift 2
|
|
;;
|
|
--limit)
|
|
LIMIT="$2"
|
|
shift 2
|
|
;;
|
|
--assert-success-rate)
|
|
THRESHOLD="$2"
|
|
shift 2
|
|
;;
|
|
--help|-h)
|
|
usage
|
|
exit 0
|
|
;;
|
|
*)
|
|
echo "unknown arg: $1" >&2
|
|
usage >&2
|
|
exit 1
|
|
;;
|
|
esac
|
|
done
|
|
|
|
if [[ -n "$INPUT_PATH" ]]; then
|
|
ROWS="$(fetch_rows_from_file)"
|
|
else
|
|
ROWS="$(fetch_rows_from_db)"
|
|
fi
|
|
|
|
SUCCESS_COUNT=0
|
|
FAILURE_COUNT=0
|
|
PRECONDITION_COUNT=0
|
|
EXTERNAL_COUNT=0
|
|
RUNTIME_COUNT=0
|
|
UNKNOWN_COUNT=0
|
|
ROW_COUNT=0
|
|
DETAIL_LINES=""
|
|
|
|
while IFS= read -r raw_line; do
|
|
[[ -z "${raw_line}" ]] && continue
|
|
normalized_line="${raw_line//$'\t'/$FIELD_SEP}"
|
|
IFS="$FIELD_SEP" read -r source success error_message created_at <<< "$normalized_line"
|
|
ROW_COUNT=$((ROW_COUNT + 1))
|
|
if [[ "$success" == "t" || "$success" == "true" ]]; then
|
|
SUCCESS_COUNT=$((SUCCESS_COUNT + 1))
|
|
category="success"
|
|
rendered_error="-"
|
|
else
|
|
FAILURE_COUNT=$((FAILURE_COUNT + 1))
|
|
category="$(classify_failure "$error_message")"
|
|
rendered_error="${error_message:-unknown}"
|
|
if [[ "$category" == "precondition_missing" ]]; then
|
|
age_minutes="$(minutes_since_created "${created_at:-1970-01-01 00:00:00}")"
|
|
if [[ "$age_minutes" -gt "$AGED_PRECONDITION_MINUTES" ]]; then
|
|
category="aged_precondition_missing"
|
|
AGED_PRECONDITION_COUNT=$((AGED_PRECONDITION_COUNT + 1))
|
|
fi
|
|
fi
|
|
case "$category" in
|
|
precondition_missing)
|
|
PRECONDITION_COUNT=$((PRECONDITION_COUNT + 1))
|
|
;;
|
|
aged_precondition_missing)
|
|
;;
|
|
external_provider_failure)
|
|
EXTERNAL_COUNT=$((EXTERNAL_COUNT + 1))
|
|
;;
|
|
collector_runtime_failure)
|
|
RUNTIME_COUNT=$((RUNTIME_COUNT + 1))
|
|
;;
|
|
*)
|
|
UNKNOWN_COUNT=$((UNKNOWN_COUNT + 1))
|
|
;;
|
|
esac
|
|
fi
|
|
|
|
DETAIL_LINES+=$'sample_'"${ROW_COUNT}"$' created_at='"${created_at:-unknown}"$' source='"${source:-unknown}"$' outcome='"$([[ "$category" == "success" ]] && printf '%s' "success" || printf '%s' "failure")"$' category='"${category}"$' error='"${rendered_error}"$'\n'
|
|
done <<< "$ROWS"
|
|
|
|
if [[ "$ROW_COUNT" -eq 0 ]]; then
|
|
echo "window_size=0 success_count=0 failure_count=0 success_rate=0.00 threshold=${THRESHOLD:-n/a} precondition_missing=0 aged_precondition_missing=0 external_provider_failure=0 collector_runtime_failure=0 unknown_failure=0"
|
|
|
|
echo "sample_window=empty"
|
|
if [[ -n "$THRESHOLD" ]]; then
|
|
exit 1
|
|
fi
|
|
exit 0
|
|
fi
|
|
|
|
SUCCESS_RATE="$(awk -v success="$SUCCESS_COUNT" -v aged="$AGED_PRECONDITION_COUNT" -v total="$ROW_COUNT" 'BEGIN { effective_total = total - aged; if (effective_total <= 0) { printf "0.00" } else { printf "%.2f", (success * 100) / effective_total } }')"
|
|
echo "window_size=${ROW_COUNT} success_count=${SUCCESS_COUNT} failure_count=${FAILURE_COUNT} success_rate=${SUCCESS_RATE} threshold=${THRESHOLD:-n/a} precondition_missing=${PRECONDITION_COUNT} aged_precondition_missing=${AGED_PRECONDITION_COUNT} external_provider_failure=${EXTERNAL_COUNT} collector_runtime_failure=${RUNTIME_COUNT} unknown_failure=${UNKNOWN_COUNT}"
|
|
printf '%s' "$DETAIL_LINES"
|
|
|
|
if [[ -n "$THRESHOLD" ]]; then
|
|
if awk -v actual="$SUCCESS_RATE" -v threshold="$THRESHOLD" 'BEGIN { exit !(actual >= threshold) }'; then
|
|
exit 0
|
|
fi
|
|
exit 1
|
|
fi
|