Files
user-system/scripts/ops/validate-alerting-package.ps1

223 lines
8.5 KiB
PowerShell
Raw Permalink Normal View History

param(
[string]$EvidenceDate = (Get-Date -Format 'yyyy-MM-dd'),
[string]$BaselineReportPath = '',
[string]$AlertmanagerPath = ''
)
$ErrorActionPreference = 'Stop'
$projectRoot = (Resolve-Path (Join-Path $PSScriptRoot '..\..')).Path
$alertsPath = Join-Path $projectRoot 'deployment\alertmanager\alerts.yml'
$alertmanagerPath = if ([string]::IsNullOrWhiteSpace($AlertmanagerPath)) {
Join-Path $projectRoot 'deployment\alertmanager\alertmanager.yml'
} else {
$AlertmanagerPath
}
$evidenceRoot = Join-Path $projectRoot "docs\evidence\ops\$EvidenceDate\alerting"
$timestamp = Get-Date -Format 'yyyyMMdd-HHmmss'
$reportPath = Join-Path $evidenceRoot "ALERTING_PACKAGE_$timestamp.md"
New-Item -ItemType Directory -Force $evidenceRoot | Out-Null
function Get-LatestBaselineReportPath {
param(
[Parameter(Mandatory = $true)][string]$ProjectRoot,
[Parameter(Mandatory = $true)][string]$EvidenceDate
)
$observabilityRoot = Join-Path $ProjectRoot "docs\evidence\ops\$EvidenceDate\observability"
$latest = Get-ChildItem $observabilityRoot -Filter 'LOCAL_BASELINE_*.md' -ErrorAction SilentlyContinue |
Sort-Object LastWriteTime -Descending |
Select-Object -First 1
if ($latest) {
return $latest.FullName
}
$fallbackRoot = Join-Path $ProjectRoot 'docs\evidence\ops'
$fallback = Get-ChildItem $fallbackRoot -Recurse -Filter 'LOCAL_BASELINE_*.md' -ErrorAction SilentlyContinue |
Sort-Object LastWriteTime -Descending |
Select-Object -First 1
if (-not $fallback) {
throw "baseline report not found under $observabilityRoot or $fallbackRoot"
}
return $fallback.FullName
}
function Parse-AlertRules {
param(
[Parameter(Mandatory = $true)][string]$Content
)
$matches = [regex]::Matches($Content, '(?ms)^\s*-\s*alert:\s*(?<name>[^\r\n]+)(?<body>.*?)(?=^\s*-\s*alert:|\z)')
$rules = @()
foreach ($match in $matches) {
$body = $match.Groups['body'].Value
$severityMatch = [regex]::Match($body, '(?m)^\s*severity:\s*(?<severity>[^\r\n]+)')
$forMatch = [regex]::Match($body, '(?m)^\s*for:\s*(?<duration>[^\r\n]+)')
$exprMatch = [regex]::Match($body, '(?ms)^\s*expr:\s*\|?\s*(?<expr>.*?)(?=^\s*for:|^\s*labels:|\z)')
$rules += [pscustomobject]@{
Name = $match.Groups['name'].Value.Trim()
Severity = $severityMatch.Groups['severity'].Value.Trim()
For = $forMatch.Groups['duration'].Value.Trim()
Expr = $exprMatch.Groups['expr'].Value.Trim()
}
}
return $rules
}
function Parse-AlertmanagerRoutes {
param(
[Parameter(Mandatory = $true)][string]$Content
)
$rootReceiverMatch = [regex]::Match($Content, '(?m)^\s*receiver:\s*''(?<receiver>[^'']+)''')
$routeMatches = [regex]::Matches($Content, '(?ms)^\s*-\s*match:\s*(?<body>.*?)(?=^\s*-\s*match:|^\s*receivers:|\z)')
$routes = @()
foreach ($match in $routeMatches) {
$body = $match.Groups['body'].Value
$severityMatch = [regex]::Match($body, '(?m)^\s*severity:\s*(?<severity>[^\r\n]+)')
$receiverMatch = [regex]::Match($body, '(?m)^\s*receiver:\s*''(?<receiver>[^'']+)''')
$routes += [pscustomobject]@{
Severity = $severityMatch.Groups['severity'].Value.Trim()
Receiver = $receiverMatch.Groups['receiver'].Value.Trim()
}
}
$receiverMatches = [regex]::Matches($Content, '(?m)^\s*-\s*name:\s*''(?<name>[^'']+)''')
$receivers = @($receiverMatches | ForEach-Object { $_.Groups['name'].Value.Trim() })
return [pscustomobject]@{
RootReceiver = $rootReceiverMatch.Groups['receiver'].Value.Trim()
Routes = $routes
Receivers = $receivers
}
}
function Get-PlaceholderFindings {
param(
[Parameter(Mandatory = $true)][string]$Content
)
$findings = @()
foreach ($pattern in @(
'\$\{ALERTMANAGER_[A-Z0-9_]+\}',
'admin@example\.com',
'ops-team@example\.com',
'dev-team@example\.com',
'alertmanager@example\.com',
'smtp\.example\.com',
'auth_password:\s*''password'''
)) {
if ($Content -match $pattern) {
$findings += $pattern
}
}
return $findings
}
function Get-BaselineTimings {
param(
[Parameter(Mandatory = $true)][string]$Content
)
$timings = @{}
foreach ($name in @('login-initial', 'login-desktop', 'login-tablet', 'login-mobile')) {
$match = [regex]::Match($Content, [regex]::Escape($name) + ':\s*([0-9]+)ms')
if ($match.Success) {
$timings[$name] = [int]$match.Groups[1].Value
}
}
return $timings
}
if ([string]::IsNullOrWhiteSpace($BaselineReportPath)) {
$BaselineReportPath = Get-LatestBaselineReportPath -ProjectRoot $projectRoot -EvidenceDate $EvidenceDate
}
$alertsContent = Get-Content $alertsPath -Raw -Encoding UTF8
$alertmanagerContent = Get-Content $alertmanagerPath -Raw -Encoding UTF8
$baselineContent = Get-Content $BaselineReportPath -Raw -Encoding UTF8
$rules = Parse-AlertRules -Content $alertsContent
$routeConfig = Parse-AlertmanagerRoutes -Content $alertmanagerContent
$placeholderFindings = Get-PlaceholderFindings -Content $alertmanagerContent
$baselineTimings = Get-BaselineTimings -Content $baselineContent
$requiredRules = @(
'HighErrorRate',
'HighResponseTime',
'DatabaseConnectionPoolExhausted',
'HighLoginFailureRate'
)
$missingRules = @($requiredRules | Where-Object { $rules.Name -notcontains $_ })
$criticalRoute = $routeConfig.Routes | Where-Object { $_.Severity -eq 'critical' } | Select-Object -First 1
$warningRoute = $routeConfig.Routes | Where-Object { $_.Severity -eq 'warning' } | Select-Object -First 1
$requiredReceivers = @('default', 'critical-alerts', 'warning-alerts')
$missingReceivers = @($requiredReceivers | Where-Object { $routeConfig.Receivers -notcontains $_ })
$highResponseRule = $rules | Where-Object { $_.Name -eq 'HighResponseTime' } | Select-Object -First 1
$highResponseThresholdSeconds = $null
if ($highResponseRule -and $highResponseRule.Expr -match '>\s*(?<threshold>[0-9.]+)') {
$highResponseThresholdSeconds = [double]$Matches['threshold']
}
$maxBaselineMs = 0
if ($baselineTimings.Count -gt 0) {
$maxBaselineMs = ($baselineTimings.Values | Measure-Object -Maximum).Maximum
}
$ruleInventory = @(
"critical=$((@($rules | Where-Object { $_.Severity -eq 'critical' })).Count)",
"warning=$((@($rules | Where-Object { $_.Severity -eq 'warning' })).Count)",
"info=$((@($rules | Where-Object { $_.Severity -eq 'info' })).Count)"
) -join ', '
$structuralReady = ($missingRules.Count -eq 0) -and ($missingReceivers.Count -eq 0) -and -not [string]::IsNullOrWhiteSpace($routeConfig.RootReceiver) -and $criticalRoute -and $warningRoute
$externalDeliveryClosed = $placeholderFindings.Count -eq 0
$reportLines = @(
'# Alerting Package Validation',
'',
"- Generated at: $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss zzz')",
"- Alerts file: $alertsPath",
"- Alertmanager file: $alertmanagerPath",
"- Baseline report: $BaselineReportPath",
'',
'## Structural Validation',
'',
"- Rule inventory: $ruleInventory",
"- Missing required rules: $(if ($missingRules.Count -gt 0) { $missingRules -join ', ' } else { 'none' })",
"- Root receiver: $($routeConfig.RootReceiver)",
"- Critical route receiver: $(if ($criticalRoute) { $criticalRoute.Receiver } else { 'missing' })",
"- Warning route receiver: $(if ($warningRoute) { $warningRoute.Receiver } else { 'missing' })",
"- Missing required receivers: $(if ($missingReceivers.Count -gt 0) { $missingReceivers -join ', ' } else { 'none' })",
"- Structural ready: $structuralReady",
'',
'## Threshold Alignment',
'',
"- HighResponseTime threshold: $(if ($null -ne $highResponseThresholdSeconds) { $highResponseThresholdSeconds.ToString() + 's' } else { 'unparsed' })",
"- Latest browser max baseline: ${maxBaselineMs}ms",
"- Latest browser timings: $(if ($baselineTimings.Count -gt 0) { ($baselineTimings.GetEnumerator() | Sort-Object Name | ForEach-Object { '{0}={1}ms' -f $_.Name, $_.Value }) -join ', ' } else { 'unavailable' })",
'',
'## External Delivery Readiness',
'',
"- Placeholder findings: $(if ($placeholderFindings.Count -gt 0) { $placeholderFindings -join ', ' } else { 'none' })",
"- External delivery closed: $externalDeliveryClosed",
'- Interpretation: rules and route topology can be reviewed locally, but unresolved template variables or example SMTP/accounts mean real notification delivery evidence is still open until environment-specific contacts and secrets are injected.',
'',
'## Conclusion',
'',
"- Repo-level alerting package structurally ready: $structuralReady",
"- Repo-level oncall/delivery package fully closed: $externalDeliveryClosed",
''
)
Set-Content -Path $reportPath -Value ($reportLines -join [Environment]::NewLine) -Encoding UTF8
Get-Content $reportPath