fix: harden review and verifier governance

This commit is contained in:
phamnazage-jpg
2026-05-29 18:48:48 +08:00
parent 88833fac8b
commit e999d31b25
133 changed files with 2538 additions and 159 deletions

View File

@@ -43,10 +43,12 @@ type TaskResult struct {
StderrSummary string
Error string
Reason string
FailureClass string
EvidenceGrade string
TaskType string
}
func main() {
dryRun := flag.Bool("dry-run", false, "print commands without executing")
taskFilter := flag.String("task", "", "filter by task ID (e.g. T-Q2-1.1)")
@@ -119,6 +121,9 @@ func main() {
if r.StdoutSummary != "" && (!r.Verified || r.Reason != "" || r.Error != "") {
fmt.Printf(" stdout: %s\n", r.StdoutSummary)
}
if r.FailureClass != "" {
fmt.Printf(" class: %s\n", r.FailureClass)
}
if r.Error != "" {
fmt.Printf(" ERROR: %s\n", r.Error)
} else if r.ExitCode != 0 && r.Stdout != "" {
@@ -126,14 +131,14 @@ func main() {
} else if r.Reason != "" {
fmt.Printf(" reason: %s\n", r.Reason)
}
}
fmt.Printf("\n=== Summary: %d passed, %d failed ===\n", passed, failed)
if failed > 0 {
os.Exit(1)
}
os.Exit(determineProcessExitCode(results))
}
func resolveTasksPath(flagValue string) string {
envValue := os.Getenv("TASKS_PATH")
wd := ""
@@ -305,6 +310,7 @@ func verifyTask(t taskEntry, dryRun bool) TaskResult {
if !t.HasVerification {
r.Reason = "no verification block"
r.FailureClass = "missing_verification"
r.Verified = true // No verification = trivially pass
return r
}
@@ -318,11 +324,17 @@ func verifyTask(t taskEntry, dryRun bool) TaskResult {
if validationErr := validateVerification(t.Verification); validationErr != "" {
r.Verified = false
r.Reason = validationErr
r.FailureClass = "verification_config_failure"
return r
}
if t.Verification.Command == "" {
if t.Verification.Mode == "artifact_present" {
r.Verified = true
return r
}
r.Reason = "verification.command is empty"
r.FailureClass = "verification_config_failure"
r.Verified = false
return r
}
@@ -351,6 +363,7 @@ func verifyTask(t taskEntry, dryRun bool) TaskResult {
r.ExitCode = 0
if err != nil {
r.ExitCode = -1
r.FailureClass = "tool_execution_failure"
if ctx.Err() == context.DeadlineExceeded {
r.Error = fmt.Sprintf("timeout after %ds", t.Verification.TimeoutSeconds)
} else {
@@ -394,17 +407,50 @@ func verifyTask(t taskEntry, dryRun bool) TaskResult {
r.Verified = matched
if !matched {
r.Reason = fmt.Sprintf("expected_evidence '%s' not found in output", evidence)
r.FailureClass = "business_assertion_failure"
}
} else if r.ExitCode == 0 {
r.Verified = true
} else {
r.Verified = false
r.Reason = fmt.Sprintf("exit code %d", r.ExitCode)
r.FailureClass = "tool_execution_failure"
}
return r
}
func classifyFailureTier(r TaskResult) int {
if r.Verified {
return 0
}
if r.EvidenceGrade == "runtime-verified" {
return 2
}
return 3
}
func determineProcessExitCode(results []TaskResult) int {
hasRuntimeFailure := false
hasLowerTierFailure := false
for _, r := range results {
tier := classifyFailureTier(r)
switch tier {
case 2:
hasRuntimeFailure = true
case 3:
hasLowerTierFailure = true
}
}
if hasRuntimeFailure {
return 2
}
if hasLowerTierFailure {
return 3
}
return 0
}
func normalizeEvidenceGrade(mode, explicit string) string {
if explicit = strings.TrimSpace(explicit); explicit != "" {
return explicit
@@ -532,6 +578,14 @@ func validateVerification(v Verification) string {
if (v.TaskType == "code" || v.TaskType == "automation") && v.Mode == "semantic" {
return fmt.Sprintf("semantic-only verification is not allowed for %s tasks", v.TaskType)
}
if v.Mode == "artifact_present" {
if strings.TrimSpace(v.Command) != "" || strings.TrimSpace(v.ExpectedEvidence) != "" {
return "artifact_present does not allow command or expected_evidence; use test_pass for executable verification"
}
if v.TaskType == "code" || v.TaskType == "automation" || v.TaskType == "data" || v.TaskType == "analysis" {
return fmt.Sprintf("artifact_present is not allowed for %s tasks", v.TaskType)
}
}
return ""
}