// verification_executor.go // Reads TASKS.md, runs each task's verification.command, // matches expected_evidence, outputs pass/fail report. // // Usage: go run scripts/verification_executor.go [--dry-run] [--task T-Q2-1.1] //go:build llm_script package main import ( "bufio" "bytes" "context" "flag" "fmt" "os" "os/exec" "path/filepath" "regexp" "runtime" "strings" "time" ) type Verification struct { Mode string Command string ExpectedEvidence string TimeoutSeconds int EvidenceGrade string TaskType string } type TaskResult struct { TaskID string TaskName string Verified bool Command string ExitCode int Stdout string Stderr string StdoutSummary string StderrSummary string Error string Reason string EvidenceGrade string TaskType string } func main() { dryRun := flag.Bool("dry-run", false, "print commands without executing") taskFilter := flag.String("task", "", "filter by task ID (e.g. T-Q2-1.1)") tasksPathFlag := flag.String("tasks", "", "path to TASKS.md") statusFilter := flag.String("status", "all", "filter by normalized status: all|completed|in_progress|planned|paused|unknown") completedOnly := flag.Bool("completed-only", false, "shortcut for --status completed") flag.Parse() tasksPath := resolveTasksPath(*tasksPathFlag) f, err := os.Open(tasksPath) if err != nil { fmt.Fprintf(os.Stderr, "open TASKS.md: %v\n", err) os.Exit(1) } defer f.Close() tasks := parseTasks(f) if *taskFilter != "" { var filtered []taskEntry for _, t := range tasks { if t.ID == *taskFilter { filtered = append(filtered, t) } } tasks = filtered } effectiveStatus := *statusFilter if *completedOnly { effectiveStatus = "completed" } tasks, err = filterTasksByStatus(tasks, effectiveStatus) if err != nil { fmt.Fprintf(os.Stderr, "filter tasks: %v\n", err) os.Exit(1) } fmt.Printf("=== Verification Report (%s) ===\n", time.Now().Format("2006-01-02 15:04")) fmt.Printf("Tasks checked: %d | Dry-run: %v | Status: %s | TASKS: %s\n\n", len(tasks), *dryRun, effectiveStatus, tasksPath) var passed, failed int var results []TaskResult for _, t := range tasks { r := verifyTask(t, *dryRun) results = append(results, r) if r.Verified { passed++ } else { failed++ } } for _, r := range results { icon := "✅" if !r.Verified { icon = "❌" } fmt.Printf("%s [%s] %s\n", icon, r.TaskID, r.TaskName) if r.Command != "" { fmt.Printf(" cmd: %s\n", r.Command) } if r.EvidenceGrade != "" || r.TaskType != "" { fmt.Printf(" grade: %s | type: %s\n", r.EvidenceGrade, r.TaskType) } if r.StderrSummary != "" { fmt.Printf(" stderr: %s\n", r.StderrSummary) } if r.StdoutSummary != "" && (!r.Verified || r.Reason != "" || r.Error != "") { fmt.Printf(" stdout: %s\n", r.StdoutSummary) } if r.Error != "" { fmt.Printf(" ERROR: %s\n", r.Error) } else if r.ExitCode != 0 && r.Stdout != "" { fmt.Printf(" output: %s\n", strings.TrimSpace(r.Stdout)) } else if r.Reason != "" { fmt.Printf(" reason: %s\n", r.Reason) } } fmt.Printf("\n=== Summary: %d passed, %d failed ===\n", passed, failed) if failed > 0 { os.Exit(1) } } func resolveTasksPath(flagValue string) string { envValue := os.Getenv("TASKS_PATH") wd := "" if currentWD, err := os.Getwd(); err == nil { wd = currentWD } sourceDir := "" if _, sourcePath, _, ok := runtime.Caller(0); ok { sourceDir = filepath.Dir(sourcePath) } return resolveTasksPathWithContext(flagValue, envValue, wd, sourceDir, "/home/long/.openclaw/workspace/TASKS.md") } func resolveTasksPathWithContext(flagValue, envValue, wd, sourceDir, globalTasksPath string) string { candidates := []string{} if flagValue != "" { candidates = append(candidates, flagValue) } if envValue != "" { candidates = append(candidates, envValue) } if wd != "" { candidates = append(candidates, filepath.Join(wd, "TASKS.md"), filepath.Join(wd, "..", "TASKS.md"), ) } defaultProjectTasks := "" if sourceDir != "" { defaultProjectTasks = filepath.Join(sourceDir, "..", "TASKS.md") candidates = append(candidates, defaultProjectTasks) } seen := map[string]struct{}{} for _, candidate := range candidates { if candidate == "" { continue } cleaned := filepath.Clean(candidate) if _, ok := seen[cleaned]; ok { continue } seen[cleaned] = struct{}{} if _, err := os.Stat(cleaned); err == nil { return cleaned } } if flagValue != "" { return filepath.Clean(flagValue) } if envValue != "" { return filepath.Clean(envValue) } if defaultProjectTasks != "" { return filepath.Clean(defaultProjectTasks) } if wd != "" { return filepath.Clean(filepath.Join(wd, "TASKS.md")) } if globalTasksPath != "" { return filepath.Clean(globalTasksPath) } return "TASKS.md" } type taskEntry struct { ID string Name string Status string Verification Verification HasVerification bool } func parseTasks(f *os.File) []taskEntry { var tasks []taskEntry var currentTask *taskEntry inVerification := false scanner := bufio.NewScanner(f) for scanner.Scan() { line := scanner.Text() // Match task header: ### T-1.1 🔶 Phase 1 范围冻结 taskRe := regexp.MustCompile(`^### (T-[A-Za-z0-9.-]+)\s+[^\s]+\s+(.+)`) if m := taskRe.FindStringSubmatch(line); m != nil { if currentTask != nil { tasks = append(tasks, *currentTask) } currentTask = &taskEntry{ID: m[1], Name: m[2], Status: normalizeStatusFromText(line)} inVerification = false continue } if currentTask == nil { continue } // Check for verification block if strings.Contains(line, "**verification**") || strings.Contains(line, "**verification**:") { inVerification = true currentTask.HasVerification = true continue } if !inVerification { statusRe := regexp.MustCompile(`^\s*-\s+\*\*状态\*\*:(.+)$`) if m := statusRe.FindStringSubmatch(line); m != nil { currentTask.Status = normalizeStatusFromText(m[1]) } continue } // Parse verification fields (indented under **verification**) // - mode: `artifact_present` modeRe := regexp.MustCompile(`^\s+- mode:\s+` + "`" + `([^` + "`" + `]+)` + "`") if m := modeRe.FindStringSubmatch(line); m != nil { currentTask.Verification.Mode = m[1] continue } cmdRe := regexp.MustCompile(`^\s+- command:\s+` + "`" + `([^` + "`" + `]+)` + "`") if m := cmdRe.FindStringSubmatch(line); m != nil { currentTask.Verification.Command = m[1] continue } expRe := regexp.MustCompile(`^\s+- expected_evidence:\s+` + "`" + `([^` + "`" + `]+)` + "`") if m := expRe.FindStringSubmatch(line); m != nil { currentTask.Verification.ExpectedEvidence = m[1] continue } evidenceGradeRe := regexp.MustCompile(`^\s+- evidence_grade:\s+` + "`" + `([^` + "`" + `]+)` + "`") if m := evidenceGradeRe.FindStringSubmatch(line); m != nil { currentTask.Verification.EvidenceGrade = m[1] continue } taskTypeRe := regexp.MustCompile(`^\s+- task_type:\s+` + "`" + `([^` + "`" + `]+)` + "`") if m := taskTypeRe.FindStringSubmatch(line); m != nil { currentTask.Verification.TaskType = m[1] continue } timeoutRe := regexp.MustCompile(`^\s+- timeout_seconds:\s+(\d+)`) if m := timeoutRe.FindStringSubmatch(line); m != nil { fmt.Sscanf(m[1], "%d", ¤tTask.Verification.TimeoutSeconds) continue } // Blank line or new top-level field ends verification block if strings.TrimSpace(line) == "" || (strings.HasPrefix(strings.TrimSpace(line), "**") && !strings.Contains(line, "verification")) { inVerification = false } } if currentTask != nil { tasks = append(tasks, *currentTask) } return tasks } func verifyTask(t taskEntry, dryRun bool) TaskResult { r := TaskResult{TaskID: t.ID, TaskName: t.Name} if !t.HasVerification { r.Reason = "no verification block" r.Verified = true // No verification = trivially pass return r } t.Verification.Mode = strings.TrimSpace(t.Verification.Mode) t.Verification.TaskType = normalizeTaskType(t.Verification.TaskType) t.Verification.EvidenceGrade = normalizeEvidenceGrade(t.Verification.Mode, t.Verification.EvidenceGrade) r.TaskType = t.Verification.TaskType r.EvidenceGrade = t.Verification.EvidenceGrade if validationErr := validateVerification(t.Verification); validationErr != "" { r.Verified = false r.Reason = validationErr return r } if t.Verification.Command == "" { r.Reason = "verification.command is empty" r.Verified = false return r } r.Command = t.Verification.Command if t.Verification.TimeoutSeconds == 0 { t.Verification.TimeoutSeconds = 30 } if dryRun { r.Stdout = "(dry-run, command not executed)" r.Verified = true return r } ctx, cancel := context.WithTimeout(context.Background(), time.Duration(t.Verification.TimeoutSeconds)*time.Second) defer cancel() cmd := exec.CommandContext(ctx, "sh", "-c", t.Verification.Command) var stdout, stderr bytes.Buffer cmd.Stdout = &stdout cmd.Stderr = &stderr err := cmd.Run() r.ExitCode = 0 if err != nil { r.ExitCode = -1 if ctx.Err() == context.DeadlineExceeded { r.Error = fmt.Sprintf("timeout after %ds", t.Verification.TimeoutSeconds) } else { r.Error = err.Error() } } r.Stdout = stdout.String() r.Stderr = stderr.String() r.StdoutSummary = summarizeOutput(r.Stdout) r.StderrSummary = summarizeOutput(r.Stderr) if r.ExitCode != 0 && t.Verification.Mode == "test_pass" { r.Verified = false return r } // Match expected_evidence if t.Verification.ExpectedEvidence != "" { evidence := t.Verification.ExpectedEvidence matched := false if strings.HasPrefix(evidence, "[") && strings.HasSuffix(evidence, "]") { // Regex range like [4-9] re := regexp.MustCompile(`\[(\d+)-(\d+)\]`) if m := re.FindStringSubmatch(evidence); m != nil { var lo, hi int fmt.Sscanf(m[1], "%d", &lo) fmt.Sscanf(m[2], "%d", &hi) reOut := regexp.MustCompile(fmt.Sprintf(`^\s*(\d+)\s*$`)) if numMatch := reOut.FindStringSubmatch(strings.TrimSpace(r.Stdout)); numMatch != nil { var n int fmt.Sscanf(numMatch[1], "%d", &n) matched = n >= lo && n <= hi } } } else if strings.Contains(r.Stdout, evidence) { matched = true } r.Verified = matched if !matched { r.Reason = fmt.Sprintf("expected_evidence '%s' not found in output", evidence) } } else if r.ExitCode == 0 { r.Verified = true } else { r.Verified = false r.Reason = fmt.Sprintf("exit code %d", r.ExitCode) } return r } func normalizeEvidenceGrade(mode, explicit string) string { if explicit = strings.TrimSpace(explicit); explicit != "" { return explicit } switch strings.TrimSpace(mode) { case "test_pass": return "runtime-verified" case "artifact_present": return "artifact-present" case "semantic": return "doc-claimed" default: return "" } } func normalizeTaskType(raw string) string { raw = strings.TrimSpace(raw) if raw == "" { return "unspecified" } return raw } func normalizeStatusFromText(raw string) string { lower := strings.ToLower(strings.TrimSpace(raw)) switch { case strings.Contains(raw, "✅") || strings.Contains(raw, "完成"): return "completed" case strings.Contains(raw, "🟡") || strings.Contains(raw, "进行中"): return "in_progress" case strings.Contains(raw, "🔶") || strings.Contains(raw, "🔴") || strings.Contains(raw, "待启动") || strings.Contains(raw, "未开始"): return "planned" case strings.Contains(raw, "⏸️") || strings.Contains(raw, "待规划") || strings.Contains(raw, "暂停"): return "paused" case lower == "": return "unknown" default: return "unknown" } } func filterTasksByStatus(tasks []taskEntry, filter string) ([]taskEntry, error) { filter = strings.TrimSpace(filter) if filter == "" { filter = "all" } valid := map[string]struct{}{ "all": {}, "completed": {}, "in_progress": {}, "planned": {}, "paused": {}, "unknown": {}, } if _, ok := valid[filter]; !ok { return nil, fmt.Errorf("unsupported status filter: %s", filter) } if filter == "all" { return tasks, nil } filtered := make([]taskEntry, 0, len(tasks)) for _, t := range tasks { status := t.Status if status == "" { status = "unknown" } if status == filter { filtered = append(filtered, t) } } return filtered, nil } func summarizeOutput(raw string) string { cleaned := strings.TrimSpace(raw) if cleaned == "" { return "" } cleaned = strings.Join(strings.Fields(cleaned), " ") const limit = 220 if len(cleaned) <= limit { return cleaned } return cleaned[:limit] + "..." } func validateVerification(v Verification) string { validModes := map[string]struct{}{ "test_pass": {}, "artifact_present": {}, "semantic": {}, } if _, ok := validModes[v.Mode]; !ok { return fmt.Sprintf("unsupported verification mode: %s", v.Mode) } validGrades := map[string]struct{}{ "runtime-verified": {}, "artifact-present": {}, "doc-claimed": {}, } if v.EvidenceGrade != "" { if _, ok := validGrades[v.EvidenceGrade]; !ok { return fmt.Sprintf("unsupported evidence grade: %s", v.EvidenceGrade) } } validTaskTypes := map[string]struct{}{ "unspecified": {}, "code": {}, "automation": {}, "documentation": {}, "configuration": {}, "data": {}, "analysis": {}, } if _, ok := validTaskTypes[v.TaskType]; !ok { return fmt.Sprintf("unsupported task type: %s", v.TaskType) } if (v.TaskType == "code" || v.TaskType == "automation") && v.Mode == "semantic" { return fmt.Sprintf("semantic-only verification is not allowed for %s tasks", v.TaskType) } return "" }