Files
llm-intelligence/scripts/verification_executor_test.go
2026-05-29 18:48:48 +08:00

395 lines
11 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//go:build llm_script
package main
import (
"os"
"path/filepath"
"strings"
"testing"
)
func TestParseTasksParsesEvidenceFields(t *testing.T) {
md := `# Tasks
### T-1 ✅ Example
- **verification**:
- mode: ` + "`test_pass`" + `
- command: ` + "`echo ok`" + `
- expected_evidence: ` + "`ok`" + `
- evidence_grade: ` + "`runtime-verified`" + `
- task_type: ` + "`code`" + `
- timeout_seconds: 15
`
tmpFile, err := os.CreateTemp(t.TempDir(), "tasks-*.md")
if err != nil {
t.Fatalf("create temp file: %v", err)
}
defer tmpFile.Close()
if _, err := tmpFile.WriteString(md); err != nil {
t.Fatalf("write temp file: %v", err)
}
if _, err := tmpFile.Seek(0, 0); err != nil {
t.Fatalf("seek temp file: %v", err)
}
tasks := parseTasks(tmpFile)
if len(tasks) != 1 {
t.Fatalf("expected 1 task, got %d", len(tasks))
}
got := tasks[0].Verification
if got.Mode != "test_pass" {
t.Fatalf("expected mode test_pass, got %q", got.Mode)
}
if got.Command != "echo ok" {
t.Fatalf("expected command echo ok, got %q", got.Command)
}
if got.ExpectedEvidence != "ok" {
t.Fatalf("expected evidence ok, got %q", got.ExpectedEvidence)
}
if got.EvidenceGrade != "runtime-verified" {
t.Fatalf("expected evidence grade runtime-verified, got %q", got.EvidenceGrade)
}
if got.TaskType != "code" {
t.Fatalf("expected task type code, got %q", got.TaskType)
}
if got.TimeoutSeconds != 15 {
t.Fatalf("expected timeout 15, got %d", got.TimeoutSeconds)
}
}
func TestVerifyTaskRejectsSemanticOnlyForCodeTask(t *testing.T) {
task := taskEntry{
ID: "T-1",
Name: "semantic code task",
Verification: Verification{
Mode: "semantic",
Command: "echo ok",
TaskType: "code",
EvidenceGrade: "doc-claimed",
},
HasVerification: true,
}
result := verifyTask(task, true)
if result.Verified {
t.Fatalf("expected semantic-only code task to fail")
}
if !strings.Contains(result.Reason, "semantic-only") {
t.Fatalf("expected semantic-only rejection reason, got %q", result.Reason)
}
}
func TestVerifyTaskDefaultsEvidenceGradeFromMode(t *testing.T) {
task := taskEntry{
ID: "T-2",
Name: "artifact task",
Verification: Verification{
Mode: "artifact_present",
TaskType: "documentation",
},
HasVerification: true,
}
result := verifyTask(task, true)
if !result.Verified {
t.Fatalf("expected dry-run artifact task to pass, got reason %q", result.Reason)
}
if result.EvidenceGrade != "artifact-present" {
t.Fatalf("expected default evidence grade artifact-present, got %q", result.EvidenceGrade)
}
}
func TestResolveTasksPathDoesNotImplicitlyFallbackToGlobal(t *testing.T) {
root := t.TempDir()
projectDir := filepath.Join(root, "project")
globalDir := filepath.Join(root, "workspace")
scriptDir := filepath.Join(projectDir, "scripts")
if err := os.MkdirAll(projectDir, 0o755); err != nil {
t.Fatalf("mkdir project dir: %v", err)
}
if err := os.MkdirAll(globalDir, 0o755); err != nil {
t.Fatalf("mkdir global dir: %v", err)
}
if err := os.MkdirAll(scriptDir, 0o755); err != nil {
t.Fatalf("mkdir script dir: %v", err)
}
projectTasks := filepath.Join(projectDir, "TASKS.md")
globalTasks := filepath.Join(globalDir, "TASKS.md")
if err := os.WriteFile(projectTasks, []byte("# project"), 0o644); err != nil {
t.Fatalf("write project tasks: %v", err)
}
if err := os.WriteFile(globalTasks, []byte("# global"), 0o644); err != nil {
t.Fatalf("write global tasks: %v", err)
}
got := resolveTasksPathWithContext("", "", filepath.Join(root, "outside"), scriptDir, globalTasks)
if got != projectTasks {
t.Fatalf("expected project tasks path, got %q", got)
}
}
func TestResolveTasksPathAllowsExplicitGlobalPath(t *testing.T) {
root := t.TempDir()
projectDir := filepath.Join(root, "project")
globalDir := filepath.Join(root, "workspace")
scriptDir := filepath.Join(projectDir, "scripts")
if err := os.MkdirAll(projectDir, 0o755); err != nil {
t.Fatalf("mkdir project dir: %v", err)
}
if err := os.MkdirAll(globalDir, 0o755); err != nil {
t.Fatalf("mkdir global dir: %v", err)
}
if err := os.MkdirAll(scriptDir, 0o755); err != nil {
t.Fatalf("mkdir script dir: %v", err)
}
projectTasks := filepath.Join(projectDir, "TASKS.md")
globalTasks := filepath.Join(globalDir, "TASKS.md")
if err := os.WriteFile(projectTasks, []byte("# project"), 0o644); err != nil {
t.Fatalf("write project tasks: %v", err)
}
if err := os.WriteFile(globalTasks, []byte("# global"), 0o644); err != nil {
t.Fatalf("write global tasks: %v", err)
}
got := resolveTasksPathWithContext(globalTasks, "", filepath.Join(root, "outside"), scriptDir, globalTasks)
if got != globalTasks {
t.Fatalf("expected explicit global tasks path, got %q", got)
}
}
func TestVerifyTaskCapturesFailureSummaries(t *testing.T) {
task := taskEntry{
ID: "T-3",
Name: "failing task",
Verification: Verification{
Mode: "test_pass",
Command: "echo standard-output && echo standard-error 1>&2 && exit 1",
ExpectedEvidence: "unused",
TaskType: "automation",
},
HasVerification: true,
}
result := verifyTask(task, false)
if result.Verified {
t.Fatalf("expected failing task to fail")
}
if !strings.Contains(result.StdoutSummary, "standard-output") {
t.Fatalf("expected stdout summary to contain command output, got %q", result.StdoutSummary)
}
if !strings.Contains(result.StderrSummary, "standard-error") {
t.Fatalf("expected stderr summary to contain command error, got %q", result.StderrSummary)
}
}
func TestParseTasksParsesNormalizedStatus(t *testing.T) {
md := `# Tasks
### T-1 ✅ Done task
- **状态**:✅ 完成2026-05-11
- **verification**:
- mode: ` + "`test_pass`" + `
- command: ` + "`echo ok`" + `
- expected_evidence: ` + "`ok`" + `
### T-2 🔶 Planned task
- **状态**:🔶 待启动
- **verification**:
- mode: ` + "`test_pass`" + `
- command: ` + "`echo ok`" + `
- expected_evidence: ` + "`ok`" + `
### T-3 ⏸️ Paused task
- **状态**:⏸️ 待规划
- **verification**:
- mode: ` + "`test_pass`" + `
- command: ` + "`echo ok`" + `
- expected_evidence: ` + "`ok`" + `
`
tmpFile, err := os.CreateTemp(t.TempDir(), "tasks-status-*.md")
if err != nil {
t.Fatalf("create temp file: %v", err)
}
defer tmpFile.Close()
if _, err := tmpFile.WriteString(md); err != nil {
t.Fatalf("write temp file: %v", err)
}
if _, err := tmpFile.Seek(0, 0); err != nil {
t.Fatalf("seek temp file: %v", err)
}
tasks := parseTasks(tmpFile)
if len(tasks) != 3 {
t.Fatalf("expected 3 tasks, got %d", len(tasks))
}
if tasks[0].Status != "completed" {
t.Fatalf("expected first task status completed, got %q", tasks[0].Status)
}
if tasks[1].Status != "planned" {
t.Fatalf("expected second task status planned, got %q", tasks[1].Status)
}
if tasks[2].Status != "paused" {
t.Fatalf("expected third task status paused, got %q", tasks[2].Status)
}
}
func TestFilterTasksByStatus(t *testing.T) {
tasks := []taskEntry{
{ID: "T-1", Status: "completed"},
{ID: "T-2", Status: "planned"},
{ID: "T-3", Status: "in_progress"},
}
completed, err := filterTasksByStatus(tasks, "completed")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(completed) != 1 || completed[0].ID != "T-1" {
t.Fatalf("expected only completed task, got %#v", completed)
}
all, err := filterTasksByStatus(tasks, "all")
if err != nil {
t.Fatalf("unexpected error for all: %v", err)
}
if len(all) != 3 {
t.Fatalf("expected all 3 tasks, got %d", len(all))
}
}
func TestDetermineProcessExitCode(t *testing.T) {
cases := []struct {
name string
results []TaskResult
want int
}{
{
name: "all pass",
results: []TaskResult{{Verified: true}, {Verified: true}},
want: 0,
},
{
name: "runtime failure",
results: []TaskResult{{Verified: false, EvidenceGrade: "runtime-verified", TaskType: "automation"}},
want: 2,
},
{
name: "artifact only failure",
results: []TaskResult{{Verified: false, EvidenceGrade: "artifact-present", TaskType: "documentation"}},
want: 3,
},
{
name: "mixed defaults to runtime",
results: []TaskResult{
{Verified: false, EvidenceGrade: "artifact-present", TaskType: "documentation"},
{Verified: false, EvidenceGrade: "runtime-verified", TaskType: "automation"},
},
want: 2,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
if got := determineProcessExitCode(tc.results); got != tc.want {
t.Fatalf("exit code = %d, want %d", got, tc.want)
}
})
}
}
func TestClassifyFailureTier(t *testing.T) {
if got := classifyFailureTier(TaskResult{Verified: false, EvidenceGrade: "runtime-verified", TaskType: "automation"}); got != 2 {
t.Fatalf("runtime failure tier = %d, want 2", got)
}
if got := classifyFailureTier(TaskResult{Verified: false, EvidenceGrade: "artifact-present", TaskType: "documentation"}); got != 3 {
t.Fatalf("artifact failure tier = %d, want 3", got)
}
if got := classifyFailureTier(TaskResult{Verified: true, EvidenceGrade: "runtime-verified", TaskType: "automation"}); got != 0 {
t.Fatalf("verified tier = %d, want 0", got)
}
}
func TestVerifyTaskClassifiesToolExecutionFailure(t *testing.T) {
task := taskEntry{
ID: "T-4",
Name: "tool failure task",
Verification: Verification{
Mode: "test_pass",
Command: "echo tool-out && echo tool-err 1>&2 && exit 1",
ExpectedEvidence: "tool-out",
TaskType: "automation",
},
HasVerification: true,
}
result := verifyTask(task, false)
if result.Verified {
t.Fatalf("expected tool failure task to fail")
}
if result.FailureClass != "tool_execution_failure" {
t.Fatalf("failure class = %q, want tool_execution_failure", result.FailureClass)
}
}
func TestVerifyTaskArtifactPresentMisuseBecomesConfigFailure(t *testing.T) {
task := taskEntry{
ID: "T-5",
Name: "artifact misuse",
Verification: Verification{
Mode: "artifact_present",
Command: "echo actual-output",
ExpectedEvidence: "expected-output",
TaskType: "documentation",
},
HasVerification: true,
}
result := verifyTask(task, false)
if result.Verified {
t.Fatalf("expected artifact misuse to fail")
}
if result.FailureClass != "verification_config_failure" {
t.Fatalf("failure class = %q, want verification_config_failure", result.FailureClass)
}
}
func TestValidateVerificationRejectsArtifactPresentWithCommand(t *testing.T) {
got := validateVerification(Verification{
Mode: "artifact_present",
Command: "echo exists",
ExpectedEvidence: "exists",
TaskType: "documentation",
EvidenceGrade: "artifact-present",
})
if got == "" {
t.Fatalf("expected artifact_present with command to be rejected")
}
if !strings.Contains(got, "artifact_present") {
t.Fatalf("unexpected validation error: %q", got)
}
}
func TestValidateVerificationRejectsArtifactPresentForAutomation(t *testing.T) {
got := validateVerification(Verification{
Mode: "artifact_present",
TaskType: "automation",
EvidenceGrade: "artifact-present",
})
if got == "" {
t.Fatalf("expected artifact_present automation task to be rejected")
}
if !strings.Contains(got, "artifact_present") {
t.Fatalf("unexpected validation error: %q", got)
}
}