llm-intelligence/scripts/verification_executor_test.go

//go:build llm_script

package main

import (
	"os"
	"path/filepath"
	"strings"
	"testing"
)

func TestParseTasksParsesEvidenceFields(t *testing.T) {
	md := `# Tasks

### T-1 ✅ Example
- **verification**:
  - mode: ` + "`test_pass`" + `
  - command: ` + "`echo ok`" + `
  - expected_evidence: ` + "`ok`" + `
  - evidence_grade: ` + "`runtime-verified`" + `
  - task_type: ` + "`code`" + `
  - timeout_seconds: 15
`

	tmpFile, err := os.CreateTemp(t.TempDir(), "tasks-*.md")
	if err != nil {
		t.Fatalf("create temp file: %v", err)
	}
	defer tmpFile.Close()

	if _, err := tmpFile.WriteString(md); err != nil {
		t.Fatalf("write temp file: %v", err)
	}
	if _, err := tmpFile.Seek(0, 0); err != nil {
		t.Fatalf("seek temp file: %v", err)
	}

	tasks := parseTasks(tmpFile)
	if len(tasks) != 1 {
		t.Fatalf("expected 1 task, got %d", len(tasks))
	}

	got := tasks[0].Verification
	if got.Mode != "test_pass" {
		t.Fatalf("expected mode test_pass, got %q", got.Mode)
	}
	if got.Command != "echo ok" {
		t.Fatalf("expected command echo ok, got %q", got.Command)
	}
	if got.ExpectedEvidence != "ok" {
		t.Fatalf("expected evidence ok, got %q", got.ExpectedEvidence)
	}
	if got.EvidenceGrade != "runtime-verified" {
		t.Fatalf("expected evidence grade runtime-verified, got %q", got.EvidenceGrade)
	}
	if got.TaskType != "code" {
		t.Fatalf("expected task type code, got %q", got.TaskType)
	}
	if got.TimeoutSeconds != 15 {
		t.Fatalf("expected timeout 15, got %d", got.TimeoutSeconds)
	}
}

func TestVerifyTaskRejectsSemanticOnlyForCodeTask(t *testing.T) {
	task := taskEntry{
		ID:   "T-1",
		Name: "semantic code task",
		Verification: Verification{
			Mode:          "semantic",
			Command:       "echo ok",
			TaskType:      "code",
			EvidenceGrade: "doc-claimed",
		},
		HasVerification: true,
	}

	result := verifyTask(task, true)
	if result.Verified {
		t.Fatalf("expected semantic-only code task to fail")
	}
	if !strings.Contains(result.Reason, "semantic-only") {
		t.Fatalf("expected semantic-only rejection reason, got %q", result.Reason)
	}
}

func TestVerifyTaskDefaultsEvidenceGradeFromMode(t *testing.T) {
	task := taskEntry{
		ID:   "T-2",
		Name: "artifact task",
		Verification: Verification{
			Mode:      "artifact_present",
			TaskType:  "documentation",
		},
		HasVerification: true,
	}

	result := verifyTask(task, true)
	if !result.Verified {
		t.Fatalf("expected dry-run artifact task to pass, got reason %q", result.Reason)
	}
	if result.EvidenceGrade != "artifact-present" {
		t.Fatalf("expected default evidence grade artifact-present, got %q", result.EvidenceGrade)
	}
}


func TestResolveTasksPathDoesNotImplicitlyFallbackToGlobal(t *testing.T) {
	root := t.TempDir()
	projectDir := filepath.Join(root, "project")
	globalDir := filepath.Join(root, "workspace")
	scriptDir := filepath.Join(projectDir, "scripts")
	if err := os.MkdirAll(projectDir, 0o755); err != nil {
		t.Fatalf("mkdir project dir: %v", err)
	}
	if err := os.MkdirAll(globalDir, 0o755); err != nil {
		t.Fatalf("mkdir global dir: %v", err)
	}
	if err := os.MkdirAll(scriptDir, 0o755); err != nil {
		t.Fatalf("mkdir script dir: %v", err)
	}

	projectTasks := filepath.Join(projectDir, "TASKS.md")
	globalTasks := filepath.Join(globalDir, "TASKS.md")
	if err := os.WriteFile(projectTasks, []byte("# project"), 0o644); err != nil {
		t.Fatalf("write project tasks: %v", err)
	}
	if err := os.WriteFile(globalTasks, []byte("# global"), 0o644); err != nil {
		t.Fatalf("write global tasks: %v", err)
	}

	got := resolveTasksPathWithContext("", "", filepath.Join(root, "outside"), scriptDir, globalTasks)
	if got != projectTasks {
		t.Fatalf("expected project tasks path, got %q", got)
	}
}

func TestResolveTasksPathAllowsExplicitGlobalPath(t *testing.T) {
	root := t.TempDir()
	projectDir := filepath.Join(root, "project")
	globalDir := filepath.Join(root, "workspace")
	scriptDir := filepath.Join(projectDir, "scripts")
	if err := os.MkdirAll(projectDir, 0o755); err != nil {
		t.Fatalf("mkdir project dir: %v", err)
	}
	if err := os.MkdirAll(globalDir, 0o755); err != nil {
		t.Fatalf("mkdir global dir: %v", err)
	}
	if err := os.MkdirAll(scriptDir, 0o755); err != nil {
		t.Fatalf("mkdir script dir: %v", err)
	}

	projectTasks := filepath.Join(projectDir, "TASKS.md")
	globalTasks := filepath.Join(globalDir, "TASKS.md")
	if err := os.WriteFile(projectTasks, []byte("# project"), 0o644); err != nil {
		t.Fatalf("write project tasks: %v", err)
	}
	if err := os.WriteFile(globalTasks, []byte("# global"), 0o644); err != nil {
		t.Fatalf("write global tasks: %v", err)
	}

	got := resolveTasksPathWithContext(globalTasks, "", filepath.Join(root, "outside"), scriptDir, globalTasks)
	if got != globalTasks {
		t.Fatalf("expected explicit global tasks path, got %q", got)
	}
}

func TestVerifyTaskCapturesFailureSummaries(t *testing.T) {
	task := taskEntry{
		ID:   "T-3",
		Name: "failing task",
		Verification: Verification{
			Mode:             "test_pass",
			Command:          "echo standard-output && echo standard-error 1>&2 && exit 1",
			ExpectedEvidence: "unused",
			TaskType:         "automation",
		},
		HasVerification: true,
	}

	result := verifyTask(task, false)
	if result.Verified {
		t.Fatalf("expected failing task to fail")
	}
	if !strings.Contains(result.StdoutSummary, "standard-output") {
		t.Fatalf("expected stdout summary to contain command output, got %q", result.StdoutSummary)
	}
	if !strings.Contains(result.StderrSummary, "standard-error") {
		t.Fatalf("expected stderr summary to contain command error, got %q", result.StderrSummary)
	}
}

func TestParseTasksParsesNormalizedStatus(t *testing.T) {
	md := `# Tasks

### T-1 ✅ Done task
- **状态**：✅ 完成（2026-05-11）
- **verification**:
  - mode: ` + "`test_pass`" + `
  - command: ` + "`echo ok`" + `
  - expected_evidence: ` + "`ok`" + `

### T-2 🔶 Planned task
- **状态**：🔶 待启动
- **verification**:
  - mode: ` + "`test_pass`" + `
  - command: ` + "`echo ok`" + `
  - expected_evidence: ` + "`ok`" + `

### T-3 ⏸️ Paused task
- **状态**：⏸️ 待规划
- **verification**:
  - mode: ` + "`test_pass`" + `
  - command: ` + "`echo ok`" + `
  - expected_evidence: ` + "`ok`" + `
`

	tmpFile, err := os.CreateTemp(t.TempDir(), "tasks-status-*.md")
	if err != nil {
		t.Fatalf("create temp file: %v", err)
	}
	defer tmpFile.Close()

	if _, err := tmpFile.WriteString(md); err != nil {
		t.Fatalf("write temp file: %v", err)
	}
	if _, err := tmpFile.Seek(0, 0); err != nil {
		t.Fatalf("seek temp file: %v", err)
	}

	tasks := parseTasks(tmpFile)
	if len(tasks) != 3 {
		t.Fatalf("expected 3 tasks, got %d", len(tasks))
	}

	if tasks[0].Status != "completed" {
		t.Fatalf("expected first task status completed, got %q", tasks[0].Status)
	}
	if tasks[1].Status != "planned" {
		t.Fatalf("expected second task status planned, got %q", tasks[1].Status)
	}
	if tasks[2].Status != "paused" {
		t.Fatalf("expected third task status paused, got %q", tasks[2].Status)
	}
}

func TestFilterTasksByStatus(t *testing.T) {
	tasks := []taskEntry{
		{ID: "T-1", Status: "completed"},
		{ID: "T-2", Status: "planned"},
		{ID: "T-3", Status: "in_progress"},
	}

	completed, err := filterTasksByStatus(tasks, "completed")
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if len(completed) != 1 || completed[0].ID != "T-1" {
		t.Fatalf("expected only completed task, got %#v", completed)
	}

	all, err := filterTasksByStatus(tasks, "all")
	if err != nil {
		t.Fatalf("unexpected error for all: %v", err)
	}
	if len(all) != 3 {
		t.Fatalf("expected all 3 tasks, got %d", len(all))
	}
}

func TestDetermineProcessExitCode(t *testing.T) {
	cases := []struct {
		name    string
		results []TaskResult
		want    int
	}{
		{
			name:    "all pass",
			results: []TaskResult{{Verified: true}, {Verified: true}},
			want:    0,
		},
		{
			name:    "runtime failure",
			results: []TaskResult{{Verified: false, EvidenceGrade: "runtime-verified", TaskType: "automation"}},
			want:    2,
		},
		{
			name:    "artifact only failure",
			results: []TaskResult{{Verified: false, EvidenceGrade: "artifact-present", TaskType: "documentation"}},
			want:    3,
		},
		{
			name: "mixed defaults to runtime",
			results: []TaskResult{
				{Verified: false, EvidenceGrade: "artifact-present", TaskType: "documentation"},
				{Verified: false, EvidenceGrade: "runtime-verified", TaskType: "automation"},
			},
			want: 2,
		},
	}

	for _, tc := range cases {
		t.Run(tc.name, func(t *testing.T) {
			if got := determineProcessExitCode(tc.results); got != tc.want {
				t.Fatalf("exit code = %d, want %d", got, tc.want)
			}
		})
	}
}

func TestClassifyFailureTier(t *testing.T) {
	if got := classifyFailureTier(TaskResult{Verified: false, EvidenceGrade: "runtime-verified", TaskType: "automation"}); got != 2 {
		t.Fatalf("runtime failure tier = %d, want 2", got)
	}
	if got := classifyFailureTier(TaskResult{Verified: false, EvidenceGrade: "artifact-present", TaskType: "documentation"}); got != 3 {
		t.Fatalf("artifact failure tier = %d, want 3", got)
	}
	if got := classifyFailureTier(TaskResult{Verified: true, EvidenceGrade: "runtime-verified", TaskType: "automation"}); got != 0 {
		t.Fatalf("verified tier = %d, want 0", got)
	}
}

func TestVerifyTaskClassifiesToolExecutionFailure(t *testing.T) {
	task := taskEntry{
		ID:   "T-4",
		Name: "tool failure task",
		Verification: Verification{
			Mode:             "test_pass",
			Command:          "echo tool-out && echo tool-err 1>&2 && exit 1",
			ExpectedEvidence: "tool-out",
			TaskType:         "automation",
		},
		HasVerification: true,
	}

	result := verifyTask(task, false)
	if result.Verified {
		t.Fatalf("expected tool failure task to fail")
	}
	if result.FailureClass != "tool_execution_failure" {
		t.Fatalf("failure class = %q, want tool_execution_failure", result.FailureClass)
	}
}

func TestVerifyTaskArtifactPresentMisuseBecomesConfigFailure(t *testing.T) {
	task := taskEntry{
		ID:   "T-5",
		Name: "artifact misuse",
		Verification: Verification{
			Mode:             "artifact_present",
			Command:          "echo actual-output",
			ExpectedEvidence: "expected-output",
			TaskType:         "documentation",
		},
		HasVerification: true,
	}

	result := verifyTask(task, false)
	if result.Verified {
		t.Fatalf("expected artifact misuse to fail")
	}
	if result.FailureClass != "verification_config_failure" {
		t.Fatalf("failure class = %q, want verification_config_failure", result.FailureClass)
	}
}

func TestValidateVerificationRejectsArtifactPresentWithCommand(t *testing.T) {
	got := validateVerification(Verification{
		Mode:             "artifact_present",
		Command:          "echo exists",
		ExpectedEvidence: "exists",
		TaskType:         "documentation",
		EvidenceGrade:    "artifact-present",
	})
	if got == "" {
		t.Fatalf("expected artifact_present with command to be rejected")
	}
	if !strings.Contains(got, "artifact_present") {
		t.Fatalf("unexpected validation error: %q", got)
	}
}

func TestValidateVerificationRejectsArtifactPresentForAutomation(t *testing.T) {
	got := validateVerification(Verification{
		Mode:          "artifact_present",
		TaskType:      "automation",
		EvidenceGrade: "artifact-present",
	})
	if got == "" {
		t.Fatalf("expected artifact_present automation task to be rejected")
	}
	if !strings.Contains(got, "artifact_present") {
		t.Fatalf("unexpected validation error: %q", got)
	}
}