TradingAgents/.claude/lib/test_validator.py

389 lines
11 KiB
Python

#!/usr/bin/env python3
"""
Test Validator - Execute tests and validate TDD workflow.
Runs pytest, parses results, enforces TDD red phase validation, detects syntax
errors, and validates coverage thresholds. Critical for quality gates before
code review and commit.
Key Features:
1. Execute pytest with minimal verbosity (--tb=line -q, Issue #90)
2. Parse pytest output for pass/fail/error counts
3. Enforce TDD red phase (tests must fail before implementation)
4. Detect syntax errors vs runtime errors
5. Validate coverage thresholds
6. Validation gate for blocking commits
Usage:
from test_validator import (
run_tests,
validate_red_phase,
run_validation_gate
)
# Run tests
result = run_tests(Path("tests"))
# TDD red phase validation (before implementation)
validate_red_phase(result) # Raises if tests pass prematurely
# Validation gate (after implementation)
gate_result = run_validation_gate(Path("tests"))
if not gate_result["gate_passed"]:
# Block commit
Date: 2025-12-25
Issue: #161 (Enhanced test-master for 3-tier coverage)
Agent: implementer
Phase: TDD Green (making tests pass)
"""
import re
import subprocess
from pathlib import Path
from typing import Dict, List, Tuple, Any
def run_tests(
test_path: Path,
timeout: int = 300,
pytest_args: List[str] = None
) -> Dict[str, Any]:
"""Execute pytest and return results.
Runs pytest with minimal verbosity (--tb=line -q) to prevent subprocess
pipe deadlock (Issue #90). Reduces output from ~2,300 lines to ~50 lines.
Args:
test_path: Path to test directory or file
timeout: Timeout in seconds (default 5 minutes)
pytest_args: Optional custom pytest arguments
Returns:
Dict with test results:
{
"success": bool,
"passed": int,
"failed": int,
"errors": int,
"skipped": int,
"total": int,
"stdout": str,
"stderr": str,
"no_tests_collected": bool
}
Raises:
TimeoutError: If tests exceed timeout
RuntimeError: If pytest not installed
Example:
>>> result = run_tests(Path("tests"))
>>> result["passed"]
42
"""
# Default pytest args (minimal verbosity)
if pytest_args is None:
pytest_args = ["--tb=line", "-q"]
# Build command
cmd = ["pytest", str(test_path)] + pytest_args
try:
# Execute pytest
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=timeout,
check=False # Handle return codes manually
)
# Parse output
parsed = parse_pytest_output(result.stdout)
parsed["stdout"] = result.stdout
parsed["stderr"] = result.stderr
# Check for no tests collected (pytest returns 5)
if result.returncode == 5:
parsed["no_tests_collected"] = True
parsed["success"] = False
else:
parsed["no_tests_collected"] = False
# Success if returncode is 0
parsed["success"] = result.returncode == 0
return parsed
except FileNotFoundError:
raise RuntimeError(
"pytest not installed. Install with: pip install pytest"
)
except subprocess.TimeoutExpired:
raise TimeoutError(
f"Tests exceeded timeout of {timeout} seconds ({timeout // 60} minutes)"
)
def parse_pytest_output(output: str) -> Dict[str, int]:
"""Parse pytest output for test counts.
Extracts counts from pytest summary line:
"10 passed, 2 failed, 1 error in 1.23s"
Args:
output: pytest stdout
Returns:
Dict with counts: {passed, failed, errors, skipped, total}
Example:
>>> output = "10 passed, 2 failed, 1 error in 1.23s"
>>> parse_pytest_output(output)
{"passed": 10, "failed": 2, "errors": 1, "skipped": 0, "total": 13}
"""
result = {
"passed": 0,
"failed": 0,
"errors": 0,
"skipped": 0,
"total": 0
}
# Try to find summary line (last line with counts)
# Pattern: "N passed, M failed, K error in X.XXs"
summary_pattern = r'(\d+)\s+passed|(\d+)\s+failed|(\d+)\s+error|(\d+)\s+skipped'
matches = re.findall(summary_pattern, output, re.IGNORECASE)
for match in matches:
if match[0]: # passed
result["passed"] = int(match[0])
elif match[1]: # failed
result["failed"] = int(match[1])
elif match[2]: # error
result["errors"] = int(match[2])
elif match[3]: # skipped
result["skipped"] = int(match[3])
# Try to find "collected N items"
collected_pattern = r'collected\s+(\d+)\s+items?'
collected_match = re.search(collected_pattern, output, re.IGNORECASE)
if collected_match:
result["total"] = int(collected_match.group(1))
else:
# Fallback: sum counts
result["total"] = result["passed"] + result["failed"] + result["errors"]
return result
def validate_red_phase(test_result: Dict[str, Any]) -> None:
"""Validate TDD red phase - tests should fail before implementation.
Ensures tests fail initially (no implementation exists yet). Blocks workflow
if all tests pass prematurely.
Args:
test_result: Test result from run_tests()
Raises:
ValueError: If tests pass prematurely (TDD red phase violation)
ValueError: If no tests found
Example:
>>> result = {"success": True, "passed": 10, "failed": 0, "errors": 0}
>>> validate_red_phase(result)
ValueError: TDD red phase violation: tests should fail before implementation
"""
# Check for premature pass (all tests pass)
# Note: Don't check total==0 here because test_result may not have "total" field
passed = test_result.get("passed", 0)
failed = test_result.get("failed", 0)
errors = test_result.get("errors", 0)
# If all tests pass (no failures or errors), that's a red phase violation
if test_result.get("success", False) and failed == 0 and errors == 0 and passed > 0:
raise ValueError(
"TDD red phase violation: All tests pass, but implementation doesn't exist yet. "
"Tests should fail initially (import errors, assertion failures) before implementation."
)
# Check for no tests (passed + failed + errors == 0)
if passed == 0 and failed == 0 and errors == 0:
raise ValueError(
"No tests found. TDD requires tests to be written first."
)
# Valid red phase: Some failures or errors exist
# (Import errors are expected when modules don't exist yet)
def detect_syntax_errors(pytest_output: str) -> Tuple[bool, List[str]]:
"""Detect syntax errors in test files.
Distinguishes syntax/import errors from runtime errors (assertions, exceptions).
Args:
pytest_output: pytest stdout/stderr
Returns:
Tuple of (has_syntax_errors, error_details)
Example:
>>> output = "SyntaxError: invalid syntax on line 10"
>>> has_errors, details = detect_syntax_errors(output)
>>> has_errors
True
"""
errors = []
has_syntax_errors = False
# Patterns for syntax errors
syntax_patterns = [
r'SyntaxError:',
r'ImportError:',
r'ModuleNotFoundError:',
r'IndentationError:',
r'TabError:'
]
# Search for syntax errors
for pattern in syntax_patterns:
matches = re.findall(f'({pattern}.*)', pytest_output, re.MULTILINE)
if matches:
has_syntax_errors = True
errors.extend(matches)
return has_syntax_errors, errors
def validate_test_syntax(test_result: Dict[str, Any]) -> None:
"""Validate test files for syntax errors.
Blocks workflow if syntax errors detected (not runtime errors).
Args:
test_result: Test result from run_tests()
Raises:
SyntaxError: If test files contain syntax errors
Example:
>>> result = {"stderr": "SyntaxError: invalid syntax"}
>>> validate_test_syntax(result)
SyntaxError: Test files contain syntax errors
"""
combined_output = test_result.get("stdout", "") + test_result.get("stderr", "")
has_errors, details = detect_syntax_errors(combined_output)
if has_errors:
error_msg = "Test files contain syntax errors:\n" + "\n".join(details[:5])
raise SyntaxError(error_msg)
def run_validation_gate(test_path: Path, timeout: int = 300) -> Dict[str, Any]:
"""Run validation gate before code review.
Executes all tests and determines if commit should proceed. Blocks on:
- Test failures
- Syntax errors
- No tests found
Args:
test_path: Path to test directory
timeout: Test timeout in seconds
Returns:
Dict with validation results:
{
"gate_passed": bool,
"all_tests_passed": bool,
"block_commit": bool,
"passed": int,
"failed": int,
"errors": int,
"message": str
}
Example:
>>> result = run_validation_gate(Path("tests"))
>>> if not result["gate_passed"]:
... print("Blocking commit")
"""
# Run tests
try:
test_result = run_tests(test_path, timeout)
except Exception as e:
return {
"gate_passed": False,
"all_tests_passed": False,
"block_commit": True,
"passed": 0,
"failed": 0,
"errors": 0,
"message": f"Test execution failed: {e}"
}
# Check syntax errors
try:
validate_test_syntax(test_result)
except SyntaxError as e:
return {
"gate_passed": False,
"all_tests_passed": False,
"block_commit": True,
"passed": test_result.get("passed", 0),
"failed": test_result.get("failed", 0),
"errors": test_result.get("errors", 0),
"message": str(e)
}
# Check if all tests passed
all_passed = test_result.get("success", False)
block_commit = not all_passed
return {
"gate_passed": all_passed,
"all_tests_passed": all_passed,
"block_commit": block_commit,
"passed": test_result.get("passed", 0),
"failed": test_result.get("failed", 0),
"errors": test_result.get("errors", 0),
"message": "All tests passed" if all_passed else f"{test_result.get('failed', 0)} tests failed"
}
def validate_coverage(coverage_output: str, threshold: float = 80.0) -> None:
"""Validate test coverage meets threshold.
Parses pytest-cov output and blocks if coverage below threshold.
Args:
coverage_output: pytest --cov output
threshold: Minimum coverage percentage (default 80%)
Raises:
ValueError: If coverage below threshold
Example:
>>> output = "TOTAL 100 15 85%"
>>> validate_coverage(output, threshold=80)
# Passes (85% >= 80%)
"""
# Parse coverage from output
# Format: "TOTAL 100 15 85%"
pattern = r'TOTAL\s+\d+\s+\d+\s+(\d+)%'
match = re.search(pattern, coverage_output)
if not match:
# Can't determine coverage, skip validation
return
coverage = int(match.group(1))
if coverage < threshold:
raise ValueError(
f"Coverage below {threshold}%: {coverage}%. "
f"Add more tests to reach {threshold}% coverage."
)