#!/usr/bin/env python3 """ Test Validator - Execute tests and validate TDD workflow. Runs pytest, parses results, enforces TDD red phase validation, detects syntax errors, and validates coverage thresholds. Critical for quality gates before code review and commit. Key Features: 1. Execute pytest with minimal verbosity (--tb=line -q, Issue #90) 2. Parse pytest output for pass/fail/error counts 3. Enforce TDD red phase (tests must fail before implementation) 4. Detect syntax errors vs runtime errors 5. Validate coverage thresholds 6. Validation gate for blocking commits Usage: from test_validator import ( run_tests, validate_red_phase, run_validation_gate ) # Run tests result = run_tests(Path("tests")) # TDD red phase validation (before implementation) validate_red_phase(result) # Raises if tests pass prematurely # Validation gate (after implementation) gate_result = run_validation_gate(Path("tests")) if not gate_result["gate_passed"]: # Block commit Date: 2025-12-25 Issue: #161 (Enhanced test-master for 3-tier coverage) Agent: implementer Phase: TDD Green (making tests pass) """ import re import subprocess from pathlib import Path from typing import Dict, List, Tuple, Any def run_tests( test_path: Path, timeout: int = 300, pytest_args: List[str] = None ) -> Dict[str, Any]: """Execute pytest and return results. Runs pytest with minimal verbosity (--tb=line -q) to prevent subprocess pipe deadlock (Issue #90). Reduces output from ~2,300 lines to ~50 lines. Args: test_path: Path to test directory or file timeout: Timeout in seconds (default 5 minutes) pytest_args: Optional custom pytest arguments Returns: Dict with test results: { "success": bool, "passed": int, "failed": int, "errors": int, "skipped": int, "total": int, "stdout": str, "stderr": str, "no_tests_collected": bool } Raises: TimeoutError: If tests exceed timeout RuntimeError: If pytest not installed Example: >>> result = run_tests(Path("tests")) >>> result["passed"] 42 """ # Default pytest args (minimal verbosity) if pytest_args is None: pytest_args = ["--tb=line", "-q"] # Build command cmd = ["pytest", str(test_path)] + pytest_args try: # Execute pytest result = subprocess.run( cmd, capture_output=True, text=True, timeout=timeout, check=False # Handle return codes manually ) # Parse output parsed = parse_pytest_output(result.stdout) parsed["stdout"] = result.stdout parsed["stderr"] = result.stderr # Check for no tests collected (pytest returns 5) if result.returncode == 5: parsed["no_tests_collected"] = True parsed["success"] = False else: parsed["no_tests_collected"] = False # Success if returncode is 0 parsed["success"] = result.returncode == 0 return parsed except FileNotFoundError: raise RuntimeError( "pytest not installed. Install with: pip install pytest" ) except subprocess.TimeoutExpired: raise TimeoutError( f"Tests exceeded timeout of {timeout} seconds ({timeout // 60} minutes)" ) def parse_pytest_output(output: str) -> Dict[str, int]: """Parse pytest output for test counts. Extracts counts from pytest summary line: "10 passed, 2 failed, 1 error in 1.23s" Args: output: pytest stdout Returns: Dict with counts: {passed, failed, errors, skipped, total} Example: >>> output = "10 passed, 2 failed, 1 error in 1.23s" >>> parse_pytest_output(output) {"passed": 10, "failed": 2, "errors": 1, "skipped": 0, "total": 13} """ result = { "passed": 0, "failed": 0, "errors": 0, "skipped": 0, "total": 0 } # Try to find summary line (last line with counts) # Pattern: "N passed, M failed, K error in X.XXs" summary_pattern = r'(\d+)\s+passed|(\d+)\s+failed|(\d+)\s+error|(\d+)\s+skipped' matches = re.findall(summary_pattern, output, re.IGNORECASE) for match in matches: if match[0]: # passed result["passed"] = int(match[0]) elif match[1]: # failed result["failed"] = int(match[1]) elif match[2]: # error result["errors"] = int(match[2]) elif match[3]: # skipped result["skipped"] = int(match[3]) # Try to find "collected N items" collected_pattern = r'collected\s+(\d+)\s+items?' collected_match = re.search(collected_pattern, output, re.IGNORECASE) if collected_match: result["total"] = int(collected_match.group(1)) else: # Fallback: sum counts result["total"] = result["passed"] + result["failed"] + result["errors"] return result def validate_red_phase(test_result: Dict[str, Any]) -> None: """Validate TDD red phase - tests should fail before implementation. Ensures tests fail initially (no implementation exists yet). Blocks workflow if all tests pass prematurely. Args: test_result: Test result from run_tests() Raises: ValueError: If tests pass prematurely (TDD red phase violation) ValueError: If no tests found Example: >>> result = {"success": True, "passed": 10, "failed": 0, "errors": 0} >>> validate_red_phase(result) ValueError: TDD red phase violation: tests should fail before implementation """ # Check for premature pass (all tests pass) # Note: Don't check total==0 here because test_result may not have "total" field passed = test_result.get("passed", 0) failed = test_result.get("failed", 0) errors = test_result.get("errors", 0) # If all tests pass (no failures or errors), that's a red phase violation if test_result.get("success", False) and failed == 0 and errors == 0 and passed > 0: raise ValueError( "TDD red phase violation: All tests pass, but implementation doesn't exist yet. " "Tests should fail initially (import errors, assertion failures) before implementation." ) # Check for no tests (passed + failed + errors == 0) if passed == 0 and failed == 0 and errors == 0: raise ValueError( "No tests found. TDD requires tests to be written first." ) # Valid red phase: Some failures or errors exist # (Import errors are expected when modules don't exist yet) def detect_syntax_errors(pytest_output: str) -> Tuple[bool, List[str]]: """Detect syntax errors in test files. Distinguishes syntax/import errors from runtime errors (assertions, exceptions). Args: pytest_output: pytest stdout/stderr Returns: Tuple of (has_syntax_errors, error_details) Example: >>> output = "SyntaxError: invalid syntax on line 10" >>> has_errors, details = detect_syntax_errors(output) >>> has_errors True """ errors = [] has_syntax_errors = False # Patterns for syntax errors syntax_patterns = [ r'SyntaxError:', r'ImportError:', r'ModuleNotFoundError:', r'IndentationError:', r'TabError:' ] # Search for syntax errors for pattern in syntax_patterns: matches = re.findall(f'({pattern}.*)', pytest_output, re.MULTILINE) if matches: has_syntax_errors = True errors.extend(matches) return has_syntax_errors, errors def validate_test_syntax(test_result: Dict[str, Any]) -> None: """Validate test files for syntax errors. Blocks workflow if syntax errors detected (not runtime errors). Args: test_result: Test result from run_tests() Raises: SyntaxError: If test files contain syntax errors Example: >>> result = {"stderr": "SyntaxError: invalid syntax"} >>> validate_test_syntax(result) SyntaxError: Test files contain syntax errors """ combined_output = test_result.get("stdout", "") + test_result.get("stderr", "") has_errors, details = detect_syntax_errors(combined_output) if has_errors: error_msg = "Test files contain syntax errors:\n" + "\n".join(details[:5]) raise SyntaxError(error_msg) def run_validation_gate(test_path: Path, timeout: int = 300) -> Dict[str, Any]: """Run validation gate before code review. Executes all tests and determines if commit should proceed. Blocks on: - Test failures - Syntax errors - No tests found Args: test_path: Path to test directory timeout: Test timeout in seconds Returns: Dict with validation results: { "gate_passed": bool, "all_tests_passed": bool, "block_commit": bool, "passed": int, "failed": int, "errors": int, "message": str } Example: >>> result = run_validation_gate(Path("tests")) >>> if not result["gate_passed"]: ... print("Blocking commit") """ # Run tests try: test_result = run_tests(test_path, timeout) except Exception as e: return { "gate_passed": False, "all_tests_passed": False, "block_commit": True, "passed": 0, "failed": 0, "errors": 0, "message": f"Test execution failed: {e}" } # Check syntax errors try: validate_test_syntax(test_result) except SyntaxError as e: return { "gate_passed": False, "all_tests_passed": False, "block_commit": True, "passed": test_result.get("passed", 0), "failed": test_result.get("failed", 0), "errors": test_result.get("errors", 0), "message": str(e) } # Check if all tests passed all_passed = test_result.get("success", False) block_commit = not all_passed return { "gate_passed": all_passed, "all_tests_passed": all_passed, "block_commit": block_commit, "passed": test_result.get("passed", 0), "failed": test_result.get("failed", 0), "errors": test_result.get("errors", 0), "message": "All tests passed" if all_passed else f"{test_result.get('failed', 0)} tests failed" } def validate_coverage(coverage_output: str, threshold: float = 80.0) -> None: """Validate test coverage meets threshold. Parses pytest-cov output and blocks if coverage below threshold. Args: coverage_output: pytest --cov output threshold: Minimum coverage percentage (default 80%) Raises: ValueError: If coverage below threshold Example: >>> output = "TOTAL 100 15 85%" >>> validate_coverage(output, threshold=80) # Passes (85% >= 80%) """ # Parse coverage from output # Format: "TOTAL 100 15 85%" pattern = r'TOTAL\s+\d+\s+\d+\s+(\d+)%' match = re.search(pattern, coverage_output) if not match: # Can't determine coverage, skip validation return coverage = int(match.group(1)) if coverage < threshold: raise ValueError( f"Coverage below {threshold}%: {coverage}%. " f"Add more tests to reach {threshold}% coverage." )