TradingAgents/.claude/lib/test_validator.py

#!/usr/bin/env python3
"""
Test Validator - Execute tests and validate TDD workflow.

Runs pytest, parses results, enforces TDD red phase validation, detects syntax
errors, and validates coverage thresholds. Critical for quality gates before
code review and commit.

Key Features:
1. Execute pytest with minimal verbosity (--tb=line -q, Issue #90)
2. Parse pytest output for pass/fail/error counts
3. Enforce TDD red phase (tests must fail before implementation)
4. Detect syntax errors vs runtime errors
5. Validate coverage thresholds
6. Validation gate for blocking commits

Usage:
    from test_validator import (
        run_tests,
        validate_red_phase,
        run_validation_gate
    )

    # Run tests
    result = run_tests(Path("tests"))

    # TDD red phase validation (before implementation)
    validate_red_phase(result)  # Raises if tests pass prematurely

    # Validation gate (after implementation)
    gate_result = run_validation_gate(Path("tests"))
    if not gate_result["gate_passed"]:
        # Block commit

Date: 2025-12-25
Issue: #161 (Enhanced test-master for 3-tier coverage)
Agent: implementer
Phase: TDD Green (making tests pass)
"""

import re
import subprocess
from pathlib import Path
from typing import Dict, List, Tuple, Any


def run_tests(
    test_path: Path,
    timeout: int = 300,
    pytest_args: List[str] = None
) -> Dict[str, Any]:
    """Execute pytest and return results.

    Runs pytest with minimal verbosity (--tb=line -q) to prevent subprocess
    pipe deadlock (Issue #90). Reduces output from ~2,300 lines to ~50 lines.

    Args:
        test_path: Path to test directory or file
        timeout: Timeout in seconds (default 5 minutes)
        pytest_args: Optional custom pytest arguments

    Returns:
        Dict with test results:
        {
            "success": bool,
            "passed": int,
            "failed": int,
            "errors": int,
            "skipped": int,
            "total": int,
            "stdout": str,
            "stderr": str,
            "no_tests_collected": bool
        }

    Raises:
        TimeoutError: If tests exceed timeout
        RuntimeError: If pytest not installed

    Example:
        >>> result = run_tests(Path("tests"))
        >>> result["passed"]
        42
    """
    # Default pytest args (minimal verbosity)
    if pytest_args is None:
        pytest_args = ["--tb=line", "-q"]

    # Build command
    cmd = ["pytest", str(test_path)] + pytest_args

    try:
        # Execute pytest
        result = subprocess.run(
            cmd,
            capture_output=True,
            text=True,
            timeout=timeout,
            check=False  # Handle return codes manually
        )

        # Parse output
        parsed = parse_pytest_output(result.stdout)
        parsed["stdout"] = result.stdout
        parsed["stderr"] = result.stderr

        # Check for no tests collected (pytest returns 5)
        if result.returncode == 5:
            parsed["no_tests_collected"] = True
            parsed["success"] = False
        else:
            parsed["no_tests_collected"] = False
            # Success if returncode is 0
            parsed["success"] = result.returncode == 0

        return parsed

    except FileNotFoundError:
        raise RuntimeError(
            "pytest not installed. Install with: pip install pytest"
        )
    except subprocess.TimeoutExpired:
        raise TimeoutError(
            f"Tests exceeded timeout of {timeout} seconds ({timeout // 60} minutes)"
        )


def parse_pytest_output(output: str) -> Dict[str, int]:
    """Parse pytest output for test counts.

    Extracts counts from pytest summary line:
    "10 passed, 2 failed, 1 error in 1.23s"

    Args:
        output: pytest stdout

    Returns:
        Dict with counts: {passed, failed, errors, skipped, total}

    Example:
        >>> output = "10 passed, 2 failed, 1 error in 1.23s"
        >>> parse_pytest_output(output)
        {"passed": 10, "failed": 2, "errors": 1, "skipped": 0, "total": 13}
    """
    result = {
        "passed": 0,
        "failed": 0,
        "errors": 0,
        "skipped": 0,
        "total": 0
    }

    # Try to find summary line (last line with counts)
    # Pattern: "N passed, M failed, K error in X.XXs"
    summary_pattern = r'(\d+)\s+passed|(\d+)\s+failed|(\d+)\s+error|(\d+)\s+skipped'
    matches = re.findall(summary_pattern, output, re.IGNORECASE)

    for match in matches:
        if match[0]:  # passed
            result["passed"] = int(match[0])
        elif match[1]:  # failed
            result["failed"] = int(match[1])
        elif match[2]:  # error
            result["errors"] = int(match[2])
        elif match[3]:  # skipped
            result["skipped"] = int(match[3])

    # Try to find "collected N items"
    collected_pattern = r'collected\s+(\d+)\s+items?'
    collected_match = re.search(collected_pattern, output, re.IGNORECASE)
    if collected_match:
        result["total"] = int(collected_match.group(1))
    else:
        # Fallback: sum counts
        result["total"] = result["passed"] + result["failed"] + result["errors"]

    return result


def validate_red_phase(test_result: Dict[str, Any]) -> None:
    """Validate TDD red phase - tests should fail before implementation.

    Ensures tests fail initially (no implementation exists yet). Blocks workflow
    if all tests pass prematurely.

    Args:
        test_result: Test result from run_tests()

    Raises:
        ValueError: If tests pass prematurely (TDD red phase violation)
        ValueError: If no tests found

    Example:
        >>> result = {"success": True, "passed": 10, "failed": 0, "errors": 0}
        >>> validate_red_phase(result)
        ValueError: TDD red phase violation: tests should fail before implementation
    """
    # Check for premature pass (all tests pass)
    # Note: Don't check total==0 here because test_result may not have "total" field
    passed = test_result.get("passed", 0)
    failed = test_result.get("failed", 0)
    errors = test_result.get("errors", 0)

    # If all tests pass (no failures or errors), that's a red phase violation
    if test_result.get("success", False) and failed == 0 and errors == 0 and passed > 0:
        raise ValueError(
            "TDD red phase violation: All tests pass, but implementation doesn't exist yet. "
            "Tests should fail initially (import errors, assertion failures) before implementation."
        )

    # Check for no tests (passed + failed + errors == 0)
    if passed == 0 and failed == 0 and errors == 0:
        raise ValueError(
            "No tests found. TDD requires tests to be written first."
        )

    # Valid red phase: Some failures or errors exist
    # (Import errors are expected when modules don't exist yet)


def detect_syntax_errors(pytest_output: str) -> Tuple[bool, List[str]]:
    """Detect syntax errors in test files.

    Distinguishes syntax/import errors from runtime errors (assertions, exceptions).

    Args:
        pytest_output: pytest stdout/stderr

    Returns:
        Tuple of (has_syntax_errors, error_details)

    Example:
        >>> output = "SyntaxError: invalid syntax on line 10"
        >>> has_errors, details = detect_syntax_errors(output)
        >>> has_errors
        True
    """
    errors = []
    has_syntax_errors = False

    # Patterns for syntax errors
    syntax_patterns = [
        r'SyntaxError:',
        r'ImportError:',
        r'ModuleNotFoundError:',
        r'IndentationError:',
        r'TabError:'
    ]

    # Search for syntax errors
    for pattern in syntax_patterns:
        matches = re.findall(f'({pattern}.*)', pytest_output, re.MULTILINE)
        if matches:
            has_syntax_errors = True
            errors.extend(matches)

    return has_syntax_errors, errors


def validate_test_syntax(test_result: Dict[str, Any]) -> None:
    """Validate test files for syntax errors.

    Blocks workflow if syntax errors detected (not runtime errors).

    Args:
        test_result: Test result from run_tests()

    Raises:
        SyntaxError: If test files contain syntax errors

    Example:
        >>> result = {"stderr": "SyntaxError: invalid syntax"}
        >>> validate_test_syntax(result)
        SyntaxError: Test files contain syntax errors
    """
    combined_output = test_result.get("stdout", "") + test_result.get("stderr", "")
    has_errors, details = detect_syntax_errors(combined_output)

    if has_errors:
        error_msg = "Test files contain syntax errors:\n" + "\n".join(details[:5])
        raise SyntaxError(error_msg)


def run_validation_gate(test_path: Path, timeout: int = 300) -> Dict[str, Any]:
    """Run validation gate before code review.

    Executes all tests and determines if commit should proceed. Blocks on:
    - Test failures
    - Syntax errors
    - No tests found

    Args:
        test_path: Path to test directory
        timeout: Test timeout in seconds

    Returns:
        Dict with validation results:
        {
            "gate_passed": bool,
            "all_tests_passed": bool,
            "block_commit": bool,
            "passed": int,
            "failed": int,
            "errors": int,
            "message": str
        }

    Example:
        >>> result = run_validation_gate(Path("tests"))
        >>> if not result["gate_passed"]:
        ...     print("Blocking commit")
    """
    # Run tests
    try:
        test_result = run_tests(test_path, timeout)
    except Exception as e:
        return {
            "gate_passed": False,
            "all_tests_passed": False,
            "block_commit": True,
            "passed": 0,
            "failed": 0,
            "errors": 0,
            "message": f"Test execution failed: {e}"
        }

    # Check syntax errors
    try:
        validate_test_syntax(test_result)
    except SyntaxError as e:
        return {
            "gate_passed": False,
            "all_tests_passed": False,
            "block_commit": True,
            "passed": test_result.get("passed", 0),
            "failed": test_result.get("failed", 0),
            "errors": test_result.get("errors", 0),
            "message": str(e)
        }

    # Check if all tests passed
    all_passed = test_result.get("success", False)
    block_commit = not all_passed

    return {
        "gate_passed": all_passed,
        "all_tests_passed": all_passed,
        "block_commit": block_commit,
        "passed": test_result.get("passed", 0),
        "failed": test_result.get("failed", 0),
        "errors": test_result.get("errors", 0),
        "message": "All tests passed" if all_passed else f"{test_result.get('failed', 0)} tests failed"
    }


def validate_coverage(coverage_output: str, threshold: float = 80.0) -> None:
    """Validate test coverage meets threshold.

    Parses pytest-cov output and blocks if coverage below threshold.

    Args:
        coverage_output: pytest --cov output
        threshold: Minimum coverage percentage (default 80%)

    Raises:
        ValueError: If coverage below threshold

    Example:
        >>> output = "TOTAL  100  15  85%"
        >>> validate_coverage(output, threshold=80)
        # Passes (85% >= 80%)
    """
    # Parse coverage from output
    # Format: "TOTAL  100  15  85%"
    pattern = r'TOTAL\s+\d+\s+\d+\s+(\d+)%'
    match = re.search(pattern, coverage_output)

    if not match:
        # Can't determine coverage, skip validation
        return

    coverage = int(match.group(1))

    if coverage < threshold:
        raise ValueError(
            f"Coverage below {threshold}%: {coverage}%. "
            f"Add more tests to reach {threshold}% coverage."
        )