TradingAgents/.claude/lib/test_tier_organizer.py

#!/usr/bin/env python3
"""
Test Tier Organizer - Classify and organize tests into unit/integration/uat tiers.

Analyzes test content to determine tier (unit/integration/uat), creates tier
directory structure, and moves tests to appropriate locations.

Key Features:
1. Intelligent tier classification (content and filename analysis)
2. Directory structure creation (tests/{unit,integration,uat}/)
3. Test file organization with collision handling
4. Test pyramid validation (unit > integration > uat)
5. Statistics and reporting

Directory Structure:
    tests/
    ├── unit/           # Unit tests (70-80%)
    │   ├── lib/        # Library tests
    │   └── ...
    ├── integration/    # Integration tests (15-20%)
    └── uat/            # UAT tests (5-10%)

Usage:
    from test_tier_organizer import (
        determine_tier,
        create_tier_directories,
        organize_tests_by_tier,
        get_tier_statistics
    )

    # Create directory structure
    create_tier_directories(Path("project_root"))

    # Organize tests
    test_files = [Path("test_example.py"), ...]
    organize_tests_by_tier(test_files)

    # Get statistics
    stats = get_tier_statistics(Path("tests"))

Date: 2025-12-25
Issue: #161 (Enhanced test-master for 3-tier coverage)
Agent: implementer
Phase: TDD Green (making tests pass)
"""

import re
from pathlib import Path
from typing import Dict, List, Tuple


def determine_tier(test_content: str) -> str:
    """Determine test tier from test file content.

    Analyzes test content for tier indicators:
    - UAT: pytest-bdd, Gherkin (scenario, given, when, then), @scenario/@given/@when/@then
    - Integration: multiple imports, subprocess, file I/O, "integration" in function name
    - Unit: default (single function, mocking, isolated)

    Args:
        test_content: Test file content as string

    Returns:
        Tier name: "unit", "integration", or "uat"

    Example:
        >>> content = "from pytest_bdd import scenario\\n"
        >>> determine_tier(content)
        'uat'
    """
    content_lower = test_content.lower()

    # UAT indicators (highest priority) - STRONG signals only
    # Must have pytest-bdd imports or Gherkin decorators
    strong_uat_indicators = [
        'pytest_bdd',
        'from pytest_bdd import',
        '@scenario',
        '@given',
        '@when',
        '@then',
        'def test_uat_',  # Explicit UAT naming
    ]

    for indicator in strong_uat_indicators:
        if indicator in content_lower:
            return "uat"

    # Integration indicators (medium priority)
    integration_indicators = [
        'subprocess.run',
        'subprocess.call',
        'def test_integration_',
        'test_full_pipeline',
        'test_end_to_end',
        'tmp_path',  # File I/O
        'tmpdir',
        'open(',  # File operations
        'file.write',
        'file.read'
    ]

    # Count module imports (integration tests import multiple modules)
    import_count = len(re.findall(r'^\s*from\s+\w+.*import', test_content, re.MULTILINE))
    if import_count >= 3:  # 3+ imports suggests integration
        return "integration"

    for indicator in integration_indicators:
        if indicator in content_lower:
            return "integration"

    # Default to unit
    return "unit"


def determine_tier_from_filename(filename: str) -> str:
    """Determine test tier from filename.

    Checks for tier prefixes in filename:
    - test_integration_*.py -> integration
    - test_uat_*.py -> uat
    - test_*.py -> unit (default)

    Args:
        filename: Test filename (e.g., "test_integration_workflow.py")

    Returns:
        Tier name: "unit", "integration", or "uat"

    Example:
        >>> determine_tier_from_filename("test_integration_workflow.py")
        'integration'
    """
    filename_lower = filename.lower()

    if 'test_uat_' in filename_lower or '_uat.' in filename_lower:
        return "uat"
    elif 'test_integration_' in filename_lower or '_integration.' in filename_lower:
        return "integration"
    else:
        return "unit"


def create_tier_directories(base_path: Path, subdirs: List[str] = None) -> None:
    """Create test tier directory structure.

    Creates:
    - tests/
    - tests/unit/
    - tests/integration/
    - tests/uat/
    - __init__.py files in each directory

    Args:
        base_path: Project root directory
        subdirs: Optional list of subdirectories to create in each tier (e.g., ["lib"])

    Raises:
        PermissionError: If directory creation fails due to permissions

    Example:
        >>> create_tier_directories(Path("/tmp/project"), subdirs=["lib"])
        # Creates: /tmp/project/tests/{unit,integration,uat}/lib/
    """
    tests_dir = base_path / "tests"

    try:
        # Create tests/ directory
        tests_dir.mkdir(parents=True, exist_ok=True)
        (tests_dir / "__init__.py").touch(exist_ok=True)

        # Create tier directories
        for tier in ["unit", "integration", "uat"]:
            tier_dir = tests_dir / tier
            tier_dir.mkdir(exist_ok=True)
            (tier_dir / "__init__.py").touch(exist_ok=True)

            # Create subdirectories if specified
            if subdirs:
                for subdir in subdirs:
                    subdir_path = tier_dir / subdir
                    subdir_path.mkdir(parents=True, exist_ok=True)
                    (subdir_path / "__init__.py").touch(exist_ok=True)

    except PermissionError as e:
        raise PermissionError(f"Permission denied creating tier directories: {e}")


def move_test_to_tier(
    test_file: Path,
    tier: str,
    target_subdir: str = None,
    base_path: Path = None
) -> Path:
    """Move test file to appropriate tier directory.

    Args:
        test_file: Path to test file
        tier: Target tier ("unit", "integration", "uat")
        target_subdir: Optional subdirectory within tier (e.g., "lib")
        base_path: Optional base path (defaults to test_file's parent for cwd tests)

    Returns:
        Path to moved file

    Raises:
        FileNotFoundError: If test_file doesn't exist
        FileExistsError: If target file already exists
        ValueError: If tier is invalid

    Example:
        >>> move_test_to_tier(Path("test_parser.py"), "unit", target_subdir="lib")
        Path("tests/unit/lib/test_parser.py")
    """
    # Validate test file exists
    if not test_file.exists():
        raise FileNotFoundError(f"Test file not found: {test_file}")

    # Validate tier
    if tier not in ["unit", "integration", "uat"]:
        raise ValueError(f"Invalid tier: {tier}. Must be 'unit', 'integration', or 'uat'")

    # Determine base path
    if base_path is None:
        # If test_file is in cwd, use cwd as base
        # Otherwise, search up for project root
        if test_file.parent == Path.cwd():
            base_path = Path.cwd()
        else:
            # Search for tests/ directory parent
            current = test_file.parent
            while current != current.parent:
                if (current / "tests").exists():
                    base_path = current
                    break
                current = current.parent
            else:
                # Fallback to test file's parent
                base_path = test_file.parent

    # Build target path
    target_dir = base_path / "tests" / tier
    if target_subdir:
        target_dir = target_dir / target_subdir

    target_path = target_dir / test_file.name

    # Check for collision
    if target_path.exists():
        raise FileExistsError(f"Target file already exists: {target_path}")

    # Ensure target directory exists
    target_dir.mkdir(parents=True, exist_ok=True)

    # Move file
    test_file.rename(target_path)

    return target_path


def organize_tests_by_tier(test_files: List[Path], base_path: Path = None) -> Dict[str, List[Path]]:
    """Organize multiple test files into tier directories.

    Analyzes each test file, determines tier, and moves to appropriate directory.

    Args:
        test_files: List of test file paths
        base_path: Optional base path for tier directories

    Returns:
        Dict mapping tier name to list of organized file paths

    Raises:
        ValueError: If path traversal is detected

    Security:
        - Validates all paths are within base_path (CWE-22 prevention)
        - Uses Path.resolve() for canonicalization
        - Rejects symlinks and parent directory references

    Example:
        >>> files = [Path("test_unit.py"), Path("test_integration.py")]
        >>> result = organize_tests_by_tier(files)
        >>> result["unit"]
        [Path("tests/unit/test_unit.py")]
    """
    result = {
        "unit": [],
        "integration": [],
        "uat": []
    }

    # Establish safe base path for path traversal prevention
    if base_path is None:
        base_path = Path.cwd()
    safe_base = base_path.resolve()

    for test_file in test_files:
        if not test_file.exists():
            continue

        # Security: Validate file is within base_path (CWE-22 prevention)
        try:
            safe_file = test_file.resolve()
            if not str(safe_file).startswith(str(safe_base)):
                raise ValueError(f"Path traversal blocked: {test_file}")
        except (OSError, ValueError) as e:
            # Skip files that can't be resolved or are outside base
            continue

        # Read file content to determine tier
        try:
            content = test_file.read_text()
        except Exception:
            # Fallback to filename analysis
            content = ""

        # Determine tier (content analysis takes precedence over filename)
        if content:
            tier = determine_tier(content)
        else:
            tier = determine_tier_from_filename(test_file.name)

        # Determine subdirectory from original path safely
        # Only check if "lib" is an actual path component (not substring)
        # e.g., tests/unit/lib/test_parser.py -> target_subdir="lib"
        target_subdir = None
        path_parts = test_file.parts
        if "lib" in path_parts:
            target_subdir = "lib"

        # Move to tier
        try:
            moved_path = move_test_to_tier(test_file, tier, target_subdir, base_path)
            result[tier].append(moved_path)
        except FileExistsError:
            # Skip files that already exist in target
            pass

    return result


def get_tier_statistics(tests_path: Path) -> Dict[str, int]:
    """Get test count statistics per tier.

    Args:
        tests_path: Path to tests/ directory

    Returns:
        Dict with counts: {tier: count, "total": total_count}

    Example:
        >>> stats = get_tier_statistics(Path("tests"))
        >>> stats
        {"unit": 42, "integration": 10, "uat": 5, "total": 57}
    """
    stats = {
        "unit": 0,
        "integration": 0,
        "uat": 0,
        "total": 0
    }

    if not tests_path.exists():
        return stats

    for tier in ["unit", "integration", "uat"]:
        tier_dir = tests_path / tier
        if tier_dir.exists():
            # Count test_*.py files recursively
            test_files = list(tier_dir.rglob("test_*.py"))
            stats[tier] = len(test_files)

    stats["total"] = sum(stats[tier] for tier in ["unit", "integration", "uat"])

    return stats


def validate_test_pyramid(tests_path: Path) -> Tuple[bool, List[str]]:
    """Validate test pyramid structure (unit > integration > uat).

    Args:
        tests_path: Path to tests/ directory

    Returns:
        Tuple of (is_valid, warnings)

    Example:
        >>> is_valid, warnings = validate_test_pyramid(Path("tests"))
        >>> is_valid
        False
        >>> warnings
        ["Test pyramid inverted: integration (10) > unit (5)"]
    """
    stats = get_tier_statistics(tests_path)
    warnings = []

    # Check pyramid structure
    if stats["integration"] > stats["unit"]:
        warnings.append(
            f"Test pyramid inverted: integration ({stats['integration']}) > unit ({stats['unit']}). "
            "Aim for 70-80% unit tests."
        )

    if stats["uat"] > stats["integration"]:
        warnings.append(
            f"Test pyramid inverted: UAT ({stats['uat']}) > integration ({stats['integration']}). "
            "UAT tests should be 5-10% of total."
        )

    if stats["uat"] > stats["unit"]:
        warnings.append(
            f"Test pyramid severely inverted: UAT ({stats['uat']}) > unit ({stats['unit']}). "
            "Unit tests should form the base of the pyramid."
        )

    # Check total test count
    if stats["total"] == 0:
        warnings.append("No tests found")

    is_valid = len(warnings) == 0

    return is_valid, warnings