TradingAgents/.claude/lib/acceptance_criteria_parser.py

#!/usr/bin/env python3
"""
Acceptance Criteria Parser - Extract and format acceptance criteria from GitHub issues.

Fetches GitHub issue bodies via gh CLI, parses acceptance criteria sections,
and formats criteria for UAT test generation with Gherkin-style scenarios.

Key Features:
1. Fetch issue body via gh CLI (subprocess with security)
2. Parse categorized acceptance criteria (### headers)
3. Format criteria as Gherkin-style test scenarios
4. Handle malformed/missing criteria gracefully
5. Security: subprocess list args (no shell=True), input validation

Usage:
    from acceptance_criteria_parser import (
        fetch_issue_body,
        parse_acceptance_criteria,
        format_for_uat
    )

    # Full pipeline
    issue_body = fetch_issue_body(161)
    criteria = parse_acceptance_criteria(issue_body)
    uat_scenarios = format_for_uat(criteria)

Date: 2025-12-25
Issue: #161 (Enhanced test-master for 3-tier coverage)
Agent: implementer
Phase: TDD Green (making tests pass)
"""

import re
import subprocess
from typing import Dict, List


def fetch_issue_body(issue_number: int) -> str:
    """Fetch GitHub issue body via gh CLI.

    Args:
        issue_number: GitHub issue number

    Returns:
        Issue body as string

    Raises:
        ValueError: If issue not found (404)
        RuntimeError: If gh CLI not installed or network error

    Security:
        - Uses subprocess.run with list args (no shell=True)
        - Validates issue_number is positive integer
        - No credential exposure

    Example:
        >>> body = fetch_issue_body(161)
        >>> "Acceptance Criteria" in body
        True
    """
    # Validate issue number
    if not isinstance(issue_number, int) or issue_number <= 0:
        raise ValueError(f"Invalid issue number: {issue_number}")

    # Build gh CLI command
    cmd = [
        "gh", "issue", "view", str(issue_number),
        "--json", "body",
        "--jq", ".body"
    ]

    try:
        # Execute gh CLI (security: list args, no shell=True)
        result = subprocess.run(
            cmd,
            capture_output=True,
            text=True,
            timeout=30,
            check=False  # Handle return codes manually
        )

        # Check for errors
        if result.returncode != 0:
            stderr_lower = result.stderr.lower()
            # Check network errors first (more specific than generic "could not resolve")
            if "could not resolve host" in stderr_lower or "network" in stderr_lower:
                raise RuntimeError(f"Network error fetching issue #{issue_number}: {result.stderr}")
            elif "could not resolve" in stderr_lower or "not found" in stderr_lower:
                raise ValueError(f"Issue #{issue_number} not found")
            else:
                raise RuntimeError(f"gh CLI error: {result.stderr}")

        return result.stdout

    except FileNotFoundError:
        raise RuntimeError(
            "gh CLI not installed. Install with: brew install gh (macOS) or see https://cli.github.com/"
        )
    except subprocess.TimeoutExpired:
        raise RuntimeError(f"Timeout fetching issue #{issue_number}")


def parse_acceptance_criteria(issue_body: str) -> Dict[str, List[str]]:
    """Parse acceptance criteria from GitHub issue body.

    Extracts criteria from "## Acceptance Criteria" section, supporting both
    categorized (### headers) and uncategorized (- [ ] items) formats.

    Args:
        issue_body: GitHub issue body text

    Returns:
        Dict mapping category name to list of criteria strings.
        Empty dict if no acceptance criteria found.

    Examples:
        Categorized:
        >>> body = '''
        ... ## Acceptance Criteria
        ... ### Fresh Install
        ... - [ ] Feature works
        ... - [ ] Tests pass
        ... '''
        >>> criteria = parse_acceptance_criteria(body)
        >>> criteria["Fresh Install"]
        ['Feature works', 'Tests pass']

        Uncategorized:
        >>> body = '''
        ... ## Acceptance Criteria
        ... - [ ] Feature works
        ... - [ ] Tests pass
        ... '''
        >>> criteria = parse_acceptance_criteria(body)
        >>> criteria["General"]
        ['Feature works', 'Tests pass']
    """
    # Find "## Acceptance Criteria" section
    # Pattern matches ## but not ### to avoid stopping at category headers
    ac_pattern = r"## Acceptance Criteria\s*\n(.*?)(?=\n## [^#]|\Z)"
    match = re.search(ac_pattern, issue_body, re.DOTALL | re.IGNORECASE)

    if not match:
        return {}

    ac_section = match.group(1)

    # Check for categorized criteria (### headers)
    category_pattern = r"###\s+([^\n]+)\s*\n(.*?)(?=\n###|\Z)"
    category_matches = list(re.finditer(category_pattern, ac_section, re.DOTALL))

    if category_matches:
        # Categorized format
        result = {}
        for category_match in category_matches:
            category = category_match.group(1).strip()
            criteria_text = category_match.group(2)
            criteria = _extract_criteria_items(criteria_text)
            if criteria:  # Only add categories with criteria
                result[category] = criteria
        return result
    else:
        # Uncategorized format - all items go to "General"
        criteria = _extract_criteria_items(ac_section)
        if criteria:
            return {"General": criteria}
        else:
            return {}


def _extract_criteria_items(text: str) -> List[str]:
    """Extract individual criteria items from text.

    Handles both checkbox format (- [ ]) and plain bullet format (-).
    Strips checkbox markers and cleans whitespace.

    Args:
        text: Text containing criteria items

    Returns:
        List of cleaned criteria strings
    """
    # Pattern for criteria items: - [ ] or - [x] or just -
    item_pattern = r"^[\s]*-\s*(?:\[[ x]\]\s*)?(.+)$"
    criteria = []

    for line in text.split('\n'):
        match = re.match(item_pattern, line.strip())
        if match:
            criterion = match.group(1).strip()
            # Skip empty criteria or noise
            if criterion and not criterion.startswith('(') and criterion.lower() != 'no criteria defined':
                criteria.append(criterion)

    return criteria


def format_for_uat(criteria: Dict[str, List[str]]) -> List[Dict[str, str]]:
    """Format acceptance criteria as UAT test scenarios.

    Converts each criterion into a Gherkin-style test scenario with:
    - category: Original category name
    - criterion: Original criterion text
    - scenario_name: Valid pytest function name (test_*)

    Args:
        criteria: Dict mapping category to list of criteria

    Returns:
        List of scenario dicts, one per criterion

    Example:
        >>> criteria = {"Fresh Install": ["Feature works correctly"]}
        >>> scenarios = format_for_uat(criteria)
        >>> scenarios[0]["scenario_name"]
        'test_fresh_install_feature_works_correctly'
        >>> scenarios[0]["category"]
        'Fresh Install'
    """
    scenarios = []

    for category, criteria_list in criteria.items():
        for criterion in criteria_list:
            # Generate pytest-compatible scenario name
            scenario_name = _generate_scenario_name(category, criterion)

            scenarios.append({
                "category": category,
                "criterion": criterion,
                "scenario_name": scenario_name
            })

    return scenarios


def _generate_scenario_name(category: str, criterion: str) -> str:
    """Generate valid pytest scenario name from category and criterion.

    Converts to snake_case, removes special characters, prepends "test_".

    Args:
        category: Category name (e.g., "Fresh Install")
        criterion: Criterion text (e.g., "Feature works correctly")

    Returns:
        Valid pytest function name (e.g., "test_fresh_install_feature_works_correctly")
    """
    # Combine category and criterion
    combined = f"{category} {criterion}"

    # Convert to lowercase
    name = combined.lower()

    # Replace spaces and special chars with underscores
    name = re.sub(r'[^a-z0-9_]+', '_', name)

    # Remove leading/trailing underscores
    name = name.strip('_')

    # Collapse multiple underscores
    name = re.sub(r'_+', '_', name)

    # Truncate to reasonable length (pytest allows long names, but 100 chars is practical)
    if len(name) > 97:  # 97 + "test_" = 101
        name = name[:97]

    # Prepend "test_"
    return f"test_{name}"