TradingAgents/.claude/lib/acceptance_criteria_parser.py

269 lines
8.3 KiB
Python

#!/usr/bin/env python3
"""
Acceptance Criteria Parser - Extract and format acceptance criteria from GitHub issues.
Fetches GitHub issue bodies via gh CLI, parses acceptance criteria sections,
and formats criteria for UAT test generation with Gherkin-style scenarios.
Key Features:
1. Fetch issue body via gh CLI (subprocess with security)
2. Parse categorized acceptance criteria (### headers)
3. Format criteria as Gherkin-style test scenarios
4. Handle malformed/missing criteria gracefully
5. Security: subprocess list args (no shell=True), input validation
Usage:
from acceptance_criteria_parser import (
fetch_issue_body,
parse_acceptance_criteria,
format_for_uat
)
# Full pipeline
issue_body = fetch_issue_body(161)
criteria = parse_acceptance_criteria(issue_body)
uat_scenarios = format_for_uat(criteria)
Date: 2025-12-25
Issue: #161 (Enhanced test-master for 3-tier coverage)
Agent: implementer
Phase: TDD Green (making tests pass)
"""
import re
import subprocess
from typing import Dict, List
def fetch_issue_body(issue_number: int) -> str:
"""Fetch GitHub issue body via gh CLI.
Args:
issue_number: GitHub issue number
Returns:
Issue body as string
Raises:
ValueError: If issue not found (404)
RuntimeError: If gh CLI not installed or network error
Security:
- Uses subprocess.run with list args (no shell=True)
- Validates issue_number is positive integer
- No credential exposure
Example:
>>> body = fetch_issue_body(161)
>>> "Acceptance Criteria" in body
True
"""
# Validate issue number
if not isinstance(issue_number, int) or issue_number <= 0:
raise ValueError(f"Invalid issue number: {issue_number}")
# Build gh CLI command
cmd = [
"gh", "issue", "view", str(issue_number),
"--json", "body",
"--jq", ".body"
]
try:
# Execute gh CLI (security: list args, no shell=True)
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=30,
check=False # Handle return codes manually
)
# Check for errors
if result.returncode != 0:
stderr_lower = result.stderr.lower()
# Check network errors first (more specific than generic "could not resolve")
if "could not resolve host" in stderr_lower or "network" in stderr_lower:
raise RuntimeError(f"Network error fetching issue #{issue_number}: {result.stderr}")
elif "could not resolve" in stderr_lower or "not found" in stderr_lower:
raise ValueError(f"Issue #{issue_number} not found")
else:
raise RuntimeError(f"gh CLI error: {result.stderr}")
return result.stdout
except FileNotFoundError:
raise RuntimeError(
"gh CLI not installed. Install with: brew install gh (macOS) or see https://cli.github.com/"
)
except subprocess.TimeoutExpired:
raise RuntimeError(f"Timeout fetching issue #{issue_number}")
def parse_acceptance_criteria(issue_body: str) -> Dict[str, List[str]]:
"""Parse acceptance criteria from GitHub issue body.
Extracts criteria from "## Acceptance Criteria" section, supporting both
categorized (### headers) and uncategorized (- [ ] items) formats.
Args:
issue_body: GitHub issue body text
Returns:
Dict mapping category name to list of criteria strings.
Empty dict if no acceptance criteria found.
Examples:
Categorized:
>>> body = '''
... ## Acceptance Criteria
... ### Fresh Install
... - [ ] Feature works
... - [ ] Tests pass
... '''
>>> criteria = parse_acceptance_criteria(body)
>>> criteria["Fresh Install"]
['Feature works', 'Tests pass']
Uncategorized:
>>> body = '''
... ## Acceptance Criteria
... - [ ] Feature works
... - [ ] Tests pass
... '''
>>> criteria = parse_acceptance_criteria(body)
>>> criteria["General"]
['Feature works', 'Tests pass']
"""
# Find "## Acceptance Criteria" section
# Pattern matches ## but not ### to avoid stopping at category headers
ac_pattern = r"## Acceptance Criteria\s*\n(.*?)(?=\n## [^#]|\Z)"
match = re.search(ac_pattern, issue_body, re.DOTALL | re.IGNORECASE)
if not match:
return {}
ac_section = match.group(1)
# Check for categorized criteria (### headers)
category_pattern = r"###\s+([^\n]+)\s*\n(.*?)(?=\n###|\Z)"
category_matches = list(re.finditer(category_pattern, ac_section, re.DOTALL))
if category_matches:
# Categorized format
result = {}
for category_match in category_matches:
category = category_match.group(1).strip()
criteria_text = category_match.group(2)
criteria = _extract_criteria_items(criteria_text)
if criteria: # Only add categories with criteria
result[category] = criteria
return result
else:
# Uncategorized format - all items go to "General"
criteria = _extract_criteria_items(ac_section)
if criteria:
return {"General": criteria}
else:
return {}
def _extract_criteria_items(text: str) -> List[str]:
"""Extract individual criteria items from text.
Handles both checkbox format (- [ ]) and plain bullet format (-).
Strips checkbox markers and cleans whitespace.
Args:
text: Text containing criteria items
Returns:
List of cleaned criteria strings
"""
# Pattern for criteria items: - [ ] or - [x] or just -
item_pattern = r"^[\s]*-\s*(?:\[[ x]\]\s*)?(.+)$"
criteria = []
for line in text.split('\n'):
match = re.match(item_pattern, line.strip())
if match:
criterion = match.group(1).strip()
# Skip empty criteria or noise
if criterion and not criterion.startswith('(') and criterion.lower() != 'no criteria defined':
criteria.append(criterion)
return criteria
def format_for_uat(criteria: Dict[str, List[str]]) -> List[Dict[str, str]]:
"""Format acceptance criteria as UAT test scenarios.
Converts each criterion into a Gherkin-style test scenario with:
- category: Original category name
- criterion: Original criterion text
- scenario_name: Valid pytest function name (test_*)
Args:
criteria: Dict mapping category to list of criteria
Returns:
List of scenario dicts, one per criterion
Example:
>>> criteria = {"Fresh Install": ["Feature works correctly"]}
>>> scenarios = format_for_uat(criteria)
>>> scenarios[0]["scenario_name"]
'test_fresh_install_feature_works_correctly'
>>> scenarios[0]["category"]
'Fresh Install'
"""
scenarios = []
for category, criteria_list in criteria.items():
for criterion in criteria_list:
# Generate pytest-compatible scenario name
scenario_name = _generate_scenario_name(category, criterion)
scenarios.append({
"category": category,
"criterion": criterion,
"scenario_name": scenario_name
})
return scenarios
def _generate_scenario_name(category: str, criterion: str) -> str:
"""Generate valid pytest scenario name from category and criterion.
Converts to snake_case, removes special characters, prepends "test_".
Args:
category: Category name (e.g., "Fresh Install")
criterion: Criterion text (e.g., "Feature works correctly")
Returns:
Valid pytest function name (e.g., "test_fresh_install_feature_works_correctly")
"""
# Combine category and criterion
combined = f"{category} {criterion}"
# Convert to lowercase
name = combined.lower()
# Replace spaces and special chars with underscores
name = re.sub(r'[^a-z0-9_]+', '_', name)
# Remove leading/trailing underscores
name = name.strip('_')
# Collapse multiple underscores
name = re.sub(r'_+', '_', name)
# Truncate to reasonable length (pytest allows long names, but 100 chars is practical)
if len(name) > 97: # 97 + "test_" = 101
name = name[:97]
# Prepend "test_"
return f"test_{name}"