138 lines
5.0 KiB
Python
138 lines
5.0 KiB
Python
"""
|
|
PROJECT.md parsing and validation.
|
|
|
|
Parses PROJECT.md to extract GOALS, SCOPE (included/excluded), and CONSTRAINTS.
|
|
Provides structured access to project governance information.
|
|
"""
|
|
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Dict, Any, Optional, List
|
|
|
|
|
|
class ProjectMdParser:
|
|
"""Parse and validate PROJECT.md"""
|
|
|
|
def __init__(self, project_md_path: Path):
|
|
"""
|
|
Initialize parser
|
|
|
|
Args:
|
|
project_md_path: Path to PROJECT.md file
|
|
"""
|
|
self.project_md_path = project_md_path
|
|
|
|
if not project_md_path.exists():
|
|
raise FileNotFoundError(f"PROJECT.md not found at: {project_md_path}")
|
|
|
|
self.content = project_md_path.read_text()
|
|
self.goals = self._parse_section("GOALS")
|
|
|
|
# Parse SCOPE section by emoji
|
|
self.scope_included = self._parse_section("SCOPE", emoji_filter='✅')
|
|
self.scope_excluded = self._parse_section("SCOPE", emoji_filter='❌')
|
|
|
|
self.constraints = self._parse_section("CONSTRAINTS")
|
|
|
|
def _parse_section(
|
|
self,
|
|
section_name: str,
|
|
subsection: Optional[str] = None,
|
|
emoji_filter: Optional[str] = None
|
|
) -> List[str]:
|
|
"""
|
|
Parse a section from PROJECT.md
|
|
|
|
Args:
|
|
section_name: Name of main section (GOALS, SCOPE, CONSTRAINTS)
|
|
subsection: Optional subsection name (e.g., "In Scope")
|
|
emoji_filter: Optional emoji to filter items (e.g., '✅' or '❌')
|
|
|
|
Returns:
|
|
List of items in the section
|
|
"""
|
|
# Find section (allow any characters after section name, like emojis)
|
|
section_pattern = rf"^##\s+{section_name}\b"
|
|
section_match = re.search(section_pattern, self.content, re.MULTILINE)
|
|
|
|
if not section_match:
|
|
return []
|
|
|
|
# Extract section content (until next ## heading)
|
|
start = section_match.end()
|
|
next_section = re.search(r"^##\s+", self.content[start:], re.MULTILINE)
|
|
end = start + next_section.start() if next_section else len(self.content)
|
|
|
|
section_content = self.content[start:end]
|
|
|
|
# If subsection specified, extract that
|
|
if subsection:
|
|
# Try ### header first (h3)
|
|
subsection_pattern = rf"^###\s+{subsection}\s*$"
|
|
subsection_match = re.search(subsection_pattern, section_content, re.MULTILINE)
|
|
|
|
# If not found, try **bold** header with flexible matching
|
|
if not subsection_match:
|
|
# Match "**What's IN Scope**" for subsection="In Scope"
|
|
# Use case-insensitive and partial matching
|
|
subsection_pattern = rf"\*\*.*?{re.escape(subsection)}.*?\*\*"
|
|
subsection_match = re.search(subsection_pattern, section_content, re.IGNORECASE)
|
|
|
|
if not subsection_match:
|
|
return []
|
|
|
|
subsection_start = subsection_match.end()
|
|
|
|
# Find next subsection (either ### or **)
|
|
next_subsection = re.search(r"(^###\s+|\*\*.*?\*\*)", section_content[subsection_start:], re.MULTILINE)
|
|
subsection_end = subsection_start + next_subsection.start() if next_subsection else len(section_content)
|
|
|
|
section_content = section_content[subsection_start:subsection_end]
|
|
|
|
# Extract bullet points and numbered lists
|
|
items = []
|
|
for line in section_content.split('\n'):
|
|
line = line.strip()
|
|
|
|
# Skip section headers (lines with ** that end with : or **:)
|
|
if line.startswith('**') and (':' in line or line.endswith('**')):
|
|
continue
|
|
|
|
# Skip horizontal rules (---, ***, etc.)
|
|
if line.startswith('---') or line.startswith('***') or line == '--':
|
|
continue
|
|
|
|
# Apply emoji filter if specified
|
|
if emoji_filter and emoji_filter not in line:
|
|
continue
|
|
|
|
# Match bullet points (-, *) or numbered lists (1., 2., etc.)
|
|
if line.startswith('-') or line.startswith('*') or re.match(r'^\d+\.', line):
|
|
# Remove leading marker and ❌/✅ symbols
|
|
item = re.sub(r'^[-*]\s*[❌✅]?\s*', '', line).strip()
|
|
item = re.sub(r'^\d+\.\s*[❌✅]?\s*', '', item).strip()
|
|
|
|
# Remove **bold** markers
|
|
item = re.sub(r'\*\*(.*?)\*\*', r'\1', item)
|
|
|
|
# Extract main content before dash or hyphen (for items like "Goal - explanation")
|
|
# This gets "Goal" from "Goal - explanation text"
|
|
if ' - ' in item:
|
|
item = item.split(' - ')[0].strip()
|
|
|
|
if item and not item.endswith(':'): # Skip headers and empty items
|
|
items.append(item)
|
|
|
|
return items
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert parsed PROJECT.md to dictionary"""
|
|
return {
|
|
'goals': self.goals,
|
|
'scope': {
|
|
'included': self.scope_included,
|
|
'excluded': self.scope_excluded
|
|
},
|
|
'constraints': self.constraints
|
|
}
|