TradingAgents/.claude/lib/alignment_assessor.py

670 lines
23 KiB
Python

"""Alignment assessment for brownfield projects.
This module analyzes codebase analysis results and assesses alignment with
autonomous-dev standards. It generates PROJECT.md drafts, calculates 12-Factor
App compliance scores, identifies alignment gaps, and prioritizes remediation.
Classes:
TwelveFactorScore: 12-Factor App methodology compliance scoring
AlignmentGap: Represents a gap between current and desired state
ProjectMdDraft: Draft PROJECT.md content with confidence scoring
AssessmentResult: Complete alignment assessment results
AlignmentAssessor: Main assessment coordinator
Security:
- CWE-22: Path validation via security_utils
- CWE-117: Audit logging with sanitization
- CWE-20: Input validation for all user inputs
Related:
- GitHub Issue #59: Brownfield retrofit command implementation
Relevant Skills:
- project-alignment-validation: Gap assessment methodology, alignment checklist
- error-handling-patterns: Exception hierarchy and error handling best practices
- library-design-patterns: Standardized design patterns
"""
from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path
from typing import Dict, List, Optional
from .security_utils import audit_log, validate_path
from .codebase_analyzer import AnalysisReport
class Severity(Enum):
"""Gap severity levels."""
CRITICAL = "CRITICAL" # Blocks autonomous development
HIGH = "HIGH" # Major impediment, should fix soon
MEDIUM = "MEDIUM" # Moderate issue, can defer
LOW = "LOW" # Minor improvement, nice to have
@dataclass
class TwelveFactorScore:
"""12-Factor App methodology compliance score.
Attributes:
factors: Dict mapping factor name to score (0-10)
total_score: Sum of all factor scores (max 120)
compliance_percentage: Percentage compliance (0-100)
"""
factors: Dict[str, int] = field(default_factory=dict)
total_score: int = 0
compliance_percentage: float = 0.0
def __post_init__(self):
"""Calculate total score and compliance percentage."""
if self.factors:
self.total_score = sum(self.factors.values())
max_score = len(self.factors) * 10
self.compliance_percentage = (self.total_score / max_score * 100) if max_score > 0 else 0.0
@dataclass
class AlignmentGap:
"""Represents a gap between current and desired state.
Attributes:
category: Gap category (e.g., 'file-organization', 'testing')
severity: Gap severity level
description: Human-readable description
current_state: Current project state
desired_state: Target state for alignment
fix_steps: List of remediation steps
impact_score: Impact score (0-100, higher = more important)
effort_hours: Estimated effort to fix (hours)
"""
category: str
severity: Severity
description: str
current_state: str
desired_state: str
fix_steps: List[str]
impact_score: int = 0
effort_hours: float = 0.0
@dataclass
class ProjectMdDraft:
"""Draft PROJECT.md content with confidence scoring.
Attributes:
sections: Dict mapping section name to content
confidence: Confidence score (0.0-1.0)
source_files: List of files used to generate draft
"""
sections: Dict[str, str] = field(default_factory=dict)
confidence: float = 0.0
source_files: List[str] = field(default_factory=list)
def to_markdown(self) -> str:
"""Convert draft to PROJECT.md markdown format.
Returns:
Formatted PROJECT.md content
"""
lines = ["# Project Overview\n"]
# Add sections in standard order
section_order = [
"GOALS",
"SCOPE",
"CONSTRAINTS",
"ARCHITECTURE",
"DEPENDENCIES",
"DEVELOPMENT",
"TESTING",
"DEPLOYMENT"
]
for section_name in section_order:
if section_name in self.sections:
lines.append(f"\n## {section_name}\n")
lines.append(self.sections[section_name])
# Add any remaining sections
for section_name, content in self.sections.items():
if section_name not in section_order:
lines.append(f"\n## {section_name}\n")
lines.append(content)
# Add metadata footer
lines.append(f"\n---\n")
lines.append(f"<!-- Generated by /align-project-retrofit -->\n")
lines.append(f"<!-- Confidence: {self.confidence:.2f} -->\n")
lines.append(f"<!-- Source files: {len(self.source_files)} -->\n")
return "\n".join(lines)
@dataclass
class AssessmentResult:
"""Complete alignment assessment results.
Attributes:
project_md: Draft PROJECT.md content
twelve_factor_score: 12-Factor compliance scoring
gaps: List of identified alignment gaps
priority_list: Gaps sorted by priority (impact/effort)
"""
project_md: ProjectMdDraft
twelve_factor_score: TwelveFactorScore
gaps: List[AlignmentGap] = field(default_factory=list)
priority_list: List[AlignmentGap] = field(default_factory=list)
def to_dict(self) -> dict:
"""Convert to dictionary representation.
Returns:
Dictionary with all assessment data
"""
return {
"project_md": {
"sections": self.project_md.sections,
"confidence": self.project_md.confidence,
"source_files": self.project_md.source_files
},
"twelve_factor_score": {
"factors": self.twelve_factor_score.factors,
"total_score": self.twelve_factor_score.total_score,
"compliance_percentage": self.twelve_factor_score.compliance_percentage
},
"gaps": [
{
"category": gap.category,
"severity": gap.severity.value,
"description": gap.description,
"current_state": gap.current_state,
"desired_state": gap.desired_state,
"fix_steps": gap.fix_steps,
"impact_score": gap.impact_score,
"effort_hours": gap.effort_hours
}
for gap in self.gaps
],
"priority_list": [
{
"category": gap.category,
"severity": gap.severity.value,
"description": gap.description,
"impact_score": gap.impact_score,
"effort_hours": gap.effort_hours
}
for gap in self.priority_list
]
}
class AlignmentAssessor:
"""Main alignment assessment coordinator.
Analyzes codebase analysis results and generates comprehensive alignment
assessment including PROJECT.md drafts, 12-Factor scores, and gap analysis.
"""
def __init__(self, project_root: Path):
"""Initialize alignment assessor.
Args:
project_root: Path to project root directory
Raises:
ValueError: If project_root invalid
"""
# Security: Validate project root path (CWE-22)
validated_root = validate_path(
project_root,
"project_root",
allow_missing=False,
)
self.project_root = Path(validated_root)
# Audit log initialization
audit_log(
"alignment_assessor_init",
project_root=str(self.project_root),
success=True
)
def assess(self, analysis: AnalysisReport) -> AssessmentResult:
"""Perform complete alignment assessment.
Args:
analysis: Codebase analysis results
Returns:
Complete assessment results
Raises:
ValueError: If analysis invalid
"""
if not analysis:
raise ValueError("Analysis result required")
audit_log(
"alignment_assessment_start",
project_root=str(self.project_root),
has_tech_stack=bool(analysis.tech_stack),
has_structure=bool(analysis.structure)
)
try:
# Generate PROJECT.md draft
project_md = self.generate_project_md(analysis)
# Calculate 12-Factor compliance
twelve_factor = self.calculate_twelve_factor_score(analysis)
# Identify alignment gaps
gaps = self.identify_alignment_gaps(analysis, twelve_factor)
# Prioritize gaps
priority_list = self.prioritize_gaps(gaps)
result = AssessmentResult(
project_md=project_md,
twelve_factor_score=twelve_factor,
gaps=gaps,
priority_list=priority_list
)
audit_log(
"alignment_assessment_complete",
project_root=str(self.project_root),
gaps_found=len(gaps),
compliance_percentage=twelve_factor.compliance_percentage,
success=True
)
return result
except Exception as e:
audit_log(
"alignment_assessment_failed",
project_root=str(self.project_root),
error=str(e),
success=False
)
raise
def generate_project_md(self, analysis: AnalysisReport) -> ProjectMdDraft:
"""Generate PROJECT.md draft from analysis.
Args:
analysis: Codebase analysis results
Returns:
Draft PROJECT.md content with confidence score
"""
sections = {}
source_files = []
# GOALS section from README/docs
goals_content = self._extract_goals(analysis)
if goals_content:
sections["GOALS"] = goals_content
source_files.extend(["README.md", "docs/"])
# SCOPE section from tech stack
if analysis.tech_stack.primary_language:
scope_lines = [
f"**Primary Language**: {analysis.tech_stack.primary_language}",
f"**Framework**: {analysis.tech_stack.framework or 'None detected'}",
f"**Package Manager**: {analysis.tech_stack.package_manager or 'None detected'}",
]
sections["SCOPE"] = "\n".join(scope_lines)
source_files.append("Tech stack detection")
# CONSTRAINTS section
constraints = self._extract_constraints(analysis)
if constraints:
sections["CONSTRAINTS"] = constraints
# ARCHITECTURE section from structure
if analysis.structure.total_files > 0:
arch_lines = [
f"**Total Files**: {analysis.structure.total_files}",
f"**Source Files**: {analysis.structure.source_files}",
f"**Test Files**: {analysis.structure.test_files}",
f"**Documentation**: {analysis.structure.doc_files} files",
]
sections["ARCHITECTURE"] = "\n".join(arch_lines)
source_files.append("File structure analysis")
# DEPENDENCIES section
if analysis.tech_stack.dependencies:
dep_lines = ["**Key Dependencies**:"]
for dep in list(analysis.tech_stack.dependencies)[:10]: # Top 10
dep_lines.append(f"- {dep}")
sections["DEPENDENCIES"] = "\n".join(dep_lines)
source_files.append("Dependency files")
# TESTING section
if analysis.structure.test_files > 0:
test_lines = [
f"**Test Framework**: {analysis.tech_stack.test_framework or 'Detected from structure'}",
f"**Test Files**: {analysis.structure.test_files}",
f"**Test Coverage**: Unknown (run tests to detect)",
]
sections["TESTING"] = "\n".join(test_lines)
source_files.append("Test structure")
# Calculate confidence score (0.0-1.0)
confidence = self._calculate_confidence(sections, analysis)
return ProjectMdDraft(
sections=sections,
confidence=confidence,
source_files=list(set(source_files)) # Deduplicate
)
def calculate_twelve_factor_score(self, analysis: AnalysisReport) -> TwelveFactorScore:
"""Calculate 12-Factor App compliance score.
Each factor scored 0-10:
- 10: Full compliance
- 7-9: Partial compliance
- 4-6: Minimal compliance
- 0-3: Non-compliant
Args:
analysis: Codebase analysis results
Returns:
12-Factor compliance scoring
"""
factors = {}
# I. Codebase - Single codebase in version control
has_git = (self.project_root / ".git").is_dir()
factors["codebase"] = 10 if has_git else 3
# II. Dependencies - Explicitly declared
has_deps = bool(analysis.tech_stack.package_manager)
factors["dependencies"] = 10 if has_deps else 4
# III. Config - Store in environment
has_env = any(f for f in analysis.structure.config_files if ".env" in f.lower())
factors["config"] = 8 if has_env else 5
# IV. Backing services - Treat as attached resources
# Heuristic: Check for database/cache config
has_backing = any(
tech in str(analysis.tech_stack.dependencies).lower()
for tech in ["postgres", "redis", "mysql", "mongo"]
)
factors["backing_services"] = 8 if has_backing else 6
# V. Build, release, run - Strict separation
has_ci = any(f for f in analysis.structure.config_files if "ci" in f.lower() or "github" in f.lower())
factors["build_release_run"] = 9 if has_ci else 5
# VI. Processes - Execute as stateless processes
# Heuristic: No obvious state storage detected
factors["processes"] = 7 # Default moderate score
# VII. Port binding - Export via port binding
# Heuristic: Check for web framework
has_web = analysis.tech_stack.framework in ["flask", "django", "fastapi", "express"]
factors["port_binding"] = 9 if has_web else 6
# VIII. Concurrency - Scale via process model
factors["concurrency"] = 7 # Default moderate score
# IX. Disposability - Fast startup/graceful shutdown
factors["disposability"] = 7 # Default moderate score
# X. Dev/prod parity - Keep similar
has_docker = any(f for f in analysis.structure.config_files if "docker" in f.lower())
factors["dev_prod_parity"] = 9 if has_docker else 5
# XI. Logs - Treat as event streams
has_logging = any(
tech in str(analysis.tech_stack.dependencies).lower()
for tech in ["logging", "logger", "log"]
)
factors["logs"] = 8 if has_logging else 6
# XII. Admin processes - Run as one-off processes
has_scripts = analysis.structure.total_files > 0 # Has any scripts
factors["admin_processes"] = 7 if has_scripts else 5
return TwelveFactorScore(factors=factors)
def identify_alignment_gaps(
self,
analysis: AnalysisReport,
twelve_factor: TwelveFactorScore
) -> List[AlignmentGap]:
"""Identify alignment gaps between current and desired state.
Args:
analysis: Codebase analysis results
twelve_factor: 12-Factor compliance score
Returns:
List of identified gaps
"""
gaps = []
# Gap: Missing PROJECT.md
if not (self.project_root / ".claude" / "PROJECT.md").exists():
gaps.append(AlignmentGap(
category="documentation",
severity=Severity.CRITICAL,
description="Missing .claude/PROJECT.md file",
current_state="No PROJECT.md exists",
desired_state="PROJECT.md defines GOALS, SCOPE, CONSTRAINTS",
fix_steps=[
"Create .claude/ directory",
"Generate PROJECT.md from analysis",
"Review and customize content"
],
impact_score=100,
effort_hours=0.5
))
# Gap: Poor file organization
if not analysis.structure.has_src_dir and analysis.structure.source_files > 10:
gaps.append(AlignmentGap(
category="file-organization",
severity=Severity.HIGH,
description="No src/ directory structure",
current_state=f"{analysis.structure.source_files} files in root",
desired_state="Organized src/ directory structure",
fix_steps=[
"Create src/ directory",
"Move source files to src/",
"Update import paths"
],
impact_score=80,
effort_hours=2.0
))
# Gap: Missing tests
if analysis.structure.test_files == 0:
gaps.append(AlignmentGap(
category="testing",
severity=Severity.HIGH,
description="No test files detected",
current_state="0 test files",
desired_state="Test coverage > 80%",
fix_steps=[
"Create tests/ directory",
"Add test framework (pytest recommended)",
"Write initial test suite"
],
impact_score=90,
effort_hours=4.0
))
# Gap: Low test coverage
elif analysis.structure.test_files < analysis.structure.source_files * 0.5:
gaps.append(AlignmentGap(
category="testing",
severity=Severity.MEDIUM,
description="Insufficient test coverage",
current_state=f"{analysis.structure.test_files} test files vs {analysis.structure.source_files} source files",
desired_state="Test coverage > 80%",
fix_steps=[
"Identify untested modules",
"Add tests for critical paths",
"Set up coverage reporting"
],
impact_score=70,
effort_hours=8.0
))
# Gap: Missing CI/CD
has_ci = any(f for f in analysis.structure.config_files if "ci" in f.lower())
if not has_ci:
gaps.append(AlignmentGap(
category="automation",
severity=Severity.MEDIUM,
description="No CI/CD configuration",
current_state="No CI/CD detected",
desired_state="Automated testing and deployment",
fix_steps=[
"Add .github/workflows/ directory",
"Create test workflow",
"Configure deployment pipeline"
],
impact_score=75,
effort_hours=3.0
))
# Gap: 12-Factor compliance issues
for factor_name, score in twelve_factor.factors.items():
if score < 7: # Below good compliance threshold
gaps.append(AlignmentGap(
category="twelve-factor",
severity=Severity.LOW if score >= 4 else Severity.MEDIUM,
description=f"Low 12-Factor score: {factor_name}",
current_state=f"Score: {score}/10",
desired_state=f"Score: 8+/10",
fix_steps=[
f"Review 12-Factor methodology for '{factor_name}'",
f"Implement recommended practices",
f"Verify compliance"
],
impact_score=50 + score * 2, # Higher impact for lower scores
effort_hours=1.0 + (10 - score) * 0.5
))
return gaps
def prioritize_gaps(self, gaps: List[AlignmentGap]) -> List[AlignmentGap]:
"""Prioritize gaps by impact/effort ratio.
Args:
gaps: List of alignment gaps
Returns:
Gaps sorted by priority (highest first)
"""
# Calculate priority score for each gap
def priority_score(gap: AlignmentGap) -> float:
# Severity weight
severity_weight = {
Severity.CRITICAL: 100,
Severity.HIGH: 50,
Severity.MEDIUM: 25,
Severity.LOW: 10
}
# Impact/effort ratio (higher is better)
effort = max(gap.effort_hours, 0.1) # Avoid division by zero
ratio = gap.impact_score / effort
# Combined score
return severity_weight[gap.severity] + ratio
# Sort by priority score (highest first)
return sorted(gaps, key=priority_score, reverse=True)
# Private helper methods
def _extract_goals(self, analysis: AnalysisReport) -> Optional[str]:
"""Extract goals from README or documentation.
Args:
analysis: Codebase analysis results
Returns:
Goals content or None
"""
readme_path = self.project_root / "README.md"
if readme_path.exists():
try:
content = readme_path.read_text(encoding="utf-8")
# Look for common goal-related sections
for marker in ["## Goals", "## Purpose", "## Objectives"]:
if marker in content:
# Extract section content (simplified)
return f"*Extracted from README.md*\n\n{content[:500]}..."
except Exception:
pass
return "**TODO**: Define project goals and objectives"
def _extract_constraints(self, analysis: AnalysisReport) -> str:
"""Extract constraints from tech stack.
Args:
analysis: Codebase analysis results
Returns:
Constraints content
"""
constraints = []
if analysis.tech_stack.primary_language:
constraints.append(f"- **Language**: {analysis.tech_stack.primary_language}")
if analysis.tech_stack.framework:
constraints.append(f"- **Framework**: {analysis.tech_stack.framework}")
# Add default constraints
constraints.append("- **Code Quality**: 80%+ test coverage required")
constraints.append("- **Security**: No secrets in version control")
constraints.append("- **Documentation**: Keep CLAUDE.md and PROJECT.md in sync")
return "\n".join(constraints)
def _calculate_confidence(self, sections: Dict[str, str], analysis: AnalysisReport) -> float:
"""Calculate confidence score for generated PROJECT.md.
Args:
sections: Generated sections
analysis: Codebase analysis results
Returns:
Confidence score (0.0-1.0)
"""
score = 0.0
# Base score from sections generated
score += len(sections) * 0.1 # 0.1 per section
# Bonus for tech stack detection
if analysis.tech_stack.primary_language:
score += 0.15
# Bonus for framework detection
if analysis.tech_stack.framework:
score += 0.15
# Bonus for dependencies
if analysis.tech_stack.dependencies:
score += 0.1
# Bonus for tests
if analysis.structure.test_files > 0:
score += 0.1
# Cap at 1.0
return min(score, 1.0)