527 lines
17 KiB
Python
527 lines
17 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Workflow State Tracking for Preference Learning - Issue #155
|
|
|
|
Tracks quality workflow steps taken/skipped, detects user corrections,
|
|
and learns preferences over time to improve Claude's workflow decisions.
|
|
|
|
Key Features:
|
|
- Step tracking: Records which quality steps were taken vs skipped
|
|
- Correction detection: Parses user feedback for improvement signals
|
|
- Preference learning: Derives preferences from patterns over time
|
|
- Privacy-preserving: Local storage only, no cloud sync
|
|
- Atomic persistence: Safe concurrent access with file locking
|
|
|
|
State File Location:
|
|
- ~/.autonomous-dev/workflow_state.json (user-level preferences)
|
|
|
|
Usage:
|
|
from workflow_tracker import WorkflowTracker, detect_correction
|
|
|
|
# Track workflow steps
|
|
tracker = WorkflowTracker()
|
|
tracker.start_session()
|
|
tracker.record_step("research", taken=True)
|
|
tracker.record_step("testing", taken=False, reason="quick fix")
|
|
tracker.save()
|
|
|
|
# Detect corrections in user feedback
|
|
correction = detect_correction("you should have researched first")
|
|
if correction:
|
|
tracker.record_correction(correction["step"], correction["text"])
|
|
|
|
# Get learned preferences
|
|
prefs = tracker.get_preferences()
|
|
recommended = tracker.get_recommended_steps()
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import re
|
|
import tempfile
|
|
import threading
|
|
import uuid
|
|
from datetime import datetime, timedelta
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
# ============================================================================
|
|
# Configuration
|
|
# ============================================================================
|
|
|
|
# Maximum sessions to keep (prevents unbounded growth)
|
|
MAX_SESSIONS = 50
|
|
|
|
# Correction threshold to emphasize a step
|
|
CORRECTION_THRESHOLD = 3
|
|
|
|
# Preference decay period (days)
|
|
PREFERENCE_DECAY_DAYS = 30
|
|
|
|
# Default state file location
|
|
DEFAULT_STATE_FILE = Path.home() / ".autonomous-dev" / "workflow_state.json"
|
|
|
|
# Default state structure
|
|
DEFAULT_WORKFLOW_STATE: Dict[str, Any] = {
|
|
"version": "1.0",
|
|
"sessions": [],
|
|
"preferences": {
|
|
"emphasized_steps": {}, # step -> correction_count
|
|
"task_type_preferences": {}, # task_type -> {step -> priority}
|
|
},
|
|
"corrections": [], # List of correction records
|
|
"metadata": {
|
|
"created_at": None,
|
|
"updated_at": None,
|
|
},
|
|
}
|
|
|
|
# Quality workflow steps
|
|
WORKFLOW_STEPS = [
|
|
"alignment", # PROJECT.md alignment check
|
|
"research", # Codebase/web research
|
|
"planning", # Implementation planning
|
|
"testing", # TDD tests
|
|
"implementation", # Code implementation
|
|
"review", # Code review
|
|
"security", # Security audit
|
|
"documentation", # Doc updates
|
|
]
|
|
|
|
|
|
# ============================================================================
|
|
# Correction Detection Patterns
|
|
# ============================================================================
|
|
|
|
# Patterns to detect user corrections
|
|
# Each pattern maps to a step extraction function
|
|
CORRECTION_PATTERNS = [
|
|
# "you should have X"
|
|
(r"\byou\s+should\s+have\s+(\w+)", "should_have"),
|
|
# "need to X first"
|
|
(r"\bneed\s+to\s+(\w+)", "need_to"),
|
|
# "forgot to X"
|
|
(r"\bforgot\s+to\s+(\w+)", "forgot"),
|
|
# "always should X"
|
|
(r"\bshould\s+always\s+(\w+)", "always_should"),
|
|
# "didn't X"
|
|
(r"\bdidn'?t\s+(\w+)", "didnt"),
|
|
# "should X before"
|
|
(r"\bshould\s+(\w+)\s+(?:first|before)", "should_before"),
|
|
]
|
|
|
|
# Step keyword mapping
|
|
STEP_KEYWORDS = {
|
|
"research": ["research", "searched", "looked", "checked", "investigated"],
|
|
"testing": ["test", "tested", "tests", "tdd", "unittest", "write"], # "write tests"
|
|
"planning": ["plan", "planned", "planning", "design"],
|
|
"review": ["review", "reviewed", "check", "checked"],
|
|
"security": ["security", "secure", "audit", "audited", "vulnerability", "run"], # "run security"
|
|
"documentation": ["document", "documented", "docs", "readme"],
|
|
"alignment": ["align", "aligned", "project", "goals"],
|
|
"implementation": ["implement", "implemented", "code", "coded"],
|
|
}
|
|
|
|
|
|
def _extract_step_from_keyword(keyword: str) -> Optional[str]:
|
|
"""Extract workflow step from a keyword."""
|
|
keyword_lower = keyword.lower()
|
|
for step, keywords in STEP_KEYWORDS.items():
|
|
for kw in keywords:
|
|
if kw in keyword_lower or keyword_lower in kw:
|
|
return step
|
|
return None
|
|
|
|
|
|
def detect_correction(user_input: Optional[str]) -> Optional[Dict[str, str]]:
|
|
"""
|
|
Detect if user input contains a correction signal.
|
|
|
|
Looks for patterns like:
|
|
- "you should have researched first"
|
|
- "need to write tests before implementing"
|
|
- "forgot to check for duplicates"
|
|
- "should always run security checks"
|
|
|
|
Args:
|
|
user_input: User's message text
|
|
|
|
Returns:
|
|
Dict with 'step' and 'text' if correction detected, None otherwise
|
|
|
|
Example:
|
|
>>> detect_correction("you should have researched first")
|
|
{'step': 'research', 'text': 'you should have researched first', 'pattern': 'should_have'}
|
|
"""
|
|
if not user_input:
|
|
return None
|
|
|
|
text = user_input.lower()
|
|
|
|
for pattern, pattern_name in CORRECTION_PATTERNS:
|
|
match = re.search(pattern, text, re.IGNORECASE)
|
|
if match:
|
|
keyword = match.group(1)
|
|
step = _extract_step_from_keyword(keyword)
|
|
if step:
|
|
return {
|
|
"step": step,
|
|
"text": user_input,
|
|
"pattern": pattern_name,
|
|
"keyword": keyword,
|
|
}
|
|
|
|
return None
|
|
|
|
|
|
# ============================================================================
|
|
# Session Management
|
|
# ============================================================================
|
|
|
|
def create_session() -> Dict[str, Any]:
|
|
"""
|
|
Create a new workflow session record.
|
|
|
|
Returns:
|
|
Dict with session_id, started_at timestamp, and empty steps list
|
|
|
|
Example:
|
|
>>> session = create_session()
|
|
>>> session["session_id"]
|
|
'abc123-def456-...'
|
|
"""
|
|
return {
|
|
"session_id": str(uuid.uuid4()),
|
|
"started_at": datetime.utcnow().isoformat() + "Z",
|
|
"ended_at": None,
|
|
"steps": [],
|
|
"task_type": None, # feature, bugfix, docs, etc.
|
|
}
|
|
|
|
|
|
# ============================================================================
|
|
# Workflow Tracker Class
|
|
# ============================================================================
|
|
|
|
class WorkflowTracker:
|
|
"""
|
|
Tracks workflow steps, corrections, and learned preferences.
|
|
|
|
Thread-safe with file locking for concurrent access.
|
|
Uses atomic writes to prevent state corruption.
|
|
|
|
Attributes:
|
|
state_file: Path to workflow state JSON file
|
|
_state: In-memory state dict
|
|
_current_session: Current active session dict
|
|
_lock: Thread lock for concurrent access
|
|
"""
|
|
|
|
def __init__(self, state_file: Optional[Path] = None):
|
|
"""
|
|
Initialize workflow tracker.
|
|
|
|
Args:
|
|
state_file: Optional custom state file path (default: ~/.autonomous-dev/workflow_state.json)
|
|
"""
|
|
self.state_file = state_file or DEFAULT_STATE_FILE
|
|
self._lock = threading.RLock()
|
|
self._state = self._load_state()
|
|
self._current_session: Optional[Dict[str, Any]] = None
|
|
|
|
def _load_state(self) -> Dict[str, Any]:
|
|
"""Load state from file or return defaults."""
|
|
try:
|
|
if self.state_file.exists():
|
|
content = self.state_file.read_text()
|
|
state = json.loads(content)
|
|
# Ensure all required keys exist
|
|
for key in DEFAULT_WORKFLOW_STATE:
|
|
if key not in state:
|
|
state[key] = DEFAULT_WORKFLOW_STATE[key]
|
|
return state
|
|
except (json.JSONDecodeError, OSError) as e:
|
|
# Corrupted or unreadable - use defaults
|
|
pass
|
|
|
|
# Return copy of defaults
|
|
state = json.loads(json.dumps(DEFAULT_WORKFLOW_STATE))
|
|
state["metadata"]["created_at"] = datetime.utcnow().isoformat() + "Z"
|
|
return state
|
|
|
|
def save(self) -> bool:
|
|
"""
|
|
Save state to file using atomic write.
|
|
|
|
Returns:
|
|
True if save succeeded, False otherwise
|
|
"""
|
|
with self._lock:
|
|
try:
|
|
# Ensure directory exists
|
|
self.state_file.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Update timestamp
|
|
self._state["metadata"]["updated_at"] = datetime.utcnow().isoformat() + "Z"
|
|
|
|
# Atomic write
|
|
fd, temp_path = tempfile.mkstemp(
|
|
dir=self.state_file.parent,
|
|
suffix=".tmp",
|
|
)
|
|
try:
|
|
with os.fdopen(fd, "w") as f:
|
|
json.dump(self._state, f, indent=2)
|
|
os.replace(temp_path, self.state_file)
|
|
return True
|
|
except Exception:
|
|
# Clean up temp file on error
|
|
try:
|
|
os.unlink(temp_path)
|
|
except OSError:
|
|
pass
|
|
raise
|
|
except OSError as e:
|
|
return False
|
|
|
|
# ========================================================================
|
|
# Session Management
|
|
# ========================================================================
|
|
|
|
def start_session(self, task_type: Optional[str] = None) -> str:
|
|
"""
|
|
Start a new workflow session.
|
|
|
|
Args:
|
|
task_type: Optional task type (feature, bugfix, docs, etc.)
|
|
|
|
Returns:
|
|
Session ID
|
|
"""
|
|
with self._lock:
|
|
self._current_session = create_session()
|
|
self._current_session["task_type"] = task_type
|
|
return self._current_session["session_id"]
|
|
|
|
def end_session(self) -> None:
|
|
"""End current session and add to history."""
|
|
with self._lock:
|
|
if self._current_session:
|
|
self._current_session["ended_at"] = datetime.utcnow().isoformat() + "Z"
|
|
self._state["sessions"].append(self._current_session)
|
|
|
|
# Trim to max sessions
|
|
if len(self._state["sessions"]) > MAX_SESSIONS:
|
|
self._state["sessions"] = self._state["sessions"][-MAX_SESSIONS:]
|
|
|
|
self._current_session = None
|
|
self.save()
|
|
|
|
def get_sessions(self) -> List[Dict[str, Any]]:
|
|
"""Get all recorded sessions."""
|
|
return self._state.get("sessions", [])
|
|
|
|
def get_current_session_steps(self) -> List[Dict[str, Any]]:
|
|
"""Get steps from current session."""
|
|
if self._current_session:
|
|
return self._current_session.get("steps", [])
|
|
return []
|
|
|
|
# ========================================================================
|
|
# Step Tracking
|
|
# ========================================================================
|
|
|
|
def record_step(
|
|
self,
|
|
step: str,
|
|
taken: bool,
|
|
reason: Optional[str] = None,
|
|
) -> None:
|
|
"""
|
|
Record a workflow step.
|
|
|
|
Args:
|
|
step: Step name (research, testing, etc.)
|
|
taken: True if step was taken, False if skipped
|
|
reason: Optional reason for skipping
|
|
"""
|
|
with self._lock:
|
|
if not self._current_session:
|
|
self.start_session()
|
|
|
|
step_record = {
|
|
"step": step,
|
|
"taken": taken,
|
|
"timestamp": datetime.utcnow().isoformat() + "Z",
|
|
}
|
|
if reason:
|
|
step_record["reason"] = reason
|
|
|
|
self._current_session["steps"].append(step_record)
|
|
|
|
# ========================================================================
|
|
# Correction Tracking
|
|
# ========================================================================
|
|
|
|
def record_correction(
|
|
self,
|
|
step: str,
|
|
text: str,
|
|
task_type: Optional[str] = None,
|
|
) -> None:
|
|
"""
|
|
Record a user correction.
|
|
|
|
Args:
|
|
step: Step that was corrected (research, testing, etc.)
|
|
text: Original user text
|
|
task_type: Optional task type for context
|
|
"""
|
|
with self._lock:
|
|
correction = {
|
|
"step": step,
|
|
"text": text,
|
|
"timestamp": datetime.utcnow().isoformat() + "Z",
|
|
"task_type": task_type,
|
|
}
|
|
self._state["corrections"].append(correction)
|
|
|
|
# Update emphasized steps
|
|
emphasized = self._state["preferences"].get("emphasized_steps", {})
|
|
emphasized[step] = emphasized.get(step, 0) + 1
|
|
self._state["preferences"]["emphasized_steps"] = emphasized
|
|
|
|
# Update task-type preferences if provided
|
|
if task_type:
|
|
task_prefs = self._state["preferences"].get("task_type_preferences", {})
|
|
if task_type not in task_prefs:
|
|
task_prefs[task_type] = {}
|
|
task_prefs[task_type][step] = task_prefs[task_type].get(step, 0) + 1
|
|
self._state["preferences"]["task_type_preferences"] = task_prefs
|
|
|
|
self.save()
|
|
|
|
def get_corrections(self) -> List[Dict[str, Any]]:
|
|
"""Get all recorded corrections."""
|
|
return self._state.get("corrections", [])
|
|
|
|
# ========================================================================
|
|
# Preference Learning
|
|
# ========================================================================
|
|
|
|
def get_preferences(self) -> Dict[str, Any]:
|
|
"""Get learned preferences."""
|
|
return self._state.get("preferences", {})
|
|
|
|
def get_recommended_steps(self, task_type: Optional[str] = None) -> List[str]:
|
|
"""
|
|
Get recommended workflow steps based on preferences.
|
|
|
|
Steps with corrections above threshold are emphasized.
|
|
|
|
Args:
|
|
task_type: Optional task type for context-specific recommendations
|
|
|
|
Returns:
|
|
List of recommended step names in priority order
|
|
"""
|
|
emphasized = self._state["preferences"].get("emphasized_steps", {})
|
|
|
|
# Get steps above correction threshold
|
|
high_priority = [
|
|
step for step, count in emphasized.items()
|
|
if count >= CORRECTION_THRESHOLD
|
|
]
|
|
|
|
# Add task-type specific steps if available
|
|
if task_type:
|
|
task_prefs = self._state["preferences"].get("task_type_preferences", {})
|
|
task_steps = task_prefs.get(task_type, {})
|
|
for step, count in task_steps.items():
|
|
if count >= CORRECTION_THRESHOLD and step not in high_priority:
|
|
high_priority.append(step)
|
|
|
|
# Return in priority order (most corrections first)
|
|
return sorted(
|
|
high_priority,
|
|
key=lambda s: emphasized.get(s, 0),
|
|
reverse=True,
|
|
)
|
|
|
|
def apply_preference_decay(self) -> None:
|
|
"""
|
|
Apply time-based decay to preferences.
|
|
|
|
Reduces correction counts for old corrections to allow
|
|
preferences to evolve over time.
|
|
"""
|
|
with self._lock:
|
|
cutoff = datetime.utcnow() - timedelta(days=PREFERENCE_DECAY_DAYS)
|
|
cutoff_str = cutoff.isoformat() + "Z"
|
|
|
|
# Filter recent corrections
|
|
recent = [
|
|
c for c in self._state.get("corrections", [])
|
|
if c.get("timestamp", "") >= cutoff_str
|
|
]
|
|
|
|
# Rebuild emphasized steps from recent corrections only
|
|
emphasized = {}
|
|
for correction in recent:
|
|
step = correction.get("step")
|
|
if step:
|
|
emphasized[step] = emphasized.get(step, 0) + 1
|
|
|
|
self._state["preferences"]["emphasized_steps"] = emphasized
|
|
self._state["corrections"] = recent
|
|
|
|
self.save()
|
|
|
|
|
|
# ============================================================================
|
|
# CLI Entry Point
|
|
# ============================================================================
|
|
|
|
def main():
|
|
"""CLI entry point for testing."""
|
|
import sys
|
|
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python workflow_tracker.py <command> [args]")
|
|
print("Commands:")
|
|
print(" detect <text> - Detect correction in text")
|
|
print(" preferences - Show learned preferences")
|
|
print(" sessions - Show session count")
|
|
sys.exit(1)
|
|
|
|
command = sys.argv[1]
|
|
|
|
if command == "detect":
|
|
text = " ".join(sys.argv[2:]) if len(sys.argv) > 2 else ""
|
|
result = detect_correction(text)
|
|
if result:
|
|
print(f"Correction detected:")
|
|
print(f" Step: {result['step']}")
|
|
print(f" Pattern: {result['pattern']}")
|
|
else:
|
|
print("No correction detected")
|
|
|
|
elif command == "preferences":
|
|
tracker = WorkflowTracker()
|
|
prefs = tracker.get_preferences()
|
|
print("Learned preferences:")
|
|
print(json.dumps(prefs, indent=2))
|
|
|
|
elif command == "sessions":
|
|
tracker = WorkflowTracker()
|
|
sessions = tracker.get_sessions()
|
|
print(f"Sessions recorded: {len(sessions)}")
|
|
|
|
else:
|
|
print(f"Unknown command: {command}")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|