Based on claude-code-tools TmuxCLIController, this refactor: - Added DockerTmuxController class for robust tmux session management - Implements send_keys() with configurable delay_enter - Implements capture_pane() for output retrieval - Implements wait_for_prompt() for pattern-based completion detection - Implements wait_for_idle() for content-hash-based idle detection - Implements wait_for_shell_prompt() for shell prompt detection Also includes workflow improvements: - Pre-task git snapshot before agent execution - Post-task commit protocol in agent guidelines Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
342 lines
14 KiB
Python
342 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Error Pattern Analyzer
|
|
|
|
Analyzes system issues to identify systemic patterns:
|
|
- Groups issues by root cause
|
|
- Calculates frequency and impact
|
|
- Recommends systemic fixes
|
|
- Identifies precursors and prevention strategies
|
|
"""
|
|
|
|
import time
|
|
from typing import List, Dict, Tuple
|
|
from collections import defaultdict
|
|
|
|
|
|
class ErrorPatternAnalyzer:
|
|
"""Analyze error patterns to identify systemic issues."""
|
|
|
|
# Known systemic patterns
|
|
PATTERNS = {
|
|
'incomplete_research_blocking': {
|
|
'description': 'Research sessions ask user question, never resume',
|
|
'root_causes': ['Research agent ends without follow-up', 'User question not resumed'],
|
|
'indicators': ['unresolved_question', 'claude_no_conclusion'],
|
|
'frequency_threshold': 5, # Per 30 days
|
|
'impact': 'KG quality degradation, user confusion',
|
|
'prevention': 'Block session completion if unresolved questions exist'
|
|
},
|
|
'task_stalling_under_load': {
|
|
'description': 'Long-running tasks timeout heartbeat updates',
|
|
'root_causes': ['Heartbeat updates blocked', 'Task exceeds timeout', 'Process hangs'],
|
|
'indicators': ['heartbeat_timeout', 'process_not_found'],
|
|
'frequency_threshold': 3, # Per 30 days
|
|
'impact': 'Tasks marked running indefinitely, resources held',
|
|
'prevention': 'Increase heartbeat timeout or add intermediate progress signals'
|
|
},
|
|
'disk_pressure_growth': {
|
|
'description': 'Old conductor tasks accumulating, not archived',
|
|
'root_causes': ['No automatic archival', 'Task cleanup not running', 'Large task logs'],
|
|
'indicators': ['disk_usage_high', 'old_tasks_accumulating'],
|
|
'frequency_threshold': 5, # %/month growth
|
|
'impact': 'Approaching critical capacity, performance degradation',
|
|
'prevention': 'Implement automatic archival of >30 day tasks'
|
|
},
|
|
'missing_documentation': {
|
|
'description': 'Research findings incomplete or not documented',
|
|
'root_causes': ['No mandatory documentation', 'Findings not extracted', 'Synthesis missing'],
|
|
'indicators': ['incomplete_duration', 'missing_findings'],
|
|
'frequency_threshold': 8, # Per 30 days
|
|
'impact': 'Knowledge loss, difficult to track progress',
|
|
'prevention': 'Require structured findings section before completion'
|
|
},
|
|
'script_quality_drift': {
|
|
'description': 'Script quality degrades over time',
|
|
'root_causes': ['No validation on commit', 'Dependencies change', 'Type hints missing'],
|
|
'indicators': ['syntax_error', 'unused_import', 'low_type_coverage'],
|
|
'frequency_threshold': 3, # Issues per week
|
|
'impact': 'Fragility, hard to maintain, bugs increase',
|
|
'prevention': 'Enforce validation in pre-commit hooks'
|
|
}
|
|
}
|
|
|
|
def __init__(self):
|
|
"""Initialize error pattern analyzer."""
|
|
self.issues_log: List[Dict] = []
|
|
self.pattern_matches: Dict[str, List[Dict]] = defaultdict(list)
|
|
|
|
def analyze_kg_issues(self, kg_findings: List[Dict]) -> Dict:
|
|
"""
|
|
Analyze KG findings for error patterns.
|
|
|
|
Args:
|
|
kg_findings: List of findings from KGHealthChecker
|
|
|
|
Returns:
|
|
Dict with pattern analysis
|
|
"""
|
|
patterns = {}
|
|
|
|
# Pattern 1: Incomplete Research Blocking
|
|
unresolved = [f for f in kg_findings if f.get('pattern') == 'unresolved_question']
|
|
if len(unresolved) >= self.PATTERNS['incomplete_research_blocking']['frequency_threshold']:
|
|
patterns['incomplete_research_blocking'] = {
|
|
'matched': True,
|
|
'evidence_count': len(unresolved),
|
|
'examples': unresolved[:3],
|
|
'severity': 'high' if len(unresolved) > 10 else 'medium',
|
|
'frequency_30d': len(unresolved),
|
|
'root_cause_analysis': self._analyze_incomplete_research(unresolved),
|
|
'recommended_fix': self.PATTERNS['incomplete_research_blocking']['prevention']
|
|
}
|
|
|
|
# Pattern 2: Missing Documentation
|
|
no_conclusion = [f for f in kg_findings if f.get('pattern') == 'claude_no_conclusion']
|
|
if len(no_conclusion) >= self.PATTERNS['missing_documentation']['frequency_threshold']:
|
|
patterns['missing_documentation'] = {
|
|
'matched': True,
|
|
'evidence_count': len(no_conclusion),
|
|
'examples': no_conclusion[:3],
|
|
'severity': 'medium',
|
|
'root_cause_analysis': 'Claude responses present but missing synthesis/conclusions',
|
|
'recommended_fix': 'Add validation requiring "Conclusion:" or "Summary:" section'
|
|
}
|
|
|
|
return patterns
|
|
|
|
def analyze_conductor_issues(self, conductor_stalled: List[Dict], disk_usage_pct: float) -> Dict:
|
|
"""
|
|
Analyze conductor issues for error patterns.
|
|
|
|
Args:
|
|
conductor_stalled: List of stalled tasks
|
|
disk_usage_pct: Disk usage percentage
|
|
|
|
Returns:
|
|
Dict with pattern analysis
|
|
"""
|
|
patterns = {}
|
|
|
|
# Pattern 1: Task Stalling Under Load
|
|
if len(conductor_stalled) >= self.PATTERNS['task_stalling_under_load']['frequency_threshold']:
|
|
patterns['task_stalling_under_load'] = {
|
|
'matched': True,
|
|
'evidence_count': len(conductor_stalled),
|
|
'examples': conductor_stalled[:3],
|
|
'severity': 'high' if len(conductor_stalled) > 5 else 'medium',
|
|
'root_cause_analysis': self._analyze_stalled_tasks(conductor_stalled),
|
|
'recommended_fix': self.PATTERNS['task_stalling_under_load']['prevention']
|
|
}
|
|
|
|
# Pattern 2: Disk Pressure Growth
|
|
if disk_usage_pct > 80:
|
|
patterns['disk_pressure_growth'] = {
|
|
'matched': True,
|
|
'current_usage_pct': disk_usage_pct,
|
|
'severity': 'critical' if disk_usage_pct > 90 else 'high' if disk_usage_pct > 85 else 'medium',
|
|
'estimated_growth_pct_month': 5, # Historical average
|
|
'days_until_critical': max(0, int((95 - disk_usage_pct) / 5 * 30)),
|
|
'root_cause_analysis': 'Old conductor tasks accumulating without archival',
|
|
'recommended_fix': self.PATTERNS['disk_pressure_growth']['prevention']
|
|
}
|
|
|
|
return patterns
|
|
|
|
def analyze_script_issues(self, script_health: Dict) -> Dict:
|
|
"""
|
|
Analyze script quality for error patterns.
|
|
|
|
Args:
|
|
script_health: Script health report data
|
|
|
|
Returns:
|
|
Dict with pattern analysis
|
|
"""
|
|
patterns = {}
|
|
|
|
# Pattern 1: Script Quality Drift
|
|
problematic_scripts = [s for s in script_health.get('scripts', [])
|
|
if s['status'] in ['syntax_error', 'issues']]
|
|
|
|
if len(problematic_scripts) >= self.PATTERNS['script_quality_drift']['frequency_threshold']:
|
|
patterns['script_quality_drift'] = {
|
|
'matched': True,
|
|
'problematic_count': len(problematic_scripts),
|
|
'examples': [{'script': s['script'], 'status': s['status']} for s in problematic_scripts[:3]],
|
|
'severity': 'high' if len(problematic_scripts) > 5 else 'medium',
|
|
'root_cause_analysis': 'No pre-commit validation enforcing script quality',
|
|
'recommended_fix': self.PATTERNS['script_quality_drift']['prevention']
|
|
}
|
|
|
|
return patterns
|
|
|
|
def run_full_pattern_analysis(self, all_health_data: Dict) -> Dict:
|
|
"""
|
|
Run comprehensive pattern analysis across all systems.
|
|
|
|
Args:
|
|
all_health_data: Complete health data from orchestrator
|
|
|
|
Returns:
|
|
Dict with all identified patterns
|
|
"""
|
|
all_patterns = {}
|
|
|
|
# Analyze KG issues
|
|
kg_issues = self._extract_kg_issues(all_health_data)
|
|
kg_patterns = self.analyze_kg_issues(kg_issues)
|
|
all_patterns.update(kg_patterns)
|
|
|
|
# Analyze conductor issues
|
|
conductor_stalled = self._extract_conductor_stalled(all_health_data)
|
|
disk_usage = all_health_data.get('capacity', {}).get('disk', {}).get('usage_pct', 0)
|
|
conductor_patterns = self.analyze_conductor_issues(conductor_stalled, disk_usage)
|
|
all_patterns.update(conductor_patterns)
|
|
|
|
# Analyze script issues
|
|
script_patterns = self.analyze_script_issues(all_health_data)
|
|
all_patterns.update(script_patterns)
|
|
|
|
return {
|
|
'total_patterns': len(all_patterns),
|
|
'patterns': all_patterns,
|
|
'summary': self._generate_pattern_summary(all_patterns),
|
|
'systemic_recommendations': self._generate_systemic_recommendations(all_patterns),
|
|
'timestamp': time.time()
|
|
}
|
|
|
|
def _analyze_incomplete_research(self, unresolved_findings: List[Dict]) -> str:
|
|
"""Generate detailed root cause analysis for incomplete research."""
|
|
if not unresolved_findings:
|
|
return "No data available"
|
|
|
|
# Analyze pattern
|
|
avg_duration = sum(f.get('duration_secs', 0) for f in unresolved_findings) / len(unresolved_findings)
|
|
|
|
analysis = f"""
|
|
Root Cause: Research agent creates initial analysis but asks user question.
|
|
User answer is expected but session is marked complete anyway.
|
|
|
|
Evidence:
|
|
- {len(unresolved_findings)} sessions ended with unresolved questions
|
|
- Average session duration: {int(avg_duration)}s
|
|
- Pattern: Initial research → Claude analysis → "What do you think?" → END
|
|
|
|
Impact:
|
|
- User confusion (unclear next steps)
|
|
- Knowledge incomplete (user input never captured)
|
|
- KG quality degraded (research marked done but unresolved)
|
|
|
|
Systemic Issue:
|
|
Research workflow doesn't enforce follow-up on user questions.
|
|
Sessions can complete even with pending decisions.
|
|
"""
|
|
return analysis.strip()
|
|
|
|
def _analyze_stalled_tasks(self, stalled_tasks: List[Dict]) -> str:
|
|
"""Generate detailed root cause analysis for stalled tasks."""
|
|
if not stalled_tasks:
|
|
return "No data available"
|
|
|
|
heartbeat_timeouts = [t for t in stalled_tasks if t.get('stall_reason') == 'heartbeat_timeout']
|
|
process_missing = [t for t in stalled_tasks if t.get('stall_reason') == 'process_not_found']
|
|
|
|
analysis = f"""
|
|
Root Cause: Long-running tasks exceed heartbeat timeout window.
|
|
No intermediate progress updates during execution.
|
|
|
|
Evidence:
|
|
- {len(heartbeat_timeouts)} tasks with heartbeat timeout
|
|
- {len(process_missing)} tasks with missing process
|
|
- Pattern: Task starts → no heartbeat update → marked stalled after 300s
|
|
|
|
Impact:
|
|
- Resources held indefinitely
|
|
- Tasks can't recover automatically
|
|
- System capacity wasted
|
|
|
|
Systemic Issue:
|
|
Heartbeat mechanism assumes short tasks (< 5 min).
|
|
Long-running tasks (> 10 min) always timeout regardless of progress.
|
|
No intermediate signal for slow but progressing tasks.
|
|
"""
|
|
return analysis.strip()
|
|
|
|
def _generate_pattern_summary(self, patterns: Dict) -> Dict:
|
|
"""Generate summary statistics for all patterns."""
|
|
summary = {
|
|
'total_patterns_detected': len(patterns),
|
|
'high_severity': 0,
|
|
'medium_severity': 0,
|
|
'total_evidence_items': 0
|
|
}
|
|
|
|
for pattern_name, pattern_data in patterns.items():
|
|
if pattern_data.get('matched'):
|
|
severity = pattern_data.get('severity', 'medium')
|
|
if severity == 'high':
|
|
summary['high_severity'] += 1
|
|
elif severity == 'medium':
|
|
summary['medium_severity'] += 1
|
|
|
|
summary['total_evidence_items'] += pattern_data.get('evidence_count', 1)
|
|
|
|
return summary
|
|
|
|
def _generate_systemic_recommendations(self, patterns: Dict) -> List[str]:
|
|
"""Generate systemic recommendations from identified patterns."""
|
|
recommendations = []
|
|
|
|
for pattern_name, pattern_data in patterns.items():
|
|
if pattern_data.get('matched'):
|
|
severity = pattern_data.get('severity', 'medium')
|
|
prefix = "[URGENT]" if severity == 'high' else "[WARNING]"
|
|
|
|
recommendations.append(
|
|
f"{prefix} {pattern_data.get('recommended_fix', 'Fix this issue')}"
|
|
)
|
|
|
|
# Add forward-looking recommendations
|
|
if len(recommendations) > 0:
|
|
recommendations.append("\nLong-term Systemic Fixes:")
|
|
recommendations.append(" 1. Implement pre-commit validation for script quality")
|
|
recommendations.append(" 2. Add mandatory documentation sections for research")
|
|
recommendations.append(" 3. Increase heartbeat timeout or add intermediate signals")
|
|
recommendations.append(" 4. Implement automatic archival for old tasks")
|
|
|
|
return recommendations
|
|
|
|
def _extract_kg_issues(self, health_data: Dict) -> List[Dict]:
|
|
"""Extract KG issues from health data."""
|
|
# This would be populated from actual KG checker results
|
|
return []
|
|
|
|
def _extract_conductor_stalled(self, health_data: Dict) -> List[Dict]:
|
|
"""Extract stalled conductor tasks from health data."""
|
|
# This would be populated from actual conductor checker results
|
|
return []
|
|
|
|
|
|
if __name__ == '__main__':
|
|
analyzer = ErrorPatternAnalyzer()
|
|
|
|
# Example: Run pattern analysis with sample data
|
|
sample_data = {
|
|
'capacity': {'disk': {'usage_pct': 82}},
|
|
'integration': {}
|
|
}
|
|
|
|
result = analyzer.run_full_pattern_analysis(sample_data)
|
|
|
|
print("=" * 70)
|
|
print("ERROR PATTERN ANALYSIS")
|
|
print("=" * 70)
|
|
print(f"\nPatterns detected: {result['total_patterns']}")
|
|
print(f"High severity: {result['summary']['high_severity']}")
|
|
print(f"Medium severity: {result['summary']['medium_severity']}")
|
|
|
|
print(f"\nSystemic Recommendations:")
|
|
for rec in result['systemic_recommendations']:
|
|
print(f" {rec}")
|