Refactor cockpit to use DockerTmuxController pattern
Based on claude-code-tools TmuxCLIController, this refactor: - Added DockerTmuxController class for robust tmux session management - Implements send_keys() with configurable delay_enter - Implements capture_pane() for output retrieval - Implements wait_for_prompt() for pattern-based completion detection - Implements wait_for_idle() for content-hash-based idle detection - Implements wait_for_shell_prompt() for shell prompt detection Also includes workflow improvements: - Pre-task git snapshot before agent execution - Post-task commit protocol in agent guidelines Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
341
lib/error_pattern_analyzer.py
Normal file
341
lib/error_pattern_analyzer.py
Normal file
@@ -0,0 +1,341 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Error Pattern Analyzer
|
||||
|
||||
Analyzes system issues to identify systemic patterns:
|
||||
- Groups issues by root cause
|
||||
- Calculates frequency and impact
|
||||
- Recommends systemic fixes
|
||||
- Identifies precursors and prevention strategies
|
||||
"""
|
||||
|
||||
import time
|
||||
from typing import List, Dict, Tuple
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
class ErrorPatternAnalyzer:
|
||||
"""Analyze error patterns to identify systemic issues."""
|
||||
|
||||
# Known systemic patterns
|
||||
PATTERNS = {
|
||||
'incomplete_research_blocking': {
|
||||
'description': 'Research sessions ask user question, never resume',
|
||||
'root_causes': ['Research agent ends without follow-up', 'User question not resumed'],
|
||||
'indicators': ['unresolved_question', 'claude_no_conclusion'],
|
||||
'frequency_threshold': 5, # Per 30 days
|
||||
'impact': 'KG quality degradation, user confusion',
|
||||
'prevention': 'Block session completion if unresolved questions exist'
|
||||
},
|
||||
'task_stalling_under_load': {
|
||||
'description': 'Long-running tasks timeout heartbeat updates',
|
||||
'root_causes': ['Heartbeat updates blocked', 'Task exceeds timeout', 'Process hangs'],
|
||||
'indicators': ['heartbeat_timeout', 'process_not_found'],
|
||||
'frequency_threshold': 3, # Per 30 days
|
||||
'impact': 'Tasks marked running indefinitely, resources held',
|
||||
'prevention': 'Increase heartbeat timeout or add intermediate progress signals'
|
||||
},
|
||||
'disk_pressure_growth': {
|
||||
'description': 'Old conductor tasks accumulating, not archived',
|
||||
'root_causes': ['No automatic archival', 'Task cleanup not running', 'Large task logs'],
|
||||
'indicators': ['disk_usage_high', 'old_tasks_accumulating'],
|
||||
'frequency_threshold': 5, # %/month growth
|
||||
'impact': 'Approaching critical capacity, performance degradation',
|
||||
'prevention': 'Implement automatic archival of >30 day tasks'
|
||||
},
|
||||
'missing_documentation': {
|
||||
'description': 'Research findings incomplete or not documented',
|
||||
'root_causes': ['No mandatory documentation', 'Findings not extracted', 'Synthesis missing'],
|
||||
'indicators': ['incomplete_duration', 'missing_findings'],
|
||||
'frequency_threshold': 8, # Per 30 days
|
||||
'impact': 'Knowledge loss, difficult to track progress',
|
||||
'prevention': 'Require structured findings section before completion'
|
||||
},
|
||||
'script_quality_drift': {
|
||||
'description': 'Script quality degrades over time',
|
||||
'root_causes': ['No validation on commit', 'Dependencies change', 'Type hints missing'],
|
||||
'indicators': ['syntax_error', 'unused_import', 'low_type_coverage'],
|
||||
'frequency_threshold': 3, # Issues per week
|
||||
'impact': 'Fragility, hard to maintain, bugs increase',
|
||||
'prevention': 'Enforce validation in pre-commit hooks'
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize error pattern analyzer."""
|
||||
self.issues_log: List[Dict] = []
|
||||
self.pattern_matches: Dict[str, List[Dict]] = defaultdict(list)
|
||||
|
||||
def analyze_kg_issues(self, kg_findings: List[Dict]) -> Dict:
|
||||
"""
|
||||
Analyze KG findings for error patterns.
|
||||
|
||||
Args:
|
||||
kg_findings: List of findings from KGHealthChecker
|
||||
|
||||
Returns:
|
||||
Dict with pattern analysis
|
||||
"""
|
||||
patterns = {}
|
||||
|
||||
# Pattern 1: Incomplete Research Blocking
|
||||
unresolved = [f for f in kg_findings if f.get('pattern') == 'unresolved_question']
|
||||
if len(unresolved) >= self.PATTERNS['incomplete_research_blocking']['frequency_threshold']:
|
||||
patterns['incomplete_research_blocking'] = {
|
||||
'matched': True,
|
||||
'evidence_count': len(unresolved),
|
||||
'examples': unresolved[:3],
|
||||
'severity': 'high' if len(unresolved) > 10 else 'medium',
|
||||
'frequency_30d': len(unresolved),
|
||||
'root_cause_analysis': self._analyze_incomplete_research(unresolved),
|
||||
'recommended_fix': self.PATTERNS['incomplete_research_blocking']['prevention']
|
||||
}
|
||||
|
||||
# Pattern 2: Missing Documentation
|
||||
no_conclusion = [f for f in kg_findings if f.get('pattern') == 'claude_no_conclusion']
|
||||
if len(no_conclusion) >= self.PATTERNS['missing_documentation']['frequency_threshold']:
|
||||
patterns['missing_documentation'] = {
|
||||
'matched': True,
|
||||
'evidence_count': len(no_conclusion),
|
||||
'examples': no_conclusion[:3],
|
||||
'severity': 'medium',
|
||||
'root_cause_analysis': 'Claude responses present but missing synthesis/conclusions',
|
||||
'recommended_fix': 'Add validation requiring "Conclusion:" or "Summary:" section'
|
||||
}
|
||||
|
||||
return patterns
|
||||
|
||||
def analyze_conductor_issues(self, conductor_stalled: List[Dict], disk_usage_pct: float) -> Dict:
|
||||
"""
|
||||
Analyze conductor issues for error patterns.
|
||||
|
||||
Args:
|
||||
conductor_stalled: List of stalled tasks
|
||||
disk_usage_pct: Disk usage percentage
|
||||
|
||||
Returns:
|
||||
Dict with pattern analysis
|
||||
"""
|
||||
patterns = {}
|
||||
|
||||
# Pattern 1: Task Stalling Under Load
|
||||
if len(conductor_stalled) >= self.PATTERNS['task_stalling_under_load']['frequency_threshold']:
|
||||
patterns['task_stalling_under_load'] = {
|
||||
'matched': True,
|
||||
'evidence_count': len(conductor_stalled),
|
||||
'examples': conductor_stalled[:3],
|
||||
'severity': 'high' if len(conductor_stalled) > 5 else 'medium',
|
||||
'root_cause_analysis': self._analyze_stalled_tasks(conductor_stalled),
|
||||
'recommended_fix': self.PATTERNS['task_stalling_under_load']['prevention']
|
||||
}
|
||||
|
||||
# Pattern 2: Disk Pressure Growth
|
||||
if disk_usage_pct > 80:
|
||||
patterns['disk_pressure_growth'] = {
|
||||
'matched': True,
|
||||
'current_usage_pct': disk_usage_pct,
|
||||
'severity': 'critical' if disk_usage_pct > 90 else 'high' if disk_usage_pct > 85 else 'medium',
|
||||
'estimated_growth_pct_month': 5, # Historical average
|
||||
'days_until_critical': max(0, int((95 - disk_usage_pct) / 5 * 30)),
|
||||
'root_cause_analysis': 'Old conductor tasks accumulating without archival',
|
||||
'recommended_fix': self.PATTERNS['disk_pressure_growth']['prevention']
|
||||
}
|
||||
|
||||
return patterns
|
||||
|
||||
def analyze_script_issues(self, script_health: Dict) -> Dict:
|
||||
"""
|
||||
Analyze script quality for error patterns.
|
||||
|
||||
Args:
|
||||
script_health: Script health report data
|
||||
|
||||
Returns:
|
||||
Dict with pattern analysis
|
||||
"""
|
||||
patterns = {}
|
||||
|
||||
# Pattern 1: Script Quality Drift
|
||||
problematic_scripts = [s for s in script_health.get('scripts', [])
|
||||
if s['status'] in ['syntax_error', 'issues']]
|
||||
|
||||
if len(problematic_scripts) >= self.PATTERNS['script_quality_drift']['frequency_threshold']:
|
||||
patterns['script_quality_drift'] = {
|
||||
'matched': True,
|
||||
'problematic_count': len(problematic_scripts),
|
||||
'examples': [{'script': s['script'], 'status': s['status']} for s in problematic_scripts[:3]],
|
||||
'severity': 'high' if len(problematic_scripts) > 5 else 'medium',
|
||||
'root_cause_analysis': 'No pre-commit validation enforcing script quality',
|
||||
'recommended_fix': self.PATTERNS['script_quality_drift']['prevention']
|
||||
}
|
||||
|
||||
return patterns
|
||||
|
||||
def run_full_pattern_analysis(self, all_health_data: Dict) -> Dict:
|
||||
"""
|
||||
Run comprehensive pattern analysis across all systems.
|
||||
|
||||
Args:
|
||||
all_health_data: Complete health data from orchestrator
|
||||
|
||||
Returns:
|
||||
Dict with all identified patterns
|
||||
"""
|
||||
all_patterns = {}
|
||||
|
||||
# Analyze KG issues
|
||||
kg_issues = self._extract_kg_issues(all_health_data)
|
||||
kg_patterns = self.analyze_kg_issues(kg_issues)
|
||||
all_patterns.update(kg_patterns)
|
||||
|
||||
# Analyze conductor issues
|
||||
conductor_stalled = self._extract_conductor_stalled(all_health_data)
|
||||
disk_usage = all_health_data.get('capacity', {}).get('disk', {}).get('usage_pct', 0)
|
||||
conductor_patterns = self.analyze_conductor_issues(conductor_stalled, disk_usage)
|
||||
all_patterns.update(conductor_patterns)
|
||||
|
||||
# Analyze script issues
|
||||
script_patterns = self.analyze_script_issues(all_health_data)
|
||||
all_patterns.update(script_patterns)
|
||||
|
||||
return {
|
||||
'total_patterns': len(all_patterns),
|
||||
'patterns': all_patterns,
|
||||
'summary': self._generate_pattern_summary(all_patterns),
|
||||
'systemic_recommendations': self._generate_systemic_recommendations(all_patterns),
|
||||
'timestamp': time.time()
|
||||
}
|
||||
|
||||
def _analyze_incomplete_research(self, unresolved_findings: List[Dict]) -> str:
|
||||
"""Generate detailed root cause analysis for incomplete research."""
|
||||
if not unresolved_findings:
|
||||
return "No data available"
|
||||
|
||||
# Analyze pattern
|
||||
avg_duration = sum(f.get('duration_secs', 0) for f in unresolved_findings) / len(unresolved_findings)
|
||||
|
||||
analysis = f"""
|
||||
Root Cause: Research agent creates initial analysis but asks user question.
|
||||
User answer is expected but session is marked complete anyway.
|
||||
|
||||
Evidence:
|
||||
- {len(unresolved_findings)} sessions ended with unresolved questions
|
||||
- Average session duration: {int(avg_duration)}s
|
||||
- Pattern: Initial research → Claude analysis → "What do you think?" → END
|
||||
|
||||
Impact:
|
||||
- User confusion (unclear next steps)
|
||||
- Knowledge incomplete (user input never captured)
|
||||
- KG quality degraded (research marked done but unresolved)
|
||||
|
||||
Systemic Issue:
|
||||
Research workflow doesn't enforce follow-up on user questions.
|
||||
Sessions can complete even with pending decisions.
|
||||
"""
|
||||
return analysis.strip()
|
||||
|
||||
def _analyze_stalled_tasks(self, stalled_tasks: List[Dict]) -> str:
|
||||
"""Generate detailed root cause analysis for stalled tasks."""
|
||||
if not stalled_tasks:
|
||||
return "No data available"
|
||||
|
||||
heartbeat_timeouts = [t for t in stalled_tasks if t.get('stall_reason') == 'heartbeat_timeout']
|
||||
process_missing = [t for t in stalled_tasks if t.get('stall_reason') == 'process_not_found']
|
||||
|
||||
analysis = f"""
|
||||
Root Cause: Long-running tasks exceed heartbeat timeout window.
|
||||
No intermediate progress updates during execution.
|
||||
|
||||
Evidence:
|
||||
- {len(heartbeat_timeouts)} tasks with heartbeat timeout
|
||||
- {len(process_missing)} tasks with missing process
|
||||
- Pattern: Task starts → no heartbeat update → marked stalled after 300s
|
||||
|
||||
Impact:
|
||||
- Resources held indefinitely
|
||||
- Tasks can't recover automatically
|
||||
- System capacity wasted
|
||||
|
||||
Systemic Issue:
|
||||
Heartbeat mechanism assumes short tasks (< 5 min).
|
||||
Long-running tasks (> 10 min) always timeout regardless of progress.
|
||||
No intermediate signal for slow but progressing tasks.
|
||||
"""
|
||||
return analysis.strip()
|
||||
|
||||
def _generate_pattern_summary(self, patterns: Dict) -> Dict:
|
||||
"""Generate summary statistics for all patterns."""
|
||||
summary = {
|
||||
'total_patterns_detected': len(patterns),
|
||||
'high_severity': 0,
|
||||
'medium_severity': 0,
|
||||
'total_evidence_items': 0
|
||||
}
|
||||
|
||||
for pattern_name, pattern_data in patterns.items():
|
||||
if pattern_data.get('matched'):
|
||||
severity = pattern_data.get('severity', 'medium')
|
||||
if severity == 'high':
|
||||
summary['high_severity'] += 1
|
||||
elif severity == 'medium':
|
||||
summary['medium_severity'] += 1
|
||||
|
||||
summary['total_evidence_items'] += pattern_data.get('evidence_count', 1)
|
||||
|
||||
return summary
|
||||
|
||||
def _generate_systemic_recommendations(self, patterns: Dict) -> List[str]:
|
||||
"""Generate systemic recommendations from identified patterns."""
|
||||
recommendations = []
|
||||
|
||||
for pattern_name, pattern_data in patterns.items():
|
||||
if pattern_data.get('matched'):
|
||||
severity = pattern_data.get('severity', 'medium')
|
||||
prefix = "[URGENT]" if severity == 'high' else "[WARNING]"
|
||||
|
||||
recommendations.append(
|
||||
f"{prefix} {pattern_data.get('recommended_fix', 'Fix this issue')}"
|
||||
)
|
||||
|
||||
# Add forward-looking recommendations
|
||||
if len(recommendations) > 0:
|
||||
recommendations.append("\nLong-term Systemic Fixes:")
|
||||
recommendations.append(" 1. Implement pre-commit validation for script quality")
|
||||
recommendations.append(" 2. Add mandatory documentation sections for research")
|
||||
recommendations.append(" 3. Increase heartbeat timeout or add intermediate signals")
|
||||
recommendations.append(" 4. Implement automatic archival for old tasks")
|
||||
|
||||
return recommendations
|
||||
|
||||
def _extract_kg_issues(self, health_data: Dict) -> List[Dict]:
|
||||
"""Extract KG issues from health data."""
|
||||
# This would be populated from actual KG checker results
|
||||
return []
|
||||
|
||||
def _extract_conductor_stalled(self, health_data: Dict) -> List[Dict]:
|
||||
"""Extract stalled conductor tasks from health data."""
|
||||
# This would be populated from actual conductor checker results
|
||||
return []
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
analyzer = ErrorPatternAnalyzer()
|
||||
|
||||
# Example: Run pattern analysis with sample data
|
||||
sample_data = {
|
||||
'capacity': {'disk': {'usage_pct': 82}},
|
||||
'integration': {}
|
||||
}
|
||||
|
||||
result = analyzer.run_full_pattern_analysis(sample_data)
|
||||
|
||||
print("=" * 70)
|
||||
print("ERROR PATTERN ANALYSIS")
|
||||
print("=" * 70)
|
||||
print(f"\nPatterns detected: {result['total_patterns']}")
|
||||
print(f"High severity: {result['summary']['high_severity']}")
|
||||
print(f"Medium severity: {result['summary']['medium_severity']}")
|
||||
|
||||
print(f"\nSystemic Recommendations:")
|
||||
for rec in result['systemic_recommendations']:
|
||||
print(f" {rec}")
|
||||
Reference in New Issue
Block a user