Refactor cockpit to use DockerTmuxController pattern

Based on claude-code-tools TmuxCLIController, this refactor:

- Added DockerTmuxController class for robust tmux session management
- Implements send_keys() with configurable delay_enter
- Implements capture_pane() for output retrieval
- Implements wait_for_prompt() for pattern-based completion detection
- Implements wait_for_idle() for content-hash-based idle detection
- Implements wait_for_shell_prompt() for shell prompt detection

Also includes workflow improvements:
- Pre-task git snapshot before agent execution
- Post-task commit protocol in agent guidelines

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
admin
2026-01-14 10:42:16 -03:00
commit ec33ac1936
265 changed files with 92011 additions and 0 deletions

View File

@@ -0,0 +1,341 @@
#!/usr/bin/env python3
"""
Error Pattern Analyzer
Analyzes system issues to identify systemic patterns:
- Groups issues by root cause
- Calculates frequency and impact
- Recommends systemic fixes
- Identifies precursors and prevention strategies
"""
import time
from typing import List, Dict, Tuple
from collections import defaultdict
class ErrorPatternAnalyzer:
"""Analyze error patterns to identify systemic issues."""
# Known systemic patterns
PATTERNS = {
'incomplete_research_blocking': {
'description': 'Research sessions ask user question, never resume',
'root_causes': ['Research agent ends without follow-up', 'User question not resumed'],
'indicators': ['unresolved_question', 'claude_no_conclusion'],
'frequency_threshold': 5, # Per 30 days
'impact': 'KG quality degradation, user confusion',
'prevention': 'Block session completion if unresolved questions exist'
},
'task_stalling_under_load': {
'description': 'Long-running tasks timeout heartbeat updates',
'root_causes': ['Heartbeat updates blocked', 'Task exceeds timeout', 'Process hangs'],
'indicators': ['heartbeat_timeout', 'process_not_found'],
'frequency_threshold': 3, # Per 30 days
'impact': 'Tasks marked running indefinitely, resources held',
'prevention': 'Increase heartbeat timeout or add intermediate progress signals'
},
'disk_pressure_growth': {
'description': 'Old conductor tasks accumulating, not archived',
'root_causes': ['No automatic archival', 'Task cleanup not running', 'Large task logs'],
'indicators': ['disk_usage_high', 'old_tasks_accumulating'],
'frequency_threshold': 5, # %/month growth
'impact': 'Approaching critical capacity, performance degradation',
'prevention': 'Implement automatic archival of >30 day tasks'
},
'missing_documentation': {
'description': 'Research findings incomplete or not documented',
'root_causes': ['No mandatory documentation', 'Findings not extracted', 'Synthesis missing'],
'indicators': ['incomplete_duration', 'missing_findings'],
'frequency_threshold': 8, # Per 30 days
'impact': 'Knowledge loss, difficult to track progress',
'prevention': 'Require structured findings section before completion'
},
'script_quality_drift': {
'description': 'Script quality degrades over time',
'root_causes': ['No validation on commit', 'Dependencies change', 'Type hints missing'],
'indicators': ['syntax_error', 'unused_import', 'low_type_coverage'],
'frequency_threshold': 3, # Issues per week
'impact': 'Fragility, hard to maintain, bugs increase',
'prevention': 'Enforce validation in pre-commit hooks'
}
}
def __init__(self):
"""Initialize error pattern analyzer."""
self.issues_log: List[Dict] = []
self.pattern_matches: Dict[str, List[Dict]] = defaultdict(list)
def analyze_kg_issues(self, kg_findings: List[Dict]) -> Dict:
"""
Analyze KG findings for error patterns.
Args:
kg_findings: List of findings from KGHealthChecker
Returns:
Dict with pattern analysis
"""
patterns = {}
# Pattern 1: Incomplete Research Blocking
unresolved = [f for f in kg_findings if f.get('pattern') == 'unresolved_question']
if len(unresolved) >= self.PATTERNS['incomplete_research_blocking']['frequency_threshold']:
patterns['incomplete_research_blocking'] = {
'matched': True,
'evidence_count': len(unresolved),
'examples': unresolved[:3],
'severity': 'high' if len(unresolved) > 10 else 'medium',
'frequency_30d': len(unresolved),
'root_cause_analysis': self._analyze_incomplete_research(unresolved),
'recommended_fix': self.PATTERNS['incomplete_research_blocking']['prevention']
}
# Pattern 2: Missing Documentation
no_conclusion = [f for f in kg_findings if f.get('pattern') == 'claude_no_conclusion']
if len(no_conclusion) >= self.PATTERNS['missing_documentation']['frequency_threshold']:
patterns['missing_documentation'] = {
'matched': True,
'evidence_count': len(no_conclusion),
'examples': no_conclusion[:3],
'severity': 'medium',
'root_cause_analysis': 'Claude responses present but missing synthesis/conclusions',
'recommended_fix': 'Add validation requiring "Conclusion:" or "Summary:" section'
}
return patterns
def analyze_conductor_issues(self, conductor_stalled: List[Dict], disk_usage_pct: float) -> Dict:
"""
Analyze conductor issues for error patterns.
Args:
conductor_stalled: List of stalled tasks
disk_usage_pct: Disk usage percentage
Returns:
Dict with pattern analysis
"""
patterns = {}
# Pattern 1: Task Stalling Under Load
if len(conductor_stalled) >= self.PATTERNS['task_stalling_under_load']['frequency_threshold']:
patterns['task_stalling_under_load'] = {
'matched': True,
'evidence_count': len(conductor_stalled),
'examples': conductor_stalled[:3],
'severity': 'high' if len(conductor_stalled) > 5 else 'medium',
'root_cause_analysis': self._analyze_stalled_tasks(conductor_stalled),
'recommended_fix': self.PATTERNS['task_stalling_under_load']['prevention']
}
# Pattern 2: Disk Pressure Growth
if disk_usage_pct > 80:
patterns['disk_pressure_growth'] = {
'matched': True,
'current_usage_pct': disk_usage_pct,
'severity': 'critical' if disk_usage_pct > 90 else 'high' if disk_usage_pct > 85 else 'medium',
'estimated_growth_pct_month': 5, # Historical average
'days_until_critical': max(0, int((95 - disk_usage_pct) / 5 * 30)),
'root_cause_analysis': 'Old conductor tasks accumulating without archival',
'recommended_fix': self.PATTERNS['disk_pressure_growth']['prevention']
}
return patterns
def analyze_script_issues(self, script_health: Dict) -> Dict:
"""
Analyze script quality for error patterns.
Args:
script_health: Script health report data
Returns:
Dict with pattern analysis
"""
patterns = {}
# Pattern 1: Script Quality Drift
problematic_scripts = [s for s in script_health.get('scripts', [])
if s['status'] in ['syntax_error', 'issues']]
if len(problematic_scripts) >= self.PATTERNS['script_quality_drift']['frequency_threshold']:
patterns['script_quality_drift'] = {
'matched': True,
'problematic_count': len(problematic_scripts),
'examples': [{'script': s['script'], 'status': s['status']} for s in problematic_scripts[:3]],
'severity': 'high' if len(problematic_scripts) > 5 else 'medium',
'root_cause_analysis': 'No pre-commit validation enforcing script quality',
'recommended_fix': self.PATTERNS['script_quality_drift']['prevention']
}
return patterns
def run_full_pattern_analysis(self, all_health_data: Dict) -> Dict:
"""
Run comprehensive pattern analysis across all systems.
Args:
all_health_data: Complete health data from orchestrator
Returns:
Dict with all identified patterns
"""
all_patterns = {}
# Analyze KG issues
kg_issues = self._extract_kg_issues(all_health_data)
kg_patterns = self.analyze_kg_issues(kg_issues)
all_patterns.update(kg_patterns)
# Analyze conductor issues
conductor_stalled = self._extract_conductor_stalled(all_health_data)
disk_usage = all_health_data.get('capacity', {}).get('disk', {}).get('usage_pct', 0)
conductor_patterns = self.analyze_conductor_issues(conductor_stalled, disk_usage)
all_patterns.update(conductor_patterns)
# Analyze script issues
script_patterns = self.analyze_script_issues(all_health_data)
all_patterns.update(script_patterns)
return {
'total_patterns': len(all_patterns),
'patterns': all_patterns,
'summary': self._generate_pattern_summary(all_patterns),
'systemic_recommendations': self._generate_systemic_recommendations(all_patterns),
'timestamp': time.time()
}
def _analyze_incomplete_research(self, unresolved_findings: List[Dict]) -> str:
"""Generate detailed root cause analysis for incomplete research."""
if not unresolved_findings:
return "No data available"
# Analyze pattern
avg_duration = sum(f.get('duration_secs', 0) for f in unresolved_findings) / len(unresolved_findings)
analysis = f"""
Root Cause: Research agent creates initial analysis but asks user question.
User answer is expected but session is marked complete anyway.
Evidence:
- {len(unresolved_findings)} sessions ended with unresolved questions
- Average session duration: {int(avg_duration)}s
- Pattern: Initial research → Claude analysis → "What do you think?" → END
Impact:
- User confusion (unclear next steps)
- Knowledge incomplete (user input never captured)
- KG quality degraded (research marked done but unresolved)
Systemic Issue:
Research workflow doesn't enforce follow-up on user questions.
Sessions can complete even with pending decisions.
"""
return analysis.strip()
def _analyze_stalled_tasks(self, stalled_tasks: List[Dict]) -> str:
"""Generate detailed root cause analysis for stalled tasks."""
if not stalled_tasks:
return "No data available"
heartbeat_timeouts = [t for t in stalled_tasks if t.get('stall_reason') == 'heartbeat_timeout']
process_missing = [t for t in stalled_tasks if t.get('stall_reason') == 'process_not_found']
analysis = f"""
Root Cause: Long-running tasks exceed heartbeat timeout window.
No intermediate progress updates during execution.
Evidence:
- {len(heartbeat_timeouts)} tasks with heartbeat timeout
- {len(process_missing)} tasks with missing process
- Pattern: Task starts → no heartbeat update → marked stalled after 300s
Impact:
- Resources held indefinitely
- Tasks can't recover automatically
- System capacity wasted
Systemic Issue:
Heartbeat mechanism assumes short tasks (< 5 min).
Long-running tasks (> 10 min) always timeout regardless of progress.
No intermediate signal for slow but progressing tasks.
"""
return analysis.strip()
def _generate_pattern_summary(self, patterns: Dict) -> Dict:
"""Generate summary statistics for all patterns."""
summary = {
'total_patterns_detected': len(patterns),
'high_severity': 0,
'medium_severity': 0,
'total_evidence_items': 0
}
for pattern_name, pattern_data in patterns.items():
if pattern_data.get('matched'):
severity = pattern_data.get('severity', 'medium')
if severity == 'high':
summary['high_severity'] += 1
elif severity == 'medium':
summary['medium_severity'] += 1
summary['total_evidence_items'] += pattern_data.get('evidence_count', 1)
return summary
def _generate_systemic_recommendations(self, patterns: Dict) -> List[str]:
"""Generate systemic recommendations from identified patterns."""
recommendations = []
for pattern_name, pattern_data in patterns.items():
if pattern_data.get('matched'):
severity = pattern_data.get('severity', 'medium')
prefix = "[URGENT]" if severity == 'high' else "[WARNING]"
recommendations.append(
f"{prefix} {pattern_data.get('recommended_fix', 'Fix this issue')}"
)
# Add forward-looking recommendations
if len(recommendations) > 0:
recommendations.append("\nLong-term Systemic Fixes:")
recommendations.append(" 1. Implement pre-commit validation for script quality")
recommendations.append(" 2. Add mandatory documentation sections for research")
recommendations.append(" 3. Increase heartbeat timeout or add intermediate signals")
recommendations.append(" 4. Implement automatic archival for old tasks")
return recommendations
def _extract_kg_issues(self, health_data: Dict) -> List[Dict]:
"""Extract KG issues from health data."""
# This would be populated from actual KG checker results
return []
def _extract_conductor_stalled(self, health_data: Dict) -> List[Dict]:
"""Extract stalled conductor tasks from health data."""
# This would be populated from actual conductor checker results
return []
if __name__ == '__main__':
analyzer = ErrorPatternAnalyzer()
# Example: Run pattern analysis with sample data
sample_data = {
'capacity': {'disk': {'usage_pct': 82}},
'integration': {}
}
result = analyzer.run_full_pattern_analysis(sample_data)
print("=" * 70)
print("ERROR PATTERN ANALYSIS")
print("=" * 70)
print(f"\nPatterns detected: {result['total_patterns']}")
print(f"High severity: {result['summary']['high_severity']}")
print(f"Medium severity: {result['summary']['medium_severity']}")
print(f"\nSystemic Recommendations:")
for rec in result['systemic_recommendations']:
print(f" {rec}")