Refactor cockpit to use DockerTmuxController pattern

Based on claude-code-tools TmuxCLIController, this refactor: - Added DockerTmuxController class for robust tmux session management - Implements send_keys() with configurable delay_enter - Implements capture_pane() for output retrieval - Implements wait_for_prompt() for pattern-based completion detection - Implements wait_for_idle() for content-hash-based idle detection - Implements wait_for_shell_prompt() for shell prompt detection Also includes workflow improvements: - Pre-task git snapshot before agent execution - Post-task commit protocol in agent guidelines Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-14 10:42:16 -03:00
commit ec33ac1936
265 changed files with 92011 additions and 0 deletions
--- a/lib/error_pattern_analyzer.py
+++ b/lib/error_pattern_analyzer.py
@@ -0,0 +1,341 @@
+#!/usr/bin/env python3
+"""
+Error Pattern Analyzer
+
+Analyzes system issues to identify systemic patterns:
+- Groups issues by root cause
+- Calculates frequency and impact
+- Recommends systemic fixes
+- Identifies precursors and prevention strategies
+"""
+
+import time
+from typing import List, Dict, Tuple
+from collections import defaultdict
+
+
+class ErrorPatternAnalyzer:
+    """Analyze error patterns to identify systemic issues."""
+
+    # Known systemic patterns
+    PATTERNS = {
+        'incomplete_research_blocking': {
+            'description': 'Research sessions ask user question, never resume',
+            'root_causes': ['Research agent ends without follow-up', 'User question not resumed'],
+            'indicators': ['unresolved_question', 'claude_no_conclusion'],
+            'frequency_threshold': 5,  # Per 30 days
+            'impact': 'KG quality degradation, user confusion',
+            'prevention': 'Block session completion if unresolved questions exist'
+        },
+        'task_stalling_under_load': {
+            'description': 'Long-running tasks timeout heartbeat updates',
+            'root_causes': ['Heartbeat updates blocked', 'Task exceeds timeout', 'Process hangs'],
+            'indicators': ['heartbeat_timeout', 'process_not_found'],
+            'frequency_threshold': 3,  # Per 30 days
+            'impact': 'Tasks marked running indefinitely, resources held',
+            'prevention': 'Increase heartbeat timeout or add intermediate progress signals'
+        },
+        'disk_pressure_growth': {
+            'description': 'Old conductor tasks accumulating, not archived',
+            'root_causes': ['No automatic archival', 'Task cleanup not running', 'Large task logs'],
+            'indicators': ['disk_usage_high', 'old_tasks_accumulating'],
+            'frequency_threshold': 5,  # %/month growth
+            'impact': 'Approaching critical capacity, performance degradation',
+            'prevention': 'Implement automatic archival of >30 day tasks'
+        },
+        'missing_documentation': {
+            'description': 'Research findings incomplete or not documented',
+            'root_causes': ['No mandatory documentation', 'Findings not extracted', 'Synthesis missing'],
+            'indicators': ['incomplete_duration', 'missing_findings'],
+            'frequency_threshold': 8,  # Per 30 days
+            'impact': 'Knowledge loss, difficult to track progress',
+            'prevention': 'Require structured findings section before completion'
+        },
+        'script_quality_drift': {
+            'description': 'Script quality degrades over time',
+            'root_causes': ['No validation on commit', 'Dependencies change', 'Type hints missing'],
+            'indicators': ['syntax_error', 'unused_import', 'low_type_coverage'],
+            'frequency_threshold': 3,  # Issues per week
+            'impact': 'Fragility, hard to maintain, bugs increase',
+            'prevention': 'Enforce validation in pre-commit hooks'
+        }
+    }
+
+    def __init__(self):
+        """Initialize error pattern analyzer."""
+        self.issues_log: List[Dict] = []
+        self.pattern_matches: Dict[str, List[Dict]] = defaultdict(list)
+
+    def analyze_kg_issues(self, kg_findings: List[Dict]) -> Dict:
+        """
+        Analyze KG findings for error patterns.
+
+        Args:
+            kg_findings: List of findings from KGHealthChecker
+
+        Returns:
+            Dict with pattern analysis
+        """
+        patterns = {}
+
+        # Pattern 1: Incomplete Research Blocking
+        unresolved = [f for f in kg_findings if f.get('pattern') == 'unresolved_question']
+        if len(unresolved) >= self.PATTERNS['incomplete_research_blocking']['frequency_threshold']:
+            patterns['incomplete_research_blocking'] = {
+                'matched': True,
+                'evidence_count': len(unresolved),
+                'examples': unresolved[:3],
+                'severity': 'high' if len(unresolved) > 10 else 'medium',
+                'frequency_30d': len(unresolved),
+                'root_cause_analysis': self._analyze_incomplete_research(unresolved),
+                'recommended_fix': self.PATTERNS['incomplete_research_blocking']['prevention']
+            }
+
+        # Pattern 2: Missing Documentation
+        no_conclusion = [f for f in kg_findings if f.get('pattern') == 'claude_no_conclusion']
+        if len(no_conclusion) >= self.PATTERNS['missing_documentation']['frequency_threshold']:
+            patterns['missing_documentation'] = {
+                'matched': True,
+                'evidence_count': len(no_conclusion),
+                'examples': no_conclusion[:3],
+                'severity': 'medium',
+                'root_cause_analysis': 'Claude responses present but missing synthesis/conclusions',
+                'recommended_fix': 'Add validation requiring "Conclusion:" or "Summary:" section'
+            }
+
+        return patterns
+
+    def analyze_conductor_issues(self, conductor_stalled: List[Dict], disk_usage_pct: float) -> Dict:
+        """
+        Analyze conductor issues for error patterns.
+
+        Args:
+            conductor_stalled: List of stalled tasks
+            disk_usage_pct: Disk usage percentage
+
+        Returns:
+            Dict with pattern analysis
+        """
+        patterns = {}
+
+        # Pattern 1: Task Stalling Under Load
+        if len(conductor_stalled) >= self.PATTERNS['task_stalling_under_load']['frequency_threshold']:
+            patterns['task_stalling_under_load'] = {
+                'matched': True,
+                'evidence_count': len(conductor_stalled),
+                'examples': conductor_stalled[:3],
+                'severity': 'high' if len(conductor_stalled) > 5 else 'medium',
+                'root_cause_analysis': self._analyze_stalled_tasks(conductor_stalled),
+                'recommended_fix': self.PATTERNS['task_stalling_under_load']['prevention']
+            }
+
+        # Pattern 2: Disk Pressure Growth
+        if disk_usage_pct > 80:
+            patterns['disk_pressure_growth'] = {
+                'matched': True,
+                'current_usage_pct': disk_usage_pct,
+                'severity': 'critical' if disk_usage_pct > 90 else 'high' if disk_usage_pct > 85 else 'medium',
+                'estimated_growth_pct_month': 5,  # Historical average
+                'days_until_critical': max(0, int((95 - disk_usage_pct) / 5 * 30)),
+                'root_cause_analysis': 'Old conductor tasks accumulating without archival',
+                'recommended_fix': self.PATTERNS['disk_pressure_growth']['prevention']
+            }
+
+        return patterns
+
+    def analyze_script_issues(self, script_health: Dict) -> Dict:
+        """
+        Analyze script quality for error patterns.
+
+        Args:
+            script_health: Script health report data
+
+        Returns:
+            Dict with pattern analysis
+        """
+        patterns = {}
+
+        # Pattern 1: Script Quality Drift
+        problematic_scripts = [s for s in script_health.get('scripts', [])
+                             if s['status'] in ['syntax_error', 'issues']]
+
+        if len(problematic_scripts) >= self.PATTERNS['script_quality_drift']['frequency_threshold']:
+            patterns['script_quality_drift'] = {
+                'matched': True,
+                'problematic_count': len(problematic_scripts),
+                'examples': [{'script': s['script'], 'status': s['status']} for s in problematic_scripts[:3]],
+                'severity': 'high' if len(problematic_scripts) > 5 else 'medium',
+                'root_cause_analysis': 'No pre-commit validation enforcing script quality',
+                'recommended_fix': self.PATTERNS['script_quality_drift']['prevention']
+            }
+
+        return patterns
+
+    def run_full_pattern_analysis(self, all_health_data: Dict) -> Dict:
+        """
+        Run comprehensive pattern analysis across all systems.
+
+        Args:
+            all_health_data: Complete health data from orchestrator
+
+        Returns:
+            Dict with all identified patterns
+        """
+        all_patterns = {}
+
+        # Analyze KG issues
+        kg_issues = self._extract_kg_issues(all_health_data)
+        kg_patterns = self.analyze_kg_issues(kg_issues)
+        all_patterns.update(kg_patterns)
+
+        # Analyze conductor issues
+        conductor_stalled = self._extract_conductor_stalled(all_health_data)
+        disk_usage = all_health_data.get('capacity', {}).get('disk', {}).get('usage_pct', 0)
+        conductor_patterns = self.analyze_conductor_issues(conductor_stalled, disk_usage)
+        all_patterns.update(conductor_patterns)
+
+        # Analyze script issues
+        script_patterns = self.analyze_script_issues(all_health_data)
+        all_patterns.update(script_patterns)
+
+        return {
+            'total_patterns': len(all_patterns),
+            'patterns': all_patterns,
+            'summary': self._generate_pattern_summary(all_patterns),
+            'systemic_recommendations': self._generate_systemic_recommendations(all_patterns),
+            'timestamp': time.time()
+        }
+
+    def _analyze_incomplete_research(self, unresolved_findings: List[Dict]) -> str:
+        """Generate detailed root cause analysis for incomplete research."""
+        if not unresolved_findings:
+            return "No data available"
+
+        # Analyze pattern
+        avg_duration = sum(f.get('duration_secs', 0) for f in unresolved_findings) / len(unresolved_findings)
+
+        analysis = f"""
+Root Cause: Research agent creates initial analysis but asks user question.
+            User answer is expected but session is marked complete anyway.
+
+Evidence:
+  - {len(unresolved_findings)} sessions ended with unresolved questions
+  - Average session duration: {int(avg_duration)}s
+  - Pattern: Initial research → Claude analysis → "What do you think?" → END
+
+Impact:
+  - User confusion (unclear next steps)
+  - Knowledge incomplete (user input never captured)
+  - KG quality degraded (research marked done but unresolved)
+
+Systemic Issue:
+  Research workflow doesn't enforce follow-up on user questions.
+  Sessions can complete even with pending decisions.
+"""
+        return analysis.strip()
+
+    def _analyze_stalled_tasks(self, stalled_tasks: List[Dict]) -> str:
+        """Generate detailed root cause analysis for stalled tasks."""
+        if not stalled_tasks:
+            return "No data available"
+
+        heartbeat_timeouts = [t for t in stalled_tasks if t.get('stall_reason') == 'heartbeat_timeout']
+        process_missing = [t for t in stalled_tasks if t.get('stall_reason') == 'process_not_found']
+
+        analysis = f"""
+Root Cause: Long-running tasks exceed heartbeat timeout window.
+            No intermediate progress updates during execution.
+
+Evidence:
+  - {len(heartbeat_timeouts)} tasks with heartbeat timeout
+  - {len(process_missing)} tasks with missing process
+  - Pattern: Task starts → no heartbeat update → marked stalled after 300s
+
+Impact:
+  - Resources held indefinitely
+  - Tasks can't recover automatically
+  - System capacity wasted
+
+Systemic Issue:
+  Heartbeat mechanism assumes short tasks (< 5 min).
+  Long-running tasks (> 10 min) always timeout regardless of progress.
+  No intermediate signal for slow but progressing tasks.
+"""
+        return analysis.strip()
+
+    def _generate_pattern_summary(self, patterns: Dict) -> Dict:
+        """Generate summary statistics for all patterns."""
+        summary = {
+            'total_patterns_detected': len(patterns),
+            'high_severity': 0,
+            'medium_severity': 0,
+            'total_evidence_items': 0
+        }
+
+        for pattern_name, pattern_data in patterns.items():
+            if pattern_data.get('matched'):
+                severity = pattern_data.get('severity', 'medium')
+                if severity == 'high':
+                    summary['high_severity'] += 1
+                elif severity == 'medium':
+                    summary['medium_severity'] += 1
+
+                summary['total_evidence_items'] += pattern_data.get('evidence_count', 1)
+
+        return summary
+
+    def _generate_systemic_recommendations(self, patterns: Dict) -> List[str]:
+        """Generate systemic recommendations from identified patterns."""
+        recommendations = []
+
+        for pattern_name, pattern_data in patterns.items():
+            if pattern_data.get('matched'):
+                severity = pattern_data.get('severity', 'medium')
+                prefix = "[URGENT]" if severity == 'high' else "[WARNING]"
+
+                recommendations.append(
+                    f"{prefix} {pattern_data.get('recommended_fix', 'Fix this issue')}"
+                )
+
+        # Add forward-looking recommendations
+        if len(recommendations) > 0:
+            recommendations.append("\nLong-term Systemic Fixes:")
+            recommendations.append("  1. Implement pre-commit validation for script quality")
+            recommendations.append("  2. Add mandatory documentation sections for research")
+            recommendations.append("  3. Increase heartbeat timeout or add intermediate signals")
+            recommendations.append("  4. Implement automatic archival for old tasks")
+
+        return recommendations
+
+    def _extract_kg_issues(self, health_data: Dict) -> List[Dict]:
+        """Extract KG issues from health data."""
+        # This would be populated from actual KG checker results
+        return []
+
+    def _extract_conductor_stalled(self, health_data: Dict) -> List[Dict]:
+        """Extract stalled conductor tasks from health data."""
+        # This would be populated from actual conductor checker results
+        return []
+
+
+if __name__ == '__main__':
+    analyzer = ErrorPatternAnalyzer()
+
+    # Example: Run pattern analysis with sample data
+    sample_data = {
+        'capacity': {'disk': {'usage_pct': 82}},
+        'integration': {}
+    }
+
+    result = analyzer.run_full_pattern_analysis(sample_data)
+
+    print("=" * 70)
+    print("ERROR PATTERN ANALYSIS")
+    print("=" * 70)
+    print(f"\nPatterns detected: {result['total_patterns']}")
+    print(f"High severity: {result['summary']['high_severity']}")
+    print(f"Medium severity: {result['summary']['medium_severity']}")
+
+    print(f"\nSystemic Recommendations:")
+    for rec in result['systemic_recommendations']:
+        print(f"  {rec}")