Refactor cockpit to use DockerTmuxController pattern

Based on claude-code-tools TmuxCLIController, this refactor: - Added DockerTmuxController class for robust tmux session management - Implements send_keys() with configurable delay_enter - Implements capture_pane() for output retrieval - Implements wait_for_prompt() for pattern-based completion detection - Implements wait_for_idle() for content-hash-based idle detection - Implements wait_for_shell_prompt() for shell prompt detection Also includes workflow improvements: - Pre-task git snapshot before agent execution - Post-task commit protocol in agent guidelines Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-14 10:42:16 -03:00
commit ec33ac1936
265 changed files with 92011 additions and 0 deletions
--- a/lib/kg_health_checker.py
+++ b/lib/kg_health_checker.py
@@ -0,0 +1,374 @@
+#!/usr/bin/env python3
+"""
+Knowledge Graph Health Checker
+
+Provides comprehensive KG health assessment including:
+- Pattern detection for incomplete research
+- Health score generation (0-100)
+- Issue categorization and severity assessment
+- Recommendations for fixing issues
+"""
+
+import time
+import json
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import List, Dict, Tuple
+
+from kg_pattern_detector import KGPatternDetector
+
+
+class KGHealthChecker:
+    """Check and report on Knowledge Graph health status."""
+
+    def __init__(self):
+        """Initialize the health checker."""
+        self.detector = KGPatternDetector()
+        self.kg_db_paths = [
+            '/etc/luz-knowledge/research.db',
+            '/etc/zen-swarm/memory/research.db',
+        ]
+
+    def check_kg_completeness(self, time_scope_days: int = 30, verbose: bool = False) -> Dict:
+        """
+        Quick KG completeness audit (last 30 days).
+
+        Returns:
+            Dict with:
+                - 'status': 'healthy' | 'degraded' | 'critical'
+                - 'incomplete_count': Number of incomplete research sessions
+                - 'total_sessions': Total sessions in time scope
+                - 'completeness_pct': Percentage of complete research
+                - 'findings': List of specific issues
+                - 'summary': Pattern breakdown
+        """
+        result = self.detector.find_all_incomplete_research(
+            kg_db_paths=self.kg_db_paths,
+            time_scope_days=time_scope_days
+        )
+
+        findings = result['findings']
+        summary = result['summary']
+
+        # Determine status based on issue count
+        incomplete_count = len(findings)
+        total_sessions = self._count_total_sessions(time_scope_days)
+
+        completeness_pct = 100 - (incomplete_count / max(total_sessions, 1) * 100)
+
+        # Status determination
+        if completeness_pct >= 95:
+            status = 'healthy'
+        elif completeness_pct >= 80:
+            status = 'degraded'
+        else:
+            status = 'critical'
+
+        return {
+            'status': status,
+            'incomplete_count': incomplete_count,
+            'total_sessions': total_sessions,
+            'completeness_pct': round(completeness_pct, 1),
+            'findings': findings,
+            'summary': summary,
+            'timestamp': time.time(),
+            'time_scope_days': time_scope_days
+        }
+
+    def check_research_patterns(self, time_scope_days: int = 30, verbose: bool = False) -> Dict:
+        """
+        Detailed pattern analysis across all 4 pattern types.
+
+        Returns:
+            Dict with:
+                - 'pattern_analysis': Breakdown by each pattern type
+                - 'severity_breakdown': High/Medium/Low counts
+                - 'recommendations': Specific fixes for each pattern
+                - 'health_score': 0-100 KG health rating
+        """
+        result = self.detector.find_all_incomplete_research(
+            kg_db_paths=self.kg_db_paths,
+            time_scope_days=time_scope_days
+        )
+
+        findings = result['findings']
+        summary = result['summary']
+
+        # Categorize by pattern with detailed analysis
+        pattern_analysis = {}
+        for pattern in ['unresolved_question', 'incomplete_duration', 'claude_no_conclusion']:
+            pattern_findings = [f for f in findings if f['pattern'] == pattern]
+            pattern_analysis[pattern] = {
+                'count': len(pattern_findings),
+                'examples': pattern_findings[:3],  # First 3 examples
+                'recommendation': self._get_pattern_recommendation(pattern)
+            }
+
+        # Severity breakdown
+        severity_breakdown = summary['by_severity']
+
+        # Calculate health score (0-100)
+        health_score = self._calculate_kg_health_score(summary, len(findings))
+
+        return {
+            'pattern_analysis': pattern_analysis,
+            'severity_breakdown': severity_breakdown,
+            'total_findings': len(findings),
+            'health_score': health_score,
+            'health_status': 'healthy' if health_score >= 80 else 'degraded' if health_score >= 60 else 'critical',
+            'recommendations': self._generate_recommendations(summary),
+            'timestamp': time.time()
+        }
+
+    def mark_incomplete_for_review(self, findings: List[Dict] = None,
+                                  time_scope_days: int = 30,
+                                  auto_mark: bool = False) -> Dict:
+        """
+        Mark incomplete research sessions for review (does NOT auto-fix, only flags).
+
+        Args:
+            findings: List of findings to mark. If None, will detect first.
+            time_scope_days: Time scope for detection
+            auto_mark: If True, actually mark. If False, return preview.
+
+        Returns:
+            Dict with:
+                - 'marked_count': Number of sessions marked for review
+                - 'review_queue_path': Path to review queue
+                - 'actions': List of marking actions
+        """
+        if findings is None:
+            result = self.detector.find_all_incomplete_research(
+                kg_db_paths=self.kg_db_paths,
+                time_scope_days=time_scope_days
+            )
+            findings = result['findings']
+
+        # Create review queue directory
+        review_queue_path = Path('/home/admin/conductor/review')
+        review_queue_path.mkdir(parents=True, exist_ok=True)
+
+        actions = []
+
+        for finding in findings:
+            if finding['source'] == 'kg_database':
+                # Create review marker in conductor
+                review_id = f"{finding['id']}_review"
+                review_file = review_queue_path / f"{review_id}.json"
+
+                review_data = {
+                    'entity_id': finding['id'],
+                    'entity_name': finding.get('name', 'unknown'),
+                    'pattern': finding['pattern'],
+                    'severity': finding['severity'],
+                    'example': finding['example'],
+                    'marked_at': datetime.now().isoformat(),
+                    'reason': 'Incomplete research: needs user follow-up',
+                    'action_required': 'Review and complete research session'
+                }
+
+                action = {
+                    'entity_id': finding['id'],
+                    'review_file': str(review_file),
+                    'status': 'preview' if not auto_mark else 'marked'
+                }
+
+                if auto_mark:
+                    review_file.write_text(json.dumps(review_data, indent=2))
+
+                actions.append(action)
+
+        return {
+            'marked_count': len(actions),
+            'review_queue_path': str(review_queue_path),
+            'actions': actions,
+            'auto_mark': auto_mark,
+            'timestamp': time.time()
+        }
+
+    def generate_health_score(self, time_scope_days: int = 30) -> Dict:
+        """
+        Generate comprehensive KG health score.
+
+        Returns:
+            Dict with:
+                - 'overall_score': 0-100 health rating
+                - 'component_scores': Breakdown by metric
+                - 'issues': List of specific problems
+                - 'actionable_fixes': Recommended actions
+        """
+        audit = self.check_kg_completeness(time_scope_days)
+        patterns = self.check_research_patterns(time_scope_days)
+
+        findings = audit['findings']
+
+        # Component scoring (each 0-100)
+        component_scores = {
+            'completeness': audit['completeness_pct'],
+            'pattern_quality': patterns['health_score'],
+            'recency': self._calculate_recency_score(findings, time_scope_days),
+            'metadata_integrity': self._calculate_metadata_integrity(findings)
+        }
+
+        # Weighted overall score
+        overall_score = (
+            component_scores['completeness'] * 0.35 +
+            component_scores['pattern_quality'] * 0.35 +
+            component_scores['recency'] * 0.20 +
+            component_scores['metadata_integrity'] * 0.10
+        )
+
+        return {
+            'overall_score': round(overall_score, 1),
+            'component_scores': {k: round(v, 1) for k, v in component_scores.items()},
+            'status': 'healthy' if overall_score >= 80 else 'degraded' if overall_score >= 60 else 'critical',
+            'total_findings': len(findings),
+            'findings_by_severity': audit['summary']['by_severity'],
+            'actionable_fixes': patterns['recommendations'],
+            'timestamp': time.time()
+        }
+
+    def _count_total_sessions(self, time_scope_days: int) -> int:
+        """Count total research sessions in time scope."""
+        cutoff_time = time.time() - (time_scope_days * 86400)
+        total = 0
+
+        try:
+            import sqlite3
+            for db_path in self.kg_db_paths:
+                if not Path(db_path).exists():
+                    continue
+
+                with sqlite3.connect(db_path) as conn:
+                    cursor = conn.cursor()
+                    cursor.execute("""
+                        SELECT COUNT(*)
+                        FROM entities
+                        WHERE type = 'session' AND domain = 'research'
+                        AND updated_at > ?
+                    """, (cutoff_time,))
+                    count = cursor.fetchone()[0]
+                    total += count
+        except Exception:
+            pass
+
+        return total
+
+    def _calculate_kg_health_score(self, summary: Dict, finding_count: int) -> float:
+        """Calculate KG health score based on issue summary."""
+        # Start with 100
+        score = 100.0
+
+        # Deduct for each finding type
+        high_severity = summary['by_severity'].get('high', 0)
+        medium_severity = summary['by_severity'].get('medium', 0)
+
+        score -= high_severity * 5  # -5 per high severity issue
+        score -= medium_severity * 2  # -2 per medium severity issue
+
+        return max(0, min(100, score))
+
+    def _calculate_recency_score(self, findings: List[Dict], time_scope_days: int) -> float:
+        """Score based on age of incomplete research (older = worse)."""
+        if not findings:
+            return 100.0
+
+        cutoff_time = time.time() - (time_scope_days * 86400)
+        now = time.time()
+
+        avg_age = sum(now - f['timestamp'] for f in findings) / len(findings)
+        avg_age_days = avg_age / 86400
+
+        # Score decreases with age
+        if avg_age_days <= 3:
+            return 90.0
+        elif avg_age_days <= 7:
+            return 75.0
+        elif avg_age_days <= 14:
+            return 60.0
+        else:
+            return 40.0
+
+    def _calculate_metadata_integrity(self, findings: List[Dict]) -> float:
+        """Score based on completeness of finding metadata."""
+        if not findings:
+            return 100.0
+
+        required_fields = {'source', 'pattern', 'severity', 'example', 'timestamp'}
+        valid_count = 0
+
+        for finding in findings:
+            if required_fields.issubset(set(finding.keys())):
+                valid_count += 1
+
+        return (valid_count / len(findings)) * 100
+
+    def _get_pattern_recommendation(self, pattern: str) -> str:
+        """Get specific recommendation for a pattern."""
+        recommendations = {
+            'unresolved_question': 'Resume research session with user input; complete analysis and synthesis',
+            'incomplete_duration': 'Research ended prematurely; needs deeper investigation or additional findings',
+            'claude_no_conclusion': 'Assistant analysis present but missing final conclusions; add summary section',
+        }
+        return recommendations.get(pattern, 'Review and complete research session')
+
+    def _generate_recommendations(self, summary: Dict) -> List[str]:
+        """Generate ranked recommendations based on findings."""
+        recommendations = []
+
+        high_count = summary['by_severity'].get('high', 0)
+        if high_count > 0:
+            recommendations.append(
+                f"[URGENT] Address {high_count} high-severity incomplete research sessions"
+            )
+
+        pattern_counts = summary['by_pattern']
+        if pattern_counts.get('unresolved_question', 0) > 0:
+            recommendations.append(
+                "Resume incomplete research with user follow-up and complete analysis"
+            )
+
+        if pattern_counts.get('claude_no_conclusion', 0) > 0:
+            recommendations.append(
+                "Add missing conclusion/synthesis sections to Claude analysis"
+            )
+
+        if pattern_counts.get('incomplete_duration', 0) > 0:
+            recommendations.append(
+                "Investigate incomplete sessions with minimal duration; may need deeper research"
+            )
+
+        recommendations.append(
+            "Implement validation: block research completion if unresolved questions remain"
+        )
+
+        return recommendations
+
+
+if __name__ == '__main__':
+    checker = KGHealthChecker()
+
+    print("=" * 70)
+    print("KG COMPLETENESS AUDIT")
+    print("=" * 70)
+    audit = checker.check_kg_completeness()
+    print(f"Status: {audit['status'].upper()}")
+    print(f"Completeness: {audit['completeness_pct']}% ({audit['incomplete_count']}/{audit['total_sessions']})")
+    print(f"Issues by pattern: {audit['summary']['by_pattern']}")
+
+    print("\n" + "=" * 70)
+    print("KG HEALTH SCORE")
+    print("=" * 70)
+    health = checker.generate_health_score()
+    print(f"Overall Score: {health['overall_score']}/100 ({health['status'].upper()})")
+    print(f"Component Scores: {health['component_scores']}")
+    print(f"\nRecommendations:")
+    for i, rec in enumerate(health['actionable_fixes'], 1):
+        print(f"  {i}. {rec}")
+
+    print("\n" + "=" * 70)
+    print("REVIEW MARKING (PREVIEW)")
+    print("=" * 70)
+    review_result = checker.mark_incomplete_for_review(auto_mark=False)
+    print(f"Sessions to mark for review: {review_result['marked_count']}")
+    print(f"Review queue path: {review_result['review_queue_path']}")