Refactor cockpit to use DockerTmuxController pattern

Based on claude-code-tools TmuxCLIController, this refactor: - Added DockerTmuxController class for robust tmux session management - Implements send_keys() with configurable delay_enter - Implements capture_pane() for output retrieval - Implements wait_for_prompt() for pattern-based completion detection - Implements wait_for_idle() for content-hash-based idle detection - Implements wait_for_shell_prompt() for shell prompt detection Also includes workflow improvements: - Pre-task git snapshot before agent execution - Post-task commit protocol in agent guidelines Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-14 10:42:16 -03:00
commit ec33ac1936
265 changed files with 92011 additions and 0 deletions
--- a/lib/research_agent.py
+++ b/lib/research_agent.py
@@ -0,0 +1,408 @@
+#!/usr/bin/env python3
+"""
+Luzia Research Agent - Smart Task Routing and Analysis
+
+Routes research tasks to appropriate Zen tools based on:
+- Security implications
+- Speed requirements
+- Complexity/depth needed
+
+Stores findings in research KG.
+"""
+
+import json
+import sqlite3
+import uuid
+import time
+from pathlib import Path
+from datetime import datetime
+from typing import Optional, Dict, List, Tuple
+from enum import Enum
+
+
+class SecurityLevel(Enum):
+    """Security classification for tasks"""
+    PUBLIC = "public"           # No sensitive data
+    INTERNAL = "internal"       # Internal infrastructure
+    SENSITIVE = "sensitive"     # Auth, credentials, compliance
+    CRITICAL = "critical"       # Full infrastructure control
+
+
+class SpeedRequirement(Enum):
+    """Speed classification for tasks"""
+    INTERACTIVE = "interactive"   # <2 seconds (chat)
+    RESPONSIVE = "responsive"     # <10 seconds (debug, chat)
+    THOROUGH = "thorough"        # <60 seconds (thinkdeep, consensus)
+    RESEARCH = "research"        # No time limit (deep investigation)
+
+
+class ComplexityLevel(Enum):
+    """Complexity classification"""
+    TRIVIAL = "trivial"          # Simple answer
+    STRAIGHTFORWARD = "straightforward"  # Clear problem, known solution
+    COMPLEX = "complex"           # Multiple considerations
+    EXPLORATORY = "exploratory"   # Unknown territory
+
+
+class TaskFilter:
+    """Smart filter evaluating security, speed, complexity"""
+
+    SECURITY_KEYWORDS = {
+        'critical': SecurityLevel.CRITICAL,
+        'infrastructure': SecurityLevel.SENSITIVE,
+        'credentials': SecurityLevel.CRITICAL,
+        'auth': SecurityLevel.SENSITIVE,
+        'permission': SecurityLevel.SENSITIVE,
+        'rbac': SecurityLevel.SENSITIVE,
+        'vulnerability': SecurityLevel.CRITICAL,
+        'exploit': SecurityLevel.CRITICAL,
+        'secret': SecurityLevel.CRITICAL,
+        'token': SecurityLevel.SENSITIVE,
+        'api key': SecurityLevel.CRITICAL,
+        'password': SecurityLevel.CRITICAL,
+        'deploy': SecurityLevel.SENSITIVE,
+        'production': SecurityLevel.SENSITIVE,
+    }
+
+    SPEED_KEYWORDS = {
+        'urgent': SpeedRequirement.INTERACTIVE,
+        'asap': SpeedRequirement.INTERACTIVE,
+        'now': SpeedRequirement.INTERACTIVE,
+        'blocking': SpeedRequirement.RESPONSIVE,
+        'quick': SpeedRequirement.INTERACTIVE,
+        'quick answer': SpeedRequirement.INTERACTIVE,
+        'fast': SpeedRequirement.RESPONSIVE,
+        'slow': SpeedRequirement.THOROUGH,
+        'analyze': SpeedRequirement.THOROUGH,
+        'research': SpeedRequirement.RESEARCH,
+        'explore': SpeedRequirement.RESEARCH,
+        'investigate': SpeedRequirement.RESEARCH,
+        'comprehensive': SpeedRequirement.THOROUGH,
+    }
+
+    COMPLEXITY_KEYWORDS = {
+        'simple': ComplexityLevel.TRIVIAL,
+        'quick answer': ComplexityLevel.TRIVIAL,
+        'obvious': ComplexityLevel.TRIVIAL,
+        'tradeoff': ComplexityLevel.COMPLEX,
+        'decision': ComplexityLevel.COMPLEX,
+        'architecture': ComplexityLevel.COMPLEX,
+        'design': ComplexityLevel.COMPLEX,
+        'bug': ComplexityLevel.STRAIGHTFORWARD,
+        'error': ComplexityLevel.STRAIGHTFORWARD,
+        'fix': ComplexityLevel.STRAIGHTFORWARD,
+        'explore': ComplexityLevel.EXPLORATORY,
+        'research': ComplexityLevel.EXPLORATORY,
+        'unknown': ComplexityLevel.EXPLORATORY,
+        'investigation': ComplexityLevel.EXPLORATORY,
+    }
+
+    @staticmethod
+    def evaluate_security(task: str) -> SecurityLevel:
+        """Determine security level from task description"""
+        task_lower = task.lower()
+
+        # Check for critical keywords first
+        for keyword, level in TaskFilter.SECURITY_KEYWORDS.items():
+            if keyword in task_lower:
+                if level == SecurityLevel.CRITICAL:
+                    return SecurityLevel.CRITICAL
+                elif level == SecurityLevel.SENSITIVE:
+                    return SecurityLevel.SENSITIVE
+
+        # Check for infrastructure-related tasks
+        if any(word in task_lower for word in ['deploy', 'systemd', 'service', 'nginx', 'database', 'firewall']):
+            return SecurityLevel.SENSITIVE
+
+        return SecurityLevel.INTERNAL
+
+    @staticmethod
+    def evaluate_speed(task: str) -> SpeedRequirement:
+        """Determine speed requirement from task description"""
+        task_lower = task.lower()
+
+        for keyword, level in TaskFilter.SPEED_KEYWORDS.items():
+            if keyword in task_lower:
+                return level
+
+        # Default to thorough for unknown tasks
+        return SpeedRequirement.THOROUGH
+
+    @staticmethod
+    def evaluate_complexity(task: str) -> ComplexityLevel:
+        """Determine complexity level from task description"""
+        task_lower = task.lower()
+
+        for keyword, level in TaskFilter.COMPLEXITY_KEYWORDS.items():
+            if keyword in task_lower:
+                return level
+
+        # Check task length as proxy for complexity
+        word_count = len(task.split())
+        if word_count > 100:
+            return ComplexityLevel.COMPLEX
+        elif word_count > 50:
+            return ComplexityLevel.STRAIGHTFORWARD
+
+        return ComplexityLevel.STRAIGHTFORWARD
+
+
+class ToolRouter:
+    """Routes tasks to appropriate Zen tools based on filters"""
+
+    @staticmethod
+    def recommend_tools(
+        security: SecurityLevel,
+        speed: SpeedRequirement,
+        complexity: ComplexityLevel,
+        task_text: str = ""
+    ) -> Tuple[str, str]:
+        """
+        Recommend best Zen tool(s) for the task.
+
+        Returns: (primary_tool, reason)
+        """
+
+        # Critical security + complex → codereview
+        if security == SecurityLevel.CRITICAL and complexity == ComplexityLevel.COMPLEX:
+            return "codereview", "Critical security + complex design requires security review and deep thinking"
+
+        # Critical security → codereview
+        if security == SecurityLevel.CRITICAL:
+            return "codereview", "Critical security implications require thorough code/design review"
+
+        # Time critical + trivial → chat (fastest)
+        if speed == SpeedRequirement.INTERACTIVE and complexity == ComplexityLevel.TRIVIAL:
+            return "chat", "Simple answer needed immediately"
+
+        # Time critical → chat
+        if speed == SpeedRequirement.INTERACTIVE:
+            return "chat", "Time critical - using fastest response tool"
+
+        # Architecture/design decisions → consensus (multi-perspective)
+        if complexity == ComplexityLevel.COMPLEX and ("design" in task_text.lower() or "decision" in task_text.lower()):
+            return "consensus", "Complex architectural decision needs multi-perspective analysis"
+
+        # Exploratory research → thinkdeep
+        if complexity == ComplexityLevel.EXPLORATORY or speed == SpeedRequirement.RESEARCH:
+            return "thinkdeep", "Exploratory research needs deep investigation and analysis"
+
+        # Bug/error diagnosis → debug
+        if complexity == ComplexityLevel.STRAIGHTFORWARD and ("bug" in task_text.lower() or "error" in task_text.lower() or "fix" in task_text.lower()):
+            return "debug", "Systematic debugging and error diagnosis"
+
+        # Complex + sensitive infrastructure → thinkdeep
+        if complexity == ComplexityLevel.COMPLEX and security == SecurityLevel.SENSITIVE:
+            return "thinkdeep", "Complex infrastructure task needs thorough analysis"
+
+        # General thorough analysis → thinkdeep
+        if speed == SpeedRequirement.THOROUGH:
+            return "thinkdeep", "Thorough analysis and deep reasoning needed"
+
+        # Default: balanced approach
+        return "thinkdeep", "Comprehensive analysis and reasoning"
+
+    @staticmethod
+    def get_routing_summary(
+        security: SecurityLevel,
+        speed: SpeedRequirement,
+        complexity: ComplexityLevel,
+        tool: str,
+        reason: str
+    ) -> str:
+        """Generate human-readable routing summary"""
+        return f"""
+📊 Task Analysis:
+  🔒 Security:    {security.value}
+  ⚡ Speed:       {speed.value}
+  🧠 Complexity:  {complexity.value}
+
+🎯 Routing Decision:
+  Tool:    {tool}
+  Reason:  {reason}
+"""
+
+
+class LuziaResearchAgent:
+    """Luzia research agent with smart filtering and routing"""
+
+    def __init__(self):
+        self.research_kg = Path("/etc/luz-knowledge/research.db")
+        self.log_file = Path("/opt/server-agents/logs/research-agent.log")
+        self.log_file.parent.mkdir(parents=True, exist_ok=True)
+
+    def log(self, message):
+        """Log research action"""
+        timestamp = datetime.now().isoformat()
+        log_entry = f"[{timestamp}] {message}\n"
+        with open(self.log_file, 'a') as f:
+            f.write(log_entry)
+        print(message)
+
+    def analyze_task(self, task: str) -> Dict:
+        """
+        Analyze incoming research task.
+
+        Returns evaluation with security, speed, complexity, and tool recommendation.
+        """
+        security = TaskFilter.evaluate_security(task)
+        speed = TaskFilter.evaluate_speed(task)
+        complexity = TaskFilter.evaluate_complexity(task)
+
+        tool, reason = ToolRouter.recommend_tools(security, speed, complexity, task)
+
+        return {
+            'security': security.value,
+            'speed': speed.value,
+            'complexity': complexity.value,
+            'recommended_tool': tool,
+            'reasoning': reason,
+            'routing_summary': ToolRouter.get_routing_summary(security, speed, complexity, tool, reason),
+        }
+
+    def clarify_task(self, task: str, analysis: Dict) -> Optional[Dict]:
+        """
+        Determine if clarification is needed based on analysis.
+
+        Returns clarification questions or None if ready to proceed.
+        """
+        questions = []
+
+        # Clarify sensitive tasks
+        if analysis['security'] in ['sensitive', 'critical']:
+            questions.append("🔒 Is this for production infrastructure? (yes/no)")
+
+        # Clarify timing for quick tasks
+        if analysis['speed'] == 'interactive':
+            questions.append("⚡ Is this blocking other work? (yes/no)")
+
+        # Clarify scope for exploratory work
+        if analysis['complexity'] == 'exploratory':
+            questions.append("🧭 What's the scope of research? (e.g., feasibility study, comparison, deep investigation)")
+
+        if questions:
+            return {
+                'needs_clarification': True,
+                'questions': questions,
+            }
+
+        return None
+
+    def store_research_finding(
+        self,
+        task: str,
+        tool_used: str,
+        finding: str,
+        tags: Optional[List[str]] = None,
+    ) -> bool:
+        """Store research finding in research KG"""
+        try:
+            conn = sqlite3.connect(self.research_kg)
+            cursor = conn.cursor()
+
+            entity_id = str(uuid.uuid4())
+            now = time.time()
+
+            # Create finding entity
+            cursor.execute("""
+                INSERT INTO entities
+                (id, name, type, domain, content, metadata, created_at, updated_at, source)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+            """, (
+                entity_id,
+                f"Research: {task[:50]}",  # name
+                f"research_{tool_used}",   # type
+                'research',                # domain
+                finding,                   # content
+                json.dumps({
+                    'task': task,
+                    'tool_used': tool_used,
+                    'tags': tags or [],
+                }),
+                now,
+                now,
+                'research_agent'
+            ))
+
+            conn.commit()
+            conn.close()
+
+            self.log(f"✅ Stored finding: {task[:40]}...")
+            return True
+
+        except Exception as e:
+            self.log(f"❌ Error storing finding: {e}")
+            return False
+
+    def process_research_task(self, task: str) -> Dict:
+        """
+        Main entry point: analyze task and provide routing recommendation.
+        """
+        self.log(f"🔍 Processing research task: {task[:60]}...")
+
+        # Analyze the task
+        analysis = self.analyze_task(task)
+        self.log(analysis['routing_summary'])
+
+        # Check if clarification needed
+        clarification = self.clarify_task(task, analysis)
+
+        return {
+            'task': task,
+            'analysis': analysis,
+            'clarification': clarification,
+            'status': 'ready' if not clarification else 'needs_clarification',
+        }
+
+    def get_summary(self) -> Dict:
+        """Get summary of research findings stored"""
+        try:
+            conn = sqlite3.connect(self.research_kg)
+            cursor = conn.cursor()
+
+            # Count research findings by tool
+            cursor.execute("""
+                SELECT type, COUNT(*) as count
+                FROM entities
+                WHERE type LIKE 'research_%'
+                GROUP BY type
+            """)
+
+            findings_by_tool = {row[0].replace('research_', ''): row[1] for row in cursor.fetchall()}
+
+            # Count total research entities
+            cursor.execute("SELECT COUNT(*) FROM entities WHERE type LIKE 'research_%'")
+            total_research = cursor.fetchone()[0]
+
+            conn.close()
+
+            return {
+                'total_research_findings': total_research,
+                'findings_by_tool': findings_by_tool,
+                'tools_used': list(findings_by_tool.keys()),
+            }
+
+        except Exception as e:
+            self.log(f"❌ Error getting summary: {e}")
+            return {}
+
+
+if __name__ == '__main__':
+    agent = LuziaResearchAgent()
+
+    # Example tasks
+    test_tasks = [
+        "quick answer: what's the difference between async and await?",
+        "urgent critical security review needed for authentication implementation",
+        "research and explore different approaches to distributed caching",
+        "fix the bug in the zen-proxy max_tokens handling",
+        "design decision: should we use REST or GraphQL API?",
+    ]
+
+    print("=" * 70)
+    print("LUZIA RESEARCH AGENT - SMART FILTER DEMONSTRATION")
+    print("=" * 70)
+
+    for task in test_tasks:
+        result = agent.process_research_task(task)
+        print()