Refactor cockpit to use DockerTmuxController pattern

Based on claude-code-tools TmuxCLIController, this refactor: - Added DockerTmuxController class for robust tmux session management - Implements send_keys() with configurable delay_enter - Implements capture_pane() for output retrieval - Implements wait_for_prompt() for pattern-based completion detection - Implements wait_for_idle() for content-hash-based idle detection - Implements wait_for_shell_prompt() for shell prompt detection Also includes workflow improvements: - Pre-task git snapshot before agent execution - Post-task commit protocol in agent guidelines Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-14 10:42:16 -03:00
commit ec33ac1936
265 changed files with 92011 additions and 0 deletions
--- a/lib/web_search_integrator.py
+++ b/lib/web_search_integrator.py
@@ -0,0 +1,402 @@
+#!/usr/bin/env python3
+"""
+Web Search Integrator - Context enhancement via web search
+
+Features:
+1. Detect when web search would be helpful
+2. Query Stack Overflow for solutions
+3. Fetch and summarize reference docs
+4. Track learned solutions
+5. Integrate references into prompts
+"""
+
+import json
+import re
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple, Any
+from datetime import datetime
+from dataclasses import dataclass, asdict
+
+@dataclass
+class WebReference:
+    """A reference found via web search"""
+    title: str
+    url: str
+    source: str  # stackoverflow, docs, blog, etc
+    snippet: str
+    relevance: float  # 0-1 score
+    topic: str
+    found_at: str
+
+@dataclass
+class LearningResult:
+    """A solution learned from web search"""
+    problem: str
+    solution: str
+    references: List[str]
+    tags: List[str]
+    learned_at: str
+    confidence: float  # How confident in this solution
+
+class WebSearchIntegrator:
+    """Integrates web search for context enhancement"""
+
+    def __init__(self, cache_dir: Optional[Path] = None):
+        """Initialize web search integrator
+
+        Args:
+            cache_dir: Optional directory for caching search results
+        """
+        self.cache_dir = cache_dir or Path("/tmp/.luzia-web-cache")
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+        self.learning_db: List[LearningResult] = []
+        self.search_history: List[Dict[str, Any]] = []
+        self.load_learning_db()
+
+    def load_learning_db(self) -> None:
+        """Load learned solutions from cache"""
+        db_file = self.cache_dir / "learning.json"
+        if db_file.exists():
+            try:
+                data = json.loads(db_file.read_text())
+                self.learning_db = [LearningResult(**item) for item in data.get("learned", [])]
+            except Exception as e:
+                print(f"[Warning] Failed to load learning DB: {e}")
+
+    def save_learning_db(self) -> None:
+        """Save learned solutions to cache"""
+        db_file = self.cache_dir / "learning.json"
+        db_file.write_text(json.dumps({
+            "learned": [asdict(item) for item in self.learning_db],
+            "timestamp": datetime.now().isoformat()
+        }, indent=2))
+
+    def should_search(self, task: str, error: Optional[str] = None) -> Tuple[bool, str]:
+        """Determine if web search would be helpful
+
+        Args:
+            task: Task description
+            error: Optional error message
+
+        Returns:
+            Tuple of (should_search, search_query)
+        """
+        search_triggers = [
+            # Error investigation
+            (r"error|exception|failed|problem", "error_investigation"),
+            # How-to tasks
+            (r"how\s+to|guide|tutorial|learn", "how_to"),
+            # Library/tool questions
+            (r"npm|pip|cargo|ruby", "package_mgmt"),
+            # Framework questions
+            (r"react|vue|angular|django|flask", "framework"),
+            # Integration/setup
+            (r"integrate|setup|configure|install", "setup"),
+            # Best practices
+            (r"best practice|pattern|architecture", "architecture"),
+        ]
+
+        combined = f"{task} {error or ''}".lower()
+
+        for pattern, category in search_triggers:
+            if re.search(pattern, combined):
+                # Extract search query
+                if "error" in combined:
+                    # For errors, extract the error message
+                    search_query = re.sub(r".*error.*?:\s*", "", error or task)[:80]
+                else:
+                    search_query = task[:100]
+
+                return True, search_query
+
+        return False, ""
+
+    def find_stackoverflow_answer(self, query: str) -> Optional[WebReference]:
+        """Find Stack Overflow answer for query
+
+        This is a reference implementation. In production, would use
+        Stack Overflow API or web search.
+
+        Args:
+            query: Search query
+
+        Returns:
+            Best matching reference, or None
+        """
+        # In actual implementation, would call web search API
+        # For now, return structure for documentation
+        return WebReference(
+            title="Relevant Stack Overflow Answer",
+            url="https://stackoverflow.com/search?q=...",
+            source="stackoverflow",
+            snippet="[Search result snippet would appear here]",
+            relevance=0.8,
+            topic=query,
+            found_at=datetime.now().isoformat()
+        )
+
+    def fetch_documentation(self, library: str, topic: str) -> Optional[WebReference]:
+        """Fetch documentation for a library/topic
+
+        Args:
+            library: Library name (npm package, python module, etc)
+            topic: Specific topic within library
+
+        Returns:
+            Reference to documentation, or None
+        """
+        # Common documentation URLs
+        doc_patterns = {
+            "react": "https://react.dev/reference/",
+            "nodejs": "https://nodejs.org/api/",
+            "python": "https://docs.python.org/3/",
+            "typescript": "https://www.typescriptlang.org/docs/",
+            "rust": "https://doc.rust-lang.org/",
+            "django": "https://docs.djangoproject.com/",
+            "flask": "https://flask.palletsprojects.com/",
+        }
+
+        base_url = doc_patterns.get(library.lower())
+        if not base_url:
+            return None
+
+        return WebReference(
+            title=f"{library} Documentation - {topic}",
+            url=f"{base_url}{topic}/",
+            source="official_docs",
+            snippet=f"Official documentation for {library} {topic}",
+            relevance=0.95,
+            topic=topic,
+            found_at=datetime.now().isoformat()
+        )
+
+    def detect_tech_stack(self, task: str) -> List[str]:
+        """Detect technology stack from task description
+
+        Args:
+            task: Task description
+
+        Returns:
+            List of detected technologies
+        """
+        tech_patterns = {
+            "React": r"react|jsx",
+            "TypeScript": r"typescript|\.ts",
+            "Node.js": r"node|npm|javascript",
+            "Python": r"python|pip|py",
+            "Rust": r"rust|cargo",
+            "Docker": r"docker|container",
+            "PostgreSQL": r"postgres|sql",
+            "MongoDB": r"mongo|mongodb",
+            "Redis": r"redis",
+            "Kubernetes": r"k8s|kubernetes",
+            "GraphQL": r"graphql|apollo",
+            "REST": r"rest|api",
+            "WebSocket": r"websocket|ws",
+        }
+
+        detected = []
+        task_lower = task.lower()
+
+        for tech, pattern in tech_patterns.items():
+            if re.search(pattern, task_lower):
+                detected.append(tech)
+
+        return detected
+
+    def generate_context_section(self, references: List[WebReference]) -> str:
+        """Generate a context section with web references
+
+        Args:
+            references: List of web references
+
+        Returns:
+            Markdown section to add to prompt
+        """
+        if not references:
+            return ""
+
+        sections = ["# Web References and Context\n"]
+
+        for ref in references:
+            sections.append(f"\n## {ref.title}")
+            sections.append(f"**Source:** {ref.source}")
+            sections.append(f"**URL:** {ref.url}")
+            sections.append(f"**Relevance:** {ref.relevance:.1%}")
+            sections.append(f"\n{ref.snippet}\n")
+
+        return "\n".join(sections)
+
+    def learn_solution(self, problem: str, solution: str,
+                      references: List[str], tags: List[str],
+                      confidence: float = 0.8) -> None:
+        """Record a learned solution for future reference
+
+        Args:
+            problem: Problem description
+            solution: Solution description
+            references: List of reference URLs
+            tags: Topic tags
+            confidence: Confidence in this solution (0-1)
+        """
+        learning = LearningResult(
+            problem=problem,
+            solution=solution,
+            references=references,
+            tags=tags,
+            learned_at=datetime.now().isoformat(),
+            confidence=confidence
+        )
+        self.learning_db.append(learning)
+        self.save_learning_db()
+
+    def search_learned_solutions(self, query: str) -> List[LearningResult]:
+        """Search previously learned solutions
+
+        Args:
+            query: Search query
+
+        Returns:
+            List of matching learned solutions
+        """
+        matches = []
+        query_lower = query.lower()
+
+        for result in self.learning_db:
+            # Search in problem, solution, and tags
+            if (query_lower in result.problem.lower() or
+                query_lower in result.solution.lower() or
+                any(query_lower in tag.lower() for tag in result.tags)):
+                matches.append(result)
+
+        # Sort by confidence and recency
+        matches.sort(
+            key=lambda r: (r.confidence, datetime.fromisoformat(r.learned_at)),
+            reverse=True
+        )
+
+        return matches
+
+    def get_reference_for_technology(self, tech: str) -> Optional[WebReference]:
+        """Get reference documentation for a technology
+
+        Args:
+            tech: Technology name
+
+        Returns:
+            Reference to documentation
+        """
+        refs = {
+            "React": self.fetch_documentation("react", "introduction"),
+            "TypeScript": self.fetch_documentation("typescript", "handbook"),
+            "Node.js": self.fetch_documentation("nodejs", "api"),
+            "Python": self.fetch_documentation("python", "tutorial"),
+            "Docker": WebReference(
+                title="Docker Documentation",
+                url="https://docs.docker.com/",
+                source="official_docs",
+                snippet="Official Docker documentation",
+                relevance=1.0,
+                topic="Docker",
+                found_at=datetime.now().isoformat()
+            ),
+        }
+        return refs.get(tech)
+
+    def generate_research_prompt(self, task: str, tech_stack: List[str],
+                               error: Optional[str] = None) -> str:
+        """Generate a prompt for web research
+
+        Args:
+            task: Task description
+            tech_stack: List of technologies involved
+            error: Optional error message
+
+        Returns:
+            Research prompt
+        """
+        sections = [
+            f"# Research Task\n",
+            f"**Task:** {task}\n",
+        ]
+
+        if error:
+            sections.append(f"**Error:** {error}\n")
+
+        if tech_stack:
+            sections.append(f"**Technologies:** {', '.join(tech_stack)}\n")
+
+        # Learned solutions
+        learned = self.search_learned_solutions(task)
+        if learned:
+            sections.append("\n## Previously Learned Solutions\n")
+            for i, result in enumerate(learned[:3], 1):
+                sections.append(f"{i}. **{result.problem}**")
+                sections.append(f"   - Solution: {result.solution}")
+                sections.append(f"   - Tags: {', '.join(result.tags)}")
+                sections.append(f"   - Confidence: {result.confidence:.0%}\n")
+
+        sections.append("\n## Research Approach\n")
+        sections.append("1. Check previously learned solutions")
+        sections.append("2. Search Stack Overflow for similar issues")
+        sections.append("3. Check official documentation")
+        sections.append("4. Look for blog posts or tutorials")
+        sections.append("5. Synthesize findings into solution")
+
+        return "\n".join(sections)
+
+    def export_learning_data(self, output_path: Path) -> None:
+        """Export learning database for analysis
+
+        Args:
+            output_path: Path to write export to
+        """
+        export_data = {
+            "total_learned": len(self.learning_db),
+            "by_topic": {},
+            "average_confidence": 0,
+            "solutions": [asdict(item) for item in self.learning_db]
+        }
+
+        # Calculate statistics
+        if self.learning_db:
+            export_data["average_confidence"] = (
+                sum(r.confidence for r in self.learning_db) / len(self.learning_db)
+            )
+
+            # Group by tags
+            by_topic = {}
+            for result in self.learning_db:
+                for tag in result.tags:
+                    if tag not in by_topic:
+                        by_topic[tag] = 0
+                    by_topic[tag] += 1
+            export_data["by_topic"] = by_topic
+
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        output_path.write_text(json.dumps(export_data, indent=2))
+
+    def get_stats(self) -> Dict[str, Any]:
+        """Get statistics about web search usage
+
+        Returns:
+            Statistics dict
+        """
+        if not self.learning_db:
+            return {
+                "total_learned": 0,
+                "average_confidence": 0,
+                "searches_performed": len(self.search_history)
+            }
+
+        avg_confidence = sum(r.confidence for r in self.learning_db) / len(self.learning_db)
+
+        return {
+            "total_learned": len(self.learning_db),
+            "average_confidence": avg_confidence,
+            "searches_performed": len(self.search_history),
+            "topics": list(set(
+                tag for result in self.learning_db
+                for tag in result.tags
+            ))
+        }