Refactor cockpit to use DockerTmuxController pattern

Based on claude-code-tools TmuxCLIController, this refactor: - Added DockerTmuxController class for robust tmux session management - Implements send_keys() with configurable delay_enter - Implements capture_pane() for output retrieval - Implements wait_for_prompt() for pattern-based completion detection - Implements wait_for_idle() for content-hash-based idle detection - Implements wait_for_shell_prompt() for shell prompt detection Also includes workflow improvements: - Pre-task git snapshot before agent execution - Post-task commit protocol in agent guidelines Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-14 10:42:16 -03:00
commit ec33ac1936
265 changed files with 92011 additions and 0 deletions
--- a/tests/test_skill_learning.py
+++ b/tests/test_skill_learning.py
@@ -0,0 +1,433 @@
+#!/usr/bin/env python3
+"""
+Tests for skill learning system.
+
+Tests the complete pipeline:
+1. Task execution analysis
+2. Skill extraction
+3. Learning storage in KG
+4. Skill recommendations
+5. QA integration
+"""
+
+import pytest
+import json
+import sys
+from pathlib import Path
+from datetime import datetime
+from unittest.mock import MagicMock, patch
+
+# Add lib to path
+sys.path.insert(0, str(Path(__file__).parent.parent / "lib"))
+
+from skill_learning_engine import (
+    TaskAnalyzer, SkillExtractor, LearningEngine,
+    SkillRecommender, SkillLearningSystem,
+    TaskExecution, ExtractedSkill
+)
+
+
+class TestTaskAnalyzer:
+    """Test task analysis and pattern extraction."""
+
+    def test_analyze_valid_task(self):
+        """Test analyzing a valid task execution."""
+        analyzer = TaskAnalyzer()
+
+        task_data = {
+            "task_id": "test_001",
+            "prompt": "Refactor database schema",
+            "project": "overbits",
+            "status": "success",
+            "tools_used": ["Bash", "Read", "Edit"],
+            "duration": 45.2,
+            "result_summary": "Successfully refactored",
+            "qa_passed": True,
+            "timestamp": datetime.now().isoformat()
+        }
+
+        execution = analyzer.analyze_task(task_data)
+
+        assert execution is not None
+        assert execution.task_id == "test_001"
+        assert execution.project == "overbits"
+        assert execution.status == "success"
+        assert len(execution.tools_used) == 3
+
+    def test_extract_patterns(self):
+        """Test pattern extraction from multiple tasks."""
+        analyzer = TaskAnalyzer()
+
+        # Add multiple tasks
+        executions = []
+        for i in range(3):
+            task_data = {
+                "task_id": f"task_{i}",
+                "prompt": "Test task",
+                "project": "overbits",
+                "status": "success" if i < 2 else "failed",
+                "tools_used": ["Bash", "Read"],
+                "duration": 30.0 + i,
+                "result_summary": "Test",
+                "qa_passed": i < 2,
+                "timestamp": datetime.now().isoformat()
+            }
+            exec = analyzer.analyze_task(task_data)
+            if exec:
+                executions.append(exec)
+
+        patterns = analyzer.extract_patterns(executions)
+
+        assert "success_rate" in patterns
+        assert "average_duration" in patterns
+        assert "common_tools" in patterns
+        assert patterns["success_rate"] == 2/3
+
+
+class TestSkillExtractor:
+    """Test skill extraction from tasks and QA results."""
+
+    def test_extract_from_task(self):
+        """Test skill extraction from task execution."""
+        extractor = SkillExtractor()
+
+        execution = TaskExecution(
+            task_id="test_001",
+            prompt="Debug authentication flow for users",
+            project="overbits",
+            status="success",
+            tools_used=["Read", "Bash", "Edit"],
+            duration=30.0,
+            result_summary="Fixed login issue",
+            qa_passed=True,
+            timestamp=datetime.now()
+        )
+
+        skills = extractor.extract_from_task(execution)
+
+        assert len(skills) > 0
+        # Should have tool skills
+        tool_skills = [s for s in skills if s.category == "tool_usage"]
+        assert len(tool_skills) >= 3
+        # Should have decision patterns
+        decision_skills = [s for s in skills if s.category == "decision"]
+        assert len(decision_skills) > 0
+
+    def test_extract_from_qa_results(self):
+        """Test skill extraction from QA results."""
+        extractor = SkillExtractor()
+
+        qa_results = {
+            "passed": True,
+            "results": {
+                "syntax": True,
+                "routes": True,
+                "command_docs": True,
+            },
+            "task_id": "test_001"
+        }
+
+        skills = extractor.extract_from_qa_results(qa_results)
+
+        assert len(skills) == 3
+        assert all(s.category == "pattern" for s in skills)
+        assert all(s.confidence == 0.9 for s in skills)
+
+    def test_extract_decision_patterns(self):
+        """Test decision pattern extraction."""
+        extractor = SkillExtractor()
+
+        test_cases = [
+            ("Optimize database query", "optimization"),
+            ("Debug authentication issue", "debugging"),
+            ("Write documentation for API", "documentation"),
+            ("Test new feature", "testing"),
+            ("Refactor old code", "refactoring"),
+        ]
+
+        for prompt, expected_pattern in test_cases:
+            skills = extractor._extract_decision_patterns(prompt)
+            pattern_names = [s.name for s in skills]
+            assert any(expected_pattern in name for name in pattern_names)
+
+    def test_aggregate_skills(self):
+        """Test skill aggregation."""
+        extractor = SkillExtractor()
+
+        skills = [
+            ExtractedSkill(
+                name="tool_read",
+                category="tool_usage",
+                confidence=0.8,
+                context={"tool": "Read"},
+                source_task_id="task_1",
+                evidence="Used Read tool"
+            ),
+            ExtractedSkill(
+                name="tool_read",
+                category="tool_usage",
+                confidence=0.85,
+                context={"tool": "Read"},
+                source_task_id="task_2",
+                evidence="Used Read tool again"
+            ),
+        ]
+
+        aggregated = extractor.aggregate_skills(skills)
+
+        assert "tool_read" in aggregated
+        assert aggregated["tool_read"]["occurrences"] == 2
+        assert aggregated["tool_read"]["average_confidence"] == 0.825
+
+
+class TestLearningEngine:
+    """Test learning extraction and storage."""
+
+    @patch('skill_learning_engine.KnowledgeGraph')
+    def test_extract_learning(self, mock_kg):
+        """Test learning extraction."""
+        engine = LearningEngine()
+
+        execution = TaskExecution(
+            task_id="test_001",
+            prompt="Refactor database schema for performance",
+            project="overbits",
+            status="success",
+            tools_used=["Bash", "Read", "Edit"],
+            duration=45.0,
+            result_summary="Schema refactored successfully",
+            qa_passed=True,
+            timestamp=datetime.now()
+        )
+
+        skills = [
+            ExtractedSkill(
+                name="tool_bash",
+                category="tool_usage",
+                confidence=0.8,
+                context={"tool": "Bash"},
+                source_task_id="test_001",
+                evidence="Used Bash"
+            ),
+        ]
+
+        qa_results = {
+            "passed": True,
+            "results": {"syntax": True},
+            "summary": {"errors": 0}
+        }
+
+        learning = engine.extract_learning(execution, skills, qa_results)
+
+        assert learning is not None
+        assert len(learning.skill_names) > 0
+        assert learning.confidence > 0
+        assert "overbits" in learning.applicability
+
+    @patch('skill_learning_engine.KnowledgeGraph')
+    def test_extract_learning_failed_qa(self, mock_kg):
+        """Test that learning is not extracted if QA fails."""
+        engine = LearningEngine()
+
+        execution = TaskExecution(
+            task_id="test_001",
+            prompt="Test task",
+            project="test",
+            status="success",
+            tools_used=["Read"],
+            duration=10.0,
+            result_summary="Test",
+            qa_passed=False,
+            timestamp=datetime.now()
+        )
+
+        skills = []
+
+        qa_results = {
+            "passed": False,
+            "results": {"syntax": False},
+        }
+
+        learning = engine.extract_learning(execution, skills, qa_results)
+
+        assert learning is None
+
+
+class TestSkillRecommender:
+    """Test skill recommendation system."""
+
+    @patch('skill_learning_engine.KnowledgeGraph')
+    def test_recommend_for_task(self, mock_kg):
+        """Test getting recommendations for a task."""
+        recommender = SkillRecommender()
+
+        # Mock KG search to return test learnings
+        mock_kg.return_value.search.return_value = [
+            {
+                "name": "learning_001",
+                "type": "finding",
+                "metadata": {
+                    "skills": ["tool_bash", "pattern_optimization"],
+                    "confidence": 0.85,
+                    "applicability": ["overbits", "general"],
+                }
+            },
+        ]
+
+        recommendations = recommender.recommend_for_task(
+            "Optimize database performance",
+            project="overbits"
+        )
+
+        assert len(recommendations) > 0
+        assert recommendations[0]["confidence"] > 0
+
+    @patch('skill_learning_engine.KnowledgeGraph')
+    def test_get_skill_profile(self, mock_kg):
+        """Test getting skill profile."""
+        recommender = SkillRecommender()
+
+        mock_kg.return_value.list_entities.return_value = [
+            {
+                "name": "skill_001",
+                "type": "finding",
+                "metadata": {
+                    "category": "tool_usage",
+                    "skills": ["tool_bash", "tool_read"],
+                }
+            },
+        ]
+
+        profile = recommender.get_skill_profile()
+
+        assert "total_learnings" in profile
+        assert "by_category" in profile
+        assert "top_skills" in profile
+
+
+class TestSkillLearningSystem:
+    """Test integrated skill learning system."""
+
+    @patch('skill_learning_engine.KnowledgeGraph')
+    def test_process_task_completion(self, mock_kg):
+        """Test full task completion processing."""
+        system = SkillLearningSystem()
+
+        task_data = {
+            "task_id": "test_001",
+            "prompt": "Refactor authentication module",
+            "project": "overbits",
+            "status": "success",
+            "tools_used": ["Read", "Edit", "Bash"],
+            "duration": 60.0,
+            "result_summary": "Successfully refactored",
+            "qa_passed": True,
+            "timestamp": datetime.now().isoformat()
+        }
+
+        qa_results = {
+            "passed": True,
+            "results": {
+                "syntax": True,
+                "routes": True,
+            },
+            "summary": {"errors": 0, "warnings": 0, "info": 2}
+        }
+
+        result = system.process_task_completion(task_data, qa_results)
+
+        assert result["success"]
+        assert result["skills_extracted"] > 0
+        assert result["learning_created"]
+
+    @patch('skill_learning_engine.KnowledgeGraph')
+    def test_get_recommendations(self, mock_kg):
+        """Test getting recommendations from system."""
+        system = SkillLearningSystem()
+
+        # Mock recommender
+        mock_kg.return_value.search.return_value = []
+
+        recommendations = system.get_recommendations(
+            "Debug authentication issue",
+            project="overbits"
+        )
+
+        assert isinstance(recommendations, list)
+
+
+class TestIntegration:
+    """Integration tests for complete workflows."""
+
+    @patch('skill_learning_engine.KnowledgeGraph')
+    def test_complete_learning_pipeline(self, mock_kg):
+        """Test complete pipeline from task to recommendation."""
+        system = SkillLearningSystem()
+
+        # Process a task
+        task_data = {
+            "task_id": "pipeline_test",
+            "prompt": "Optimize API endpoint performance",
+            "project": "overbits",
+            "status": "success",
+            "tools_used": ["Bash", "Read"],
+            "duration": 30.0,
+            "result_summary": "30% performance improvement",
+            "qa_passed": True,
+            "timestamp": datetime.now().isoformat()
+        }
+
+        qa_results = {
+            "passed": True,
+            "results": {"syntax": True, "routes": True},
+            "summary": {"errors": 0}
+        }
+
+        # Process task
+        result = system.process_task_completion(task_data, qa_results)
+        assert result["success"]
+
+        # Get recommendations
+        recommendations = system.get_recommendations(
+            "Improve API performance",
+            project="overbits"
+        )
+
+        # Should be able to get recommendations
+        assert isinstance(recommendations, list)
+
+    @patch('skill_learning_engine.KnowledgeGraph')
+    def test_skill_profile_evolution(self, mock_kg):
+        """Test how skill profile evolves with multiple tasks."""
+        system = SkillLearningSystem()
+
+        # Process multiple tasks
+        for i in range(3):
+            task_data = {
+                "task_id": f"task_{i}",
+                "prompt": f"Test task {i}",
+                "project": "overbits",
+                "status": "success",
+                "tools_used": ["Bash", "Read"] if i % 2 == 0 else ["Read", "Edit"],
+                "duration": 20.0 + i,
+                "result_summary": f"Task {i} completed",
+                "qa_passed": True,
+                "timestamp": datetime.now().isoformat()
+            }
+
+            qa_results = {
+                "passed": True,
+                "results": {"syntax": True},
+                "summary": {"errors": 0}
+            }
+
+            system.process_task_completion(task_data, qa_results)
+
+        # Get profile
+        profile = system.get_learning_summary()
+
+        assert profile["total_learnings"] >= 0
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])