Refactor cockpit to use DockerTmuxController pattern

Based on claude-code-tools TmuxCLIController, this refactor: - Added DockerTmuxController class for robust tmux session management - Implements send_keys() with configurable delay_enter - Implements capture_pane() for output retrieval - Implements wait_for_prompt() for pattern-based completion detection - Implements wait_for_idle() for content-hash-based idle detection - Implements wait_for_shell_prompt() for shell prompt detection Also includes workflow improvements: - Pre-task git snapshot before agent execution - Post-task commit protocol in agent guidelines Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-14 10:42:16 -03:00
commit ec33ac1936
265 changed files with 92011 additions and 0 deletions
--- a/tests/test_plugin_system.py
+++ b/tests/test_plugin_system.py
@@ -0,0 +1,470 @@
+#!/usr/bin/env python3
+"""
+Test Suite - Plugin Marketplace System
+
+Tests for:
+1. Plugin marketplace registry and loading
+2. Plugin skill generation and matching
+3. Dispatcher integration with plugins
+4. Knowledge graph exports
+5. Plugin-aware task dispatch
+"""
+
+import json
+import sys
+import tempfile
+from pathlib import Path
+from typing import Dict, List, Any
+
+# Add lib to path
+sys.path.insert(0, str(Path(__file__).parent.parent / "lib"))
+
+from plugin_marketplace import (
+    PluginMarketplaceRegistry,
+    PluginCapabilityMatcher,
+    get_marketplace_registry
+)
+from plugin_skill_loader import (
+    PluginSkillLoader,
+    get_plugin_skill_loader
+)
+from dispatcher_plugin_integration import (
+    DispatcherPluginBridge,
+    PluginAwareTaskDispatcher
+)
+from plugin_kg_integration import (
+    PluginKnowledgeGraphExporter,
+    export_plugins_to_kg
+)
+
+
+class TestResults:
+    def __init__(self):
+        self.tests: List[Dict[str, Any]] = []
+        self.passed = 0
+        self.failed = 0
+
+    def add_test(self, name: str, passed: bool, details: str = ""):
+        status = "PASS" if passed else "FAIL"
+        self.tests.append({
+            'name': name,
+            'status': status,
+            'details': details
+        })
+        if passed:
+            self.passed += 1
+        else:
+            self.failed += 1
+        print(f"[{status}] {name}" + (f": {details}" if details else ""))
+
+    def summary(self) -> str:
+        return f"\nTest Summary: {self.passed} passed, {self.failed} failed out of {self.passed + self.failed}"
+
+
+def test_plugin_registry() -> TestResults:
+    """Test plugin marketplace registry"""
+    results = TestResults()
+    print("\n=== Testing Plugin Marketplace Registry ===\n")
+
+    # Test 1: Registry initialization
+    try:
+        registry = get_marketplace_registry()
+        results.add_test(
+            "Registry initialization",
+            len(registry.plugins) > 0,
+            f"Loaded {len(registry.plugins)} plugins"
+        )
+    except Exception as e:
+        results.add_test("Registry initialization", False, str(e))
+        return results
+
+    # Test 2: Plugin retrieval
+    try:
+        plugin = registry.get_plugin('code-simplifier')
+        results.add_test(
+            "Plugin retrieval",
+            plugin is not None and plugin.name == 'Code Simplifier',
+            f"Retrieved: {plugin.name if plugin else 'None'}"
+        )
+    except Exception as e:
+        results.add_test("Plugin retrieval", False, str(e))
+
+    # Test 3: List plugins by category
+    try:
+        code_analysis_plugins = registry.list_plugins('code-analysis')
+        results.add_test(
+            "Filter plugins by category",
+            len(code_analysis_plugins) > 0,
+            f"Found {len(code_analysis_plugins)} code-analysis plugins"
+        )
+    except Exception as e:
+        results.add_test("Filter plugins by category", False, str(e))
+
+    # Test 4: Find plugins for task
+    try:
+        task = "Review my code for security vulnerabilities"
+        matches = registry.find_plugins_for_task(task, ['security', 'review', 'code'])
+        results.add_test(
+            "Find plugins for task",
+            len(matches) > 0,
+            f"Found {len(matches)} matching plugins"
+        )
+    except Exception as e:
+        results.add_test("Find plugins for task", False, str(e))
+
+    # Test 5: Export plugin data
+    try:
+        export_data = registry.export_for_knowledge_graph()
+        results.add_test(
+            "Export for knowledge graph",
+            'plugins' in export_data and 'categories' in export_data,
+            f"Exported {len(export_data.get('plugins', {}))} plugins"
+        )
+    except Exception as e:
+        results.add_test("Export for knowledge graph", False, str(e))
+
+    return results
+
+
+def test_plugin_skills() -> TestResults:
+    """Test plugin skill generation and matching"""
+    results = TestResults()
+    print("\n=== Testing Plugin Skill System ===\n")
+
+    # Test 1: Skill loader initialization
+    try:
+        loader = get_plugin_skill_loader()
+        results.add_test(
+            "Skill loader initialization",
+            loader is not None,
+            "Initialized successfully"
+        )
+    except Exception as e:
+        results.add_test("Skill loader initialization", False, str(e))
+        return results
+
+    # Test 2: Generate skills from plugins
+    try:
+        skills = loader.generate_skills_from_plugins()
+        results.add_test(
+            "Generate skills from plugins",
+            len(skills) > 0,
+            f"Generated {len(skills)} skills"
+        )
+    except Exception as e:
+        results.add_test("Generate skills from plugins", False, str(e))
+
+    # Test 3: List all skills
+    try:
+        all_skills = loader.list_skills()
+        results.add_test(
+            "List all skills",
+            len(all_skills) > 0,
+            f"Listed {len(all_skills)} skills"
+        )
+    except Exception as e:
+        results.add_test("List all skills", False, str(e))
+
+    # Test 4: Filter skills by category
+    try:
+        code_skills = loader.list_skills(category='code-analysis')
+        results.add_test(
+            "Filter skills by category",
+            len(code_skills) > 0,
+            f"Found {len(code_skills)} code-analysis skills"
+        )
+    except Exception as e:
+        results.add_test("Filter skills by category", False, str(e))
+
+    # Test 5: Find skills for task
+    try:
+        task = "Simplify and optimize this Python function"
+        matched = loader.find_skills_for_task(task, min_relevance=0.3)
+        results.add_test(
+            "Find skills for task",
+            len(matched) > 0,
+            f"Found {len(matched)} matching skills"
+        )
+    except Exception as e:
+        results.add_test("Find skills for task", False, str(e))
+
+    # Test 6: Export for dispatcher
+    try:
+        dispatch_export = loader.export_for_dispatcher()
+        results.add_test(
+            "Export for dispatcher",
+            'skill_count' in dispatch_export and dispatch_export['skill_count'] > 0,
+            f"Exported {dispatch_export.get('skill_count', 0)} skills"
+        )
+    except Exception as e:
+        results.add_test("Export for dispatcher", False, str(e))
+
+    # Test 7: Export for knowledge graph
+    try:
+        kg_export = loader.export_for_knowledge_graph()
+        results.add_test(
+            "Export for knowledge graph",
+            'total_skills' in kg_export and kg_export['total_skills'] > 0,
+            f"Exported {kg_export.get('total_skills', 0)} skills"
+        )
+    except Exception as e:
+        results.add_test("Export for knowledge graph", False, str(e))
+
+    return results
+
+
+def test_dispatcher_integration() -> TestResults:
+    """Test dispatcher-plugin integration"""
+    results = TestResults()
+    print("\n=== Testing Dispatcher Integration ===\n")
+
+    # Test 1: Bridge initialization
+    try:
+        bridge = DispatcherPluginBridge()
+        results.add_test(
+            "Bridge initialization",
+            bridge is not None and len(bridge.skill_loader.skills) > 0,
+            f"Loaded {len(bridge.skill_loader.skills)} skills"
+        )
+    except Exception as e:
+        results.add_test("Bridge initialization", False, str(e))
+        return results
+
+    # Test 2: Enhance task context
+    try:
+        task = "Review this code for security issues and performance"
+        context = bridge.enhance_task_context(task, "test-project", "job-123")
+        results.add_test(
+            "Enhance task context",
+            'plugin_analysis' in context and 'matched_skills' in context['plugin_analysis'],
+            f"Found {len(context['plugin_analysis'].get('matched_skills', []))} skills"
+        )
+    except Exception as e:
+        results.add_test("Enhance task context", False, str(e))
+
+    # Test 3: Generate recommendations
+    try:
+        task = "Simplify and refactor this code"
+        context = bridge.enhance_task_context(task, "test-project", "job-456")
+        recommendations = context.get('recommended_plugins', {})
+        results.add_test(
+            "Generate recommendations",
+            'primary_skill' in recommendations,
+            f"Primary skill: {recommendations.get('primary_skill', {}).get('name', 'None')}"
+        )
+    except Exception as e:
+        results.add_test("Generate recommendations", False, str(e))
+
+    # Test 4: Plugin-aware task dispatcher
+    try:
+        dispatcher = PluginAwareTaskDispatcher(bridge)
+        dispatch_result = dispatcher.dispatch_with_plugin_context(
+            "Review code quality",
+            "test-project",
+            "job-789"
+        )
+        results.add_test(
+            "Plugin-aware dispatch",
+            dispatch_result['plugin_enhanced'] and 'plugin_context' in dispatch_result,
+            "Dispatch successful with plugin context"
+        )
+    except Exception as e:
+        results.add_test("Plugin-aware dispatch", False, str(e))
+
+    # Test 5: Get dispatch recommendations
+    try:
+        dispatcher = PluginAwareTaskDispatcher(bridge)
+        dispatcher.dispatch_with_plugin_context(
+            "Analyze code performance",
+            "test-project",
+            "job-999"
+        )
+        recommendations = dispatcher.get_dispatch_recommendations("job-999")
+        results.add_test(
+            "Get dispatch recommendations",
+            recommendations is not None and 'primary_skill' in recommendations,
+            "Retrieved recommendations successfully"
+        )
+    except Exception as e:
+        results.add_test("Get dispatch recommendations", False, str(e))
+
+    return results
+
+
+def test_capability_matching() -> TestResults:
+    """Test plugin capability matching"""
+    results = TestResults()
+    print("\n=== Testing Capability Matching ===\n")
+
+    # Test 1: Matcher initialization
+    try:
+        registry = get_marketplace_registry()
+        matcher = PluginCapabilityMatcher(registry)
+        results.add_test(
+            "Matcher initialization",
+            matcher is not None,
+            "Initialized successfully"
+        )
+    except Exception as e:
+        results.add_test("Matcher initialization", False, str(e))
+        return results
+
+    # Test 2: Extract keywords
+    try:
+        task = "Find security vulnerabilities in this code"
+        keywords = matcher.extract_task_keywords(task)
+        results.add_test(
+            "Extract keywords",
+            len(keywords) > 0 and 'security' in keywords,
+            f"Extracted keywords: {keywords}"
+        )
+    except Exception as e:
+        results.add_test("Extract keywords", False, str(e))
+
+    # Test 3: Match plugins to task
+    try:
+        task = "Review code for performance issues"
+        matches = matcher.match_plugins(task, min_relevance=0.3)
+        results.add_test(
+            "Match plugins to task",
+            len(matches) > 0,
+            f"Matched {len(matches)} plugins"
+        )
+    except Exception as e:
+        results.add_test("Match plugins to task", False, str(e))
+
+    # Test 4: Relevance scoring
+    try:
+        task1 = "Review code for security"
+        task2 = "Deploy application"
+        matches1 = matcher.match_plugins(task1)
+        matches2 = matcher.match_plugins(task2)
+        results.add_test(
+            "Relevance scoring",
+            len(matches1) > 0 and (len(matches2) == 0 or len(matches1) >= len(matches2)),
+            "Security task has more relevant plugins than deploy task"
+        )
+    except Exception as e:
+        results.add_test("Relevance scoring", False, str(e))
+
+    return results
+
+
+def test_knowledge_graph_export() -> TestResults:
+    """Test knowledge graph exports"""
+    results = TestResults()
+    print("\n=== Testing Knowledge Graph Export ===\n")
+
+    # Test 1: Exporter initialization
+    try:
+        exporter = PluginKnowledgeGraphExporter()
+        results.add_test(
+            "Exporter initialization",
+            exporter is not None,
+            "Initialized successfully"
+        )
+    except Exception as e:
+        results.add_test("Exporter initialization", False, str(e))
+        return results
+
+    # Test 2: Export plugins as entities
+    try:
+        entities = exporter.export_plugins_as_entities()
+        results.add_test(
+            "Export plugins as entities",
+            'entities' in entities and len(entities['entities']) > 0,
+            f"Exported {len(entities['entities'])} plugin entities"
+        )
+    except Exception as e:
+        results.add_test("Export plugins as entities", False, str(e))
+
+    # Test 3: Export skills as entities
+    try:
+        entities = exporter.export_plugin_skills_as_entities()
+        results.add_test(
+            "Export skills as entities",
+            'entities' in entities and len(entities['entities']) > 0,
+            f"Exported {len(entities['entities'])} skill entities"
+        )
+    except Exception as e:
+        results.add_test("Export skills as entities", False, str(e))
+
+    # Test 4: Export relationships
+    try:
+        relations = exporter.export_plugin_relationships()
+        results.add_test(
+            "Export relationships",
+            'relations' in relations and len(relations['relations']) > 0,
+            f"Exported {len(relations['relations'])} relationships"
+        )
+    except Exception as e:
+        results.add_test("Export relationships", False, str(e))
+
+    # Test 5: Complete export
+    try:
+        complete = exporter.export_for_shared_kg()
+        results.add_test(
+            "Complete KG export",
+            'plugins' in complete and 'skills' in complete and 'categories' in complete,
+            f"Plugins: {len(complete['plugins'])}, Skills: {len(complete['skills'])}"
+        )
+    except Exception as e:
+        results.add_test("Complete KG export", False, str(e))
+
+    # Test 6: Save exports
+    try:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            export_dir = Path(tmpdir)
+            saved = exporter.save_exports()
+            results.add_test(
+                "Save exports to files",
+                len(saved) >= 3,
+                f"Saved {len(saved)} export files"
+            )
+    except Exception as e:
+        results.add_test("Save exports to files", False, str(e))
+
+    return results
+
+
+def run_all_tests() -> None:
+    """Run all test suites"""
+    print("=" * 60)
+    print("PLUGIN SYSTEM TEST SUITE")
+    print("=" * 60)
+
+    all_results = []
+
+    # Run test suites
+    all_results.append(test_plugin_registry())
+    all_results.append(test_plugin_skills())
+    all_results.append(test_capability_matching())
+    all_results.append(test_dispatcher_integration())
+    all_results.append(test_knowledge_graph_export())
+
+    # Print overall summary
+    print("\n" + "=" * 60)
+    print("OVERALL TEST SUMMARY")
+    print("=" * 60)
+
+    total_passed = sum(r.passed for r in all_results)
+    total_failed = sum(r.failed for r in all_results)
+    total_tests = total_passed + total_failed
+
+    print(f"\nTotal: {total_passed}/{total_tests} tests passed")
+
+    if total_failed > 0:
+        print(f"\n{total_failed} tests failed:")
+        for result_set in all_results:
+            for test in result_set.tests:
+                if test['status'] == 'FAIL':
+                    print(f"  - {test['name']}: {test['details']}")
+
+    print("\n" + "=" * 60)
+    exit_code = 0 if total_failed == 0 else 1
+    print(f"Exit code: {exit_code}")
+    sys.exit(exit_code)
+
+
+if __name__ == '__main__':
+    run_all_tests()