Refactor cockpit to use DockerTmuxController pattern
Based on claude-code-tools TmuxCLIController, this refactor: - Added DockerTmuxController class for robust tmux session management - Implements send_keys() with configurable delay_enter - Implements capture_pane() for output retrieval - Implements wait_for_prompt() for pattern-based completion detection - Implements wait_for_idle() for content-hash-based idle detection - Implements wait_for_shell_prompt() for shell prompt detection Also includes workflow improvements: - Pre-task git snapshot before agent execution - Post-task commit protocol in agent guidelines Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
433
tests/test_skill_learning.py
Normal file
433
tests/test_skill_learning.py
Normal file
@@ -0,0 +1,433 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Tests for skill learning system.
|
||||
|
||||
Tests the complete pipeline:
|
||||
1. Task execution analysis
|
||||
2. Skill extraction
|
||||
3. Learning storage in KG
|
||||
4. Skill recommendations
|
||||
5. QA integration
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
# Add lib to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "lib"))
|
||||
|
||||
from skill_learning_engine import (
|
||||
TaskAnalyzer, SkillExtractor, LearningEngine,
|
||||
SkillRecommender, SkillLearningSystem,
|
||||
TaskExecution, ExtractedSkill
|
||||
)
|
||||
|
||||
|
||||
class TestTaskAnalyzer:
|
||||
"""Test task analysis and pattern extraction."""
|
||||
|
||||
def test_analyze_valid_task(self):
|
||||
"""Test analyzing a valid task execution."""
|
||||
analyzer = TaskAnalyzer()
|
||||
|
||||
task_data = {
|
||||
"task_id": "test_001",
|
||||
"prompt": "Refactor database schema",
|
||||
"project": "overbits",
|
||||
"status": "success",
|
||||
"tools_used": ["Bash", "Read", "Edit"],
|
||||
"duration": 45.2,
|
||||
"result_summary": "Successfully refactored",
|
||||
"qa_passed": True,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
execution = analyzer.analyze_task(task_data)
|
||||
|
||||
assert execution is not None
|
||||
assert execution.task_id == "test_001"
|
||||
assert execution.project == "overbits"
|
||||
assert execution.status == "success"
|
||||
assert len(execution.tools_used) == 3
|
||||
|
||||
def test_extract_patterns(self):
|
||||
"""Test pattern extraction from multiple tasks."""
|
||||
analyzer = TaskAnalyzer()
|
||||
|
||||
# Add multiple tasks
|
||||
executions = []
|
||||
for i in range(3):
|
||||
task_data = {
|
||||
"task_id": f"task_{i}",
|
||||
"prompt": "Test task",
|
||||
"project": "overbits",
|
||||
"status": "success" if i < 2 else "failed",
|
||||
"tools_used": ["Bash", "Read"],
|
||||
"duration": 30.0 + i,
|
||||
"result_summary": "Test",
|
||||
"qa_passed": i < 2,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
exec = analyzer.analyze_task(task_data)
|
||||
if exec:
|
||||
executions.append(exec)
|
||||
|
||||
patterns = analyzer.extract_patterns(executions)
|
||||
|
||||
assert "success_rate" in patterns
|
||||
assert "average_duration" in patterns
|
||||
assert "common_tools" in patterns
|
||||
assert patterns["success_rate"] == 2/3
|
||||
|
||||
|
||||
class TestSkillExtractor:
|
||||
"""Test skill extraction from tasks and QA results."""
|
||||
|
||||
def test_extract_from_task(self):
|
||||
"""Test skill extraction from task execution."""
|
||||
extractor = SkillExtractor()
|
||||
|
||||
execution = TaskExecution(
|
||||
task_id="test_001",
|
||||
prompt="Debug authentication flow for users",
|
||||
project="overbits",
|
||||
status="success",
|
||||
tools_used=["Read", "Bash", "Edit"],
|
||||
duration=30.0,
|
||||
result_summary="Fixed login issue",
|
||||
qa_passed=True,
|
||||
timestamp=datetime.now()
|
||||
)
|
||||
|
||||
skills = extractor.extract_from_task(execution)
|
||||
|
||||
assert len(skills) > 0
|
||||
# Should have tool skills
|
||||
tool_skills = [s for s in skills if s.category == "tool_usage"]
|
||||
assert len(tool_skills) >= 3
|
||||
# Should have decision patterns
|
||||
decision_skills = [s for s in skills if s.category == "decision"]
|
||||
assert len(decision_skills) > 0
|
||||
|
||||
def test_extract_from_qa_results(self):
|
||||
"""Test skill extraction from QA results."""
|
||||
extractor = SkillExtractor()
|
||||
|
||||
qa_results = {
|
||||
"passed": True,
|
||||
"results": {
|
||||
"syntax": True,
|
||||
"routes": True,
|
||||
"command_docs": True,
|
||||
},
|
||||
"task_id": "test_001"
|
||||
}
|
||||
|
||||
skills = extractor.extract_from_qa_results(qa_results)
|
||||
|
||||
assert len(skills) == 3
|
||||
assert all(s.category == "pattern" for s in skills)
|
||||
assert all(s.confidence == 0.9 for s in skills)
|
||||
|
||||
def test_extract_decision_patterns(self):
|
||||
"""Test decision pattern extraction."""
|
||||
extractor = SkillExtractor()
|
||||
|
||||
test_cases = [
|
||||
("Optimize database query", "optimization"),
|
||||
("Debug authentication issue", "debugging"),
|
||||
("Write documentation for API", "documentation"),
|
||||
("Test new feature", "testing"),
|
||||
("Refactor old code", "refactoring"),
|
||||
]
|
||||
|
||||
for prompt, expected_pattern in test_cases:
|
||||
skills = extractor._extract_decision_patterns(prompt)
|
||||
pattern_names = [s.name for s in skills]
|
||||
assert any(expected_pattern in name for name in pattern_names)
|
||||
|
||||
def test_aggregate_skills(self):
|
||||
"""Test skill aggregation."""
|
||||
extractor = SkillExtractor()
|
||||
|
||||
skills = [
|
||||
ExtractedSkill(
|
||||
name="tool_read",
|
||||
category="tool_usage",
|
||||
confidence=0.8,
|
||||
context={"tool": "Read"},
|
||||
source_task_id="task_1",
|
||||
evidence="Used Read tool"
|
||||
),
|
||||
ExtractedSkill(
|
||||
name="tool_read",
|
||||
category="tool_usage",
|
||||
confidence=0.85,
|
||||
context={"tool": "Read"},
|
||||
source_task_id="task_2",
|
||||
evidence="Used Read tool again"
|
||||
),
|
||||
]
|
||||
|
||||
aggregated = extractor.aggregate_skills(skills)
|
||||
|
||||
assert "tool_read" in aggregated
|
||||
assert aggregated["tool_read"]["occurrences"] == 2
|
||||
assert aggregated["tool_read"]["average_confidence"] == 0.825
|
||||
|
||||
|
||||
class TestLearningEngine:
|
||||
"""Test learning extraction and storage."""
|
||||
|
||||
@patch('skill_learning_engine.KnowledgeGraph')
|
||||
def test_extract_learning(self, mock_kg):
|
||||
"""Test learning extraction."""
|
||||
engine = LearningEngine()
|
||||
|
||||
execution = TaskExecution(
|
||||
task_id="test_001",
|
||||
prompt="Refactor database schema for performance",
|
||||
project="overbits",
|
||||
status="success",
|
||||
tools_used=["Bash", "Read", "Edit"],
|
||||
duration=45.0,
|
||||
result_summary="Schema refactored successfully",
|
||||
qa_passed=True,
|
||||
timestamp=datetime.now()
|
||||
)
|
||||
|
||||
skills = [
|
||||
ExtractedSkill(
|
||||
name="tool_bash",
|
||||
category="tool_usage",
|
||||
confidence=0.8,
|
||||
context={"tool": "Bash"},
|
||||
source_task_id="test_001",
|
||||
evidence="Used Bash"
|
||||
),
|
||||
]
|
||||
|
||||
qa_results = {
|
||||
"passed": True,
|
||||
"results": {"syntax": True},
|
||||
"summary": {"errors": 0}
|
||||
}
|
||||
|
||||
learning = engine.extract_learning(execution, skills, qa_results)
|
||||
|
||||
assert learning is not None
|
||||
assert len(learning.skill_names) > 0
|
||||
assert learning.confidence > 0
|
||||
assert "overbits" in learning.applicability
|
||||
|
||||
@patch('skill_learning_engine.KnowledgeGraph')
|
||||
def test_extract_learning_failed_qa(self, mock_kg):
|
||||
"""Test that learning is not extracted if QA fails."""
|
||||
engine = LearningEngine()
|
||||
|
||||
execution = TaskExecution(
|
||||
task_id="test_001",
|
||||
prompt="Test task",
|
||||
project="test",
|
||||
status="success",
|
||||
tools_used=["Read"],
|
||||
duration=10.0,
|
||||
result_summary="Test",
|
||||
qa_passed=False,
|
||||
timestamp=datetime.now()
|
||||
)
|
||||
|
||||
skills = []
|
||||
|
||||
qa_results = {
|
||||
"passed": False,
|
||||
"results": {"syntax": False},
|
||||
}
|
||||
|
||||
learning = engine.extract_learning(execution, skills, qa_results)
|
||||
|
||||
assert learning is None
|
||||
|
||||
|
||||
class TestSkillRecommender:
|
||||
"""Test skill recommendation system."""
|
||||
|
||||
@patch('skill_learning_engine.KnowledgeGraph')
|
||||
def test_recommend_for_task(self, mock_kg):
|
||||
"""Test getting recommendations for a task."""
|
||||
recommender = SkillRecommender()
|
||||
|
||||
# Mock KG search to return test learnings
|
||||
mock_kg.return_value.search.return_value = [
|
||||
{
|
||||
"name": "learning_001",
|
||||
"type": "finding",
|
||||
"metadata": {
|
||||
"skills": ["tool_bash", "pattern_optimization"],
|
||||
"confidence": 0.85,
|
||||
"applicability": ["overbits", "general"],
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
recommendations = recommender.recommend_for_task(
|
||||
"Optimize database performance",
|
||||
project="overbits"
|
||||
)
|
||||
|
||||
assert len(recommendations) > 0
|
||||
assert recommendations[0]["confidence"] > 0
|
||||
|
||||
@patch('skill_learning_engine.KnowledgeGraph')
|
||||
def test_get_skill_profile(self, mock_kg):
|
||||
"""Test getting skill profile."""
|
||||
recommender = SkillRecommender()
|
||||
|
||||
mock_kg.return_value.list_entities.return_value = [
|
||||
{
|
||||
"name": "skill_001",
|
||||
"type": "finding",
|
||||
"metadata": {
|
||||
"category": "tool_usage",
|
||||
"skills": ["tool_bash", "tool_read"],
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
profile = recommender.get_skill_profile()
|
||||
|
||||
assert "total_learnings" in profile
|
||||
assert "by_category" in profile
|
||||
assert "top_skills" in profile
|
||||
|
||||
|
||||
class TestSkillLearningSystem:
|
||||
"""Test integrated skill learning system."""
|
||||
|
||||
@patch('skill_learning_engine.KnowledgeGraph')
|
||||
def test_process_task_completion(self, mock_kg):
|
||||
"""Test full task completion processing."""
|
||||
system = SkillLearningSystem()
|
||||
|
||||
task_data = {
|
||||
"task_id": "test_001",
|
||||
"prompt": "Refactor authentication module",
|
||||
"project": "overbits",
|
||||
"status": "success",
|
||||
"tools_used": ["Read", "Edit", "Bash"],
|
||||
"duration": 60.0,
|
||||
"result_summary": "Successfully refactored",
|
||||
"qa_passed": True,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
qa_results = {
|
||||
"passed": True,
|
||||
"results": {
|
||||
"syntax": True,
|
||||
"routes": True,
|
||||
},
|
||||
"summary": {"errors": 0, "warnings": 0, "info": 2}
|
||||
}
|
||||
|
||||
result = system.process_task_completion(task_data, qa_results)
|
||||
|
||||
assert result["success"]
|
||||
assert result["skills_extracted"] > 0
|
||||
assert result["learning_created"]
|
||||
|
||||
@patch('skill_learning_engine.KnowledgeGraph')
|
||||
def test_get_recommendations(self, mock_kg):
|
||||
"""Test getting recommendations from system."""
|
||||
system = SkillLearningSystem()
|
||||
|
||||
# Mock recommender
|
||||
mock_kg.return_value.search.return_value = []
|
||||
|
||||
recommendations = system.get_recommendations(
|
||||
"Debug authentication issue",
|
||||
project="overbits"
|
||||
)
|
||||
|
||||
assert isinstance(recommendations, list)
|
||||
|
||||
|
||||
class TestIntegration:
|
||||
"""Integration tests for complete workflows."""
|
||||
|
||||
@patch('skill_learning_engine.KnowledgeGraph')
|
||||
def test_complete_learning_pipeline(self, mock_kg):
|
||||
"""Test complete pipeline from task to recommendation."""
|
||||
system = SkillLearningSystem()
|
||||
|
||||
# Process a task
|
||||
task_data = {
|
||||
"task_id": "pipeline_test",
|
||||
"prompt": "Optimize API endpoint performance",
|
||||
"project": "overbits",
|
||||
"status": "success",
|
||||
"tools_used": ["Bash", "Read"],
|
||||
"duration": 30.0,
|
||||
"result_summary": "30% performance improvement",
|
||||
"qa_passed": True,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
qa_results = {
|
||||
"passed": True,
|
||||
"results": {"syntax": True, "routes": True},
|
||||
"summary": {"errors": 0}
|
||||
}
|
||||
|
||||
# Process task
|
||||
result = system.process_task_completion(task_data, qa_results)
|
||||
assert result["success"]
|
||||
|
||||
# Get recommendations
|
||||
recommendations = system.get_recommendations(
|
||||
"Improve API performance",
|
||||
project="overbits"
|
||||
)
|
||||
|
||||
# Should be able to get recommendations
|
||||
assert isinstance(recommendations, list)
|
||||
|
||||
@patch('skill_learning_engine.KnowledgeGraph')
|
||||
def test_skill_profile_evolution(self, mock_kg):
|
||||
"""Test how skill profile evolves with multiple tasks."""
|
||||
system = SkillLearningSystem()
|
||||
|
||||
# Process multiple tasks
|
||||
for i in range(3):
|
||||
task_data = {
|
||||
"task_id": f"task_{i}",
|
||||
"prompt": f"Test task {i}",
|
||||
"project": "overbits",
|
||||
"status": "success",
|
||||
"tools_used": ["Bash", "Read"] if i % 2 == 0 else ["Read", "Edit"],
|
||||
"duration": 20.0 + i,
|
||||
"result_summary": f"Task {i} completed",
|
||||
"qa_passed": True,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
qa_results = {
|
||||
"passed": True,
|
||||
"results": {"syntax": True},
|
||||
"summary": {"errors": 0}
|
||||
}
|
||||
|
||||
system.process_task_completion(task_data, qa_results)
|
||||
|
||||
# Get profile
|
||||
profile = system.get_learning_summary()
|
||||
|
||||
assert profile["total_learnings"] >= 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
Reference in New Issue
Block a user