Files
luzia/tests/test_skill_learning.py
admin ec33ac1936 Refactor cockpit to use DockerTmuxController pattern
Based on claude-code-tools TmuxCLIController, this refactor:

- Added DockerTmuxController class for robust tmux session management
- Implements send_keys() with configurable delay_enter
- Implements capture_pane() for output retrieval
- Implements wait_for_prompt() for pattern-based completion detection
- Implements wait_for_idle() for content-hash-based idle detection
- Implements wait_for_shell_prompt() for shell prompt detection

Also includes workflow improvements:
- Pre-task git snapshot before agent execution
- Post-task commit protocol in agent guidelines

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-14 10:42:16 -03:00

434 lines
13 KiB
Python

#!/usr/bin/env python3
"""
Tests for skill learning system.
Tests the complete pipeline:
1. Task execution analysis
2. Skill extraction
3. Learning storage in KG
4. Skill recommendations
5. QA integration
"""
import pytest
import json
import sys
from pathlib import Path
from datetime import datetime
from unittest.mock import MagicMock, patch
# Add lib to path
sys.path.insert(0, str(Path(__file__).parent.parent / "lib"))
from skill_learning_engine import (
TaskAnalyzer, SkillExtractor, LearningEngine,
SkillRecommender, SkillLearningSystem,
TaskExecution, ExtractedSkill
)
class TestTaskAnalyzer:
"""Test task analysis and pattern extraction."""
def test_analyze_valid_task(self):
"""Test analyzing a valid task execution."""
analyzer = TaskAnalyzer()
task_data = {
"task_id": "test_001",
"prompt": "Refactor database schema",
"project": "overbits",
"status": "success",
"tools_used": ["Bash", "Read", "Edit"],
"duration": 45.2,
"result_summary": "Successfully refactored",
"qa_passed": True,
"timestamp": datetime.now().isoformat()
}
execution = analyzer.analyze_task(task_data)
assert execution is not None
assert execution.task_id == "test_001"
assert execution.project == "overbits"
assert execution.status == "success"
assert len(execution.tools_used) == 3
def test_extract_patterns(self):
"""Test pattern extraction from multiple tasks."""
analyzer = TaskAnalyzer()
# Add multiple tasks
executions = []
for i in range(3):
task_data = {
"task_id": f"task_{i}",
"prompt": "Test task",
"project": "overbits",
"status": "success" if i < 2 else "failed",
"tools_used": ["Bash", "Read"],
"duration": 30.0 + i,
"result_summary": "Test",
"qa_passed": i < 2,
"timestamp": datetime.now().isoformat()
}
exec = analyzer.analyze_task(task_data)
if exec:
executions.append(exec)
patterns = analyzer.extract_patterns(executions)
assert "success_rate" in patterns
assert "average_duration" in patterns
assert "common_tools" in patterns
assert patterns["success_rate"] == 2/3
class TestSkillExtractor:
"""Test skill extraction from tasks and QA results."""
def test_extract_from_task(self):
"""Test skill extraction from task execution."""
extractor = SkillExtractor()
execution = TaskExecution(
task_id="test_001",
prompt="Debug authentication flow for users",
project="overbits",
status="success",
tools_used=["Read", "Bash", "Edit"],
duration=30.0,
result_summary="Fixed login issue",
qa_passed=True,
timestamp=datetime.now()
)
skills = extractor.extract_from_task(execution)
assert len(skills) > 0
# Should have tool skills
tool_skills = [s for s in skills if s.category == "tool_usage"]
assert len(tool_skills) >= 3
# Should have decision patterns
decision_skills = [s for s in skills if s.category == "decision"]
assert len(decision_skills) > 0
def test_extract_from_qa_results(self):
"""Test skill extraction from QA results."""
extractor = SkillExtractor()
qa_results = {
"passed": True,
"results": {
"syntax": True,
"routes": True,
"command_docs": True,
},
"task_id": "test_001"
}
skills = extractor.extract_from_qa_results(qa_results)
assert len(skills) == 3
assert all(s.category == "pattern" for s in skills)
assert all(s.confidence == 0.9 for s in skills)
def test_extract_decision_patterns(self):
"""Test decision pattern extraction."""
extractor = SkillExtractor()
test_cases = [
("Optimize database query", "optimization"),
("Debug authentication issue", "debugging"),
("Write documentation for API", "documentation"),
("Test new feature", "testing"),
("Refactor old code", "refactoring"),
]
for prompt, expected_pattern in test_cases:
skills = extractor._extract_decision_patterns(prompt)
pattern_names = [s.name for s in skills]
assert any(expected_pattern in name for name in pattern_names)
def test_aggregate_skills(self):
"""Test skill aggregation."""
extractor = SkillExtractor()
skills = [
ExtractedSkill(
name="tool_read",
category="tool_usage",
confidence=0.8,
context={"tool": "Read"},
source_task_id="task_1",
evidence="Used Read tool"
),
ExtractedSkill(
name="tool_read",
category="tool_usage",
confidence=0.85,
context={"tool": "Read"},
source_task_id="task_2",
evidence="Used Read tool again"
),
]
aggregated = extractor.aggregate_skills(skills)
assert "tool_read" in aggregated
assert aggregated["tool_read"]["occurrences"] == 2
assert aggregated["tool_read"]["average_confidence"] == 0.825
class TestLearningEngine:
"""Test learning extraction and storage."""
@patch('skill_learning_engine.KnowledgeGraph')
def test_extract_learning(self, mock_kg):
"""Test learning extraction."""
engine = LearningEngine()
execution = TaskExecution(
task_id="test_001",
prompt="Refactor database schema for performance",
project="overbits",
status="success",
tools_used=["Bash", "Read", "Edit"],
duration=45.0,
result_summary="Schema refactored successfully",
qa_passed=True,
timestamp=datetime.now()
)
skills = [
ExtractedSkill(
name="tool_bash",
category="tool_usage",
confidence=0.8,
context={"tool": "Bash"},
source_task_id="test_001",
evidence="Used Bash"
),
]
qa_results = {
"passed": True,
"results": {"syntax": True},
"summary": {"errors": 0}
}
learning = engine.extract_learning(execution, skills, qa_results)
assert learning is not None
assert len(learning.skill_names) > 0
assert learning.confidence > 0
assert "overbits" in learning.applicability
@patch('skill_learning_engine.KnowledgeGraph')
def test_extract_learning_failed_qa(self, mock_kg):
"""Test that learning is not extracted if QA fails."""
engine = LearningEngine()
execution = TaskExecution(
task_id="test_001",
prompt="Test task",
project="test",
status="success",
tools_used=["Read"],
duration=10.0,
result_summary="Test",
qa_passed=False,
timestamp=datetime.now()
)
skills = []
qa_results = {
"passed": False,
"results": {"syntax": False},
}
learning = engine.extract_learning(execution, skills, qa_results)
assert learning is None
class TestSkillRecommender:
"""Test skill recommendation system."""
@patch('skill_learning_engine.KnowledgeGraph')
def test_recommend_for_task(self, mock_kg):
"""Test getting recommendations for a task."""
recommender = SkillRecommender()
# Mock KG search to return test learnings
mock_kg.return_value.search.return_value = [
{
"name": "learning_001",
"type": "finding",
"metadata": {
"skills": ["tool_bash", "pattern_optimization"],
"confidence": 0.85,
"applicability": ["overbits", "general"],
}
},
]
recommendations = recommender.recommend_for_task(
"Optimize database performance",
project="overbits"
)
assert len(recommendations) > 0
assert recommendations[0]["confidence"] > 0
@patch('skill_learning_engine.KnowledgeGraph')
def test_get_skill_profile(self, mock_kg):
"""Test getting skill profile."""
recommender = SkillRecommender()
mock_kg.return_value.list_entities.return_value = [
{
"name": "skill_001",
"type": "finding",
"metadata": {
"category": "tool_usage",
"skills": ["tool_bash", "tool_read"],
}
},
]
profile = recommender.get_skill_profile()
assert "total_learnings" in profile
assert "by_category" in profile
assert "top_skills" in profile
class TestSkillLearningSystem:
"""Test integrated skill learning system."""
@patch('skill_learning_engine.KnowledgeGraph')
def test_process_task_completion(self, mock_kg):
"""Test full task completion processing."""
system = SkillLearningSystem()
task_data = {
"task_id": "test_001",
"prompt": "Refactor authentication module",
"project": "overbits",
"status": "success",
"tools_used": ["Read", "Edit", "Bash"],
"duration": 60.0,
"result_summary": "Successfully refactored",
"qa_passed": True,
"timestamp": datetime.now().isoformat()
}
qa_results = {
"passed": True,
"results": {
"syntax": True,
"routes": True,
},
"summary": {"errors": 0, "warnings": 0, "info": 2}
}
result = system.process_task_completion(task_data, qa_results)
assert result["success"]
assert result["skills_extracted"] > 0
assert result["learning_created"]
@patch('skill_learning_engine.KnowledgeGraph')
def test_get_recommendations(self, mock_kg):
"""Test getting recommendations from system."""
system = SkillLearningSystem()
# Mock recommender
mock_kg.return_value.search.return_value = []
recommendations = system.get_recommendations(
"Debug authentication issue",
project="overbits"
)
assert isinstance(recommendations, list)
class TestIntegration:
"""Integration tests for complete workflows."""
@patch('skill_learning_engine.KnowledgeGraph')
def test_complete_learning_pipeline(self, mock_kg):
"""Test complete pipeline from task to recommendation."""
system = SkillLearningSystem()
# Process a task
task_data = {
"task_id": "pipeline_test",
"prompt": "Optimize API endpoint performance",
"project": "overbits",
"status": "success",
"tools_used": ["Bash", "Read"],
"duration": 30.0,
"result_summary": "30% performance improvement",
"qa_passed": True,
"timestamp": datetime.now().isoformat()
}
qa_results = {
"passed": True,
"results": {"syntax": True, "routes": True},
"summary": {"errors": 0}
}
# Process task
result = system.process_task_completion(task_data, qa_results)
assert result["success"]
# Get recommendations
recommendations = system.get_recommendations(
"Improve API performance",
project="overbits"
)
# Should be able to get recommendations
assert isinstance(recommendations, list)
@patch('skill_learning_engine.KnowledgeGraph')
def test_skill_profile_evolution(self, mock_kg):
"""Test how skill profile evolves with multiple tasks."""
system = SkillLearningSystem()
# Process multiple tasks
for i in range(3):
task_data = {
"task_id": f"task_{i}",
"prompt": f"Test task {i}",
"project": "overbits",
"status": "success",
"tools_used": ["Bash", "Read"] if i % 2 == 0 else ["Read", "Edit"],
"duration": 20.0 + i,
"result_summary": f"Task {i} completed",
"qa_passed": True,
"timestamp": datetime.now().isoformat()
}
qa_results = {
"passed": True,
"results": {"syntax": True},
"summary": {"errors": 0}
}
system.process_task_completion(task_data, qa_results)
# Get profile
profile = system.get_learning_summary()
assert profile["total_learnings"] >= 0
if __name__ == "__main__":
pytest.main([__file__, "-v"])