#!/usr/bin/env python3 """ Tests for skill learning system. Tests the complete pipeline: 1. Task execution analysis 2. Skill extraction 3. Learning storage in KG 4. Skill recommendations 5. QA integration """ import pytest import json import sys from pathlib import Path from datetime import datetime from unittest.mock import MagicMock, patch # Add lib to path sys.path.insert(0, str(Path(__file__).parent.parent / "lib")) from skill_learning_engine import ( TaskAnalyzer, SkillExtractor, LearningEngine, SkillRecommender, SkillLearningSystem, TaskExecution, ExtractedSkill ) class TestTaskAnalyzer: """Test task analysis and pattern extraction.""" def test_analyze_valid_task(self): """Test analyzing a valid task execution.""" analyzer = TaskAnalyzer() task_data = { "task_id": "test_001", "prompt": "Refactor database schema", "project": "overbits", "status": "success", "tools_used": ["Bash", "Read", "Edit"], "duration": 45.2, "result_summary": "Successfully refactored", "qa_passed": True, "timestamp": datetime.now().isoformat() } execution = analyzer.analyze_task(task_data) assert execution is not None assert execution.task_id == "test_001" assert execution.project == "overbits" assert execution.status == "success" assert len(execution.tools_used) == 3 def test_extract_patterns(self): """Test pattern extraction from multiple tasks.""" analyzer = TaskAnalyzer() # Add multiple tasks executions = [] for i in range(3): task_data = { "task_id": f"task_{i}", "prompt": "Test task", "project": "overbits", "status": "success" if i < 2 else "failed", "tools_used": ["Bash", "Read"], "duration": 30.0 + i, "result_summary": "Test", "qa_passed": i < 2, "timestamp": datetime.now().isoformat() } exec = analyzer.analyze_task(task_data) if exec: executions.append(exec) patterns = analyzer.extract_patterns(executions) assert "success_rate" in patterns assert "average_duration" in patterns assert "common_tools" in patterns assert patterns["success_rate"] == 2/3 class TestSkillExtractor: """Test skill extraction from tasks and QA results.""" def test_extract_from_task(self): """Test skill extraction from task execution.""" extractor = SkillExtractor() execution = TaskExecution( task_id="test_001", prompt="Debug authentication flow for users", project="overbits", status="success", tools_used=["Read", "Bash", "Edit"], duration=30.0, result_summary="Fixed login issue", qa_passed=True, timestamp=datetime.now() ) skills = extractor.extract_from_task(execution) assert len(skills) > 0 # Should have tool skills tool_skills = [s for s in skills if s.category == "tool_usage"] assert len(tool_skills) >= 3 # Should have decision patterns decision_skills = [s for s in skills if s.category == "decision"] assert len(decision_skills) > 0 def test_extract_from_qa_results(self): """Test skill extraction from QA results.""" extractor = SkillExtractor() qa_results = { "passed": True, "results": { "syntax": True, "routes": True, "command_docs": True, }, "task_id": "test_001" } skills = extractor.extract_from_qa_results(qa_results) assert len(skills) == 3 assert all(s.category == "pattern" for s in skills) assert all(s.confidence == 0.9 for s in skills) def test_extract_decision_patterns(self): """Test decision pattern extraction.""" extractor = SkillExtractor() test_cases = [ ("Optimize database query", "optimization"), ("Debug authentication issue", "debugging"), ("Write documentation for API", "documentation"), ("Test new feature", "testing"), ("Refactor old code", "refactoring"), ] for prompt, expected_pattern in test_cases: skills = extractor._extract_decision_patterns(prompt) pattern_names = [s.name for s in skills] assert any(expected_pattern in name for name in pattern_names) def test_aggregate_skills(self): """Test skill aggregation.""" extractor = SkillExtractor() skills = [ ExtractedSkill( name="tool_read", category="tool_usage", confidence=0.8, context={"tool": "Read"}, source_task_id="task_1", evidence="Used Read tool" ), ExtractedSkill( name="tool_read", category="tool_usage", confidence=0.85, context={"tool": "Read"}, source_task_id="task_2", evidence="Used Read tool again" ), ] aggregated = extractor.aggregate_skills(skills) assert "tool_read" in aggregated assert aggregated["tool_read"]["occurrences"] == 2 assert aggregated["tool_read"]["average_confidence"] == 0.825 class TestLearningEngine: """Test learning extraction and storage.""" @patch('skill_learning_engine.KnowledgeGraph') def test_extract_learning(self, mock_kg): """Test learning extraction.""" engine = LearningEngine() execution = TaskExecution( task_id="test_001", prompt="Refactor database schema for performance", project="overbits", status="success", tools_used=["Bash", "Read", "Edit"], duration=45.0, result_summary="Schema refactored successfully", qa_passed=True, timestamp=datetime.now() ) skills = [ ExtractedSkill( name="tool_bash", category="tool_usage", confidence=0.8, context={"tool": "Bash"}, source_task_id="test_001", evidence="Used Bash" ), ] qa_results = { "passed": True, "results": {"syntax": True}, "summary": {"errors": 0} } learning = engine.extract_learning(execution, skills, qa_results) assert learning is not None assert len(learning.skill_names) > 0 assert learning.confidence > 0 assert "overbits" in learning.applicability @patch('skill_learning_engine.KnowledgeGraph') def test_extract_learning_failed_qa(self, mock_kg): """Test that learning is not extracted if QA fails.""" engine = LearningEngine() execution = TaskExecution( task_id="test_001", prompt="Test task", project="test", status="success", tools_used=["Read"], duration=10.0, result_summary="Test", qa_passed=False, timestamp=datetime.now() ) skills = [] qa_results = { "passed": False, "results": {"syntax": False}, } learning = engine.extract_learning(execution, skills, qa_results) assert learning is None class TestSkillRecommender: """Test skill recommendation system.""" @patch('skill_learning_engine.KnowledgeGraph') def test_recommend_for_task(self, mock_kg): """Test getting recommendations for a task.""" recommender = SkillRecommender() # Mock KG search to return test learnings mock_kg.return_value.search.return_value = [ { "name": "learning_001", "type": "finding", "metadata": { "skills": ["tool_bash", "pattern_optimization"], "confidence": 0.85, "applicability": ["overbits", "general"], } }, ] recommendations = recommender.recommend_for_task( "Optimize database performance", project="overbits" ) assert len(recommendations) > 0 assert recommendations[0]["confidence"] > 0 @patch('skill_learning_engine.KnowledgeGraph') def test_get_skill_profile(self, mock_kg): """Test getting skill profile.""" recommender = SkillRecommender() mock_kg.return_value.list_entities.return_value = [ { "name": "skill_001", "type": "finding", "metadata": { "category": "tool_usage", "skills": ["tool_bash", "tool_read"], } }, ] profile = recommender.get_skill_profile() assert "total_learnings" in profile assert "by_category" in profile assert "top_skills" in profile class TestSkillLearningSystem: """Test integrated skill learning system.""" @patch('skill_learning_engine.KnowledgeGraph') def test_process_task_completion(self, mock_kg): """Test full task completion processing.""" system = SkillLearningSystem() task_data = { "task_id": "test_001", "prompt": "Refactor authentication module", "project": "overbits", "status": "success", "tools_used": ["Read", "Edit", "Bash"], "duration": 60.0, "result_summary": "Successfully refactored", "qa_passed": True, "timestamp": datetime.now().isoformat() } qa_results = { "passed": True, "results": { "syntax": True, "routes": True, }, "summary": {"errors": 0, "warnings": 0, "info": 2} } result = system.process_task_completion(task_data, qa_results) assert result["success"] assert result["skills_extracted"] > 0 assert result["learning_created"] @patch('skill_learning_engine.KnowledgeGraph') def test_get_recommendations(self, mock_kg): """Test getting recommendations from system.""" system = SkillLearningSystem() # Mock recommender mock_kg.return_value.search.return_value = [] recommendations = system.get_recommendations( "Debug authentication issue", project="overbits" ) assert isinstance(recommendations, list) class TestIntegration: """Integration tests for complete workflows.""" @patch('skill_learning_engine.KnowledgeGraph') def test_complete_learning_pipeline(self, mock_kg): """Test complete pipeline from task to recommendation.""" system = SkillLearningSystem() # Process a task task_data = { "task_id": "pipeline_test", "prompt": "Optimize API endpoint performance", "project": "overbits", "status": "success", "tools_used": ["Bash", "Read"], "duration": 30.0, "result_summary": "30% performance improvement", "qa_passed": True, "timestamp": datetime.now().isoformat() } qa_results = { "passed": True, "results": {"syntax": True, "routes": True}, "summary": {"errors": 0} } # Process task result = system.process_task_completion(task_data, qa_results) assert result["success"] # Get recommendations recommendations = system.get_recommendations( "Improve API performance", project="overbits" ) # Should be able to get recommendations assert isinstance(recommendations, list) @patch('skill_learning_engine.KnowledgeGraph') def test_skill_profile_evolution(self, mock_kg): """Test how skill profile evolves with multiple tasks.""" system = SkillLearningSystem() # Process multiple tasks for i in range(3): task_data = { "task_id": f"task_{i}", "prompt": f"Test task {i}", "project": "overbits", "status": "success", "tools_used": ["Bash", "Read"] if i % 2 == 0 else ["Read", "Edit"], "duration": 20.0 + i, "result_summary": f"Task {i} completed", "qa_passed": True, "timestamp": datetime.now().isoformat() } qa_results = { "passed": True, "results": {"syntax": True}, "summary": {"errors": 0} } system.process_task_completion(task_data, qa_results) # Get profile profile = system.get_learning_summary() assert profile["total_learnings"] >= 0 if __name__ == "__main__": pytest.main([__file__, "-v"])