Files
luzia/lib/skill_learning_engine.py
admin ec33ac1936 Refactor cockpit to use DockerTmuxController pattern
Based on claude-code-tools TmuxCLIController, this refactor:

- Added DockerTmuxController class for robust tmux session management
- Implements send_keys() with configurable delay_enter
- Implements capture_pane() for output retrieval
- Implements wait_for_prompt() for pattern-based completion detection
- Implements wait_for_idle() for content-hash-based idle detection
- Implements wait_for_shell_prompt() for shell prompt detection

Also includes workflow improvements:
- Pre-task git snapshot before agent execution
- Post-task commit protocol in agent guidelines

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-14 10:42:16 -03:00

703 lines
24 KiB
Python

#!/usr/bin/env python3
"""
Skill and Knowledge Learning System for Luzia Orchestrator
Automatically extracts learnings from completed tasks and QA passes,
storing them in the knowledge graph for future skill recommendations
and decision-making improvements.
Architecture:
1. TaskAnalyzer: Extracts patterns from task execution
2. SkillExtractor: Identifies skills used and outcomes
3. LearningEngine: Processes learnings and stores in KG
4. SkillRecommender: Suggests skills for future tasks
"""
import json
import re
from pathlib import Path
from typing import Dict, List, Optional, Any, Tuple
from datetime import datetime, timedelta
from dataclasses import dataclass
import hashlib
# Import our modules
import sys
sys.path.insert(0, str(Path(__file__).parent))
from knowledge_graph import KnowledgeGraph, KG_PATHS
@dataclass
class TaskExecution:
"""Records a single task execution."""
task_id: str
prompt: str
project: str
status: str # "success", "failed", "partial"
tools_used: List[str]
duration: float
result_summary: str
qa_passed: bool
timestamp: datetime
@dataclass
class ExtractedSkill:
"""A skill extracted from task execution."""
name: str
category: str # "tool_usage", "pattern", "decision", "architecture"
confidence: float # 0.0-1.0
context: Dict[str, Any]
source_task_id: str
evidence: str
@dataclass
class Learning:
"""A learning extracted from successful task completion."""
title: str
description: str
skill_names: List[str]
pattern: str
applicability: List[str] # Project types, task patterns it applies to
confidence: float
source_qa_results: Dict[str, Any]
related_learnings: List[str]
class TaskAnalyzer:
"""Analyzes task execution to extract patterns and metadata."""
def __init__(self):
self.execution_history: List[TaskExecution] = []
def analyze_task(self, task_data: Dict[str, Any]) -> Optional[TaskExecution]:
"""
Analyze a single task execution.
Expected task_data structure:
{
"task_id": str,
"prompt": str,
"project": str,
"status": "success|failed|partial",
"tools_used": [str],
"duration": float,
"result_summary": str,
"qa_passed": bool,
"timestamp": str (ISO format)
}
"""
try:
execution = TaskExecution(
task_id=task_data.get("task_id", self._generate_task_id()),
prompt=task_data.get("prompt", ""),
project=task_data.get("project", "general"),
status=task_data.get("status", "unknown"),
tools_used=task_data.get("tools_used", []),
duration=task_data.get("duration", 0.0),
result_summary=task_data.get("result_summary", ""),
qa_passed=task_data.get("qa_passed", False),
timestamp=datetime.fromisoformat(task_data.get("timestamp", datetime.now().isoformat()))
)
self.execution_history.append(execution)
return execution
except Exception as e:
print(f"Error analyzing task: {e}")
return None
def extract_patterns(self, executions: List[TaskExecution]) -> Dict[str, Any]:
"""Extract patterns from multiple task executions."""
if not executions:
return {}
patterns = {
"success_rate": self._calculate_success_rate(executions),
"average_duration": sum(e.duration for e in executions) / len(executions),
"common_tools": self._extract_common_tools(executions),
"project_distribution": self._extract_project_distribution(executions),
"time_range": {
"oldest": min(e.timestamp for e in executions).isoformat(),
"newest": max(e.timestamp for e in executions).isoformat(),
}
}
return patterns
def _calculate_success_rate(self, executions: List[TaskExecution]) -> float:
"""Calculate success rate of task executions."""
if not executions:
return 0.0
successful = sum(1 for e in executions if e.status == "success")
return successful / len(executions)
def _extract_common_tools(self, executions: List[TaskExecution]) -> Dict[str, int]:
"""Extract most commonly used tools."""
tool_counts = {}
for execution in executions:
for tool in execution.tools_used:
tool_counts[tool] = tool_counts.get(tool, 0) + 1
return dict(sorted(tool_counts.items(), key=lambda x: x[1], reverse=True))
def _extract_project_distribution(self, executions: List[TaskExecution]) -> Dict[str, int]:
"""Extract project distribution of tasks."""
projects = {}
for execution in executions:
projects[execution.project] = projects.get(execution.project, 0) + 1
return dict(sorted(projects.items(), key=lambda x: x[1], reverse=True))
def _generate_task_id(self) -> str:
"""Generate unique task ID."""
return hashlib.md5(
f"{datetime.now().isoformat()}".encode()
).hexdigest()[:12]
class SkillExtractor:
"""Extracts skills from task executions and QA results."""
def extract_from_task(self, execution: TaskExecution) -> List[ExtractedSkill]:
"""Extract skills from a single task execution."""
skills = []
# Extract tool usage skills
for tool in execution.tools_used:
skills.append(ExtractedSkill(
name=f"tool_{tool.lower()}",
category="tool_usage",
confidence=0.8,
context={
"tool": tool,
"project": execution.project,
"frequency": 1
},
source_task_id=execution.task_id,
evidence=f"Tool '{tool}' used in task: {execution.prompt[:100]}"
))
# Extract decision patterns from prompt
decision_skills = self._extract_decision_patterns(execution.prompt)
skills.extend(decision_skills)
# Extract project-specific skills
project_skill = ExtractedSkill(
name=f"project_{execution.project}",
category="architecture",
confidence=0.7,
context={"project": execution.project},
source_task_id=execution.task_id,
evidence=f"Task executed for project: {execution.project}"
)
skills.append(project_skill)
return skills
def extract_from_qa_results(self, qa_results: Dict[str, Any]) -> List[ExtractedSkill]:
"""Extract skills from QA validation results."""
skills = []
if not qa_results.get("passed", False):
return skills
# Success in validation categories
for category, passed in qa_results.get("results", {}).items():
if passed:
skills.append(ExtractedSkill(
name=f"qa_pass_{category}",
category="pattern",
confidence=0.9,
context={"qa_category": category},
source_task_id=qa_results.get("task_id", "unknown"),
evidence=f"QA passed for category: {category}"
))
return skills
def _extract_decision_patterns(self, prompt: str) -> List[ExtractedSkill]:
"""Extract decision-making patterns from task prompt."""
skills = []
patterns = {
"optimization": r"(optimiz|improves?|faster|efficient)",
"debugging": r"(debug|troubleshoot|fix|error)",
"documentation": r"(document|document|docstring|comment)",
"testing": r"(test|validate|check|verify)",
"refactoring": r"(refactor|clean|simplify|reorganize)",
"integration": r"(integrat|connect|link|sync)",
"automation": r"(automat|cron|schedule|batch)",
}
for pattern_name, pattern_regex in patterns.items():
if re.search(pattern_regex, prompt, re.IGNORECASE):
skills.append(ExtractedSkill(
name=f"pattern_{pattern_name}",
category="decision",
confidence=0.6,
context={"pattern_type": pattern_name},
source_task_id="",
evidence=f"Pattern '{pattern_name}' detected in prompt"
))
return skills
def aggregate_skills(self, skills: List[ExtractedSkill]) -> Dict[str, Dict[str, Any]]:
"""Aggregate multiple skill extractions."""
aggregated = {}
for skill in skills:
if skill.name not in aggregated:
aggregated[skill.name] = {
"name": skill.name,
"category": skill.category,
"occurrences": 0,
"total_confidence": 0.0,
"contexts": [],
}
aggregated[skill.name]["occurrences"] += 1
aggregated[skill.name]["total_confidence"] += skill.confidence
aggregated[skill.name]["contexts"].append(skill.context)
# Calculate average confidence
for skill_name, data in aggregated.items():
if data["occurrences"] > 0:
data["average_confidence"] = data["total_confidence"] / data["occurrences"]
return aggregated
class LearningEngine:
"""Processes and stores learnings in the knowledge graph."""
def __init__(self):
self.kg = KnowledgeGraph("research", skip_permission_check=True)
def extract_learning(
self,
execution: TaskExecution,
skills: List[ExtractedSkill],
qa_results: Dict[str, Any]
) -> Optional[Learning]:
"""Extract a learning from successful task completion."""
if execution.status != "success" or not qa_results.get("passed", False):
return None
# Build learning from components
skill_names = [s.name for s in skills]
learning = Learning(
title=self._generate_title(execution),
description=self._generate_description(execution, skills),
skill_names=skill_names,
pattern=self._extract_pattern(execution),
applicability=self._determine_applicability(execution, skills),
confidence=self._calculate_confidence(skills, qa_results),
source_qa_results=qa_results,
related_learnings=[]
)
return learning
def store_learning(self, learning: Learning) -> str:
"""Store learning in knowledge graph."""
# Create learning entity
learning_name = f"learning_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{learning.title[:30]}"
content = f"""Title: {learning.title}
Description: {learning.description}
Skills Used: {', '.join(learning.skill_names)}
Pattern: {learning.pattern}
Applicability:
{chr(10).join(f' - {a}' for a in learning.applicability)}
Confidence: {learning.confidence:.2%}
QA Results Summary:
{json.dumps(learning.source_qa_results.get('summary', {}), indent=2)}
"""
metadata = {
"skills": learning.skill_names,
"pattern": learning.pattern,
"confidence": learning.confidence,
"applicability": learning.applicability,
"extraction_time": datetime.now().isoformat(),
}
entity_id = self.kg.add_entity(
name=learning_name,
entity_type="finding",
content=content,
metadata=metadata,
source="skill_learning_engine"
)
# Store each skill relationship
for skill_name in learning.skill_names:
try:
self.kg.add_relation(
learning_name,
skill_name,
"references", # Changed from "uses" to valid relation type
f"Learning demonstrates use of {skill_name}"
)
except Exception as e:
# Skills might not exist as entities, skip relation
pass
return entity_id
def create_skill_entity(self, skill: ExtractedSkill) -> str:
"""Create or update skill entity in KG."""
content = f"""Category: {skill.category}
Confidence: {skill.confidence:.2%}
Context:
{json.dumps(skill.context, indent=2)}
Evidence: {skill.evidence}
"""
metadata = {
"category": skill.category,
"confidence": skill.confidence,
"source_task": skill.source_task_id,
}
return self.kg.add_entity(
name=skill.name,
entity_type="finding",
content=content,
metadata=metadata,
source="skill_extractor"
)
def _generate_title(self, execution: TaskExecution) -> str:
"""Generate a learning title from task execution."""
# Extract key concepts from prompt
words = execution.prompt.split()[:5]
return " ".join(words).title()
def _generate_description(self, execution: TaskExecution, skills: List[ExtractedSkill]) -> str:
"""Generate learning description."""
skill_summary = ", ".join([s.name for s in skills[:3]])
return f"""Task: {execution.prompt[:150]}...
Project: {execution.project}
Status: {execution.status}
Tools: {', '.join(execution.tools_used[:3])}
Key Skills: {skill_summary}
"""
def _extract_pattern(self, execution: TaskExecution) -> str:
"""Extract the core pattern from task execution."""
# Simplified pattern extraction
if "debug" in execution.prompt.lower():
return "debugging_pattern"
elif "refactor" in execution.prompt.lower():
return "refactoring_pattern"
elif "integrat" in execution.prompt.lower():
return "integration_pattern"
else:
return "general_task_pattern"
def _determine_applicability(self, execution: TaskExecution, skills: List[ExtractedSkill]) -> List[str]:
"""Determine which contexts this learning applies to."""
applicability = [
execution.project,
f"tool_{execution.tools_used[0].lower()}" if execution.tools_used else "general",
]
# Add skill categories
categories = set(s.category for s in skills)
applicability.extend(list(categories))
return list(set(applicability))
def _calculate_confidence(self, skills: List[ExtractedSkill], qa_results: Dict[str, Any]) -> float:
"""Calculate overall learning confidence."""
# Average skill confidence
skill_confidence = sum(s.confidence for s in skills) / len(skills) if skills else 0.5
# QA pass rate
qa_confidence = 0.9 if qa_results.get("passed", False) else 0.3
# Weighted average
return (skill_confidence * 0.6) + (qa_confidence * 0.4)
class SkillRecommender:
"""Recommends skills for future tasks based on learnings."""
def __init__(self):
self.kg = KnowledgeGraph("research", skip_permission_check=True)
def recommend_for_task(self, task_prompt: str, project: str = "general") -> List[Dict[str, Any]]:
"""
Recommend skills for a given task.
Returns list of recommended skills with confidence scores.
"""
recommendations = []
# Search for relevant learnings
query_terms = " ".join(task_prompt.split()[:5])
learnings = self.kg.search(query_terms, limit=10)
for learning in learnings:
if learning.get("error"):
continue
metadata = learning.get("metadata", {})
# Handle metadata as either dict or JSON string
if isinstance(metadata, str):
try:
import json
metadata = json.loads(metadata)
except:
metadata = {}
if metadata.get("applicability") and project not in metadata.get("applicability", []):
continue
# Extract skills from learning
skills = metadata.get("skills", [])
confidence = metadata.get("confidence", 0.5)
for skill in skills:
recommendations.append({
"skill": skill,
"source_learning": learning.get("name"),
"confidence": confidence,
"applicability": metadata.get("applicability", []),
})
# Sort by confidence
recommendations.sort(key=lambda x: x["confidence"], reverse=True)
return recommendations[:10] # Top 10 recommendations
def get_skill_profile(self) -> Dict[str, Any]:
"""Get overall profile of learned skills."""
skills = self.kg.list_entities(entity_type="finding")
profile = {
"total_learnings": len(skills),
"by_category": {},
"top_skills": [],
"extraction_time": datetime.now().isoformat(),
}
# Categorize
for skill in skills:
metadata = skill.get("metadata", {})
# Handle metadata as either dict or JSON string
if isinstance(metadata, str):
try:
import json
metadata = json.loads(metadata)
except:
metadata = {}
category = metadata.get("category", "unknown")
if category not in profile["by_category"]:
profile["by_category"][category] = 0
profile["by_category"][category] += 1
# Top skills by frequency
skill_counts = {}
for skill in skills:
metadata = skill.get("metadata", {})
# Handle metadata as either dict or JSON string
if isinstance(metadata, str):
try:
import json
metadata = json.loads(metadata)
except:
metadata = {}
for skill_name in metadata.get("skills", []):
skill_counts[skill_name] = skill_counts.get(skill_name, 0) + 1
profile["top_skills"] = sorted(
skill_counts.items(),
key=lambda x: x[1],
reverse=True
)[:10]
return profile
class SkillLearningSystem:
"""
Unified system for skill learning and knowledge extraction.
Orchestrates the full pipeline: task execution → analysis →
learning extraction → knowledge graph storage → recommendations.
"""
def __init__(self):
self.analyzer = TaskAnalyzer()
self.extractor = SkillExtractor()
self.learning_engine = LearningEngine()
self.recommender = SkillRecommender()
def process_task_completion(
self,
task_data: Dict[str, Any],
qa_results: Dict[str, Any]
) -> Dict[str, Any]:
"""
Full pipeline: process a completed task and extract learnings.
Args:
task_data: Task execution data
qa_results: QA validation results
Returns:
Dict with extraction results and learning IDs
"""
# 1. Analyze task
execution = self.analyzer.analyze_task(task_data)
if not execution:
return {"error": "Failed to analyze task"}
# 2. Extract skills
task_skills = self.extractor.extract_from_task(execution)
qa_skills = self.extractor.extract_from_qa_results(qa_results)
all_skills = task_skills + qa_skills
# 3. Store skills in KG
skill_ids = []
for skill in all_skills:
try:
skill_id = self.learning_engine.create_skill_entity(skill)
skill_ids.append(skill_id)
except Exception as e:
print(f"Failed to store skill: {e}")
# 4. Extract learning
learning = self.learning_engine.extract_learning(execution, all_skills, qa_results)
learning_id = None
if learning:
try:
learning_id = self.learning_engine.store_learning(learning)
except Exception as e:
print(f"Failed to store learning: {e}")
return {
"success": True,
"task_id": execution.task_id,
"skills_extracted": len(all_skills),
"skills_stored": len(skill_ids),
"learning_created": learning_id is not None,
"learning_id": learning_id,
"skill_ids": skill_ids,
"timestamp": datetime.now().isoformat(),
}
def get_recommendations(self, task_prompt: str, project: str = "general") -> List[Dict[str, Any]]:
"""Get skill recommendations for a task."""
return self.recommender.recommend_for_task(task_prompt, project)
def get_learning_summary(self) -> Dict[str, Any]:
"""Get summary of all learnings and skill profile."""
return self.recommender.get_skill_profile()
# --- CLI ---
def main():
import argparse
parser = argparse.ArgumentParser(description="Skill Learning Engine")
parser.add_argument("command", choices=["process", "recommend", "summary", "test"])
parser.add_argument("--task-data", help="JSON file with task data")
parser.add_argument("--qa-results", help="JSON file with QA results")
parser.add_argument("--task-prompt", help="Task prompt for recommendations")
parser.add_argument("--project", default="general", help="Project name")
args = parser.parse_args()
system = SkillLearningSystem()
if args.command == "process":
if not args.task_data or not args.qa_results:
print("Error: --task-data and --qa-results required")
exit(1)
task_data = json.loads(Path(args.task_data).read_text())
qa_results = json.loads(Path(args.qa_results).read_text())
result = system.process_task_completion(task_data, qa_results)
print(json.dumps(result, indent=2))
elif args.command == "recommend":
if not args.task_prompt:
print("Error: --task-prompt required")
exit(1)
recommendations = system.get_recommendations(args.task_prompt, args.project)
print(json.dumps(recommendations, indent=2))
elif args.command == "summary":
summary = system.get_learning_summary()
print(json.dumps(summary, indent=2))
elif args.command == "test":
print("=== Testing Skill Learning System ===\n")
# Test task data
test_task = {
"task_id": "test_001",
"prompt": "Refactor and optimize the database schema for better performance",
"project": "overbits",
"status": "success",
"tools_used": ["Bash", "Read", "Edit"],
"duration": 45.2,
"result_summary": "Successfully refactored schema with 40% query improvement",
"qa_passed": True,
"timestamp": datetime.now().isoformat()
}
test_qa = {
"passed": True,
"results": {
"syntax": True,
"routes": True,
"documentation": True,
},
"summary": {
"errors": 0,
"warnings": 0,
"info": 3,
},
"timestamp": datetime.now().isoformat()
}
print("Processing test task...")
result = system.process_task_completion(test_task, test_qa)
print(json.dumps(result, indent=2))
print("\nGetting recommendations...")
recommendations = system.get_recommendations(
"Optimize database performance",
"overbits"
)
print(json.dumps(recommendations, indent=2))
print("\nLearning summary...")
summary = system.get_learning_summary()
print(json.dumps(summary, indent=2))
if __name__ == "__main__":
main()