Files
luzia/lib/project_knowledge_loader.py
admin ec33ac1936 Refactor cockpit to use DockerTmuxController pattern
Based on claude-code-tools TmuxCLIController, this refactor:

- Added DockerTmuxController class for robust tmux session management
- Implements send_keys() with configurable delay_enter
- Implements capture_pane() for output retrieval
- Implements wait_for_prompt() for pattern-based completion detection
- Implements wait_for_idle() for content-hash-based idle detection
- Implements wait_for_shell_prompt() for shell prompt detection

Also includes workflow improvements:
- Pre-task git snapshot before agent execution
- Post-task commit protocol in agent guidelines

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-14 10:42:16 -03:00

623 lines
21 KiB
Python

"""
Project Knowledge Loader - Per-project RAG context injection for Luzia.
Industry Standard Implementation:
- .knowledge/ directory in each project (similar to LlamaIndex storage/, LangChain vector_store/)
- entities.json: Project-specific facts and definitions
- relations.json: Connections between concepts
- context.md: Human-readable project context (like CLAUDE.md)
- vectors/: Optional embeddings for semantic search
Usage:
from project_knowledge_loader import ProjectKnowledgeLoader
loader = ProjectKnowledgeLoader()
context = loader.load_project_context("musica", task_query)
"""
import json
import os
import sqlite3
import logging
from typing import Dict, List, Any, Optional
from dataclasses import dataclass, asdict
from datetime import datetime
from pathlib import Path
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
logger = logging.getLogger(__name__)
# =============================================================================
# DATA STRUCTURES
# =============================================================================
@dataclass
class KnowledgeEntity:
"""Single knowledge entity in project KG."""
id: str
name: str
type: str # component, api, config, pattern, decision, etc.
description: str
tags: List[str]
metadata: Dict[str, Any]
created_at: str = ""
updated_at: str = ""
@dataclass
class KnowledgeRelation:
"""Relationship between entities."""
source: str
relation: str # uses, depends_on, implements, extends, etc.
target: str
context: str = ""
weight: float = 1.0
@dataclass
class ProjectKnowledge:
"""Complete knowledge for a project."""
project: str
version: str
entities: List[KnowledgeEntity]
relations: List[KnowledgeRelation]
context_md: str # Human-readable context
last_updated: str
# =============================================================================
# KNOWLEDGE STRUCTURE TEMPLATE
# =============================================================================
KNOWLEDGE_TEMPLATE = {
"version": "1.0",
"project": "",
"description": "",
"entities": [
{
"id": "project_root",
"name": "Project Root",
"type": "component",
"description": "Main project structure",
"tags": ["structure"],
"metadata": {}
}
],
"relations": [],
"context": {
"focus": "",
"tech_stack": [],
"conventions": [],
"important_files": [],
"common_tasks": []
}
}
CONTEXT_MD_TEMPLATE = """# {project} Project Knowledge
## Overview
{description}
## Tech Stack
{tech_stack}
## Key Directories
{directories}
## Common Tasks
{tasks}
## Important Patterns
{patterns}
---
*Auto-generated by Luzia. Edit to customize project context.*
"""
# =============================================================================
# PROJECT KNOWLEDGE LOADER
# =============================================================================
class ProjectKnowledgeLoader:
"""Load and manage per-project knowledge graphs."""
# Standard paths Luzia expects in each project
KNOWLEDGE_DIR = ".knowledge"
ENTITIES_FILE = "entities.json"
RELATIONS_FILE = "relations.json"
CONTEXT_FILE = "context.md"
KG_DB_FILE = "knowledge.db"
def __init__(self, config_path: str = "/opt/server-agents/orchestrator/config.json"):
self.config_path = config_path
self.projects = self._load_projects()
self._cache: Dict[str, ProjectKnowledge] = {}
logger.debug(f"ProjectKnowledgeLoader initialized with {len(self.projects)} projects")
def _load_projects(self) -> Dict[str, Dict]:
"""Load project configurations from Luzia config."""
try:
with open(self.config_path, 'r') as f:
config = json.load(f)
return config.get("projects", {})
except Exception as e:
logger.warning(f"Could not load config: {e}")
return {}
def get_knowledge_path(self, project: str) -> Optional[Path]:
"""Get the .knowledge/ path for a project."""
if project not in self.projects:
return None
project_path = self.projects[project].get("path", f"/home/{project}")
return Path(project_path) / self.KNOWLEDGE_DIR
def has_knowledge(self, project: str) -> bool:
"""Check if a project has a .knowledge/ directory."""
kg_path = self.get_knowledge_path(project)
if kg_path is None:
return False
try:
return kg_path.exists()
except PermissionError:
# Can't access the directory (not our project)
return False
def load_project_knowledge(self, project: str, force_reload: bool = False) -> Optional[ProjectKnowledge]:
"""Load all knowledge for a project."""
# Check cache
if not force_reload and project in self._cache:
return self._cache[project]
kg_path = self.get_knowledge_path(project)
if not kg_path or not kg_path.exists():
logger.debug(f"No .knowledge/ for project {project}")
return None
try:
# Load entities
entities = []
entities_file = kg_path / self.ENTITIES_FILE
if entities_file.exists():
with open(entities_file, 'r') as f:
data = json.load(f)
for e in data.get("entities", []):
entities.append(KnowledgeEntity(**e))
# Load relations
relations = []
relations_file = kg_path / self.RELATIONS_FILE
if relations_file.exists():
with open(relations_file, 'r') as f:
data = json.load(f)
for r in data.get("relations", []):
relations.append(KnowledgeRelation(**r))
# Load context.md
context_md = ""
context_file = kg_path / self.CONTEXT_FILE
if context_file.exists():
with open(context_file, 'r') as f:
context_md = f.read()
# Create ProjectKnowledge
knowledge = ProjectKnowledge(
project=project,
version="1.0",
entities=entities,
relations=relations,
context_md=context_md,
last_updated=datetime.now().isoformat()
)
# Cache it
self._cache[project] = knowledge
logger.debug(f"Loaded knowledge for {project}: {len(entities)} entities, {len(relations)} relations")
return knowledge
except Exception as e:
logger.warning(f"Error loading knowledge for {project}: {e}")
return None
def search_project_knowledge(self, project: str, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
"""Search project knowledge for relevant context."""
knowledge = self.load_project_knowledge(project)
if not knowledge:
return []
results = []
query_lower = query.lower()
query_words = set(query_lower.split())
# Score entities by relevance
for entity in knowledge.entities:
score = 0
# Check name match
if query_lower in entity.name.lower():
score += 3
# Check description match
if query_lower in entity.description.lower():
score += 2
# Check tag matches
for tag in entity.tags:
if tag.lower() in query_lower or query_lower in tag.lower():
score += 1
# Word overlap scoring
entity_words = set(entity.name.lower().split() + entity.description.lower().split())
overlap = len(query_words & entity_words)
score += overlap * 0.5
if score > 0:
results.append({
"entity_id": entity.id,
"name": entity.name,
"type": entity.type,
"description": entity.description,
"tags": entity.tags,
"relevance": score,
"source": "project_kg"
})
# Sort by relevance and return top_k
results.sort(key=lambda x: x["relevance"], reverse=True)
return results[:top_k]
def get_related_entities(self, project: str, entity_id: str, depth: int = 1) -> List[Dict[str, Any]]:
"""Get entities related to a given entity."""
knowledge = self.load_project_knowledge(project)
if not knowledge:
return []
related = []
entity_map = {e.id: e for e in knowledge.entities}
# Find direct relations
for relation in knowledge.relations:
if relation.source == entity_id:
target = entity_map.get(relation.target)
if target:
related.append({
"entity": asdict(target),
"relation": relation.relation,
"direction": "outgoing"
})
elif relation.target == entity_id:
source = entity_map.get(relation.source)
if source:
related.append({
"entity": asdict(source),
"relation": relation.relation,
"direction": "incoming"
})
return related
def format_for_prompt(self, project: str, query: str, max_tokens: int = 2000) -> str:
"""Format project knowledge for prompt injection."""
knowledge = self.load_project_knowledge(project)
if not knowledge:
return ""
sections = []
# Add context.md summary (prioritize human-written context)
if knowledge.context_md:
# Take first 1000 chars of context.md
context_preview = knowledge.context_md[:1000]
if len(knowledge.context_md) > 1000:
context_preview += "\n..."
sections.append(f"## Project Context\n{context_preview}")
# Add relevant entities based on query
relevant = self.search_project_knowledge(project, query, top_k=5)
if relevant:
entities_text = "## Relevant Project Knowledge\n"
for item in relevant:
entities_text += f"- **{item['name']}** ({item['type']}): {item['description'][:100]}\n"
sections.append(entities_text)
# Combine sections
result = "\n\n".join(sections)
# Truncate if needed
if len(result) > max_tokens * 4: # rough char to token ratio
result = result[:max_tokens * 4] + "\n...(truncated)"
return result
def initialize_project_knowledge(self, project: str, overwrite: bool = False) -> bool:
"""Create .knowledge/ directory with template files for a project."""
kg_path = self.get_knowledge_path(project)
if not kg_path:
logger.error(f"Unknown project: {project}")
return False
if kg_path.exists() and not overwrite:
logger.info(f"Knowledge already exists for {project}. Use overwrite=True to replace.")
return False
try:
# Create directory
kg_path.mkdir(parents=True, exist_ok=True)
# Get project info
project_config = self.projects.get(project, {})
description = project_config.get("description", "")
focus = project_config.get("focus", "")
# Create entities.json
now = datetime.now().isoformat()
entities_data = {
"version": "1.0",
"project": project,
"entities": [
{
"id": "project_overview",
"name": project,
"type": "project",
"description": description or f"{project} project",
"tags": ["root", "overview"],
"metadata": {"focus": focus},
"created_at": now,
"updated_at": now
}
]
}
with open(kg_path / self.ENTITIES_FILE, 'w') as f:
json.dump(entities_data, f, indent=2)
# Create relations.json
relations_data = {
"version": "1.0",
"project": project,
"relations": []
}
with open(kg_path / self.RELATIONS_FILE, 'w') as f:
json.dump(relations_data, f, indent=2)
# Create context.md
context_content = CONTEXT_MD_TEMPLATE.format(
project=project,
description=description or "Project description here",
tech_stack="- Add tech stack items",
directories="- /src - Source code\n- /docs - Documentation",
tasks="- Build: `npm run build`\n- Test: `npm test`",
patterns="- Add important patterns and conventions"
)
with open(kg_path / self.CONTEXT_FILE, 'w') as f:
f.write(context_content)
logger.info(f"Initialized .knowledge/ for {project} at {kg_path}")
return True
except Exception as e:
logger.error(f"Failed to initialize knowledge for {project}: {e}")
return False
def sync_from_claude_md(self, project: str) -> bool:
"""Sync knowledge from existing CLAUDE.md file."""
project_path = self.projects.get(project, {}).get("path")
if not project_path:
return False
claude_md_path = Path(project_path) / "CLAUDE.md"
if not claude_md_path.exists():
logger.debug(f"No CLAUDE.md found for {project}")
return False
kg_path = self.get_knowledge_path(project)
if not kg_path:
return False
try:
# Ensure .knowledge/ exists
kg_path.mkdir(parents=True, exist_ok=True)
# Read CLAUDE.md
with open(claude_md_path, 'r') as f:
claude_content = f.read()
# Write to context.md (preserving CLAUDE.md content)
context_file = kg_path / self.CONTEXT_FILE
with open(context_file, 'w') as f:
f.write(f"# {project} Project Knowledge\n\n")
f.write("*Synced from CLAUDE.md*\n\n")
f.write(claude_content)
logger.info(f"Synced CLAUDE.md to .knowledge/context.md for {project}")
return True
except Exception as e:
logger.error(f"Failed to sync CLAUDE.md for {project}: {e}")
return False
def list_projects_with_knowledge(self) -> List[Dict[str, Any]]:
"""List all projects and their knowledge status."""
results = []
for project, config in self.projects.items():
has_kg = self.has_knowledge(project)
kg_path = self.get_knowledge_path(project)
info = {
"project": project,
"path": config.get("path", ""),
"description": config.get("description", ""),
"has_knowledge": has_kg,
"knowledge_path": str(kg_path) if kg_path else None
}
if has_kg:
knowledge = self.load_project_knowledge(project)
if knowledge:
info["entity_count"] = len(knowledge.entities)
info["relation_count"] = len(knowledge.relations)
info["has_context_md"] = bool(knowledge.context_md)
results.append(info)
return results
# =============================================================================
# RAG CONTEXT BUILDER (For prompt injection)
# =============================================================================
class ProjectRAGContext:
"""Build RAG-enhanced context for task dispatch."""
def __init__(self):
self.loader = ProjectKnowledgeLoader()
def build_context(self, project: str, task: str, include_global: bool = True) -> Dict[str, Any]:
"""
Build complete RAG context for a project task.
Returns:
{
"project_context": str, # Formatted project knowledge
"relevant_entities": List[Dict], # Relevant knowledge items
"context_source": str, # "project_kg", "global_kg", "none"
"metadata": Dict # Additional context info
}
"""
result = {
"project_context": "",
"relevant_entities": [],
"context_source": "none",
"metadata": {}
}
# Try to load project-specific knowledge
project_context = self.loader.format_for_prompt(project, task)
if project_context:
result["project_context"] = project_context
result["relevant_entities"] = self.loader.search_project_knowledge(project, task)
result["context_source"] = "project_kg"
result["metadata"]["project"] = project
result["metadata"]["entities_found"] = len(result["relevant_entities"])
# Optionally include global knowledge (from /etc/luz-knowledge/)
if include_global:
try:
from langchain_kg_retriever import KnowledgeGraphRetriever
global_retriever = KnowledgeGraphRetriever()
global_results = global_retriever.retrieve(f"{project} {task}", top_k=3)
if global_results:
global_text = "\n## Global Knowledge\n"
for item in global_results:
global_text += f"- {item['name']}: {item.get('content', '')[:100]}\n"
result["project_context"] += global_text
result["metadata"]["global_results"] = len(global_results)
if result["context_source"] == "none":
result["context_source"] = "global_kg"
else:
result["context_source"] = "hybrid"
except Exception as e:
logger.debug(f"Global KG retrieval failed: {e}")
return result
# =============================================================================
# CLI INTERFACE
# =============================================================================
def main():
"""CLI for project knowledge management."""
import sys
loader = ProjectKnowledgeLoader()
if len(sys.argv) < 2:
print("Usage: project_knowledge_loader.py <command> [args]")
print("")
print("Commands:")
print(" list - List all projects and knowledge status")
print(" init <project> - Initialize .knowledge/ for a project")
print(" sync <project> - Sync from CLAUDE.md")
print(" search <project> <query> - Search project knowledge")
print(" context <project> <task> - Get RAG context for a task")
print(" init-all - Initialize knowledge for all projects")
return
command = sys.argv[1]
if command == "list":
projects = loader.list_projects_with_knowledge()
print(f"\n{'Project':<15} {'Has KG':<10} {'Entities':<10} {'Description'}")
print("-" * 70)
for p in projects:
has_kg = "Yes" if p["has_knowledge"] else "No"
entities = p.get("entity_count", "-")
print(f"{p['project']:<15} {has_kg:<10} {str(entities):<10} {p['description'][:30]}")
elif command == "init" and len(sys.argv) > 2:
project = sys.argv[2]
success = loader.initialize_project_knowledge(project)
if success:
print(f"Initialized .knowledge/ for {project}")
else:
print(f"Failed to initialize knowledge for {project}")
elif command == "sync" and len(sys.argv) > 2:
project = sys.argv[2]
success = loader.sync_from_claude_md(project)
if success:
print(f"Synced CLAUDE.md to .knowledge/ for {project}")
else:
print(f"Failed to sync (no CLAUDE.md or error)")
elif command == "search" and len(sys.argv) > 3:
project = sys.argv[2]
query = " ".join(sys.argv[3:])
results = loader.search_project_knowledge(project, query)
print(f"\nSearch results for '{query}' in {project}:")
for r in results:
print(f" - {r['name']} ({r['type']}): {r['description'][:50]}... [score: {r['relevance']:.2f}]")
elif command == "context" and len(sys.argv) > 3:
project = sys.argv[2]
task = " ".join(sys.argv[3:])
rag = ProjectRAGContext()
context = rag.build_context(project, task)
print(f"\nRAG Context for {project} - '{task}':")
print(f"Source: {context['context_source']}")
print(f"Entities found: {len(context['relevant_entities'])}")
print("\n--- Context ---")
print(context['project_context'][:2000])
elif command == "init-all":
for project in loader.projects:
if not loader.has_knowledge(project):
loader.initialize_project_knowledge(project)
print(f"Initialized: {project}")
else:
print(f"Skipped (exists): {project}")
else:
print(f"Unknown command: {command}")
print("Run without args for help")
if __name__ == "__main__":
main()