Based on claude-code-tools TmuxCLIController, this refactor: - Added DockerTmuxController class for robust tmux session management - Implements send_keys() with configurable delay_enter - Implements capture_pane() for output retrieval - Implements wait_for_prompt() for pattern-based completion detection - Implements wait_for_idle() for content-hash-based idle detection - Implements wait_for_shell_prompt() for shell prompt detection Also includes workflow improvements: - Pre-task git snapshot before agent execution - Post-task commit protocol in agent guidelines Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
623 lines
21 KiB
Python
623 lines
21 KiB
Python
"""
|
|
Project Knowledge Loader - Per-project RAG context injection for Luzia.
|
|
|
|
Industry Standard Implementation:
|
|
- .knowledge/ directory in each project (similar to LlamaIndex storage/, LangChain vector_store/)
|
|
- entities.json: Project-specific facts and definitions
|
|
- relations.json: Connections between concepts
|
|
- context.md: Human-readable project context (like CLAUDE.md)
|
|
- vectors/: Optional embeddings for semantic search
|
|
|
|
Usage:
|
|
from project_knowledge_loader import ProjectKnowledgeLoader
|
|
|
|
loader = ProjectKnowledgeLoader()
|
|
context = loader.load_project_context("musica", task_query)
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sqlite3
|
|
import logging
|
|
from typing import Dict, List, Any, Optional
|
|
from dataclasses import dataclass, asdict
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# =============================================================================
|
|
# DATA STRUCTURES
|
|
# =============================================================================
|
|
|
|
@dataclass
|
|
class KnowledgeEntity:
|
|
"""Single knowledge entity in project KG."""
|
|
id: str
|
|
name: str
|
|
type: str # component, api, config, pattern, decision, etc.
|
|
description: str
|
|
tags: List[str]
|
|
metadata: Dict[str, Any]
|
|
created_at: str = ""
|
|
updated_at: str = ""
|
|
|
|
|
|
@dataclass
|
|
class KnowledgeRelation:
|
|
"""Relationship between entities."""
|
|
source: str
|
|
relation: str # uses, depends_on, implements, extends, etc.
|
|
target: str
|
|
context: str = ""
|
|
weight: float = 1.0
|
|
|
|
|
|
@dataclass
|
|
class ProjectKnowledge:
|
|
"""Complete knowledge for a project."""
|
|
project: str
|
|
version: str
|
|
entities: List[KnowledgeEntity]
|
|
relations: List[KnowledgeRelation]
|
|
context_md: str # Human-readable context
|
|
last_updated: str
|
|
|
|
|
|
# =============================================================================
|
|
# KNOWLEDGE STRUCTURE TEMPLATE
|
|
# =============================================================================
|
|
|
|
KNOWLEDGE_TEMPLATE = {
|
|
"version": "1.0",
|
|
"project": "",
|
|
"description": "",
|
|
"entities": [
|
|
{
|
|
"id": "project_root",
|
|
"name": "Project Root",
|
|
"type": "component",
|
|
"description": "Main project structure",
|
|
"tags": ["structure"],
|
|
"metadata": {}
|
|
}
|
|
],
|
|
"relations": [],
|
|
"context": {
|
|
"focus": "",
|
|
"tech_stack": [],
|
|
"conventions": [],
|
|
"important_files": [],
|
|
"common_tasks": []
|
|
}
|
|
}
|
|
|
|
CONTEXT_MD_TEMPLATE = """# {project} Project Knowledge
|
|
|
|
## Overview
|
|
{description}
|
|
|
|
## Tech Stack
|
|
{tech_stack}
|
|
|
|
## Key Directories
|
|
{directories}
|
|
|
|
## Common Tasks
|
|
{tasks}
|
|
|
|
## Important Patterns
|
|
{patterns}
|
|
|
|
---
|
|
*Auto-generated by Luzia. Edit to customize project context.*
|
|
"""
|
|
|
|
|
|
# =============================================================================
|
|
# PROJECT KNOWLEDGE LOADER
|
|
# =============================================================================
|
|
|
|
class ProjectKnowledgeLoader:
|
|
"""Load and manage per-project knowledge graphs."""
|
|
|
|
# Standard paths Luzia expects in each project
|
|
KNOWLEDGE_DIR = ".knowledge"
|
|
ENTITIES_FILE = "entities.json"
|
|
RELATIONS_FILE = "relations.json"
|
|
CONTEXT_FILE = "context.md"
|
|
KG_DB_FILE = "knowledge.db"
|
|
|
|
def __init__(self, config_path: str = "/opt/server-agents/orchestrator/config.json"):
|
|
self.config_path = config_path
|
|
self.projects = self._load_projects()
|
|
self._cache: Dict[str, ProjectKnowledge] = {}
|
|
logger.debug(f"ProjectKnowledgeLoader initialized with {len(self.projects)} projects")
|
|
|
|
def _load_projects(self) -> Dict[str, Dict]:
|
|
"""Load project configurations from Luzia config."""
|
|
try:
|
|
with open(self.config_path, 'r') as f:
|
|
config = json.load(f)
|
|
return config.get("projects", {})
|
|
except Exception as e:
|
|
logger.warning(f"Could not load config: {e}")
|
|
return {}
|
|
|
|
def get_knowledge_path(self, project: str) -> Optional[Path]:
|
|
"""Get the .knowledge/ path for a project."""
|
|
if project not in self.projects:
|
|
return None
|
|
|
|
project_path = self.projects[project].get("path", f"/home/{project}")
|
|
return Path(project_path) / self.KNOWLEDGE_DIR
|
|
|
|
def has_knowledge(self, project: str) -> bool:
|
|
"""Check if a project has a .knowledge/ directory."""
|
|
kg_path = self.get_knowledge_path(project)
|
|
if kg_path is None:
|
|
return False
|
|
try:
|
|
return kg_path.exists()
|
|
except PermissionError:
|
|
# Can't access the directory (not our project)
|
|
return False
|
|
|
|
def load_project_knowledge(self, project: str, force_reload: bool = False) -> Optional[ProjectKnowledge]:
|
|
"""Load all knowledge for a project."""
|
|
|
|
# Check cache
|
|
if not force_reload and project in self._cache:
|
|
return self._cache[project]
|
|
|
|
kg_path = self.get_knowledge_path(project)
|
|
if not kg_path or not kg_path.exists():
|
|
logger.debug(f"No .knowledge/ for project {project}")
|
|
return None
|
|
|
|
try:
|
|
# Load entities
|
|
entities = []
|
|
entities_file = kg_path / self.ENTITIES_FILE
|
|
if entities_file.exists():
|
|
with open(entities_file, 'r') as f:
|
|
data = json.load(f)
|
|
for e in data.get("entities", []):
|
|
entities.append(KnowledgeEntity(**e))
|
|
|
|
# Load relations
|
|
relations = []
|
|
relations_file = kg_path / self.RELATIONS_FILE
|
|
if relations_file.exists():
|
|
with open(relations_file, 'r') as f:
|
|
data = json.load(f)
|
|
for r in data.get("relations", []):
|
|
relations.append(KnowledgeRelation(**r))
|
|
|
|
# Load context.md
|
|
context_md = ""
|
|
context_file = kg_path / self.CONTEXT_FILE
|
|
if context_file.exists():
|
|
with open(context_file, 'r') as f:
|
|
context_md = f.read()
|
|
|
|
# Create ProjectKnowledge
|
|
knowledge = ProjectKnowledge(
|
|
project=project,
|
|
version="1.0",
|
|
entities=entities,
|
|
relations=relations,
|
|
context_md=context_md,
|
|
last_updated=datetime.now().isoformat()
|
|
)
|
|
|
|
# Cache it
|
|
self._cache[project] = knowledge
|
|
logger.debug(f"Loaded knowledge for {project}: {len(entities)} entities, {len(relations)} relations")
|
|
|
|
return knowledge
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error loading knowledge for {project}: {e}")
|
|
return None
|
|
|
|
def search_project_knowledge(self, project: str, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
|
|
"""Search project knowledge for relevant context."""
|
|
|
|
knowledge = self.load_project_knowledge(project)
|
|
if not knowledge:
|
|
return []
|
|
|
|
results = []
|
|
query_lower = query.lower()
|
|
query_words = set(query_lower.split())
|
|
|
|
# Score entities by relevance
|
|
for entity in knowledge.entities:
|
|
score = 0
|
|
|
|
# Check name match
|
|
if query_lower in entity.name.lower():
|
|
score += 3
|
|
|
|
# Check description match
|
|
if query_lower in entity.description.lower():
|
|
score += 2
|
|
|
|
# Check tag matches
|
|
for tag in entity.tags:
|
|
if tag.lower() in query_lower or query_lower in tag.lower():
|
|
score += 1
|
|
|
|
# Word overlap scoring
|
|
entity_words = set(entity.name.lower().split() + entity.description.lower().split())
|
|
overlap = len(query_words & entity_words)
|
|
score += overlap * 0.5
|
|
|
|
if score > 0:
|
|
results.append({
|
|
"entity_id": entity.id,
|
|
"name": entity.name,
|
|
"type": entity.type,
|
|
"description": entity.description,
|
|
"tags": entity.tags,
|
|
"relevance": score,
|
|
"source": "project_kg"
|
|
})
|
|
|
|
# Sort by relevance and return top_k
|
|
results.sort(key=lambda x: x["relevance"], reverse=True)
|
|
return results[:top_k]
|
|
|
|
def get_related_entities(self, project: str, entity_id: str, depth: int = 1) -> List[Dict[str, Any]]:
|
|
"""Get entities related to a given entity."""
|
|
|
|
knowledge = self.load_project_knowledge(project)
|
|
if not knowledge:
|
|
return []
|
|
|
|
related = []
|
|
entity_map = {e.id: e for e in knowledge.entities}
|
|
|
|
# Find direct relations
|
|
for relation in knowledge.relations:
|
|
if relation.source == entity_id:
|
|
target = entity_map.get(relation.target)
|
|
if target:
|
|
related.append({
|
|
"entity": asdict(target),
|
|
"relation": relation.relation,
|
|
"direction": "outgoing"
|
|
})
|
|
elif relation.target == entity_id:
|
|
source = entity_map.get(relation.source)
|
|
if source:
|
|
related.append({
|
|
"entity": asdict(source),
|
|
"relation": relation.relation,
|
|
"direction": "incoming"
|
|
})
|
|
|
|
return related
|
|
|
|
def format_for_prompt(self, project: str, query: str, max_tokens: int = 2000) -> str:
|
|
"""Format project knowledge for prompt injection."""
|
|
|
|
knowledge = self.load_project_knowledge(project)
|
|
if not knowledge:
|
|
return ""
|
|
|
|
sections = []
|
|
|
|
# Add context.md summary (prioritize human-written context)
|
|
if knowledge.context_md:
|
|
# Take first 1000 chars of context.md
|
|
context_preview = knowledge.context_md[:1000]
|
|
if len(knowledge.context_md) > 1000:
|
|
context_preview += "\n..."
|
|
sections.append(f"## Project Context\n{context_preview}")
|
|
|
|
# Add relevant entities based on query
|
|
relevant = self.search_project_knowledge(project, query, top_k=5)
|
|
if relevant:
|
|
entities_text = "## Relevant Project Knowledge\n"
|
|
for item in relevant:
|
|
entities_text += f"- **{item['name']}** ({item['type']}): {item['description'][:100]}\n"
|
|
sections.append(entities_text)
|
|
|
|
# Combine sections
|
|
result = "\n\n".join(sections)
|
|
|
|
# Truncate if needed
|
|
if len(result) > max_tokens * 4: # rough char to token ratio
|
|
result = result[:max_tokens * 4] + "\n...(truncated)"
|
|
|
|
return result
|
|
|
|
def initialize_project_knowledge(self, project: str, overwrite: bool = False) -> bool:
|
|
"""Create .knowledge/ directory with template files for a project."""
|
|
|
|
kg_path = self.get_knowledge_path(project)
|
|
if not kg_path:
|
|
logger.error(f"Unknown project: {project}")
|
|
return False
|
|
|
|
if kg_path.exists() and not overwrite:
|
|
logger.info(f"Knowledge already exists for {project}. Use overwrite=True to replace.")
|
|
return False
|
|
|
|
try:
|
|
# Create directory
|
|
kg_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Get project info
|
|
project_config = self.projects.get(project, {})
|
|
description = project_config.get("description", "")
|
|
focus = project_config.get("focus", "")
|
|
|
|
# Create entities.json
|
|
now = datetime.now().isoformat()
|
|
entities_data = {
|
|
"version": "1.0",
|
|
"project": project,
|
|
"entities": [
|
|
{
|
|
"id": "project_overview",
|
|
"name": project,
|
|
"type": "project",
|
|
"description": description or f"{project} project",
|
|
"tags": ["root", "overview"],
|
|
"metadata": {"focus": focus},
|
|
"created_at": now,
|
|
"updated_at": now
|
|
}
|
|
]
|
|
}
|
|
with open(kg_path / self.ENTITIES_FILE, 'w') as f:
|
|
json.dump(entities_data, f, indent=2)
|
|
|
|
# Create relations.json
|
|
relations_data = {
|
|
"version": "1.0",
|
|
"project": project,
|
|
"relations": []
|
|
}
|
|
with open(kg_path / self.RELATIONS_FILE, 'w') as f:
|
|
json.dump(relations_data, f, indent=2)
|
|
|
|
# Create context.md
|
|
context_content = CONTEXT_MD_TEMPLATE.format(
|
|
project=project,
|
|
description=description or "Project description here",
|
|
tech_stack="- Add tech stack items",
|
|
directories="- /src - Source code\n- /docs - Documentation",
|
|
tasks="- Build: `npm run build`\n- Test: `npm test`",
|
|
patterns="- Add important patterns and conventions"
|
|
)
|
|
with open(kg_path / self.CONTEXT_FILE, 'w') as f:
|
|
f.write(context_content)
|
|
|
|
logger.info(f"Initialized .knowledge/ for {project} at {kg_path}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to initialize knowledge for {project}: {e}")
|
|
return False
|
|
|
|
def sync_from_claude_md(self, project: str) -> bool:
|
|
"""Sync knowledge from existing CLAUDE.md file."""
|
|
|
|
project_path = self.projects.get(project, {}).get("path")
|
|
if not project_path:
|
|
return False
|
|
|
|
claude_md_path = Path(project_path) / "CLAUDE.md"
|
|
if not claude_md_path.exists():
|
|
logger.debug(f"No CLAUDE.md found for {project}")
|
|
return False
|
|
|
|
kg_path = self.get_knowledge_path(project)
|
|
if not kg_path:
|
|
return False
|
|
|
|
try:
|
|
# Ensure .knowledge/ exists
|
|
kg_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Read CLAUDE.md
|
|
with open(claude_md_path, 'r') as f:
|
|
claude_content = f.read()
|
|
|
|
# Write to context.md (preserving CLAUDE.md content)
|
|
context_file = kg_path / self.CONTEXT_FILE
|
|
with open(context_file, 'w') as f:
|
|
f.write(f"# {project} Project Knowledge\n\n")
|
|
f.write("*Synced from CLAUDE.md*\n\n")
|
|
f.write(claude_content)
|
|
|
|
logger.info(f"Synced CLAUDE.md to .knowledge/context.md for {project}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to sync CLAUDE.md for {project}: {e}")
|
|
return False
|
|
|
|
def list_projects_with_knowledge(self) -> List[Dict[str, Any]]:
|
|
"""List all projects and their knowledge status."""
|
|
|
|
results = []
|
|
for project, config in self.projects.items():
|
|
has_kg = self.has_knowledge(project)
|
|
kg_path = self.get_knowledge_path(project)
|
|
|
|
info = {
|
|
"project": project,
|
|
"path": config.get("path", ""),
|
|
"description": config.get("description", ""),
|
|
"has_knowledge": has_kg,
|
|
"knowledge_path": str(kg_path) if kg_path else None
|
|
}
|
|
|
|
if has_kg:
|
|
knowledge = self.load_project_knowledge(project)
|
|
if knowledge:
|
|
info["entity_count"] = len(knowledge.entities)
|
|
info["relation_count"] = len(knowledge.relations)
|
|
info["has_context_md"] = bool(knowledge.context_md)
|
|
|
|
results.append(info)
|
|
|
|
return results
|
|
|
|
|
|
# =============================================================================
|
|
# RAG CONTEXT BUILDER (For prompt injection)
|
|
# =============================================================================
|
|
|
|
class ProjectRAGContext:
|
|
"""Build RAG-enhanced context for task dispatch."""
|
|
|
|
def __init__(self):
|
|
self.loader = ProjectKnowledgeLoader()
|
|
|
|
def build_context(self, project: str, task: str, include_global: bool = True) -> Dict[str, Any]:
|
|
"""
|
|
Build complete RAG context for a project task.
|
|
|
|
Returns:
|
|
{
|
|
"project_context": str, # Formatted project knowledge
|
|
"relevant_entities": List[Dict], # Relevant knowledge items
|
|
"context_source": str, # "project_kg", "global_kg", "none"
|
|
"metadata": Dict # Additional context info
|
|
}
|
|
"""
|
|
|
|
result = {
|
|
"project_context": "",
|
|
"relevant_entities": [],
|
|
"context_source": "none",
|
|
"metadata": {}
|
|
}
|
|
|
|
# Try to load project-specific knowledge
|
|
project_context = self.loader.format_for_prompt(project, task)
|
|
if project_context:
|
|
result["project_context"] = project_context
|
|
result["relevant_entities"] = self.loader.search_project_knowledge(project, task)
|
|
result["context_source"] = "project_kg"
|
|
result["metadata"]["project"] = project
|
|
result["metadata"]["entities_found"] = len(result["relevant_entities"])
|
|
|
|
# Optionally include global knowledge (from /etc/luz-knowledge/)
|
|
if include_global:
|
|
try:
|
|
from langchain_kg_retriever import KnowledgeGraphRetriever
|
|
global_retriever = KnowledgeGraphRetriever()
|
|
global_results = global_retriever.retrieve(f"{project} {task}", top_k=3)
|
|
|
|
if global_results:
|
|
global_text = "\n## Global Knowledge\n"
|
|
for item in global_results:
|
|
global_text += f"- {item['name']}: {item.get('content', '')[:100]}\n"
|
|
|
|
result["project_context"] += global_text
|
|
result["metadata"]["global_results"] = len(global_results)
|
|
|
|
if result["context_source"] == "none":
|
|
result["context_source"] = "global_kg"
|
|
else:
|
|
result["context_source"] = "hybrid"
|
|
|
|
except Exception as e:
|
|
logger.debug(f"Global KG retrieval failed: {e}")
|
|
|
|
return result
|
|
|
|
|
|
# =============================================================================
|
|
# CLI INTERFACE
|
|
# =============================================================================
|
|
|
|
def main():
|
|
"""CLI for project knowledge management."""
|
|
import sys
|
|
|
|
loader = ProjectKnowledgeLoader()
|
|
|
|
if len(sys.argv) < 2:
|
|
print("Usage: project_knowledge_loader.py <command> [args]")
|
|
print("")
|
|
print("Commands:")
|
|
print(" list - List all projects and knowledge status")
|
|
print(" init <project> - Initialize .knowledge/ for a project")
|
|
print(" sync <project> - Sync from CLAUDE.md")
|
|
print(" search <project> <query> - Search project knowledge")
|
|
print(" context <project> <task> - Get RAG context for a task")
|
|
print(" init-all - Initialize knowledge for all projects")
|
|
return
|
|
|
|
command = sys.argv[1]
|
|
|
|
if command == "list":
|
|
projects = loader.list_projects_with_knowledge()
|
|
print(f"\n{'Project':<15} {'Has KG':<10} {'Entities':<10} {'Description'}")
|
|
print("-" * 70)
|
|
for p in projects:
|
|
has_kg = "Yes" if p["has_knowledge"] else "No"
|
|
entities = p.get("entity_count", "-")
|
|
print(f"{p['project']:<15} {has_kg:<10} {str(entities):<10} {p['description'][:30]}")
|
|
|
|
elif command == "init" and len(sys.argv) > 2:
|
|
project = sys.argv[2]
|
|
success = loader.initialize_project_knowledge(project)
|
|
if success:
|
|
print(f"Initialized .knowledge/ for {project}")
|
|
else:
|
|
print(f"Failed to initialize knowledge for {project}")
|
|
|
|
elif command == "sync" and len(sys.argv) > 2:
|
|
project = sys.argv[2]
|
|
success = loader.sync_from_claude_md(project)
|
|
if success:
|
|
print(f"Synced CLAUDE.md to .knowledge/ for {project}")
|
|
else:
|
|
print(f"Failed to sync (no CLAUDE.md or error)")
|
|
|
|
elif command == "search" and len(sys.argv) > 3:
|
|
project = sys.argv[2]
|
|
query = " ".join(sys.argv[3:])
|
|
results = loader.search_project_knowledge(project, query)
|
|
print(f"\nSearch results for '{query}' in {project}:")
|
|
for r in results:
|
|
print(f" - {r['name']} ({r['type']}): {r['description'][:50]}... [score: {r['relevance']:.2f}]")
|
|
|
|
elif command == "context" and len(sys.argv) > 3:
|
|
project = sys.argv[2]
|
|
task = " ".join(sys.argv[3:])
|
|
rag = ProjectRAGContext()
|
|
context = rag.build_context(project, task)
|
|
print(f"\nRAG Context for {project} - '{task}':")
|
|
print(f"Source: {context['context_source']}")
|
|
print(f"Entities found: {len(context['relevant_entities'])}")
|
|
print("\n--- Context ---")
|
|
print(context['project_context'][:2000])
|
|
|
|
elif command == "init-all":
|
|
for project in loader.projects:
|
|
if not loader.has_knowledge(project):
|
|
loader.initialize_project_knowledge(project)
|
|
print(f"Initialized: {project}")
|
|
else:
|
|
print(f"Skipped (exists): {project}")
|
|
|
|
else:
|
|
print(f"Unknown command: {command}")
|
|
print("Run without args for help")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|