""" Semantic Router - Route queries to domain-specific context using keyword detection. Phase 3 of Luzia modernization: Intelligent domain-aware context selection. """ import json import logging from typing import List, Dict, Any, Optional from dataclasses import dataclass logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s') logger = logging.getLogger(__name__) @dataclass class DomainContext: """Context specific to a task domain.""" name: str keywords: List[str] system_instructions: str best_practices: List[str] reasoning_enabled: bool class SemanticRouter: """Route tasks to appropriate domain contexts.""" def __init__(self): self.domains = self._initialize_domains() logger.info(f"✓ Semantic router initialized with {len(self.domains)} domains") def _initialize_domains(self) -> Dict[str, DomainContext]: """Initialize domain-specific context templates.""" return { "backend": DomainContext( name="Backend Development", keywords=["api", "server", "database", "endpoint", "migration", "authentication", "performance", "cache", "queue", "async", "goroutine", "websocket"], system_instructions="""You are a backend engineer. Focus on: - API design and implementation - Database schema and migrations - Authentication and authorization - Performance optimization - Asynchronous processing - Error handling and logging - Documentation and testing""", best_practices=[ "Start with schema design", "Test database migrations", "Validate all inputs", "Log important operations", "Consider backward compatibility" ], reasoning_enabled=True ), "frontend": DomainContext( name="Frontend Development", keywords=["ui", "component", "state", "react", "vue", "angular", "html", "css", "layout", "animation", "responsive", "accessibility", "form"], system_instructions="""You are a frontend engineer. Focus on: - Component design and reusability - State management - Performance and rendering - Accessibility (a11y) - Responsive design - User experience - Testing and documentation""", best_practices=[ "Think components-first", "Manage state cleanly", "Test user interactions", "Consider performance", "Ensure accessibility" ], reasoning_enabled=True ), "devops": DomainContext( name="DevOps & Infrastructure", keywords=["docker", "kubernetes", "deployment", "ci/cd", "terraform", "aws", "gcp", "monitoring", "logging", "infrastructure", "service", "container"], system_instructions="""You are a DevOps engineer. Focus on: - Infrastructure as code - Containerization and orchestration - CI/CD pipeline design - Monitoring and alerting - Security and compliance - Disaster recovery - Cost optimization""", best_practices=[ "Use IaC for everything", "Automate deployments", "Monitor all metrics", "Plan for failures", "Document procedures" ], reasoning_enabled=False # Usually procedural ), "research": DomainContext( name="Research & Analysis", keywords=["research", "analyze", "investigate", "find", "study", "explore", "learn", "understand", "architecture", "design", "pattern"], system_instructions="""You are a research analyst. Focus on: - Deep investigation - Architecture understanding - Design pattern analysis - Literature research - Knowledge synthesis - Alternative approaches - Risk assessment""", best_practices=[ "Start with questions", "Gather multiple sources", "Cross-reference findings", "Consider tradeoffs", "Document assumptions" ], reasoning_enabled=True ), "security": DomainContext( name="Security & Compliance", keywords=["security", "vulnerability", "auth", "encryption", "permission", "access", "compliance", "audit", "breach", "token", "hash", "ssl", "https"], system_instructions="""You are a security engineer. Focus on: - Threat modeling - Vulnerability assessment - Authentication/authorization - Encryption and hashing - Compliance requirements - Security testing - Incident response""", best_practices=[ "Assume worst-case", "Defense in depth", "Audit everything", "Test thoroughly", "Keep secrets secret" ], reasoning_enabled=True ), "system": DomainContext( name="System Administration", keywords=["admin", "system", "user", "permission", "group", "file", "process", "service", "config", "log", "troubleshoot", "diagnose"], system_instructions="""You are a system administrator. Focus on: - User and permission management - System configuration - Service management - Log analysis - Performance tuning - Troubleshooting - Maintenance procedures""", best_practices=[ "Document configurations", "Test before deploying", "Monitor systematically", "Plan for growth", "Prepare for emergencies" ], reasoning_enabled=False ) } def route(self, task_query: str) -> Dict[str, Any]: """ Analyze task query and route to appropriate domain(s). Returns domain name, confidence, and context. """ query_lower = task_query.lower() # Score each domain domain_scores = {} for domain_name, domain_context in self.domains.items(): # Count keyword matches matches = sum(1 for keyword in domain_context.keywords if keyword in query_lower) confidence = min(1.0, matches / max(1, len(domain_context.keywords)) * 0.5) domain_scores[domain_name] = { "confidence": confidence, "matches": matches, "context": domain_context } # Find best match best_domain = max(domain_scores.items(), key=lambda x: x[1]["confidence"]) return { "primary_domain": best_domain[0], "confidence": best_domain[1]["confidence"], "all_scores": {k: v["confidence"] for k, v in domain_scores.items()}, "system_instructions": best_domain[1]["context"].system_instructions, "best_practices": best_domain[1]["context"].best_practices, "reasoning_enabled": best_domain[1]["context"].reasoning_enabled, "context_object": best_domain[1]["context"] } def get_domain_context(self, domain_name: str) -> Optional[DomainContext]: """Get context for specific domain.""" return self.domains.get(domain_name) class ContextAssembler: """Assemble 4-bucket context with dynamic domain-aware selection.""" def __init__(self, router: SemanticRouter, kg_retriever: Any): self.router = router self.kg_retriever = kg_retriever logger.info("✓ Context assembler initialized") def assemble_context(self, task_query: str, max_tokens: int = 2000) -> Dict[str, Any]: """ Assemble 4-bucket context: 1. Identity (static - global CLAUDE.md + skills) 2. Grounding (static - project-specific) 3. Intelligence (dynamic - KG retrieval + domain context) 4. Task (dynamic - original query + auto-detected domain context) """ # Step 1: Route to domain routing = self.router.route(task_query) primary_domain = routing["primary_domain"] # Step 2: Retrieve relevant KG entries if self.kg_retriever: kg_context = self.kg_retriever.retrieve(task_query, top_k=5) else: kg_context = [] # Step 3: Assemble buckets context = { "bucket_1_identity": { "type": "identity", "source": "global", "role": "system_identity", "content": "You are Claude, Anthropic's AI assistant. You specialize in software engineering." }, "bucket_2_grounding": { "type": "grounding", "source": "project", "role": "project_constraints", "content": "Current project context and constraints will be injected here at dispatch time." }, "bucket_3_intelligence": { "type": "intelligence", "source": "dynamic_retrieval", "domain": primary_domain, "kg_results": kg_context, "domain_practices": routing["best_practices"], "reasoning_enabled": routing["reasoning_enabled"] }, "bucket_4_task": { "type": "task", "source": "user", "original_query": task_query, "detected_domain": primary_domain, "domain_confidence": routing["confidence"], "system_instructions": routing["system_instructions"] } } return context # Testing if __name__ == "__main__": logger.info("=" * 60) logger.info("PHASE 3: Semantic Router") logger.info("=" * 60) router = SemanticRouter() # Test queries test_queries = [ "Build a REST API for user authentication", "Fix React component performance issue", "Deploy Kubernetes cluster with monitoring", "Research architecture patterns for microservices", "Audit security of password storage", "Configure Linux user permissions" ] for query in test_queries: logger.info(f"\nQuery: '{query}'") result = router.route(query) logger.info(f" Domain: {result['primary_domain']} (confidence: {result['confidence']:.2f})") logger.info(f" Reasoning: {result['reasoning_enabled']}") logger.info("\n" + "=" * 60) logger.info("✅ PHASE 3 COMPLETE: Semantic router ready") logger.info("=" * 60)