luzia/lib/semantic_router.py

"""
Semantic Router - Route queries to domain-specific context using keyword detection.
Phase 3 of Luzia modernization: Intelligent domain-aware context selection.
"""

import json
import logging
from typing import List, Dict, Any, Optional
from dataclasses import dataclass

logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
logger = logging.getLogger(__name__)


@dataclass
class DomainContext:
    """Context specific to a task domain."""
    name: str
    keywords: List[str]
    system_instructions: str
    best_practices: List[str]
    reasoning_enabled: bool


class SemanticRouter:
    """Route tasks to appropriate domain contexts."""

    def __init__(self):
        self.domains = self._initialize_domains()
        logger.info(f"✓ Semantic router initialized with {len(self.domains)} domains")

    def _initialize_domains(self) -> Dict[str, DomainContext]:
        """Initialize domain-specific context templates."""

        return {
            "backend": DomainContext(
                name="Backend Development",
                keywords=["api", "server", "database", "endpoint", "migration", "authentication",
                         "performance", "cache", "queue", "async", "goroutine", "websocket"],
                system_instructions="""You are a backend engineer. Focus on:
- API design and implementation
- Database schema and migrations
- Authentication and authorization
- Performance optimization
- Asynchronous processing
- Error handling and logging
- Documentation and testing""",
                best_practices=[
                    "Start with schema design",
                    "Test database migrations",
                    "Validate all inputs",
                    "Log important operations",
                    "Consider backward compatibility"
                ],
                reasoning_enabled=True
            ),

            "frontend": DomainContext(
                name="Frontend Development",
                keywords=["ui", "component", "state", "react", "vue", "angular", "html", "css",
                         "layout", "animation", "responsive", "accessibility", "form"],
                system_instructions="""You are a frontend engineer. Focus on:
- Component design and reusability
- State management
- Performance and rendering
- Accessibility (a11y)
- Responsive design
- User experience
- Testing and documentation""",
                best_practices=[
                    "Think components-first",
                    "Manage state cleanly",
                    "Test user interactions",
                    "Consider performance",
                    "Ensure accessibility"
                ],
                reasoning_enabled=True
            ),

            "devops": DomainContext(
                name="DevOps & Infrastructure",
                keywords=["docker", "kubernetes", "deployment", "ci/cd", "terraform", "aws", "gcp",
                         "monitoring", "logging", "infrastructure", "service", "container"],
                system_instructions="""You are a DevOps engineer. Focus on:
- Infrastructure as code
- Containerization and orchestration
- CI/CD pipeline design
- Monitoring and alerting
- Security and compliance
- Disaster recovery
- Cost optimization""",
                best_practices=[
                    "Use IaC for everything",
                    "Automate deployments",
                    "Monitor all metrics",
                    "Plan for failures",
                    "Document procedures"
                ],
                reasoning_enabled=False  # Usually procedural
            ),

            "research": DomainContext(
                name="Research & Analysis",
                keywords=["research", "analyze", "investigate", "find", "study", "explore", "learn",
                         "understand", "architecture", "design", "pattern"],
                system_instructions="""You are a research analyst. Focus on:
- Deep investigation
- Architecture understanding
- Design pattern analysis
- Literature research
- Knowledge synthesis
- Alternative approaches
- Risk assessment""",
                best_practices=[
                    "Start with questions",
                    "Gather multiple sources",
                    "Cross-reference findings",
                    "Consider tradeoffs",
                    "Document assumptions"
                ],
                reasoning_enabled=True
            ),

            "security": DomainContext(
                name="Security & Compliance",
                keywords=["security", "vulnerability", "auth", "encryption", "permission", "access",
                         "compliance", "audit", "breach", "token", "hash", "ssl", "https"],
                system_instructions="""You are a security engineer. Focus on:
- Threat modeling
- Vulnerability assessment
- Authentication/authorization
- Encryption and hashing
- Compliance requirements
- Security testing
- Incident response""",
                best_practices=[
                    "Assume worst-case",
                    "Defense in depth",
                    "Audit everything",
                    "Test thoroughly",
                    "Keep secrets secret"
                ],
                reasoning_enabled=True
            ),

            "system": DomainContext(
                name="System Administration",
                keywords=["admin", "system", "user", "permission", "group", "file", "process",
                         "service", "config", "log", "troubleshoot", "diagnose"],
                system_instructions="""You are a system administrator. Focus on:
- User and permission management
- System configuration
- Service management
- Log analysis
- Performance tuning
- Troubleshooting
- Maintenance procedures""",
                best_practices=[
                    "Document configurations",
                    "Test before deploying",
                    "Monitor systematically",
                    "Plan for growth",
                    "Prepare for emergencies"
                ],
                reasoning_enabled=False
            )
        }

    def route(self, task_query: str) -> Dict[str, Any]:
        """
        Analyze task query and route to appropriate domain(s).

        Returns domain name, confidence, and context.
        """

        query_lower = task_query.lower()

        # Score each domain
        domain_scores = {}
        for domain_name, domain_context in self.domains.items():
            # Count keyword matches
            matches = sum(1 for keyword in domain_context.keywords if keyword in query_lower)
            confidence = min(1.0, matches / max(1, len(domain_context.keywords)) * 0.5)

            domain_scores[domain_name] = {
                "confidence": confidence,
                "matches": matches,
                "context": domain_context
            }

        # Find best match
        best_domain = max(domain_scores.items(), key=lambda x: x[1]["confidence"])

        return {
            "primary_domain": best_domain[0],
            "confidence": best_domain[1]["confidence"],
            "all_scores": {k: v["confidence"] for k, v in domain_scores.items()},
            "system_instructions": best_domain[1]["context"].system_instructions,
            "best_practices": best_domain[1]["context"].best_practices,
            "reasoning_enabled": best_domain[1]["context"].reasoning_enabled,
            "context_object": best_domain[1]["context"]
        }

    def get_domain_context(self, domain_name: str) -> Optional[DomainContext]:
        """Get context for specific domain."""
        return self.domains.get(domain_name)


class ContextAssembler:
    """Assemble 4-bucket context with dynamic domain-aware selection."""

    def __init__(self, router: SemanticRouter, kg_retriever: Any):
        self.router = router
        self.kg_retriever = kg_retriever
        logger.info("✓ Context assembler initialized")

    def assemble_context(self, task_query: str, max_tokens: int = 2000) -> Dict[str, Any]:
        """
        Assemble 4-bucket context:
        1. Identity (static - global CLAUDE.md + skills)
        2. Grounding (static - project-specific)
        3. Intelligence (dynamic - KG retrieval + domain context)
        4. Task (dynamic - original query + auto-detected domain context)
        """

        # Step 1: Route to domain
        routing = self.router.route(task_query)
        primary_domain = routing["primary_domain"]

        # Step 2: Retrieve relevant KG entries
        if self.kg_retriever:
            kg_context = self.kg_retriever.retrieve(task_query, top_k=5)
        else:
            kg_context = []

        # Step 3: Assemble buckets
        context = {
            "bucket_1_identity": {
                "type": "identity",
                "source": "global",
                "role": "system_identity",
                "content": "You are Claude, Anthropic's AI assistant. You specialize in software engineering."
            },

            "bucket_2_grounding": {
                "type": "grounding",
                "source": "project",
                "role": "project_constraints",
                "content": "Current project context and constraints will be injected here at dispatch time."
            },

            "bucket_3_intelligence": {
                "type": "intelligence",
                "source": "dynamic_retrieval",
                "domain": primary_domain,
                "kg_results": kg_context,
                "domain_practices": routing["best_practices"],
                "reasoning_enabled": routing["reasoning_enabled"]
            },

            "bucket_4_task": {
                "type": "task",
                "source": "user",
                "original_query": task_query,
                "detected_domain": primary_domain,
                "domain_confidence": routing["confidence"],
                "system_instructions": routing["system_instructions"]
            }
        }

        return context


# Testing
if __name__ == "__main__":
    logger.info("=" * 60)
    logger.info("PHASE 3: Semantic Router")
    logger.info("=" * 60)

    router = SemanticRouter()

    # Test queries
    test_queries = [
        "Build a REST API for user authentication",
        "Fix React component performance issue",
        "Deploy Kubernetes cluster with monitoring",
        "Research architecture patterns for microservices",
        "Audit security of password storage",
        "Configure Linux user permissions"
    ]

    for query in test_queries:
        logger.info(f"\nQuery: '{query}'")
        result = router.route(query)
        logger.info(f"  Domain: {result['primary_domain']} (confidence: {result['confidence']:.2f})")
        logger.info(f"  Reasoning: {result['reasoning_enabled']}")

    logger.info("\n" + "=" * 60)
    logger.info("✅ PHASE 3 COMPLETE: Semantic router ready")
    logger.info("=" * 60)