#!/usr/bin/env python3 """ Web Search Integrator - Context enhancement via web search Features: 1. Detect when web search would be helpful 2. Query Stack Overflow for solutions 3. Fetch and summarize reference docs 4. Track learned solutions 5. Integrate references into prompts """ import json import re from pathlib import Path from typing import Dict, List, Optional, Tuple, Any from datetime import datetime from dataclasses import dataclass, asdict @dataclass class WebReference: """A reference found via web search""" title: str url: str source: str # stackoverflow, docs, blog, etc snippet: str relevance: float # 0-1 score topic: str found_at: str @dataclass class LearningResult: """A solution learned from web search""" problem: str solution: str references: List[str] tags: List[str] learned_at: str confidence: float # How confident in this solution class WebSearchIntegrator: """Integrates web search for context enhancement""" def __init__(self, cache_dir: Optional[Path] = None): """Initialize web search integrator Args: cache_dir: Optional directory for caching search results """ self.cache_dir = cache_dir or Path("/tmp/.luzia-web-cache") self.cache_dir.mkdir(parents=True, exist_ok=True) self.learning_db: List[LearningResult] = [] self.search_history: List[Dict[str, Any]] = [] self.load_learning_db() def load_learning_db(self) -> None: """Load learned solutions from cache""" db_file = self.cache_dir / "learning.json" if db_file.exists(): try: data = json.loads(db_file.read_text()) self.learning_db = [LearningResult(**item) for item in data.get("learned", [])] except Exception as e: print(f"[Warning] Failed to load learning DB: {e}") def save_learning_db(self) -> None: """Save learned solutions to cache""" db_file = self.cache_dir / "learning.json" db_file.write_text(json.dumps({ "learned": [asdict(item) for item in self.learning_db], "timestamp": datetime.now().isoformat() }, indent=2)) def should_search(self, task: str, error: Optional[str] = None) -> Tuple[bool, str]: """Determine if web search would be helpful Args: task: Task description error: Optional error message Returns: Tuple of (should_search, search_query) """ search_triggers = [ # Error investigation (r"error|exception|failed|problem", "error_investigation"), # How-to tasks (r"how\s+to|guide|tutorial|learn", "how_to"), # Library/tool questions (r"npm|pip|cargo|ruby", "package_mgmt"), # Framework questions (r"react|vue|angular|django|flask", "framework"), # Integration/setup (r"integrate|setup|configure|install", "setup"), # Best practices (r"best practice|pattern|architecture", "architecture"), ] combined = f"{task} {error or ''}".lower() for pattern, category in search_triggers: if re.search(pattern, combined): # Extract search query if "error" in combined: # For errors, extract the error message search_query = re.sub(r".*error.*?:\s*", "", error or task)[:80] else: search_query = task[:100] return True, search_query return False, "" def find_stackoverflow_answer(self, query: str) -> Optional[WebReference]: """Find Stack Overflow answer for query This is a reference implementation. In production, would use Stack Overflow API or web search. Args: query: Search query Returns: Best matching reference, or None """ # In actual implementation, would call web search API # For now, return structure for documentation return WebReference( title="Relevant Stack Overflow Answer", url="https://stackoverflow.com/search?q=...", source="stackoverflow", snippet="[Search result snippet would appear here]", relevance=0.8, topic=query, found_at=datetime.now().isoformat() ) def fetch_documentation(self, library: str, topic: str) -> Optional[WebReference]: """Fetch documentation for a library/topic Args: library: Library name (npm package, python module, etc) topic: Specific topic within library Returns: Reference to documentation, or None """ # Common documentation URLs doc_patterns = { "react": "https://react.dev/reference/", "nodejs": "https://nodejs.org/api/", "python": "https://docs.python.org/3/", "typescript": "https://www.typescriptlang.org/docs/", "rust": "https://doc.rust-lang.org/", "django": "https://docs.djangoproject.com/", "flask": "https://flask.palletsprojects.com/", } base_url = doc_patterns.get(library.lower()) if not base_url: return None return WebReference( title=f"{library} Documentation - {topic}", url=f"{base_url}{topic}/", source="official_docs", snippet=f"Official documentation for {library} {topic}", relevance=0.95, topic=topic, found_at=datetime.now().isoformat() ) def detect_tech_stack(self, task: str) -> List[str]: """Detect technology stack from task description Args: task: Task description Returns: List of detected technologies """ tech_patterns = { "React": r"react|jsx", "TypeScript": r"typescript|\.ts", "Node.js": r"node|npm|javascript", "Python": r"python|pip|py", "Rust": r"rust|cargo", "Docker": r"docker|container", "PostgreSQL": r"postgres|sql", "MongoDB": r"mongo|mongodb", "Redis": r"redis", "Kubernetes": r"k8s|kubernetes", "GraphQL": r"graphql|apollo", "REST": r"rest|api", "WebSocket": r"websocket|ws", } detected = [] task_lower = task.lower() for tech, pattern in tech_patterns.items(): if re.search(pattern, task_lower): detected.append(tech) return detected def generate_context_section(self, references: List[WebReference]) -> str: """Generate a context section with web references Args: references: List of web references Returns: Markdown section to add to prompt """ if not references: return "" sections = ["# Web References and Context\n"] for ref in references: sections.append(f"\n## {ref.title}") sections.append(f"**Source:** {ref.source}") sections.append(f"**URL:** {ref.url}") sections.append(f"**Relevance:** {ref.relevance:.1%}") sections.append(f"\n{ref.snippet}\n") return "\n".join(sections) def learn_solution(self, problem: str, solution: str, references: List[str], tags: List[str], confidence: float = 0.8) -> None: """Record a learned solution for future reference Args: problem: Problem description solution: Solution description references: List of reference URLs tags: Topic tags confidence: Confidence in this solution (0-1) """ learning = LearningResult( problem=problem, solution=solution, references=references, tags=tags, learned_at=datetime.now().isoformat(), confidence=confidence ) self.learning_db.append(learning) self.save_learning_db() def search_learned_solutions(self, query: str) -> List[LearningResult]: """Search previously learned solutions Args: query: Search query Returns: List of matching learned solutions """ matches = [] query_lower = query.lower() for result in self.learning_db: # Search in problem, solution, and tags if (query_lower in result.problem.lower() or query_lower in result.solution.lower() or any(query_lower in tag.lower() for tag in result.tags)): matches.append(result) # Sort by confidence and recency matches.sort( key=lambda r: (r.confidence, datetime.fromisoformat(r.learned_at)), reverse=True ) return matches def get_reference_for_technology(self, tech: str) -> Optional[WebReference]: """Get reference documentation for a technology Args: tech: Technology name Returns: Reference to documentation """ refs = { "React": self.fetch_documentation("react", "introduction"), "TypeScript": self.fetch_documentation("typescript", "handbook"), "Node.js": self.fetch_documentation("nodejs", "api"), "Python": self.fetch_documentation("python", "tutorial"), "Docker": WebReference( title="Docker Documentation", url="https://docs.docker.com/", source="official_docs", snippet="Official Docker documentation", relevance=1.0, topic="Docker", found_at=datetime.now().isoformat() ), } return refs.get(tech) def generate_research_prompt(self, task: str, tech_stack: List[str], error: Optional[str] = None) -> str: """Generate a prompt for web research Args: task: Task description tech_stack: List of technologies involved error: Optional error message Returns: Research prompt """ sections = [ f"# Research Task\n", f"**Task:** {task}\n", ] if error: sections.append(f"**Error:** {error}\n") if tech_stack: sections.append(f"**Technologies:** {', '.join(tech_stack)}\n") # Learned solutions learned = self.search_learned_solutions(task) if learned: sections.append("\n## Previously Learned Solutions\n") for i, result in enumerate(learned[:3], 1): sections.append(f"{i}. **{result.problem}**") sections.append(f" - Solution: {result.solution}") sections.append(f" - Tags: {', '.join(result.tags)}") sections.append(f" - Confidence: {result.confidence:.0%}\n") sections.append("\n## Research Approach\n") sections.append("1. Check previously learned solutions") sections.append("2. Search Stack Overflow for similar issues") sections.append("3. Check official documentation") sections.append("4. Look for blog posts or tutorials") sections.append("5. Synthesize findings into solution") return "\n".join(sections) def export_learning_data(self, output_path: Path) -> None: """Export learning database for analysis Args: output_path: Path to write export to """ export_data = { "total_learned": len(self.learning_db), "by_topic": {}, "average_confidence": 0, "solutions": [asdict(item) for item in self.learning_db] } # Calculate statistics if self.learning_db: export_data["average_confidence"] = ( sum(r.confidence for r in self.learning_db) / len(self.learning_db) ) # Group by tags by_topic = {} for result in self.learning_db: for tag in result.tags: if tag not in by_topic: by_topic[tag] = 0 by_topic[tag] += 1 export_data["by_topic"] = by_topic output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text(json.dumps(export_data, indent=2)) def get_stats(self) -> Dict[str, Any]: """Get statistics about web search usage Returns: Statistics dict """ if not self.learning_db: return { "total_learned": 0, "average_confidence": 0, "searches_performed": len(self.search_history) } avg_confidence = sum(r.confidence for r in self.learning_db) / len(self.learning_db) return { "total_learned": len(self.learning_db), "average_confidence": avg_confidence, "searches_performed": len(self.search_history), "topics": list(set( tag for result in self.learning_db for tag in result.tags )) }