Based on claude-code-tools TmuxCLIController, this refactor: - Added DockerTmuxController class for robust tmux session management - Implements send_keys() with configurable delay_enter - Implements capture_pane() for output retrieval - Implements wait_for_prompt() for pattern-based completion detection - Implements wait_for_idle() for content-hash-based idle detection - Implements wait_for_shell_prompt() for shell prompt detection Also includes workflow improvements: - Pre-task git snapshot before agent execution - Post-task commit protocol in agent guidelines Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
403 lines
13 KiB
Python
403 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Web Search Integrator - Context enhancement via web search
|
|
|
|
Features:
|
|
1. Detect when web search would be helpful
|
|
2. Query Stack Overflow for solutions
|
|
3. Fetch and summarize reference docs
|
|
4. Track learned solutions
|
|
5. Integrate references into prompts
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Tuple, Any
|
|
from datetime import datetime
|
|
from dataclasses import dataclass, asdict
|
|
|
|
@dataclass
|
|
class WebReference:
|
|
"""A reference found via web search"""
|
|
title: str
|
|
url: str
|
|
source: str # stackoverflow, docs, blog, etc
|
|
snippet: str
|
|
relevance: float # 0-1 score
|
|
topic: str
|
|
found_at: str
|
|
|
|
@dataclass
|
|
class LearningResult:
|
|
"""A solution learned from web search"""
|
|
problem: str
|
|
solution: str
|
|
references: List[str]
|
|
tags: List[str]
|
|
learned_at: str
|
|
confidence: float # How confident in this solution
|
|
|
|
class WebSearchIntegrator:
|
|
"""Integrates web search for context enhancement"""
|
|
|
|
def __init__(self, cache_dir: Optional[Path] = None):
|
|
"""Initialize web search integrator
|
|
|
|
Args:
|
|
cache_dir: Optional directory for caching search results
|
|
"""
|
|
self.cache_dir = cache_dir or Path("/tmp/.luzia-web-cache")
|
|
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
self.learning_db: List[LearningResult] = []
|
|
self.search_history: List[Dict[str, Any]] = []
|
|
self.load_learning_db()
|
|
|
|
def load_learning_db(self) -> None:
|
|
"""Load learned solutions from cache"""
|
|
db_file = self.cache_dir / "learning.json"
|
|
if db_file.exists():
|
|
try:
|
|
data = json.loads(db_file.read_text())
|
|
self.learning_db = [LearningResult(**item) for item in data.get("learned", [])]
|
|
except Exception as e:
|
|
print(f"[Warning] Failed to load learning DB: {e}")
|
|
|
|
def save_learning_db(self) -> None:
|
|
"""Save learned solutions to cache"""
|
|
db_file = self.cache_dir / "learning.json"
|
|
db_file.write_text(json.dumps({
|
|
"learned": [asdict(item) for item in self.learning_db],
|
|
"timestamp": datetime.now().isoformat()
|
|
}, indent=2))
|
|
|
|
def should_search(self, task: str, error: Optional[str] = None) -> Tuple[bool, str]:
|
|
"""Determine if web search would be helpful
|
|
|
|
Args:
|
|
task: Task description
|
|
error: Optional error message
|
|
|
|
Returns:
|
|
Tuple of (should_search, search_query)
|
|
"""
|
|
search_triggers = [
|
|
# Error investigation
|
|
(r"error|exception|failed|problem", "error_investigation"),
|
|
# How-to tasks
|
|
(r"how\s+to|guide|tutorial|learn", "how_to"),
|
|
# Library/tool questions
|
|
(r"npm|pip|cargo|ruby", "package_mgmt"),
|
|
# Framework questions
|
|
(r"react|vue|angular|django|flask", "framework"),
|
|
# Integration/setup
|
|
(r"integrate|setup|configure|install", "setup"),
|
|
# Best practices
|
|
(r"best practice|pattern|architecture", "architecture"),
|
|
]
|
|
|
|
combined = f"{task} {error or ''}".lower()
|
|
|
|
for pattern, category in search_triggers:
|
|
if re.search(pattern, combined):
|
|
# Extract search query
|
|
if "error" in combined:
|
|
# For errors, extract the error message
|
|
search_query = re.sub(r".*error.*?:\s*", "", error or task)[:80]
|
|
else:
|
|
search_query = task[:100]
|
|
|
|
return True, search_query
|
|
|
|
return False, ""
|
|
|
|
def find_stackoverflow_answer(self, query: str) -> Optional[WebReference]:
|
|
"""Find Stack Overflow answer for query
|
|
|
|
This is a reference implementation. In production, would use
|
|
Stack Overflow API or web search.
|
|
|
|
Args:
|
|
query: Search query
|
|
|
|
Returns:
|
|
Best matching reference, or None
|
|
"""
|
|
# In actual implementation, would call web search API
|
|
# For now, return structure for documentation
|
|
return WebReference(
|
|
title="Relevant Stack Overflow Answer",
|
|
url="https://stackoverflow.com/search?q=...",
|
|
source="stackoverflow",
|
|
snippet="[Search result snippet would appear here]",
|
|
relevance=0.8,
|
|
topic=query,
|
|
found_at=datetime.now().isoformat()
|
|
)
|
|
|
|
def fetch_documentation(self, library: str, topic: str) -> Optional[WebReference]:
|
|
"""Fetch documentation for a library/topic
|
|
|
|
Args:
|
|
library: Library name (npm package, python module, etc)
|
|
topic: Specific topic within library
|
|
|
|
Returns:
|
|
Reference to documentation, or None
|
|
"""
|
|
# Common documentation URLs
|
|
doc_patterns = {
|
|
"react": "https://react.dev/reference/",
|
|
"nodejs": "https://nodejs.org/api/",
|
|
"python": "https://docs.python.org/3/",
|
|
"typescript": "https://www.typescriptlang.org/docs/",
|
|
"rust": "https://doc.rust-lang.org/",
|
|
"django": "https://docs.djangoproject.com/",
|
|
"flask": "https://flask.palletsprojects.com/",
|
|
}
|
|
|
|
base_url = doc_patterns.get(library.lower())
|
|
if not base_url:
|
|
return None
|
|
|
|
return WebReference(
|
|
title=f"{library} Documentation - {topic}",
|
|
url=f"{base_url}{topic}/",
|
|
source="official_docs",
|
|
snippet=f"Official documentation for {library} {topic}",
|
|
relevance=0.95,
|
|
topic=topic,
|
|
found_at=datetime.now().isoformat()
|
|
)
|
|
|
|
def detect_tech_stack(self, task: str) -> List[str]:
|
|
"""Detect technology stack from task description
|
|
|
|
Args:
|
|
task: Task description
|
|
|
|
Returns:
|
|
List of detected technologies
|
|
"""
|
|
tech_patterns = {
|
|
"React": r"react|jsx",
|
|
"TypeScript": r"typescript|\.ts",
|
|
"Node.js": r"node|npm|javascript",
|
|
"Python": r"python|pip|py",
|
|
"Rust": r"rust|cargo",
|
|
"Docker": r"docker|container",
|
|
"PostgreSQL": r"postgres|sql",
|
|
"MongoDB": r"mongo|mongodb",
|
|
"Redis": r"redis",
|
|
"Kubernetes": r"k8s|kubernetes",
|
|
"GraphQL": r"graphql|apollo",
|
|
"REST": r"rest|api",
|
|
"WebSocket": r"websocket|ws",
|
|
}
|
|
|
|
detected = []
|
|
task_lower = task.lower()
|
|
|
|
for tech, pattern in tech_patterns.items():
|
|
if re.search(pattern, task_lower):
|
|
detected.append(tech)
|
|
|
|
return detected
|
|
|
|
def generate_context_section(self, references: List[WebReference]) -> str:
|
|
"""Generate a context section with web references
|
|
|
|
Args:
|
|
references: List of web references
|
|
|
|
Returns:
|
|
Markdown section to add to prompt
|
|
"""
|
|
if not references:
|
|
return ""
|
|
|
|
sections = ["# Web References and Context\n"]
|
|
|
|
for ref in references:
|
|
sections.append(f"\n## {ref.title}")
|
|
sections.append(f"**Source:** {ref.source}")
|
|
sections.append(f"**URL:** {ref.url}")
|
|
sections.append(f"**Relevance:** {ref.relevance:.1%}")
|
|
sections.append(f"\n{ref.snippet}\n")
|
|
|
|
return "\n".join(sections)
|
|
|
|
def learn_solution(self, problem: str, solution: str,
|
|
references: List[str], tags: List[str],
|
|
confidence: float = 0.8) -> None:
|
|
"""Record a learned solution for future reference
|
|
|
|
Args:
|
|
problem: Problem description
|
|
solution: Solution description
|
|
references: List of reference URLs
|
|
tags: Topic tags
|
|
confidence: Confidence in this solution (0-1)
|
|
"""
|
|
learning = LearningResult(
|
|
problem=problem,
|
|
solution=solution,
|
|
references=references,
|
|
tags=tags,
|
|
learned_at=datetime.now().isoformat(),
|
|
confidence=confidence
|
|
)
|
|
self.learning_db.append(learning)
|
|
self.save_learning_db()
|
|
|
|
def search_learned_solutions(self, query: str) -> List[LearningResult]:
|
|
"""Search previously learned solutions
|
|
|
|
Args:
|
|
query: Search query
|
|
|
|
Returns:
|
|
List of matching learned solutions
|
|
"""
|
|
matches = []
|
|
query_lower = query.lower()
|
|
|
|
for result in self.learning_db:
|
|
# Search in problem, solution, and tags
|
|
if (query_lower in result.problem.lower() or
|
|
query_lower in result.solution.lower() or
|
|
any(query_lower in tag.lower() for tag in result.tags)):
|
|
matches.append(result)
|
|
|
|
# Sort by confidence and recency
|
|
matches.sort(
|
|
key=lambda r: (r.confidence, datetime.fromisoformat(r.learned_at)),
|
|
reverse=True
|
|
)
|
|
|
|
return matches
|
|
|
|
def get_reference_for_technology(self, tech: str) -> Optional[WebReference]:
|
|
"""Get reference documentation for a technology
|
|
|
|
Args:
|
|
tech: Technology name
|
|
|
|
Returns:
|
|
Reference to documentation
|
|
"""
|
|
refs = {
|
|
"React": self.fetch_documentation("react", "introduction"),
|
|
"TypeScript": self.fetch_documentation("typescript", "handbook"),
|
|
"Node.js": self.fetch_documentation("nodejs", "api"),
|
|
"Python": self.fetch_documentation("python", "tutorial"),
|
|
"Docker": WebReference(
|
|
title="Docker Documentation",
|
|
url="https://docs.docker.com/",
|
|
source="official_docs",
|
|
snippet="Official Docker documentation",
|
|
relevance=1.0,
|
|
topic="Docker",
|
|
found_at=datetime.now().isoformat()
|
|
),
|
|
}
|
|
return refs.get(tech)
|
|
|
|
def generate_research_prompt(self, task: str, tech_stack: List[str],
|
|
error: Optional[str] = None) -> str:
|
|
"""Generate a prompt for web research
|
|
|
|
Args:
|
|
task: Task description
|
|
tech_stack: List of technologies involved
|
|
error: Optional error message
|
|
|
|
Returns:
|
|
Research prompt
|
|
"""
|
|
sections = [
|
|
f"# Research Task\n",
|
|
f"**Task:** {task}\n",
|
|
]
|
|
|
|
if error:
|
|
sections.append(f"**Error:** {error}\n")
|
|
|
|
if tech_stack:
|
|
sections.append(f"**Technologies:** {', '.join(tech_stack)}\n")
|
|
|
|
# Learned solutions
|
|
learned = self.search_learned_solutions(task)
|
|
if learned:
|
|
sections.append("\n## Previously Learned Solutions\n")
|
|
for i, result in enumerate(learned[:3], 1):
|
|
sections.append(f"{i}. **{result.problem}**")
|
|
sections.append(f" - Solution: {result.solution}")
|
|
sections.append(f" - Tags: {', '.join(result.tags)}")
|
|
sections.append(f" - Confidence: {result.confidence:.0%}\n")
|
|
|
|
sections.append("\n## Research Approach\n")
|
|
sections.append("1. Check previously learned solutions")
|
|
sections.append("2. Search Stack Overflow for similar issues")
|
|
sections.append("3. Check official documentation")
|
|
sections.append("4. Look for blog posts or tutorials")
|
|
sections.append("5. Synthesize findings into solution")
|
|
|
|
return "\n".join(sections)
|
|
|
|
def export_learning_data(self, output_path: Path) -> None:
|
|
"""Export learning database for analysis
|
|
|
|
Args:
|
|
output_path: Path to write export to
|
|
"""
|
|
export_data = {
|
|
"total_learned": len(self.learning_db),
|
|
"by_topic": {},
|
|
"average_confidence": 0,
|
|
"solutions": [asdict(item) for item in self.learning_db]
|
|
}
|
|
|
|
# Calculate statistics
|
|
if self.learning_db:
|
|
export_data["average_confidence"] = (
|
|
sum(r.confidence for r in self.learning_db) / len(self.learning_db)
|
|
)
|
|
|
|
# Group by tags
|
|
by_topic = {}
|
|
for result in self.learning_db:
|
|
for tag in result.tags:
|
|
if tag not in by_topic:
|
|
by_topic[tag] = 0
|
|
by_topic[tag] += 1
|
|
export_data["by_topic"] = by_topic
|
|
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
output_path.write_text(json.dumps(export_data, indent=2))
|
|
|
|
def get_stats(self) -> Dict[str, Any]:
|
|
"""Get statistics about web search usage
|
|
|
|
Returns:
|
|
Statistics dict
|
|
"""
|
|
if not self.learning_db:
|
|
return {
|
|
"total_learned": 0,
|
|
"average_confidence": 0,
|
|
"searches_performed": len(self.search_history)
|
|
}
|
|
|
|
avg_confidence = sum(r.confidence for r in self.learning_db) / len(self.learning_db)
|
|
|
|
return {
|
|
"total_learned": len(self.learning_db),
|
|
"average_confidence": avg_confidence,
|
|
"searches_performed": len(self.search_history),
|
|
"topics": list(set(
|
|
tag for result in self.learning_db
|
|
for tag in result.tags
|
|
))
|
|
}
|