Refactor cockpit to use DockerTmuxController pattern
Based on claude-code-tools TmuxCLIController, this refactor: - Added DockerTmuxController class for robust tmux session management - Implements send_keys() with configurable delay_enter - Implements capture_pane() for output retrieval - Implements wait_for_prompt() for pattern-based completion detection - Implements wait_for_idle() for content-hash-based idle detection - Implements wait_for_shell_prompt() for shell prompt detection Also includes workflow improvements: - Pre-task git snapshot before agent execution - Post-task commit protocol in agent guidelines Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
402
lib/web_search_integrator.py
Normal file
402
lib/web_search_integrator.py
Normal file
@@ -0,0 +1,402 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Web Search Integrator - Context enhancement via web search
|
||||
|
||||
Features:
|
||||
1. Detect when web search would be helpful
|
||||
2. Query Stack Overflow for solutions
|
||||
3. Fetch and summarize reference docs
|
||||
4. Track learned solutions
|
||||
5. Integrate references into prompts
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
from datetime import datetime
|
||||
from dataclasses import dataclass, asdict
|
||||
|
||||
@dataclass
|
||||
class WebReference:
|
||||
"""A reference found via web search"""
|
||||
title: str
|
||||
url: str
|
||||
source: str # stackoverflow, docs, blog, etc
|
||||
snippet: str
|
||||
relevance: float # 0-1 score
|
||||
topic: str
|
||||
found_at: str
|
||||
|
||||
@dataclass
|
||||
class LearningResult:
|
||||
"""A solution learned from web search"""
|
||||
problem: str
|
||||
solution: str
|
||||
references: List[str]
|
||||
tags: List[str]
|
||||
learned_at: str
|
||||
confidence: float # How confident in this solution
|
||||
|
||||
class WebSearchIntegrator:
|
||||
"""Integrates web search for context enhancement"""
|
||||
|
||||
def __init__(self, cache_dir: Optional[Path] = None):
|
||||
"""Initialize web search integrator
|
||||
|
||||
Args:
|
||||
cache_dir: Optional directory for caching search results
|
||||
"""
|
||||
self.cache_dir = cache_dir or Path("/tmp/.luzia-web-cache")
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.learning_db: List[LearningResult] = []
|
||||
self.search_history: List[Dict[str, Any]] = []
|
||||
self.load_learning_db()
|
||||
|
||||
def load_learning_db(self) -> None:
|
||||
"""Load learned solutions from cache"""
|
||||
db_file = self.cache_dir / "learning.json"
|
||||
if db_file.exists():
|
||||
try:
|
||||
data = json.loads(db_file.read_text())
|
||||
self.learning_db = [LearningResult(**item) for item in data.get("learned", [])]
|
||||
except Exception as e:
|
||||
print(f"[Warning] Failed to load learning DB: {e}")
|
||||
|
||||
def save_learning_db(self) -> None:
|
||||
"""Save learned solutions to cache"""
|
||||
db_file = self.cache_dir / "learning.json"
|
||||
db_file.write_text(json.dumps({
|
||||
"learned": [asdict(item) for item in self.learning_db],
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}, indent=2))
|
||||
|
||||
def should_search(self, task: str, error: Optional[str] = None) -> Tuple[bool, str]:
|
||||
"""Determine if web search would be helpful
|
||||
|
||||
Args:
|
||||
task: Task description
|
||||
error: Optional error message
|
||||
|
||||
Returns:
|
||||
Tuple of (should_search, search_query)
|
||||
"""
|
||||
search_triggers = [
|
||||
# Error investigation
|
||||
(r"error|exception|failed|problem", "error_investigation"),
|
||||
# How-to tasks
|
||||
(r"how\s+to|guide|tutorial|learn", "how_to"),
|
||||
# Library/tool questions
|
||||
(r"npm|pip|cargo|ruby", "package_mgmt"),
|
||||
# Framework questions
|
||||
(r"react|vue|angular|django|flask", "framework"),
|
||||
# Integration/setup
|
||||
(r"integrate|setup|configure|install", "setup"),
|
||||
# Best practices
|
||||
(r"best practice|pattern|architecture", "architecture"),
|
||||
]
|
||||
|
||||
combined = f"{task} {error or ''}".lower()
|
||||
|
||||
for pattern, category in search_triggers:
|
||||
if re.search(pattern, combined):
|
||||
# Extract search query
|
||||
if "error" in combined:
|
||||
# For errors, extract the error message
|
||||
search_query = re.sub(r".*error.*?:\s*", "", error or task)[:80]
|
||||
else:
|
||||
search_query = task[:100]
|
||||
|
||||
return True, search_query
|
||||
|
||||
return False, ""
|
||||
|
||||
def find_stackoverflow_answer(self, query: str) -> Optional[WebReference]:
|
||||
"""Find Stack Overflow answer for query
|
||||
|
||||
This is a reference implementation. In production, would use
|
||||
Stack Overflow API or web search.
|
||||
|
||||
Args:
|
||||
query: Search query
|
||||
|
||||
Returns:
|
||||
Best matching reference, or None
|
||||
"""
|
||||
# In actual implementation, would call web search API
|
||||
# For now, return structure for documentation
|
||||
return WebReference(
|
||||
title="Relevant Stack Overflow Answer",
|
||||
url="https://stackoverflow.com/search?q=...",
|
||||
source="stackoverflow",
|
||||
snippet="[Search result snippet would appear here]",
|
||||
relevance=0.8,
|
||||
topic=query,
|
||||
found_at=datetime.now().isoformat()
|
||||
)
|
||||
|
||||
def fetch_documentation(self, library: str, topic: str) -> Optional[WebReference]:
|
||||
"""Fetch documentation for a library/topic
|
||||
|
||||
Args:
|
||||
library: Library name (npm package, python module, etc)
|
||||
topic: Specific topic within library
|
||||
|
||||
Returns:
|
||||
Reference to documentation, or None
|
||||
"""
|
||||
# Common documentation URLs
|
||||
doc_patterns = {
|
||||
"react": "https://react.dev/reference/",
|
||||
"nodejs": "https://nodejs.org/api/",
|
||||
"python": "https://docs.python.org/3/",
|
||||
"typescript": "https://www.typescriptlang.org/docs/",
|
||||
"rust": "https://doc.rust-lang.org/",
|
||||
"django": "https://docs.djangoproject.com/",
|
||||
"flask": "https://flask.palletsprojects.com/",
|
||||
}
|
||||
|
||||
base_url = doc_patterns.get(library.lower())
|
||||
if not base_url:
|
||||
return None
|
||||
|
||||
return WebReference(
|
||||
title=f"{library} Documentation - {topic}",
|
||||
url=f"{base_url}{topic}/",
|
||||
source="official_docs",
|
||||
snippet=f"Official documentation for {library} {topic}",
|
||||
relevance=0.95,
|
||||
topic=topic,
|
||||
found_at=datetime.now().isoformat()
|
||||
)
|
||||
|
||||
def detect_tech_stack(self, task: str) -> List[str]:
|
||||
"""Detect technology stack from task description
|
||||
|
||||
Args:
|
||||
task: Task description
|
||||
|
||||
Returns:
|
||||
List of detected technologies
|
||||
"""
|
||||
tech_patterns = {
|
||||
"React": r"react|jsx",
|
||||
"TypeScript": r"typescript|\.ts",
|
||||
"Node.js": r"node|npm|javascript",
|
||||
"Python": r"python|pip|py",
|
||||
"Rust": r"rust|cargo",
|
||||
"Docker": r"docker|container",
|
||||
"PostgreSQL": r"postgres|sql",
|
||||
"MongoDB": r"mongo|mongodb",
|
||||
"Redis": r"redis",
|
||||
"Kubernetes": r"k8s|kubernetes",
|
||||
"GraphQL": r"graphql|apollo",
|
||||
"REST": r"rest|api",
|
||||
"WebSocket": r"websocket|ws",
|
||||
}
|
||||
|
||||
detected = []
|
||||
task_lower = task.lower()
|
||||
|
||||
for tech, pattern in tech_patterns.items():
|
||||
if re.search(pattern, task_lower):
|
||||
detected.append(tech)
|
||||
|
||||
return detected
|
||||
|
||||
def generate_context_section(self, references: List[WebReference]) -> str:
|
||||
"""Generate a context section with web references
|
||||
|
||||
Args:
|
||||
references: List of web references
|
||||
|
||||
Returns:
|
||||
Markdown section to add to prompt
|
||||
"""
|
||||
if not references:
|
||||
return ""
|
||||
|
||||
sections = ["# Web References and Context\n"]
|
||||
|
||||
for ref in references:
|
||||
sections.append(f"\n## {ref.title}")
|
||||
sections.append(f"**Source:** {ref.source}")
|
||||
sections.append(f"**URL:** {ref.url}")
|
||||
sections.append(f"**Relevance:** {ref.relevance:.1%}")
|
||||
sections.append(f"\n{ref.snippet}\n")
|
||||
|
||||
return "\n".join(sections)
|
||||
|
||||
def learn_solution(self, problem: str, solution: str,
|
||||
references: List[str], tags: List[str],
|
||||
confidence: float = 0.8) -> None:
|
||||
"""Record a learned solution for future reference
|
||||
|
||||
Args:
|
||||
problem: Problem description
|
||||
solution: Solution description
|
||||
references: List of reference URLs
|
||||
tags: Topic tags
|
||||
confidence: Confidence in this solution (0-1)
|
||||
"""
|
||||
learning = LearningResult(
|
||||
problem=problem,
|
||||
solution=solution,
|
||||
references=references,
|
||||
tags=tags,
|
||||
learned_at=datetime.now().isoformat(),
|
||||
confidence=confidence
|
||||
)
|
||||
self.learning_db.append(learning)
|
||||
self.save_learning_db()
|
||||
|
||||
def search_learned_solutions(self, query: str) -> List[LearningResult]:
|
||||
"""Search previously learned solutions
|
||||
|
||||
Args:
|
||||
query: Search query
|
||||
|
||||
Returns:
|
||||
List of matching learned solutions
|
||||
"""
|
||||
matches = []
|
||||
query_lower = query.lower()
|
||||
|
||||
for result in self.learning_db:
|
||||
# Search in problem, solution, and tags
|
||||
if (query_lower in result.problem.lower() or
|
||||
query_lower in result.solution.lower() or
|
||||
any(query_lower in tag.lower() for tag in result.tags)):
|
||||
matches.append(result)
|
||||
|
||||
# Sort by confidence and recency
|
||||
matches.sort(
|
||||
key=lambda r: (r.confidence, datetime.fromisoformat(r.learned_at)),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
return matches
|
||||
|
||||
def get_reference_for_technology(self, tech: str) -> Optional[WebReference]:
|
||||
"""Get reference documentation for a technology
|
||||
|
||||
Args:
|
||||
tech: Technology name
|
||||
|
||||
Returns:
|
||||
Reference to documentation
|
||||
"""
|
||||
refs = {
|
||||
"React": self.fetch_documentation("react", "introduction"),
|
||||
"TypeScript": self.fetch_documentation("typescript", "handbook"),
|
||||
"Node.js": self.fetch_documentation("nodejs", "api"),
|
||||
"Python": self.fetch_documentation("python", "tutorial"),
|
||||
"Docker": WebReference(
|
||||
title="Docker Documentation",
|
||||
url="https://docs.docker.com/",
|
||||
source="official_docs",
|
||||
snippet="Official Docker documentation",
|
||||
relevance=1.0,
|
||||
topic="Docker",
|
||||
found_at=datetime.now().isoformat()
|
||||
),
|
||||
}
|
||||
return refs.get(tech)
|
||||
|
||||
def generate_research_prompt(self, task: str, tech_stack: List[str],
|
||||
error: Optional[str] = None) -> str:
|
||||
"""Generate a prompt for web research
|
||||
|
||||
Args:
|
||||
task: Task description
|
||||
tech_stack: List of technologies involved
|
||||
error: Optional error message
|
||||
|
||||
Returns:
|
||||
Research prompt
|
||||
"""
|
||||
sections = [
|
||||
f"# Research Task\n",
|
||||
f"**Task:** {task}\n",
|
||||
]
|
||||
|
||||
if error:
|
||||
sections.append(f"**Error:** {error}\n")
|
||||
|
||||
if tech_stack:
|
||||
sections.append(f"**Technologies:** {', '.join(tech_stack)}\n")
|
||||
|
||||
# Learned solutions
|
||||
learned = self.search_learned_solutions(task)
|
||||
if learned:
|
||||
sections.append("\n## Previously Learned Solutions\n")
|
||||
for i, result in enumerate(learned[:3], 1):
|
||||
sections.append(f"{i}. **{result.problem}**")
|
||||
sections.append(f" - Solution: {result.solution}")
|
||||
sections.append(f" - Tags: {', '.join(result.tags)}")
|
||||
sections.append(f" - Confidence: {result.confidence:.0%}\n")
|
||||
|
||||
sections.append("\n## Research Approach\n")
|
||||
sections.append("1. Check previously learned solutions")
|
||||
sections.append("2. Search Stack Overflow for similar issues")
|
||||
sections.append("3. Check official documentation")
|
||||
sections.append("4. Look for blog posts or tutorials")
|
||||
sections.append("5. Synthesize findings into solution")
|
||||
|
||||
return "\n".join(sections)
|
||||
|
||||
def export_learning_data(self, output_path: Path) -> None:
|
||||
"""Export learning database for analysis
|
||||
|
||||
Args:
|
||||
output_path: Path to write export to
|
||||
"""
|
||||
export_data = {
|
||||
"total_learned": len(self.learning_db),
|
||||
"by_topic": {},
|
||||
"average_confidence": 0,
|
||||
"solutions": [asdict(item) for item in self.learning_db]
|
||||
}
|
||||
|
||||
# Calculate statistics
|
||||
if self.learning_db:
|
||||
export_data["average_confidence"] = (
|
||||
sum(r.confidence for r in self.learning_db) / len(self.learning_db)
|
||||
)
|
||||
|
||||
# Group by tags
|
||||
by_topic = {}
|
||||
for result in self.learning_db:
|
||||
for tag in result.tags:
|
||||
if tag not in by_topic:
|
||||
by_topic[tag] = 0
|
||||
by_topic[tag] += 1
|
||||
export_data["by_topic"] = by_topic
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_path.write_text(json.dumps(export_data, indent=2))
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Get statistics about web search usage
|
||||
|
||||
Returns:
|
||||
Statistics dict
|
||||
"""
|
||||
if not self.learning_db:
|
||||
return {
|
||||
"total_learned": 0,
|
||||
"average_confidence": 0,
|
||||
"searches_performed": len(self.search_history)
|
||||
}
|
||||
|
||||
avg_confidence = sum(r.confidence for r in self.learning_db) / len(self.learning_db)
|
||||
|
||||
return {
|
||||
"total_learned": len(self.learning_db),
|
||||
"average_confidence": avg_confidence,
|
||||
"searches_performed": len(self.search_history),
|
||||
"topics": list(set(
|
||||
tag for result in self.learning_db
|
||||
for tag in result.tags
|
||||
))
|
||||
}
|
||||
Reference in New Issue
Block a user