Refactor cockpit to use DockerTmuxController pattern

Based on claude-code-tools TmuxCLIController, this refactor:

- Added DockerTmuxController class for robust tmux session management
- Implements send_keys() with configurable delay_enter
- Implements capture_pane() for output retrieval
- Implements wait_for_prompt() for pattern-based completion detection
- Implements wait_for_idle() for content-hash-based idle detection
- Implements wait_for_shell_prompt() for shell prompt detection

Also includes workflow improvements:
- Pre-task git snapshot before agent execution
- Post-task commit protocol in agent guidelines

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
admin
2026-01-14 10:42:16 -03:00
commit ec33ac1936
265 changed files with 92011 additions and 0 deletions

481
lib/doc_sync.py Normal file
View File

@@ -0,0 +1,481 @@
#!/usr/bin/env python3
"""
Documentation Sync - Migrate .md files to Knowledge Graphs
Parses markdown files and creates KG entities:
- Headers become entity names
- Content becomes entity content
- Links become relations
- Code blocks stored in metadata
Archives original .md files after migration.
"""
import json
import re
import shutil
from pathlib import Path
from typing import Dict, List, Tuple, Optional
from datetime import datetime
import sys
sys.path.insert(0, str(Path(__file__).parent))
from knowledge_graph import KnowledgeGraph, ENTITY_TYPES
# Source directories
DOCS_DIR = Path("/opt/server-agents/docs")
ARCHIVE_DIR = Path("/opt/server-agents/archive/docs-migrated")
PROJECT_HOMES = Path("/home")
class MarkdownParser:
"""Parse markdown files into structured entities."""
def __init__(self, filepath: Path):
self.filepath = filepath
self.content = filepath.read_text() if filepath.exists() else ""
self.entities: List[Dict] = []
self.relations: List[Tuple[str, str, str]] = []
def parse(self) -> Dict:
"""Parse the markdown file."""
if not self.content:
return {"entities": [], "relations": []}
# Extract title from first H1 or filename
title_match = re.search(r'^#\s+(.+)$', self.content, re.MULTILINE)
title = title_match.group(1) if title_match else self.filepath.stem
# Create main entity
main_entity = {
"name": self._sanitize_name(title),
"type": self._infer_type(title, self.content),
"content": self.content,
"metadata": {
"source_file": str(self.filepath),
"title": title,
"sections": self._extract_sections(),
"code_blocks": self._extract_code_blocks(),
}
}
self.entities.append(main_entity)
# Extract internal links as relations
self._extract_links(main_entity["name"])
return {
"entities": self.entities,
"relations": self.relations,
}
def _sanitize_name(self, name: str) -> str:
"""Convert name to KG-safe format."""
# Remove special chars, lowercase, replace spaces with underscores
name = re.sub(r'[^\w\s-]', '', name)
name = re.sub(r'\s+', '_', name)
return name.lower()[:100]
def _infer_type(self, title: str, content: str) -> str:
"""Infer entity type from title/content."""
title_lower = title.lower()
content_lower = content.lower()
# Check for specific patterns
if any(x in title_lower for x in ["command", "cli", "usage"]):
return "command"
if any(x in title_lower for x in ["service", "daemon"]):
return "service"
if any(x in title_lower for x in ["config", "settings", "setup"]):
return "config"
if any(x in title_lower for x in ["troubleshoot", "debug", "fix"]):
return "troubleshooting"
if any(x in title_lower for x in ["architecture", "design", "system"]):
return "architecture"
if any(x in title_lower for x in ["guide", "how", "tutorial"]):
return "procedure"
if any(x in title_lower for x in ["user", "account", "permission"]):
return "guide"
# Default based on presence of code
if "```" in content:
return "procedure"
return "procedure"
def _extract_sections(self) -> List[Dict]:
"""Extract sections (H2, H3 headers)."""
sections = []
pattern = r'^(#{2,3})\s+(.+)$'
for match in re.finditer(pattern, self.content, re.MULTILINE):
level = len(match.group(1))
title = match.group(2)
sections.append({
"level": level,
"title": title,
"position": match.start(),
})
return sections
def _extract_code_blocks(self) -> List[Dict]:
"""Extract code blocks with language."""
blocks = []
pattern = r'```(\w*)\n(.*?)```'
for match in re.finditer(pattern, self.content, re.DOTALL):
lang = match.group(1) or "text"
code = match.group(2).strip()
blocks.append({
"language": lang,
"code": code[:500], # Truncate long blocks
"position": match.start(),
})
return blocks
def _extract_links(self, source_name: str):
"""Extract markdown links as relations."""
# [text](url) pattern
pattern = r'\[([^\]]+)\]\(([^)]+)\)'
for match in re.finditer(pattern, self.content):
text = match.group(1)
url = match.group(2)
# Internal .md links become relations
if url.endswith('.md') and not url.startswith('http'):
target = self._sanitize_name(Path(url).stem)
self.relations.append((source_name, target, "references"))
class DocSync:
"""Sync documentation files to knowledge graphs."""
def __init__(self):
self.stats = {
"files_processed": 0,
"entities_created": 0,
"relations_created": 0,
"errors": [],
}
def migrate_docs_dir(self, domain: str = "sysadmin", dry_run: bool = True) -> Dict:
"""Migrate /opt/server-agents/docs/*.md to KG."""
if not DOCS_DIR.exists():
return {"error": f"Docs directory not found: {DOCS_DIR}"}
try:
kg = KnowledgeGraph(domain)
except Exception as e:
return {"error": f"Could not open KG: {e}"}
md_files = list(DOCS_DIR.glob("*.md"))
self.stats["files_processed"] = len(md_files)
for md_file in md_files:
try:
self._process_md_file(md_file, kg, domain, dry_run)
except Exception as e:
self.stats["errors"].append(f"{md_file.name}: {e}")
# Archive if not dry run
if not dry_run and not self.stats["errors"]:
self._archive_files(md_files)
return self.stats
def migrate_project_docs(self, dry_run: bool = True) -> Dict:
"""Migrate /home/*/CLAUDE.md to projects KG."""
try:
kg = KnowledgeGraph("projects")
except Exception as e:
return {"error": f"Could not open KG: {e}"}
claude_files = list(PROJECT_HOMES.glob("*/CLAUDE.md"))
self.stats["files_processed"] = len(claude_files)
for claude_file in claude_files:
try:
project = claude_file.parent.name
self._process_claude_md(claude_file, project, kg, dry_run)
except Exception as e:
self.stats["errors"].append(f"{claude_file}: {e}")
return self.stats
def migrate_research_dir(self, research_dir: str = "/home/admin/research",
archive: bool = False, dry_run: bool = True) -> Dict:
"""Migrate research .md files to research KG.
Args:
research_dir: Directory containing research .md files
archive: If True, move files to archive after migration
dry_run: If True, preview without making changes
"""
research_path = Path(research_dir)
if not research_path.exists():
return {"error": f"Research directory not found: {research_dir}"}
try:
kg = KnowledgeGraph("research")
except Exception as e:
return {"error": f"Could not open research KG: {e}"}
md_files = list(research_path.glob("*.md"))
self.stats["files_processed"] = len(md_files)
for md_file in md_files:
try:
self._process_research_md(md_file, kg, dry_run)
except Exception as e:
self.stats["errors"].append(f"{md_file.name}: {e}")
# Archive if requested and not dry run
if archive and not dry_run and not self.stats["errors"]:
archive_dir = research_path / "archived"
archive_dir.mkdir(exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
for f in md_files:
dest = archive_dir / f"{timestamp}_{f.name}"
shutil.move(str(f), str(dest))
return self.stats
def _process_research_md(self, filepath: Path, kg: KnowledgeGraph, dry_run: bool):
"""Process a research .md file into KG entities."""
content = filepath.read_text()
# Extract title from first H1
title_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
title = title_match.group(1) if title_match else filepath.stem
# Extract session ID if present
session_match = re.search(r'Session\s+([a-f0-9-]+)', content)
session_id = session_match.group(1) if session_match else filepath.stem
# Extract key findings
findings = []
findings_section = re.search(r'(?:Key Findings|Executive Summary)(.*?)(?=##|\Z)',
content, re.DOTALL | re.IGNORECASE)
if findings_section:
# Extract numbered items
for match in re.finditer(r'\d+\.\s+\*\*([^*]+)\*\*[:\s]*(.+?)(?=\d+\.\s+\*\*|\Z)',
findings_section.group(1), re.DOTALL):
findings.append({
"title": match.group(1).strip(),
"detail": match.group(2).strip()[:500]
})
# Create main research entity
entity_name = self._sanitize_name(title)
if not dry_run:
# Add main research document entity (use 'synthesis' as the valid type)
kg.add_entity(
name=entity_name,
entity_type="synthesis",
content=content,
metadata={
"source_file": str(filepath),
"session_id": session_id,
"title": title,
"findings_count": len(findings),
"word_count": len(content.split()),
},
source=str(filepath)
)
# Add findings as separate entities with relations
for i, finding in enumerate(findings):
finding_name = self._sanitize_name(f"{session_id}_finding_{i+1}")
kg.add_entity(
name=finding_name,
entity_type="finding",
content=f"**{finding['title']}**\n\n{finding['detail']}",
metadata={"research_session": session_id, "index": i+1},
source=str(filepath)
)
kg.add_relation(entity_name, finding_name, "contains")
self.stats["entities_created"] += 1 + len(findings)
self.stats["relations_created"] += len(findings)
def _sanitize_name(self, name: str) -> str:
"""Convert name to KG-safe format."""
name = re.sub(r'[^\w\s-]', '', name)
name = re.sub(r'\s+', '_', name)
return name.lower()[:100]
def _process_md_file(self, filepath: Path, kg: KnowledgeGraph, domain: str, dry_run: bool):
"""Process a single .md file."""
parser = MarkdownParser(filepath)
data = parser.parse()
for entity in data["entities"]:
# Validate entity type for domain
valid_types = ENTITY_TYPES.get(domain, [])
if entity["type"] not in valid_types:
entity["type"] = valid_types[0] if valid_types else "procedure"
if not dry_run:
kg.add_entity(
name=entity["name"],
entity_type=entity["type"],
content=entity["content"],
metadata=entity["metadata"],
source=str(filepath)
)
self.stats["entities_created"] += 1
for source, target, relation in data["relations"]:
if not dry_run:
kg.add_relation(source, target, relation)
self.stats["relations_created"] += 1
def _process_claude_md(self, filepath: Path, project: str, kg: KnowledgeGraph, dry_run: bool):
"""Process a project CLAUDE.md file."""
content = filepath.read_text()
# Extract key sections
sections = {}
current_section = "overview"
current_content = []
for line in content.split("\n"):
if line.startswith("## "):
if current_content:
sections[current_section] = "\n".join(current_content)
current_section = line[3:].strip().lower().replace(" ", "_")
current_content = []
else:
current_content.append(line)
if current_content:
sections[current_section] = "\n".join(current_content)
# Create/update project entity
if not dry_run:
kg.add_entity(
name=project,
entity_type="project",
content=content,
metadata={
"source_file": str(filepath),
"sections": list(sections.keys()),
"has_build_commands": "build" in content.lower(),
"has_test_commands": "test" in content.lower(),
},
source=str(filepath)
)
self.stats["entities_created"] += 1
def _archive_files(self, files: List[Path]):
"""Archive migrated files."""
ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
archive_subdir = ARCHIVE_DIR / timestamp
archive_subdir.mkdir(exist_ok=True)
for f in files:
shutil.move(str(f), str(archive_subdir / f.name))
def categorize_md_file(self, filepath: Path) -> str:
"""Determine which KG domain a file belongs to."""
content = filepath.read_text().lower()
name = filepath.stem.lower()
# Check filename patterns
if any(x in name for x in ["user", "account", "permission", "webuser"]):
return "users"
if any(x in name for x in ["research", "finding", "synthesis"]):
return "research"
if any(x in name for x in ["project", "overbits", "musica", "dss"]):
return "projects"
# Check content patterns
if "user management" in content or "create user" in content:
return "users"
if "research" in content and "methodology" in content:
return "research"
# Default to sysadmin
return "sysadmin"
def run_migration(dry_run: bool = True, verbose: bool = False) -> int:
"""Run full documentation migration."""
print(f"\n=== Documentation Migration {'(DRY RUN)' if dry_run else ''} ===\n")
sync = DocSync()
# Categorize files first
if DOCS_DIR.exists():
md_files = list(DOCS_DIR.glob("*.md"))
categories = {}
for f in md_files:
domain = sync.categorize_md_file(f)
if domain not in categories:
categories[domain] = []
categories[domain].append(f.name)
print("File categorization:")
for domain, files in categories.items():
print(f" {domain}: {len(files)} files")
if verbose:
for f in files[:5]:
print(f" - {f}")
if len(files) > 5:
print(f" ... and {len(files) - 5} more")
# Migrate docs
print("\nMigrating /opt/server-agents/docs/...")
result = sync.migrate_docs_dir("sysadmin", dry_run)
if "error" in result:
print(f" Error: {result['error']}")
else:
print(f" Files: {result['files_processed']}")
print(f" Entities: {result['entities_created']}")
print(f" Relations: {result['relations_created']}")
if result["errors"]:
print(f" Errors: {len(result['errors'])}")
# Migrate project CLAUDE.md files
sync2 = DocSync()
print("\nMigrating project CLAUDE.md files...")
result2 = sync2.migrate_project_docs(dry_run)
if "error" in result2:
print(f" Error: {result2['error']}")
else:
print(f" Files: {result2['files_processed']}")
print(f" Entities: {result2['entities_created']}")
if dry_run:
print("\n[DRY RUN] No changes made. Run with --execute to apply.")
return 0
# --- CLI ---
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Documentation Migration")
parser.add_argument("--execute", action="store_true", help="Actually perform migration")
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
parser.add_argument("--categorize", action="store_true", help="Only show file categorization")
args = parser.parse_args()
if args.categorize:
sync = DocSync()
if DOCS_DIR.exists():
for f in sorted(DOCS_DIR.glob("*.md")):
domain = sync.categorize_md_file(f)
print(f" {domain:12} {f.name}")
else:
exit(run_migration(dry_run=not args.execute, verbose=args.verbose))