Refactor cockpit to use DockerTmuxController pattern

Based on claude-code-tools TmuxCLIController, this refactor:

- Added DockerTmuxController class for robust tmux session management
- Implements send_keys() with configurable delay_enter
- Implements capture_pane() for output retrieval
- Implements wait_for_prompt() for pattern-based completion detection
- Implements wait_for_idle() for content-hash-based idle detection
- Implements wait_for_shell_prompt() for shell prompt detection

Also includes workflow improvements:
- Pre-task git snapshot before agent execution
- Post-task commit protocol in agent guidelines

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
admin
2026-01-14 10:42:16 -03:00
commit ec33ac1936
265 changed files with 92011 additions and 0 deletions

View File

@@ -0,0 +1,346 @@
#!/usr/bin/env python3
"""
Responsive Dispatcher - Non-blocking Task Dispatch with Live Status Updates
Implements:
- Immediate job_id return on task dispatch
- Background job status monitoring without blocking
- Live progress feedback system
- Concurrent task management
- Status caching for fast retrieval
Key Features:
1. Dispatch returns immediately with job_id
2. Background monitor updates job status files
3. CLI can poll status without blocking
4. Multiple concurrent tasks tracked independently
5. Status persisted to disk for resilience
"""
import json
import os
import subprocess
import time
from datetime import datetime
from pathlib import Path
from typing import Dict, Optional, Tuple, Any
import threading
import queue
class ResponseiveDispatcher:
"""Non-blocking task dispatcher with background monitoring"""
def __init__(self, jobs_dir: Path = None):
self.jobs_dir = jobs_dir or Path("/var/lib/luzia/jobs")
self.jobs_dir.mkdir(parents=True, exist_ok=True)
self.monitoring_queue = queue.Queue()
self.status_cache = {} # Local cache for fast retrieval
self.cache_update_time = {} # Track cache freshness
def dispatch_task(self, project: str, task: str, priority: int = 5) -> Tuple[str, Dict]:
"""
Dispatch a task immediately, returning job_id and initial status.
Returns:
(job_id, status_dict)
"""
job_id = datetime.now().strftime("%H%M%S") + "-" + hex(hash(task) & 0xffff)[2:]
job_dir = self.jobs_dir / job_id
# Create job directory atomically
job_dir.mkdir(parents=True, exist_ok=True)
# Write initial status
initial_status = {
"id": job_id,
"project": project,
"task": task[:200], # Truncate long tasks
"status": "dispatched",
"priority": priority,
"dispatched_at": datetime.now().isoformat(),
"progress": 0,
"message": "Task queued for execution",
}
status_file = job_dir / "status.json"
self._write_status(status_file, initial_status)
# Queue for background monitoring
self.monitoring_queue.put({
"job_id": job_id,
"project": project,
"task": task,
"job_dir": str(job_dir),
"priority": priority,
})
# Update local cache
self.status_cache[job_id] = initial_status
self.cache_update_time[job_id] = time.time()
return job_id, initial_status
def get_status(self, job_id: str, use_cache: bool = True) -> Optional[Dict]:
"""
Get current status of a job.
Args:
job_id: Job ID to query
use_cache: Use cached status if fresh (< 1 second old)
Returns:
Status dict or None if job not found
"""
# Check cache first
if use_cache and job_id in self.status_cache:
cache_age = time.time() - self.cache_update_time.get(job_id, 0)
if cache_age < 1.0: # Cache valid for 1 second
return self.status_cache[job_id]
# Read from disk
status_file = self.jobs_dir / job_id / "status.json"
if not status_file.exists():
return None
try:
status = json.loads(status_file.read_text())
self.status_cache[job_id] = status
self.cache_update_time[job_id] = time.time()
return status
except (json.JSONDecodeError, IOError):
return None
def update_status(
self,
job_id: str,
status: str,
progress: int = None,
message: str = None,
exit_code: int = None,
) -> bool:
"""
Update job status. Used by background monitor.
Returns:
True if update successful, False otherwise
"""
status_file = self.jobs_dir / job_id / "status.json"
if not status_file.exists():
return False
try:
current = json.loads(status_file.read_text())
except (json.JSONDecodeError, IOError):
return False
# Update fields
current["status"] = status
if progress is not None:
current["progress"] = min(100, max(0, progress))
if message:
current["message"] = message
if exit_code is not None:
current["exit_code"] = exit_code
current["updated_at"] = datetime.now().isoformat()
self._write_status(status_file, current)
# Update cache
self.status_cache[job_id] = current
self.cache_update_time[job_id] = time.time()
return True
def list_jobs(self, project: str = None, status_filter: str = None) -> list:
"""
List jobs, optionally filtered by project and status.
Returns:
List of job status dicts
"""
jobs = []
for job_dir in sorted(self.jobs_dir.iterdir(), reverse=True):
if not job_dir.is_dir():
continue
status = self.get_status(job_dir.name)
if not status:
continue
# Apply filters
if project and status.get("project") != project:
continue
if status_filter and status.get("status") != status_filter:
continue
jobs.append(status)
return jobs[:50] # Return last 50 jobs
def wait_for_job(self, job_id: str, timeout: int = None, poll_interval: float = 0.5):
"""
Wait for job to complete (blocking).
Useful for critical operations that need synchronization.
Args:
job_id: Job ID to wait for
timeout: Max seconds to wait (None = wait forever)
poll_interval: Seconds between status checks
Returns:
Final status dict or None if timeout
"""
start_time = time.time()
while True:
status = self.get_status(job_id, use_cache=False)
if not status:
return None
if status.get("status") in ["completed", "failed", "killed"]:
return status
if timeout:
elapsed = time.time() - start_time
if elapsed > timeout:
return None
time.sleep(poll_interval)
def stream_status(self, job_id: str, interval: float = 0.5) -> None:
"""
Stream status updates to stdout without blocking main loop.
Useful for long-running tasks.
Args:
job_id: Job ID to stream
interval: Seconds between updates
"""
last_msg = None
while True:
status = self.get_status(job_id, use_cache=False)
if not status:
print(f"Job {job_id} not found")
return
# Print new messages
msg = status.get("message")
if msg and msg != last_msg:
progress = status.get("progress", 0)
print(f" [{progress}%] {msg}")
last_msg = msg
# Check if done
if status.get("status") in ["completed", "failed", "killed"]:
exit_code = status.get("exit_code", -1)
print(f" [100%] {status['status'].title()} (exit {exit_code})")
return
time.sleep(interval)
def start_background_monitor(self) -> threading.Thread:
"""
Start background monitor thread that processes queued jobs.
Returns:
Monitor thread (started, daemon=True)
"""
monitor = threading.Thread(target=self._monitor_loop, daemon=True)
monitor.start()
return monitor
def _monitor_loop(self):
"""Background monitor loop - processes jobs from queue"""
while True:
try:
# Get next job from queue with short timeout
job_info = self.monitoring_queue.get(timeout=1.0)
self._monitor_job(job_info)
except queue.Empty:
continue
except Exception as e:
print(f"[Monitor error] {e}", flush=True)
def _monitor_job(self, job_info: Dict):
"""Monitor a single job's execution"""
job_id = job_info["job_id"]
project = job_info["project"]
job_dir = Path(job_info["job_dir"])
# Update status: starting
self.update_status(job_id, "starting", progress=5, message="Agent initialization")
# Wait for agent to start (check for meta.json or output)
max_wait = 30
for _ in range(max_wait):
output_file = job_dir / "output.log"
meta_file = job_dir / "meta.json"
if output_file.exists() or meta_file.exists():
self.update_status(job_id, "running", progress=10, message="Agent running")
break
time.sleep(0.5)
else:
# Timeout waiting for agent to start
self.update_status(job_id, "failed", progress=0, message="Agent failed to start")
return
# Monitor execution
output_file = job_dir / "output.log"
last_size = 0
stalled_count = 0
while True:
# Check if completed
if output_file.exists():
content = output_file.read_text()
if "exit:" in content:
# Parse exit code
lines = content.strip().split("\n")
for line in reversed(lines):
if line.startswith("exit:"):
exit_code = int(line.split(":")[1])
status = "completed" if exit_code == 0 else "failed"
self.update_status(
job_id,
status,
progress=100,
message=f"Agent {status}",
exit_code=exit_code,
)
return
# Update progress based on output size
current_size = len(content)
if current_size > last_size:
progress = min(95, 10 + (current_size // 1000)) # Rough progress indicator
self.update_status(job_id, "running", progress=progress)
last_size = current_size
stalled_count = 0
else:
stalled_count += 1
if stalled_count > 30: # 30 * 1 second = 30 seconds with no output
self.update_status(
job_id, "stalled", progress=50, message="No output for 30 seconds"
)
time.sleep(1.0)
@staticmethod
def _write_status(path: Path, data: Dict) -> None:
"""Write status atomically"""
tmp_path = path.with_suffix(".json.tmp")
with open(tmp_path, "w") as f:
json.dump(data, f, indent=2)
f.flush()
os.fsync(f.fileno())
os.rename(tmp_path, path)
# Helper function for quick dispatch
def quick_dispatch(job_id: str, project: str, task: str) -> Dict:
"""Quick dispatch helper - returns status dict with job_id"""
dispatcher = ResponseiveDispatcher()
_, status = dispatcher.dispatch_task(project, task)
return status