Refactor cockpit to use DockerTmuxController pattern
Based on claude-code-tools TmuxCLIController, this refactor: - Added DockerTmuxController class for robust tmux session management - Implements send_keys() with configurable delay_enter - Implements capture_pane() for output retrieval - Implements wait_for_prompt() for pattern-based completion detection - Implements wait_for_idle() for content-hash-based idle detection - Implements wait_for_shell_prompt() for shell prompt detection Also includes workflow improvements: - Pre-task git snapshot before agent execution - Post-task commit protocol in agent guidelines Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
379
lib/docker_bridge.py
Normal file
379
lib/docker_bridge.py
Normal file
@@ -0,0 +1,379 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
DockerBridge - Manages lazy-loaded Docker containers for Project Agents.
|
||||
|
||||
Executes tools inside containers while preserving user ownership.
|
||||
Containers spin up on-demand and auto-stop after idle timeout.
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import time
|
||||
import os
|
||||
import json
|
||||
import logging
|
||||
from typing import Optional, Dict, Any
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
logger = logging.getLogger("luzia-docker")
|
||||
|
||||
# Global registry of active containers and their last activity
|
||||
_container_activity: Dict[str, datetime] = {}
|
||||
|
||||
IDLE_TIMEOUT_MINUTES = 10
|
||||
DEFAULT_IMAGE = "luzia-sandbox:latest"
|
||||
|
||||
|
||||
class DockerBridge:
|
||||
"""
|
||||
Manages lazy-loaded Docker containers for Project Agents.
|
||||
Executes tools inside containers while preserving user ownership.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
project: str,
|
||||
host_path: str,
|
||||
image: str = DEFAULT_IMAGE,
|
||||
timeout_seconds: int = 300,
|
||||
extra_mounts: list = None
|
||||
):
|
||||
self.project = project
|
||||
self.host_path = host_path
|
||||
self.container_name = f"luzia-{project}"
|
||||
self.image = image
|
||||
self.timeout_seconds = timeout_seconds
|
||||
self.extra_mounts = extra_mounts or []
|
||||
self._uid = self._get_uid()
|
||||
self._gid = self._get_gid()
|
||||
|
||||
def _get_uid(self) -> str:
|
||||
"""Get UID for the project user to ensure correct file ownership"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["id", "-u", self.project],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True
|
||||
)
|
||||
return result.stdout.strip()
|
||||
except subprocess.CalledProcessError:
|
||||
logger.warning(f"Could not get UID for {self.project}, using 1000")
|
||||
return "1000"
|
||||
|
||||
def _get_gid(self) -> str:
|
||||
"""Get GID for the project user"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["id", "-g", self.project],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True
|
||||
)
|
||||
return result.stdout.strip()
|
||||
except subprocess.CalledProcessError:
|
||||
logger.warning(f"Could not get GID for {self.project}, using 1000")
|
||||
return "1000"
|
||||
|
||||
def _is_running(self) -> bool:
|
||||
"""Check if the container is currently running"""
|
||||
result = subprocess.run(
|
||||
["docker", "inspect", "-f", "{{.State.Running}}", self.container_name],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
return result.returncode == 0 and "true" in result.stdout.strip().lower()
|
||||
|
||||
def _update_activity(self):
|
||||
"""Update last activity timestamp for idle tracking"""
|
||||
_container_activity[self.container_name] = datetime.now()
|
||||
|
||||
def ensure_running(self) -> bool:
|
||||
"""Start container if not running (Lazy Loading). Returns True if started."""
|
||||
if self._is_running():
|
||||
self._update_activity()
|
||||
return False # Already running
|
||||
|
||||
logger.info(f"Starting container {self.container_name} for {self.project}")
|
||||
|
||||
# Remove if exists but stopped
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", self.container_name],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL
|
||||
)
|
||||
|
||||
# Build run command
|
||||
cmd = [
|
||||
"docker", "run", "-d",
|
||||
"--name", self.container_name,
|
||||
"--user", f"{self._uid}:{self._gid}",
|
||||
"-e", f"HOME=/workspace",
|
||||
"-e", f"npm_config_cache=/workspace/.npm",
|
||||
# Use user-specific temp dir to avoid /tmp collisions
|
||||
"-e", f"TMPDIR=/workspace/.tmp",
|
||||
"-e", f"TEMP=/workspace/.tmp",
|
||||
"-e", f"TMP=/workspace/.tmp",
|
||||
"-v", f"{self.host_path}:/workspace",
|
||||
"-w", "/workspace",
|
||||
"--network", "host", # Allow access to local services
|
||||
"--restart", "unless-stopped",
|
||||
# Resource limits
|
||||
"--memory", "2g",
|
||||
"--cpus", "2",
|
||||
# Labels for management
|
||||
"--label", "luzia.project=" + self.project,
|
||||
"--label", "luzia.created=" + datetime.now().isoformat(),
|
||||
]
|
||||
|
||||
# Add extra mounts (e.g., /opt/dss for DSS project)
|
||||
for mount in self.extra_mounts:
|
||||
cmd.extend(["-v", mount])
|
||||
|
||||
cmd.extend([self.image, "tail", "-f", "/dev/null"]) # Keep alive
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
if result.returncode != 0:
|
||||
logger.error(f"Failed to start container: {result.stderr}")
|
||||
raise RuntimeError(f"Failed to start container: {result.stderr}")
|
||||
|
||||
# Give it a moment to stabilize
|
||||
time.sleep(0.5)
|
||||
|
||||
# Ensure user-specific temp directory exists inside container
|
||||
subprocess.run(
|
||||
["docker", "exec", self.container_name, "mkdir", "-p", "/workspace/.tmp"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL
|
||||
)
|
||||
|
||||
self._update_activity()
|
||||
return True
|
||||
|
||||
def execute(self, command: str, timeout: Optional[int] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Run a bash command inside the container.
|
||||
|
||||
Returns dict with:
|
||||
- success: bool
|
||||
- output: str (stdout)
|
||||
- error: str (stderr if any)
|
||||
- exit_code: int
|
||||
"""
|
||||
self.ensure_running()
|
||||
|
||||
cmd = ["docker", "exec", self.container_name, "bash", "-c", command]
|
||||
timeout = timeout or self.timeout_seconds
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout
|
||||
)
|
||||
self._update_activity()
|
||||
|
||||
return {
|
||||
"success": result.returncode == 0,
|
||||
"output": result.stdout,
|
||||
"error": result.stderr,
|
||||
"exit_code": result.returncode
|
||||
}
|
||||
except subprocess.TimeoutExpired:
|
||||
return {
|
||||
"success": False,
|
||||
"output": "",
|
||||
"error": f"Command timed out after {timeout}s",
|
||||
"exit_code": -1
|
||||
}
|
||||
|
||||
def write_file(self, path: str, content: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Write file inside container using 'tee'.
|
||||
File is owned by the container user (project user).
|
||||
|
||||
Args:
|
||||
path: Relative path from /workspace (project home)
|
||||
content: File content to write
|
||||
"""
|
||||
self.ensure_running()
|
||||
|
||||
# Ensure parent directory exists
|
||||
parent_dir = os.path.dirname(path)
|
||||
if parent_dir:
|
||||
self.execute(f"mkdir -p '{parent_dir}'")
|
||||
|
||||
cmd = ["docker", "exec", "-i", self.container_name, "tee", path]
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
input=content.encode('utf-8'),
|
||||
capture_output=True,
|
||||
timeout=30
|
||||
)
|
||||
self._update_activity()
|
||||
|
||||
if result.returncode == 0:
|
||||
return {
|
||||
"success": True,
|
||||
"message": f"Successfully wrote to {path}",
|
||||
"bytes_written": len(content.encode('utf-8'))
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"success": False,
|
||||
"message": f"Failed to write file: {result.stderr.decode()}"
|
||||
}
|
||||
except subprocess.TimeoutExpired:
|
||||
return {
|
||||
"success": False,
|
||||
"message": "Write operation timed out"
|
||||
}
|
||||
|
||||
def read_file(self, path: str) -> Dict[str, Any]:
|
||||
"""Read file from container"""
|
||||
result = self.execute(f"cat '{path}'")
|
||||
if result["success"]:
|
||||
return {
|
||||
"success": True,
|
||||
"content": result["output"]
|
||||
}
|
||||
return {
|
||||
"success": False,
|
||||
"error": result["error"] or "File not found or not readable"
|
||||
}
|
||||
|
||||
def list_files(self, path: str = ".", pattern: str = "*") -> Dict[str, Any]:
|
||||
"""List files matching pattern"""
|
||||
result = self.execute(f"find '{path}' -name '{pattern}' -type f 2>/dev/null | head -100")
|
||||
if result["success"]:
|
||||
files = [f for f in result["output"].strip().split("\n") if f]
|
||||
return {"success": True, "files": files}
|
||||
return {"success": False, "error": result["error"]}
|
||||
|
||||
def grep(self, pattern: str, path: str = ".") -> Dict[str, Any]:
|
||||
"""Search for pattern in files"""
|
||||
result = self.execute(
|
||||
f"grep -rn '{pattern}' '{path}' 2>/dev/null | head -50"
|
||||
)
|
||||
return {
|
||||
"success": True,
|
||||
"matches": result["output"],
|
||||
"truncated": len(result["output"].split("\n")) >= 50
|
||||
}
|
||||
|
||||
def stop(self):
|
||||
"""Stop the container"""
|
||||
logger.info(f"Stopping container {self.container_name}")
|
||||
subprocess.run(["docker", "stop", self.container_name], capture_output=True)
|
||||
if self.container_name in _container_activity:
|
||||
del _container_activity[self.container_name]
|
||||
|
||||
def remove(self):
|
||||
"""Stop and remove the container"""
|
||||
logger.info(f"Removing container {self.container_name}")
|
||||
subprocess.run(["docker", "rm", "-f", self.container_name], capture_output=True)
|
||||
if self.container_name in _container_activity:
|
||||
del _container_activity[self.container_name]
|
||||
|
||||
def status(self) -> Dict[str, Any]:
|
||||
"""Get container status"""
|
||||
if not self._is_running():
|
||||
return {"running": False}
|
||||
|
||||
# Get container info
|
||||
result = subprocess.run(
|
||||
["docker", "inspect", self.container_name],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
return {"running": False, "error": result.stderr}
|
||||
|
||||
info = json.loads(result.stdout)[0]
|
||||
|
||||
return {
|
||||
"running": True,
|
||||
"container_id": info["Id"][:12],
|
||||
"started_at": info["State"]["StartedAt"],
|
||||
"user": f"{self._uid}:{self._gid}",
|
||||
"image": self.image,
|
||||
"last_activity": _container_activity.get(
|
||||
self.container_name,
|
||||
datetime.now()
|
||||
).isoformat()
|
||||
}
|
||||
|
||||
|
||||
def cleanup_idle_containers(timeout_minutes: int = IDLE_TIMEOUT_MINUTES):
|
||||
"""Stop containers that have been idle for too long"""
|
||||
now = datetime.now()
|
||||
timeout = timedelta(minutes=timeout_minutes)
|
||||
|
||||
# Get all luzia containers
|
||||
result = subprocess.run(
|
||||
["docker", "ps", "--filter", "name=luzia-", "--format", "{{.Names}}"],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
return
|
||||
|
||||
containers = [c.strip() for c in result.stdout.strip().split("\n") if c.strip()]
|
||||
|
||||
for container_name in containers:
|
||||
last_activity = _container_activity.get(container_name)
|
||||
|
||||
if last_activity is None:
|
||||
# No activity tracked, check container start time
|
||||
inspect = subprocess.run(
|
||||
["docker", "inspect", "-f", "{{.State.StartedAt}}", container_name],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
if inspect.returncode == 0:
|
||||
try:
|
||||
# Parse Docker timestamp
|
||||
started = inspect.stdout.strip()[:26] # Trim nanoseconds
|
||||
last_activity = datetime.fromisoformat(started.replace("Z", "+00:00").replace("+00:00", ""))
|
||||
_container_activity[container_name] = last_activity
|
||||
except:
|
||||
continue
|
||||
|
||||
if last_activity and (now - last_activity) > timeout:
|
||||
logger.info(f"Stopping idle container: {container_name}")
|
||||
subprocess.run(["docker", "stop", container_name], capture_output=True)
|
||||
if container_name in _container_activity:
|
||||
del _container_activity[container_name]
|
||||
|
||||
|
||||
def list_project_containers() -> list:
|
||||
"""List all luzia project containers"""
|
||||
result = subprocess.run(
|
||||
["docker", "ps", "-a", "--filter", "name=luzia-",
|
||||
"--format", "{{.Names}}\t{{.Status}}\t{{.CreatedAt}}"],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
return []
|
||||
|
||||
containers = []
|
||||
for line in result.stdout.strip().split("\n"):
|
||||
if not line:
|
||||
continue
|
||||
parts = line.split("\t")
|
||||
if len(parts) >= 2:
|
||||
containers.append({
|
||||
"name": parts[0],
|
||||
"status": parts[1],
|
||||
"created": parts[2] if len(parts) > 2 else "unknown"
|
||||
})
|
||||
|
||||
return containers
|
||||
Reference in New Issue
Block a user