Based on claude-code-tools TmuxCLIController, this refactor: - Added DockerTmuxController class for robust tmux session management - Implements send_keys() with configurable delay_enter - Implements capture_pane() for output retrieval - Implements wait_for_prompt() for pattern-based completion detection - Implements wait_for_idle() for content-hash-based idle detection - Implements wait_for_shell_prompt() for shell prompt detection Also includes workflow improvements: - Pre-task git snapshot before agent execution - Post-task commit protocol in agent guidelines Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
361 lines
11 KiB
Python
361 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Per-User Queue Manager - Ensures only one task per user at a time
|
|
|
|
Implements:
|
|
- Per-user queue isolation using file-based locks
|
|
- Atomic locking mechanism to prevent concurrent task execution
|
|
- Fair scheduling across users while maintaining user isolation
|
|
- Queue monitoring and status reporting per user
|
|
|
|
Features:
|
|
1. File-based per-user locks at /var/lib/luzia/locks/user_{username}.lock
|
|
2. Task serialization per user - only one running task at a time
|
|
3. Atomic lock acquire/release with timeout handling
|
|
4. Fallback to lock file cleanup on stale locks
|
|
5. Integration with existing QueueController capacity tracking
|
|
"""
|
|
|
|
import fcntl
|
|
import json
|
|
import os
|
|
import time
|
|
from datetime import datetime, timedelta
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Tuple, Any
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class PerUserQueueManager:
|
|
"""Manages per-user task queues with exclusive locking."""
|
|
|
|
LOCK_BASE = Path("/var/lib/luzia/locks")
|
|
LOCK_TIMEOUT_SECONDS = 3600 # 1 hour timeout for stale locks
|
|
|
|
def __init__(self):
|
|
"""Initialize per-user queue manager."""
|
|
self.LOCK_BASE.mkdir(parents=True, exist_ok=True)
|
|
|
|
def _get_lock_path(self, user: str) -> Path:
|
|
"""Get lock file path for a user."""
|
|
# Sanitize username to prevent path traversal
|
|
safe_user = "".join(c for c in user if c.isalnum() or c in "_-")
|
|
if not safe_user or len(safe_user) > 32:
|
|
safe_user = "default"
|
|
return self.LOCK_BASE / f"user_{safe_user}.lock"
|
|
|
|
def _get_lock_meta_path(self, user: str) -> Path:
|
|
"""Get lock metadata file path for a user."""
|
|
lock_path = self._get_lock_path(user)
|
|
return lock_path.with_suffix(".json")
|
|
|
|
def acquire_lock(self, user: str, task_id: str, timeout: int = 30) -> Tuple[bool, Optional[str]]:
|
|
"""
|
|
Acquire exclusive lock for a user's task execution.
|
|
|
|
Args:
|
|
user: Username
|
|
task_id: Task ID for this execution
|
|
timeout: Lock acquisition timeout in seconds
|
|
|
|
Returns:
|
|
Tuple of (acquired: bool, lock_id: str or None)
|
|
lock_id is a unique identifier for this lock acquisition
|
|
"""
|
|
lock_path = self._get_lock_path(user)
|
|
lock_meta_path = self._get_lock_meta_path(user)
|
|
|
|
# Check for stale locks first
|
|
self._cleanup_stale_locks(user)
|
|
|
|
lock_id = f"{task_id}_{int(time.time())}"
|
|
start_time = time.time()
|
|
|
|
# Try to acquire lock with timeout
|
|
while True:
|
|
try:
|
|
# Try to create lock file exclusively
|
|
# O_EXCL | O_CREAT ensures atomicity
|
|
fd = os.open(
|
|
str(lock_path),
|
|
os.O_CREAT | os.O_EXCL | os.O_WRONLY,
|
|
0o644
|
|
)
|
|
|
|
# Write lock metadata
|
|
meta = {
|
|
"lock_id": lock_id,
|
|
"user": user,
|
|
"task_id": task_id,
|
|
"acquired_at": datetime.now().isoformat(),
|
|
"acquired_by_pid": os.getpid(),
|
|
}
|
|
os.write(fd, json.dumps(meta).encode())
|
|
os.close(fd)
|
|
|
|
# Also write metadata file for monitoring
|
|
meta["lock_file"] = str(lock_path)
|
|
meta["expires_at"] = (
|
|
datetime.now() + timedelta(seconds=self.LOCK_TIMEOUT_SECONDS)
|
|
).isoformat()
|
|
lock_meta_path.write_text(json.dumps(meta, indent=2))
|
|
|
|
logger.info(f"Acquired lock for user {user}, task {task_id}, lock_id {lock_id}")
|
|
return True, lock_id
|
|
|
|
except FileExistsError:
|
|
# Lock file exists, check if it's stale
|
|
if self._is_lock_stale(user):
|
|
# Remove stale lock and retry
|
|
try:
|
|
lock_path.unlink()
|
|
continue
|
|
except FileNotFoundError:
|
|
# Another process removed it, retry
|
|
continue
|
|
|
|
# Lock is active
|
|
elapsed = time.time() - start_time
|
|
if elapsed >= timeout:
|
|
logger.warning(
|
|
f"Failed to acquire lock for user {user} within {timeout}s. "
|
|
f"Another task may be running."
|
|
)
|
|
return False, None
|
|
|
|
# Wait and retry
|
|
time.sleep(0.5)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error acquiring lock for user {user}: {e}")
|
|
return False, None
|
|
|
|
def release_lock(self, user: str, lock_id: str) -> bool:
|
|
"""
|
|
Release lock for a user.
|
|
|
|
Args:
|
|
user: Username
|
|
lock_id: Lock ID from acquire_lock
|
|
|
|
Returns:
|
|
True if lock was released, False if it wasn't held
|
|
"""
|
|
lock_path = self._get_lock_path(user)
|
|
lock_meta_path = self._get_lock_meta_path(user)
|
|
|
|
try:
|
|
# Verify lock_id matches before releasing
|
|
if lock_path.exists():
|
|
meta = json.loads(lock_path.read_text())
|
|
if meta.get("lock_id") != lock_id:
|
|
logger.warning(
|
|
f"Lock ID mismatch for user {user}. "
|
|
f"Expected {lock_id}, got {meta.get('lock_id')}"
|
|
)
|
|
return False
|
|
|
|
# Remove lock file
|
|
if lock_path.exists():
|
|
lock_path.unlink()
|
|
|
|
# Remove metadata file
|
|
if lock_meta_path.exists():
|
|
lock_meta_path.unlink()
|
|
|
|
logger.info(f"Released lock for user {user}, lock_id {lock_id}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error releasing lock for user {user}: {e}")
|
|
return False
|
|
|
|
def is_user_locked(self, user: str) -> bool:
|
|
"""
|
|
Check if a user has an active lock (non-stale).
|
|
|
|
Args:
|
|
user: Username
|
|
|
|
Returns:
|
|
True if user has an active lock
|
|
"""
|
|
lock_path = self._get_lock_path(user)
|
|
|
|
if not lock_path.exists():
|
|
return False
|
|
|
|
# Check if lock is stale
|
|
return not self._is_lock_stale(user)
|
|
|
|
def get_lock_info(self, user: str) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Get information about a user's active lock.
|
|
|
|
Args:
|
|
user: Username
|
|
|
|
Returns:
|
|
Lock metadata dict or None if no lock
|
|
"""
|
|
lock_meta_path = self._get_lock_meta_path(user)
|
|
|
|
if not lock_meta_path.exists():
|
|
return None
|
|
|
|
try:
|
|
meta = json.loads(lock_meta_path.read_text())
|
|
return meta
|
|
except Exception as e:
|
|
logger.error(f"Error reading lock info for user {user}: {e}")
|
|
return None
|
|
|
|
def get_all_locks(self) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get information about all active locks.
|
|
|
|
Returns:
|
|
List of lock metadata dicts
|
|
"""
|
|
locks = []
|
|
|
|
for meta_file in self.LOCK_BASE.glob("user_*.json"):
|
|
try:
|
|
meta = json.loads(meta_file.read_text())
|
|
# Check if lock is stale
|
|
username = meta.get("user")
|
|
if not self._is_lock_stale(username):
|
|
locks.append(meta)
|
|
except Exception as e:
|
|
logger.error(f"Error reading lock file {meta_file}: {e}")
|
|
|
|
return locks
|
|
|
|
def _is_lock_stale(self, user: str) -> bool:
|
|
"""
|
|
Check if a user's lock has expired.
|
|
|
|
Args:
|
|
user: Username
|
|
|
|
Returns:
|
|
True if lock is stale (expired or missing metadata)
|
|
"""
|
|
lock_path = self._get_lock_path(user)
|
|
lock_meta_path = self._get_lock_meta_path(user)
|
|
|
|
if not lock_path.exists():
|
|
return True
|
|
|
|
if not lock_meta_path.exists():
|
|
# No metadata, assume stale
|
|
return True
|
|
|
|
try:
|
|
meta = json.loads(lock_meta_path.read_text())
|
|
expires_at = meta.get("expires_at")
|
|
|
|
if expires_at:
|
|
expire_time = datetime.fromisoformat(expires_at)
|
|
return datetime.now() > expire_time
|
|
|
|
# Fallback: check acquired_at time
|
|
acquired_at = meta.get("acquired_at")
|
|
if acquired_at:
|
|
acquire_time = datetime.fromisoformat(acquired_at)
|
|
age_seconds = (datetime.now() - acquire_time).total_seconds()
|
|
return age_seconds > self.LOCK_TIMEOUT_SECONDS
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error checking lock staleness for user {user}: {e}")
|
|
return True
|
|
|
|
def _cleanup_stale_locks(self, user: str) -> None:
|
|
"""
|
|
Clean up stale locks for a user.
|
|
|
|
Args:
|
|
user: Username
|
|
"""
|
|
lock_path = self._get_lock_path(user)
|
|
lock_meta_path = self._get_lock_meta_path(user)
|
|
|
|
if not lock_path.exists():
|
|
return
|
|
|
|
if self._is_lock_stale(user):
|
|
try:
|
|
if lock_path.exists():
|
|
lock_path.unlink()
|
|
if lock_meta_path.exists():
|
|
lock_meta_path.unlink()
|
|
logger.info(f"Cleaned up stale lock for user {user}")
|
|
except Exception as e:
|
|
logger.error(f"Error cleaning up stale lock for user {user}: {e}")
|
|
|
|
def cleanup_all_stale_locks(self) -> int:
|
|
"""
|
|
Clean up all stale locks.
|
|
|
|
Returns:
|
|
Count of locks cleaned up
|
|
"""
|
|
count = 0
|
|
|
|
for meta_file in self.LOCK_BASE.glob("user_*.json"):
|
|
try:
|
|
meta = json.loads(meta_file.read_text())
|
|
username = meta.get("user")
|
|
|
|
if self._is_lock_stale(username):
|
|
# Remove lock and metadata
|
|
lock_path = self._get_lock_path(username)
|
|
if lock_path.exists():
|
|
lock_path.unlink()
|
|
if meta_file.exists():
|
|
meta_file.unlink()
|
|
|
|
logger.info(f"Cleaned up stale lock for user {username}")
|
|
count += 1
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing lock file {meta_file}: {e}")
|
|
|
|
return count
|
|
|
|
def wait_for_lock_release(
|
|
self, user: str, max_wait_seconds: int = 300
|
|
) -> bool:
|
|
"""
|
|
Wait for a user's lock to be released.
|
|
|
|
Args:
|
|
user: Username
|
|
max_wait_seconds: Maximum time to wait (default 5 minutes)
|
|
|
|
Returns:
|
|
True if lock was released, False if timeout
|
|
"""
|
|
start_time = time.time()
|
|
|
|
while True:
|
|
if not self.is_user_locked(user):
|
|
return True
|
|
|
|
elapsed = time.time() - start_time
|
|
if elapsed >= max_wait_seconds:
|
|
logger.warning(f"Timeout waiting for lock release for user {user}")
|
|
return False
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
# Module exports
|
|
__all__ = [
|
|
"PerUserQueueManager",
|
|
]
|