Refactor cockpit to use DockerTmuxController pattern
Based on claude-code-tools TmuxCLIController, this refactor: - Added DockerTmuxController class for robust tmux session management - Implements send_keys() with configurable delay_enter - Implements capture_pane() for output retrieval - Implements wait_for_prompt() for pattern-based completion detection - Implements wait_for_idle() for content-hash-based idle detection - Implements wait_for_shell_prompt() for shell prompt detection Also includes workflow improvements: - Pre-task git snapshot before agent execution - Post-task commit protocol in agent guidelines Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
458
lib/task_completion.py
Normal file
458
lib/task_completion.py
Normal file
@@ -0,0 +1,458 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Task Completion Callback - Notify queue when task completes
|
||||
|
||||
Called by agents when they finish to:
|
||||
1. Release per-user lock
|
||||
2. Update capacity counters
|
||||
3. Move conductor files to completed/failed
|
||||
4. Unblock project queue if was awaiting_human
|
||||
|
||||
Usage:
|
||||
# From agent code:
|
||||
from task_completion import complete_task, fail_task
|
||||
|
||||
complete_task(task_id, result_data)
|
||||
fail_task(task_id, error_message)
|
||||
|
||||
# CLI:
|
||||
python3 task_completion.py complete <task_id> [result]
|
||||
python3 task_completion.py fail <task_id> <error>
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import fcntl
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Dict, Optional
|
||||
|
||||
|
||||
class TaskCompletion:
|
||||
"""Handle task completion callbacks."""
|
||||
|
||||
CONDUCTOR_BASE = Path.home() / "conductor"
|
||||
ACTIVE_DIR = CONDUCTOR_BASE / "active"
|
||||
COMPLETED_DIR = CONDUCTOR_BASE / "completed"
|
||||
FAILED_DIR = CONDUCTOR_BASE / "failed"
|
||||
|
||||
QUEUE_BASE = Path("/var/lib/luzia/queue")
|
||||
LOCKS_BASE = Path("/var/lib/luzia/locks")
|
||||
CAPACITY_FILE = QUEUE_BASE / "capacity.json"
|
||||
|
||||
COCKPIT_STATE_DIR = Path("/var/lib/luz-orchestrator/cockpits")
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize completion handler."""
|
||||
self._ensure_dirs()
|
||||
|
||||
def _ensure_dirs(self):
|
||||
"""Ensure directories exist."""
|
||||
for d in [self.COMPLETED_DIR, self.FAILED_DIR]:
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def complete_task(
|
||||
self,
|
||||
task_id: str,
|
||||
result: Optional[Dict] = None,
|
||||
summary: str = None
|
||||
) -> Dict:
|
||||
"""
|
||||
Mark task as completed successfully.
|
||||
|
||||
Args:
|
||||
task_id: The task ID
|
||||
result: Optional result data
|
||||
summary: Optional summary of what was accomplished
|
||||
|
||||
Returns:
|
||||
Status dict with success flag
|
||||
"""
|
||||
task_dir = self.ACTIVE_DIR / task_id
|
||||
|
||||
if not task_dir.exists():
|
||||
return {'success': False, 'error': f'Task {task_id} not found in active'}
|
||||
|
||||
try:
|
||||
# Load and update meta
|
||||
meta_file = task_dir / "meta.json"
|
||||
meta = {}
|
||||
if meta_file.exists():
|
||||
meta = json.loads(meta_file.read_text())
|
||||
|
||||
meta['status'] = 'completed'
|
||||
meta['completed_at'] = datetime.now().isoformat()
|
||||
if result:
|
||||
meta['result'] = result
|
||||
if summary:
|
||||
meta['summary'] = summary
|
||||
|
||||
# Calculate duration
|
||||
if 'created_at' in meta:
|
||||
try:
|
||||
start = datetime.fromisoformat(meta['created_at'])
|
||||
meta['duration_seconds'] = (datetime.now() - start).total_seconds()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Write updated meta
|
||||
with open(meta_file, 'w') as f:
|
||||
json.dump(meta, f, indent=2)
|
||||
|
||||
# Release user lock
|
||||
user = meta.get('user') or meta.get('enqueued_by')
|
||||
lock_id = meta.get('lock_id')
|
||||
if user and lock_id:
|
||||
self._release_lock(user, lock_id)
|
||||
|
||||
# Update capacity
|
||||
self._increment_capacity()
|
||||
|
||||
# Move to completed
|
||||
dest = self.COMPLETED_DIR / task_id
|
||||
if dest.exists():
|
||||
shutil.rmtree(dest)
|
||||
shutil.move(str(task_dir), str(dest))
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'task_id': task_id,
|
||||
'status': 'completed',
|
||||
'completed_at': meta['completed_at']
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {'success': False, 'error': str(e)}
|
||||
|
||||
def fail_task(
|
||||
self,
|
||||
task_id: str,
|
||||
error: str,
|
||||
exit_code: int = 1,
|
||||
recoverable: bool = True
|
||||
) -> Dict:
|
||||
"""
|
||||
Mark task as failed.
|
||||
|
||||
Args:
|
||||
task_id: The task ID
|
||||
error: Error message
|
||||
exit_code: Process exit code
|
||||
recoverable: Whether task can be retried
|
||||
|
||||
Returns:
|
||||
Status dict
|
||||
"""
|
||||
task_dir = self.ACTIVE_DIR / task_id
|
||||
|
||||
if not task_dir.exists():
|
||||
return {'success': False, 'error': f'Task {task_id} not found in active'}
|
||||
|
||||
try:
|
||||
# Load and update meta
|
||||
meta_file = task_dir / "meta.json"
|
||||
meta = {}
|
||||
if meta_file.exists():
|
||||
meta = json.loads(meta_file.read_text())
|
||||
|
||||
meta['status'] = 'failed'
|
||||
meta['failed_at'] = datetime.now().isoformat()
|
||||
meta['error'] = error
|
||||
meta['exit_code'] = exit_code
|
||||
meta['recoverable'] = recoverable
|
||||
|
||||
# Track retry count
|
||||
meta['retry_count'] = meta.get('retry_count', 0)
|
||||
|
||||
# Write updated meta
|
||||
with open(meta_file, 'w') as f:
|
||||
json.dump(meta, f, indent=2)
|
||||
|
||||
# Release user lock
|
||||
user = meta.get('user') or meta.get('enqueued_by')
|
||||
lock_id = meta.get('lock_id')
|
||||
if user and lock_id:
|
||||
self._release_lock(user, lock_id)
|
||||
|
||||
# Update capacity
|
||||
self._increment_capacity()
|
||||
|
||||
# Move to failed
|
||||
dest = self.FAILED_DIR / task_id
|
||||
if dest.exists():
|
||||
shutil.rmtree(dest)
|
||||
shutil.move(str(task_dir), str(dest))
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'task_id': task_id,
|
||||
'status': 'failed',
|
||||
'failed_at': meta['failed_at'],
|
||||
'recoverable': recoverable
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {'success': False, 'error': str(e)}
|
||||
|
||||
def set_awaiting_human(
|
||||
self,
|
||||
task_id: str,
|
||||
question: str,
|
||||
project: str = None
|
||||
) -> Dict:
|
||||
"""
|
||||
Mark task as awaiting human response.
|
||||
This blocks the project queue AND sends question to Telegram.
|
||||
|
||||
Args:
|
||||
task_id: The task ID
|
||||
question: The question for the human
|
||||
project: Optional project name (for cockpit integration)
|
||||
|
||||
Returns:
|
||||
Status dict
|
||||
"""
|
||||
task_dir = self.ACTIVE_DIR / task_id
|
||||
|
||||
if not task_dir.exists():
|
||||
return {'success': False, 'error': f'Task {task_id} not found'}
|
||||
|
||||
try:
|
||||
# Update task meta
|
||||
meta_file = task_dir / "meta.json"
|
||||
meta = {}
|
||||
if meta_file.exists():
|
||||
meta = json.loads(meta_file.read_text())
|
||||
|
||||
meta['status'] = 'awaiting_human'
|
||||
meta['awaiting_since'] = datetime.now().isoformat()
|
||||
meta['awaiting_question'] = question
|
||||
|
||||
with open(meta_file, 'w') as f:
|
||||
json.dump(meta, f, indent=2)
|
||||
|
||||
# If project specified, also update cockpit state
|
||||
project = project or meta.get('project')
|
||||
if project:
|
||||
self._update_cockpit_awaiting(project, question)
|
||||
|
||||
# Send question to Bruno via Telegram
|
||||
telegram_request_id = None
|
||||
try:
|
||||
from telegram_bridge import ask_bruno
|
||||
context = f"Task: {task_id}\nProject: {project or 'unknown'}"
|
||||
telegram_request_id, sent = ask_bruno(
|
||||
question=question,
|
||||
project=project or "luzia",
|
||||
context=context
|
||||
)
|
||||
if sent:
|
||||
meta['telegram_request_id'] = telegram_request_id
|
||||
with open(meta_file, 'w') as f:
|
||||
json.dump(meta, f, indent=2)
|
||||
except Exception as e:
|
||||
# Log but don't fail - telegram is optional
|
||||
pass
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'task_id': task_id,
|
||||
'status': 'awaiting_human',
|
||||
'question': question,
|
||||
'telegram_request_id': telegram_request_id
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {'success': False, 'error': str(e)}
|
||||
|
||||
def resume_from_human(
|
||||
self,
|
||||
task_id: str,
|
||||
answer: str,
|
||||
project: str = None
|
||||
) -> Dict:
|
||||
"""
|
||||
Resume task after human provides answer.
|
||||
|
||||
Args:
|
||||
task_id: The task ID
|
||||
answer: Human's response
|
||||
project: Optional project name
|
||||
|
||||
Returns:
|
||||
Status dict
|
||||
"""
|
||||
task_dir = self.ACTIVE_DIR / task_id
|
||||
|
||||
if not task_dir.exists():
|
||||
return {'success': False, 'error': f'Task {task_id} not found'}
|
||||
|
||||
try:
|
||||
# Update task meta
|
||||
meta_file = task_dir / "meta.json"
|
||||
meta = {}
|
||||
if meta_file.exists():
|
||||
meta = json.loads(meta_file.read_text())
|
||||
|
||||
meta['status'] = 'running'
|
||||
meta['resumed_at'] = datetime.now().isoformat()
|
||||
meta['human_answer'] = answer
|
||||
|
||||
with open(meta_file, 'w') as f:
|
||||
json.dump(meta, f, indent=2)
|
||||
|
||||
# Clear cockpit awaiting state
|
||||
project = project or meta.get('project')
|
||||
if project:
|
||||
self._clear_cockpit_awaiting(project)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'task_id': task_id,
|
||||
'status': 'running',
|
||||
'resumed_at': meta['resumed_at']
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {'success': False, 'error': str(e)}
|
||||
|
||||
def _release_lock(self, user: str, lock_id: str) -> bool:
|
||||
"""Release a per-user lock."""
|
||||
lock_file = self.LOCKS_BASE / f"user_{user}.lock"
|
||||
meta_file = self.LOCKS_BASE / f"user_{user}.json"
|
||||
|
||||
try:
|
||||
# Verify lock ID matches
|
||||
if meta_file.exists():
|
||||
meta = json.loads(meta_file.read_text())
|
||||
if meta.get('lock_id') != lock_id:
|
||||
return False
|
||||
|
||||
# Remove lock files
|
||||
if lock_file.exists():
|
||||
lock_file.unlink()
|
||||
if meta_file.exists():
|
||||
meta_file.unlink()
|
||||
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
def _increment_capacity(self) -> bool:
|
||||
"""Increment available capacity slots."""
|
||||
if not self.CAPACITY_FILE.exists():
|
||||
return False
|
||||
|
||||
try:
|
||||
with open(self.CAPACITY_FILE, 'r+') as f:
|
||||
fcntl.flock(f, fcntl.LOCK_EX)
|
||||
try:
|
||||
capacity = json.load(f)
|
||||
current = capacity.get('slots', {}).get('available', 0)
|
||||
max_slots = capacity.get('slots', {}).get('max', 4)
|
||||
capacity['slots']['available'] = min(current + 1, max_slots)
|
||||
capacity['last_updated'] = datetime.now().isoformat()
|
||||
|
||||
f.seek(0)
|
||||
f.truncate()
|
||||
json.dump(capacity, f, indent=2)
|
||||
finally:
|
||||
fcntl.flock(f, fcntl.LOCK_UN)
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
def _update_cockpit_awaiting(self, project: str, question: str):
|
||||
"""Update cockpit state to show awaiting human."""
|
||||
state_file = self.COCKPIT_STATE_DIR / f"{project}.json"
|
||||
|
||||
try:
|
||||
state = {}
|
||||
if state_file.exists():
|
||||
state = json.loads(state_file.read_text())
|
||||
|
||||
state['awaiting_response'] = True
|
||||
state['last_question'] = question
|
||||
state['awaiting_since'] = datetime.now().isoformat()
|
||||
|
||||
with open(state_file, 'w') as f:
|
||||
json.dump(state, f, indent=2)
|
||||
except:
|
||||
pass
|
||||
|
||||
def _clear_cockpit_awaiting(self, project: str):
|
||||
"""Clear cockpit awaiting state."""
|
||||
state_file = self.COCKPIT_STATE_DIR / f"{project}.json"
|
||||
|
||||
try:
|
||||
if not state_file.exists():
|
||||
return
|
||||
|
||||
state = json.loads(state_file.read_text())
|
||||
state['awaiting_response'] = False
|
||||
state['last_question'] = None
|
||||
|
||||
with open(state_file, 'w') as f:
|
||||
json.dump(state, f, indent=2)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
# Convenience functions for direct import
|
||||
_handler = None
|
||||
|
||||
def _get_handler():
|
||||
global _handler
|
||||
if _handler is None:
|
||||
_handler = TaskCompletion()
|
||||
return _handler
|
||||
|
||||
def complete_task(task_id: str, result: Dict = None, summary: str = None) -> Dict:
|
||||
"""Complete a task successfully."""
|
||||
return _get_handler().complete_task(task_id, result, summary)
|
||||
|
||||
def fail_task(task_id: str, error: str, exit_code: int = 1, recoverable: bool = True) -> Dict:
|
||||
"""Mark a task as failed."""
|
||||
return _get_handler().fail_task(task_id, error, exit_code, recoverable)
|
||||
|
||||
def set_awaiting_human(task_id: str, question: str, project: str = None) -> Dict:
|
||||
"""Mark task as awaiting human response."""
|
||||
return _get_handler().set_awaiting_human(task_id, question, project)
|
||||
|
||||
def resume_from_human(task_id: str, answer: str, project: str = None) -> Dict:
|
||||
"""Resume task after human answer."""
|
||||
return _get_handler().resume_from_human(task_id, answer, project)
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI entry point."""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description='Task Completion Callback')
|
||||
parser.add_argument('command', choices=['complete', 'fail', 'await', 'resume'],
|
||||
help='Command to run')
|
||||
parser.add_argument('task_id', help='Task ID')
|
||||
parser.add_argument('message', nargs='?', default='',
|
||||
help='Result/error/question/answer')
|
||||
parser.add_argument('--project', help='Project name')
|
||||
parser.add_argument('--exit-code', type=int, default=1, help='Exit code for failures')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
handler = TaskCompletion()
|
||||
|
||||
if args.command == 'complete':
|
||||
result = handler.complete_task(args.task_id, summary=args.message)
|
||||
elif args.command == 'fail':
|
||||
result = handler.fail_task(args.task_id, args.message, args.exit_code)
|
||||
elif args.command == 'await':
|
||||
result = handler.set_awaiting_human(args.task_id, args.message, args.project)
|
||||
elif args.command == 'resume':
|
||||
result = handler.resume_from_human(args.task_id, args.message, args.project)
|
||||
|
||||
print(json.dumps(result, indent=2))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user