Based on claude-code-tools TmuxCLIController, this refactor: - Added DockerTmuxController class for robust tmux session management - Implements send_keys() with configurable delay_enter - Implements capture_pane() for output retrieval - Implements wait_for_prompt() for pattern-based completion detection - Implements wait_for_idle() for content-hash-based idle detection - Implements wait_for_shell_prompt() for shell prompt detection Also includes workflow improvements: - Pre-task git snapshot before agent execution - Post-task commit protocol in agent guidelines Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
874 lines
30 KiB
Python
874 lines
30 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Luzia QA Improvements - Preflight Validation for Task Dispatch
|
|
|
|
This module implements 5 QA improvements to reduce job failure rate from 25% to <5%:
|
|
1. TimeoutValidator - Detect operations needing >5 min
|
|
2. PrivilegeChecker - Detect sudo/privileged commands in restricted containers
|
|
3. ServiceHealthChecker - Pre-validate service dependencies
|
|
4. ContainerCapabilityChecker - Verify container requirements
|
|
5. DurationLearner - Adapt timeouts from historical data
|
|
|
|
Usage:
|
|
from qa_improvements import run_preflight_checks
|
|
|
|
approved, report = run_preflight_checks({
|
|
'id': 'task-123',
|
|
'title': 'Start DSS API',
|
|
'description': 'Start the DSS API service on port 5000'
|
|
})
|
|
|
|
if not approved:
|
|
print(f"Task blocked: {report['errors']}")
|
|
"""
|
|
|
|
import re
|
|
import os
|
|
import sys
|
|
import json
|
|
import sqlite3
|
|
import subprocess
|
|
import time
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Dict, List, Tuple, Any, Optional
|
|
from datetime import datetime
|
|
|
|
# Configure logging
|
|
logger = logging.getLogger('qa_improvements')
|
|
logger.setLevel(logging.INFO)
|
|
|
|
# Default paths
|
|
TASK_QUEUE_DB = Path('/opt/server-agents/state/task_queue.db')
|
|
CONFIG_PATH = Path('/opt/server-agents/orchestrator/config.json')
|
|
|
|
|
|
class TimeoutValidator:
|
|
"""
|
|
IMPROVEMENT 1: Timeout Validation
|
|
|
|
Pre-analyze task description for timeout indicators to prevent
|
|
tasks from timing out after 300s when they need more time.
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.timeout_rules = {
|
|
# Pattern -> minimum timeout in seconds
|
|
r'start.*service': 600, # 10 min
|
|
r'restart.*service': 600,
|
|
r'restart': 600,
|
|
r'reload.*nginx': 300, # 5 min (graceful)
|
|
r'healthcheck|health.*check': 180, # 3 min
|
|
r'api.*create|create.*repo': 300, # 5 min
|
|
r'database|db|postgres': 300,
|
|
r'wait.*for|wait.*until': 600, # 10 min
|
|
r'npm\s+install': 300, # 5 min for npm install
|
|
r'npm\s+run\s+build': 600, # 10 min for builds
|
|
r'docker\s+build': 900, # 15 min for docker builds
|
|
r'migration|migrate': 600, # 10 min for migrations
|
|
r'backup|restore': 600, # 10 min for backup ops
|
|
r'sync|synchronize': 600, # 10 min for sync ops
|
|
r'clone|git\s+clone': 300, # 5 min for clones
|
|
r'test|tests|npm\s+test': 600, # 10 min for test runs
|
|
}
|
|
self.default_timeout = 300
|
|
self.critical_threshold = 250 # warn if remaining < 50s
|
|
|
|
def validate_timeout(self, task_title: str, task_description: str = '') -> Dict[str, Any]:
|
|
"""
|
|
Analyze task for timeout requirements.
|
|
|
|
Returns:
|
|
{
|
|
'recommended_timeout': int,
|
|
'confidence': 'high'|'medium'|'low',
|
|
'warning': str or None,
|
|
'category': 'short'|'long'|'async',
|
|
'matched_patterns': list of (pattern, timeout) tuples
|
|
}
|
|
"""
|
|
text = (task_title + ' ' + task_description).lower()
|
|
|
|
max_timeout = self.default_timeout
|
|
matched_patterns = []
|
|
|
|
for pattern, timeout in self.timeout_rules.items():
|
|
if re.search(pattern, text):
|
|
matched_patterns.append((pattern, timeout))
|
|
max_timeout = max(max_timeout, timeout)
|
|
|
|
if max_timeout > self.default_timeout:
|
|
category = 'long' if max_timeout <= 600 else 'async'
|
|
warning = f"Task likely requires {max_timeout}s but default timeout is {self.default_timeout}s"
|
|
else:
|
|
category = 'short'
|
|
warning = None
|
|
|
|
# Determine confidence
|
|
if len(matched_patterns) > 1:
|
|
confidence = 'high'
|
|
elif matched_patterns:
|
|
confidence = 'medium'
|
|
else:
|
|
confidence = 'low'
|
|
|
|
return {
|
|
'recommended_timeout': max_timeout,
|
|
'matched_patterns': matched_patterns,
|
|
'category': category,
|
|
'warning': warning,
|
|
'confidence': confidence,
|
|
'action': 'warn' if warning else 'allow'
|
|
}
|
|
|
|
|
|
class PrivilegeChecker:
|
|
"""
|
|
IMPROVEMENT 2: Privilege Checker
|
|
|
|
Detect privileged commands (sudo, systemctl, etc.) that would fail
|
|
in containers with no-new-privileges security option.
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.privileged_patterns = [
|
|
(r'\bsudo\b', 'sudo command'),
|
|
(r'systemctl\s+(?:start|stop|restart|enable|disable)', 'systemctl control'),
|
|
(r'apt-get\s+install', 'apt-get install'),
|
|
(r'apt\s+install', 'apt install'),
|
|
(r'yum\s+install', 'yum install'),
|
|
(r'dnf\s+install', 'dnf install'),
|
|
(r'\bchown\b', 'chown'),
|
|
(r'chmod\s+[0-7]{3,4}', 'chmod with octal'),
|
|
(r'setfacl', 'setfacl'),
|
|
(r'usermod|useradd|userdel', 'user modification'),
|
|
(r'mount\s+', 'mount command'),
|
|
(r'iptables|ip6tables', 'iptables'),
|
|
(r'setcap|getcap', 'capability manipulation'),
|
|
]
|
|
|
|
self.alternatives = {
|
|
'sudo': 'Remove sudo - container runs with user permissions',
|
|
'systemctl': 'Use service scripts or direct process management',
|
|
'apt-get install': 'Install dependencies in Dockerfile or use pre-built image',
|
|
'apt install': 'Install dependencies in Dockerfile or use pre-built image',
|
|
'chown': 'Files are already owned by container user',
|
|
'chmod': 'Set permissions during build or via container entrypoint',
|
|
}
|
|
|
|
def check_container_capabilities(self) -> Dict[str, Any]:
|
|
"""Check container security configuration."""
|
|
config = {
|
|
'no_new_privileges': False,
|
|
'can_sudo': True,
|
|
'can_setuid': True,
|
|
'detected_from': 'default'
|
|
}
|
|
|
|
# Check /proc/self/status for NoNewPrivs
|
|
try:
|
|
with open('/proc/self/status', 'r') as f:
|
|
content = f.read()
|
|
if 'NoNewPrivs:\t1' in content:
|
|
config['no_new_privileges'] = True
|
|
config['can_sudo'] = False
|
|
config['can_setuid'] = False
|
|
config['detected_from'] = '/proc/self/status'
|
|
except Exception:
|
|
pass
|
|
|
|
# Check Docker daemon config
|
|
try:
|
|
result = subprocess.run(
|
|
['grep', '-l', 'no-new-privileges', '/etc/docker/daemon.json'],
|
|
capture_output=True, text=True, timeout=5
|
|
)
|
|
if result.returncode == 0:
|
|
config['no_new_privileges'] = True
|
|
config['can_sudo'] = False
|
|
config['detected_from'] = '/etc/docker/daemon.json'
|
|
except Exception:
|
|
pass
|
|
|
|
return config
|
|
|
|
def check_privilege_requirements(self, task_title: str, task_description: str = '') -> Dict[str, Any]:
|
|
"""
|
|
Analyze task for privilege requirements.
|
|
|
|
Returns:
|
|
{
|
|
'needs_privileges': bool,
|
|
'problematic_commands': list of (pattern, description),
|
|
'container_can_sudo': bool,
|
|
'action': 'allow'|'warn'|'block',
|
|
'suggestion': str or None
|
|
}
|
|
"""
|
|
text = (task_title + ' ' + task_description).lower()
|
|
caps = self.check_container_capabilities()
|
|
|
|
problematic = []
|
|
for pattern, description in self.privileged_patterns:
|
|
if re.search(pattern, text, re.IGNORECASE):
|
|
problematic.append((pattern, description))
|
|
|
|
if not problematic:
|
|
return {
|
|
'needs_privileges': False,
|
|
'problematic_commands': [],
|
|
'container_can_sudo': caps['can_sudo'],
|
|
'action': 'allow',
|
|
'suggestion': None
|
|
}
|
|
|
|
if caps['can_sudo']:
|
|
return {
|
|
'needs_privileges': True,
|
|
'problematic_commands': problematic,
|
|
'container_can_sudo': True,
|
|
'action': 'allow', # Container allows privileges
|
|
'suggestion': None
|
|
}
|
|
else:
|
|
# Container cannot sudo - need alternatives
|
|
suggestions = []
|
|
for pattern, desc in problematic[:3]:
|
|
alt_key = next((k for k in self.alternatives if k in desc.lower()), None)
|
|
if alt_key:
|
|
suggestions.append(self.alternatives[alt_key])
|
|
else:
|
|
suggestions.append(f"Find non-privileged alternative for {desc}")
|
|
|
|
return {
|
|
'needs_privileges': True,
|
|
'problematic_commands': problematic,
|
|
'container_can_sudo': False,
|
|
'action': 'block',
|
|
'suggestion': f"Container has no-new-privileges. Alternatives: {'; '.join(suggestions)}"
|
|
}
|
|
|
|
|
|
class ServiceHealthChecker:
|
|
"""
|
|
IMPROVEMENT 3: Service Health Check
|
|
|
|
Pre-check if target service is running and responsive before
|
|
dispatching tasks that depend on them.
|
|
"""
|
|
|
|
def __init__(self):
|
|
# Service -> health check configuration
|
|
self.service_checks = {
|
|
'dss': {'port': 5000, 'health_path': '/health', 'type': 'http'},
|
|
'musica': {'port': 3000, 'health_path': '/health', 'type': 'http'},
|
|
'librechat': {'port': 3200, 'health_path': '/health', 'type': 'http'},
|
|
'overbits': {'port': 3001, 'health_path': '/health', 'type': 'http'},
|
|
'nginx': {'cmd': 'systemctl is-active nginx', 'type': 'systemd'},
|
|
'postgres': {'port': 5432, 'type': 'tcp'},
|
|
'postgresql': {'port': 5432, 'type': 'tcp'},
|
|
'redis': {'port': 6379, 'type': 'tcp'},
|
|
'mysql': {'port': 3306, 'type': 'tcp'},
|
|
'mongodb': {'port': 27017, 'type': 'tcp'},
|
|
'docker': {'cmd': 'docker info', 'type': 'command'},
|
|
}
|
|
|
|
def _check_tcp_port(self, port: int, host: str = '127.0.0.1', timeout: float = 2.0) -> bool:
|
|
"""Check if a TCP port is listening."""
|
|
import socket
|
|
try:
|
|
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
sock.settimeout(timeout)
|
|
result = sock.connect_ex((host, port))
|
|
sock.close()
|
|
return result == 0
|
|
except Exception:
|
|
return False
|
|
|
|
def _check_http_health(self, port: int, path: str, host: str = '127.0.0.1',
|
|
timeout: float = 5.0) -> Tuple[bool, Optional[int], Optional[float]]:
|
|
"""Check HTTP health endpoint. Returns (responsive, status_code, latency_ms)."""
|
|
try:
|
|
import urllib.request
|
|
url = f"http://{host}:{port}{path}"
|
|
start = time.time()
|
|
|
|
req = urllib.request.Request(url, method='GET')
|
|
with urllib.request.urlopen(req, timeout=timeout) as response:
|
|
latency = (time.time() - start) * 1000
|
|
return True, response.status, latency
|
|
except Exception as e:
|
|
return False, None, None
|
|
|
|
def _check_command(self, cmd: str, timeout: float = 5.0) -> bool:
|
|
"""Check if a command succeeds."""
|
|
try:
|
|
result = subprocess.run(
|
|
cmd.split(),
|
|
capture_output=True,
|
|
timeout=timeout
|
|
)
|
|
return result.returncode == 0
|
|
except Exception:
|
|
return False
|
|
|
|
def check_service_health(self, service_name: str) -> Dict[str, Any]:
|
|
"""
|
|
Check health of a specific service.
|
|
|
|
Returns:
|
|
{
|
|
'service': str,
|
|
'running': bool or None (unknown),
|
|
'responsive': bool,
|
|
'http_status': int or None,
|
|
'latency_ms': float or None,
|
|
'issue': str or None
|
|
}
|
|
"""
|
|
service_lower = service_name.lower()
|
|
|
|
if service_lower not in self.service_checks:
|
|
return {
|
|
'service': service_name,
|
|
'running': None,
|
|
'responsive': None,
|
|
'issue': f'Unknown service: {service_name}'
|
|
}
|
|
|
|
config = self.service_checks[service_lower]
|
|
check_type = config.get('type', 'tcp')
|
|
|
|
if check_type == 'http':
|
|
port = config['port']
|
|
path = config.get('health_path', '/health')
|
|
responsive, status, latency = self._check_http_health(port, path)
|
|
|
|
return {
|
|
'service': service_name,
|
|
'running': responsive or self._check_tcp_port(port),
|
|
'responsive': responsive and status == 200,
|
|
'http_status': status,
|
|
'latency_ms': round(latency, 2) if latency else None,
|
|
'issue': None if (responsive and status == 200) else f"HTTP {status or 'unreachable'}"
|
|
}
|
|
|
|
elif check_type == 'tcp':
|
|
port = config['port']
|
|
running = self._check_tcp_port(port)
|
|
|
|
return {
|
|
'service': service_name,
|
|
'running': running,
|
|
'responsive': running,
|
|
'issue': None if running else f'Port {port} not listening'
|
|
}
|
|
|
|
elif check_type in ('systemd', 'command'):
|
|
cmd = config['cmd']
|
|
running = self._check_command(cmd)
|
|
|
|
return {
|
|
'service': service_name,
|
|
'running': running,
|
|
'responsive': running,
|
|
'issue': None if running else f'Command failed: {cmd}'
|
|
}
|
|
|
|
return {
|
|
'service': service_name,
|
|
'running': None,
|
|
'issue': 'Unknown check type'
|
|
}
|
|
|
|
def validate_task_services(self, task_description: str) -> Dict[str, Dict[str, Any]]:
|
|
"""
|
|
Extract service names from task and check their health.
|
|
|
|
Returns dict mapping service name -> health check result.
|
|
"""
|
|
# Pattern to find service references
|
|
service_pattern = r'\b(' + '|'.join(self.service_checks.keys()) + r')\b'
|
|
services = re.findall(service_pattern, task_description.lower())
|
|
|
|
checks = {}
|
|
for service in set(services):
|
|
checks[service] = self.check_service_health(service)
|
|
|
|
return checks
|
|
|
|
|
|
class ContainerCapabilityChecker:
|
|
"""
|
|
IMPROVEMENT 4: Container Capability Check
|
|
|
|
Verify container has required capabilities for the task.
|
|
Many capabilities are stripped for security.
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.required_capabilities = {
|
|
'sudo': ['CAP_SYS_ADMIN', 'CAP_SYS_RESOURCE', 'CAP_SETUID', 'CAP_SETGID'],
|
|
'network_config': ['CAP_NET_ADMIN', 'CAP_NET_RAW'],
|
|
'disk_ops': ['CAP_SYS_ADMIN', 'CAP_DAC_OVERRIDE'],
|
|
'process_management': ['CAP_SYS_RESOURCE', 'CAP_SYS_PTRACE'],
|
|
'file_ownership': ['CAP_CHOWN', 'CAP_FOWNER'],
|
|
}
|
|
|
|
self.task_to_requirement = {
|
|
r'sudo|privilege|root': 'sudo',
|
|
r'network|nginx|port|iptable|firewall': 'network_config',
|
|
r'disk|mount|unmount|partition': 'disk_ops',
|
|
r'kill|signal|ptrace|strace': 'process_management',
|
|
r'chown|ownership': 'file_ownership',
|
|
}
|
|
|
|
def get_container_security_config(self) -> Dict[str, Any]:
|
|
"""Read container security options."""
|
|
config = {
|
|
'no_new_privileges': False,
|
|
'capabilities': [],
|
|
'read_only_root': False,
|
|
'user': os.getuid(),
|
|
'in_container': False
|
|
}
|
|
|
|
# Detect if we're in a container
|
|
if Path('/.dockerenv').exists():
|
|
config['in_container'] = True
|
|
|
|
try:
|
|
with open('/proc/self/status', 'r') as f:
|
|
content = f.read()
|
|
# Check for no_new_privileges
|
|
if 'NoNewPrivs:\t1' in content:
|
|
config['no_new_privileges'] = True
|
|
|
|
# Parse CapEff (effective capabilities)
|
|
cap_match = re.search(r'CapEff:\t([0-9a-f]+)', content)
|
|
if cap_match:
|
|
config['cap_effective_hex'] = cap_match.group(1)
|
|
except Exception:
|
|
pass
|
|
|
|
# Check if root filesystem is read-only
|
|
try:
|
|
with open('/proc/mounts', 'r') as f:
|
|
for line in f:
|
|
if ' / ' in line and 'ro,' in line:
|
|
config['read_only_root'] = True
|
|
break
|
|
except Exception:
|
|
pass
|
|
|
|
return config
|
|
|
|
def check_requirements(self, task_description: str) -> List[str]:
|
|
"""Analyze task for capability requirements."""
|
|
requirements = []
|
|
text = task_description.lower()
|
|
|
|
for pattern, req in self.task_to_requirement.items():
|
|
if re.search(pattern, text):
|
|
if req not in requirements:
|
|
requirements.append(req)
|
|
|
|
return requirements
|
|
|
|
def validate_capabilities(self, task_description: str) -> Dict[str, Any]:
|
|
"""
|
|
Validate container capabilities against task requirements.
|
|
|
|
Returns:
|
|
{
|
|
'task_requirements': [str],
|
|
'container_config': dict,
|
|
'capability_gaps': [str],
|
|
'action': 'allow'|'warn'|'block'
|
|
}
|
|
"""
|
|
config = self.get_container_security_config()
|
|
requirements = self.check_requirements(task_description)
|
|
|
|
gaps = []
|
|
|
|
# Check specific capability gaps
|
|
if config['no_new_privileges']:
|
|
if 'sudo' in requirements:
|
|
gaps.append('no-new-privileges blocks sudo/privilege escalation')
|
|
|
|
if config['read_only_root']:
|
|
if 'disk_ops' in requirements:
|
|
gaps.append('read-only root filesystem blocks disk operations')
|
|
|
|
# Non-root user limitations
|
|
if config['user'] != 0:
|
|
if 'network_config' in requirements:
|
|
gaps.append('non-root user cannot modify network config')
|
|
if 'file_ownership' in requirements:
|
|
gaps.append('non-root user has limited chown abilities')
|
|
|
|
# Determine action
|
|
if gaps:
|
|
action = 'block'
|
|
elif requirements:
|
|
action = 'warn' # Has requirements but no detected gaps
|
|
else:
|
|
action = 'allow'
|
|
|
|
return {
|
|
'task_requirements': requirements,
|
|
'container_config': config,
|
|
'capability_gaps': gaps,
|
|
'action': action
|
|
}
|
|
|
|
|
|
class DurationLearner:
|
|
"""
|
|
IMPROVEMENT 5: Historical Duration Learning
|
|
|
|
Learn from historical task durations to provide better timeout
|
|
recommendations instead of using hardcoded defaults.
|
|
"""
|
|
|
|
def __init__(self, db_path: Path = TASK_QUEUE_DB):
|
|
self.db_path = db_path
|
|
self.cache = {}
|
|
self.cache_ttl = 300 # 5 min cache
|
|
self.cache_time = 0
|
|
|
|
def _get_connection(self) -> Optional[sqlite3.Connection]:
|
|
"""Get database connection if available."""
|
|
if not self.db_path.exists():
|
|
return None
|
|
try:
|
|
return sqlite3.connect(self.db_path, timeout=5)
|
|
except Exception:
|
|
return None
|
|
|
|
def get_historical_duration(self, task_title: str) -> Dict[str, Any]:
|
|
"""
|
|
Query completed tasks for average duration.
|
|
|
|
Returns:
|
|
{
|
|
'avg_duration': float or None,
|
|
'max_duration': float or None,
|
|
'sample_count': int,
|
|
'success_rate': float or None,
|
|
'by_exit_code': list of (exit_code, avg_duration, count)
|
|
}
|
|
"""
|
|
conn = self._get_connection()
|
|
if not conn:
|
|
return {'avg_duration': None, 'sample_count': 0}
|
|
|
|
try:
|
|
cursor = conn.cursor()
|
|
|
|
# Find similar tasks by title pattern
|
|
# Use LIKE with wildcards for fuzzy matching
|
|
search_term = f"%{task_title}%"
|
|
|
|
cursor.execute("""
|
|
SELECT
|
|
AVG(CASE WHEN exit_code = 0 THEN completed_at - started_at ELSE NULL END) as avg_success_duration,
|
|
MAX(completed_at - started_at) as max_duration,
|
|
COUNT(*) as total_count,
|
|
SUM(CASE WHEN exit_code = 0 THEN 1 ELSE 0 END) as success_count,
|
|
exit_code
|
|
FROM tasks
|
|
WHERE title LIKE ?
|
|
AND completed_at IS NOT NULL
|
|
AND started_at IS NOT NULL
|
|
AND completed_at > started_at
|
|
GROUP BY exit_code
|
|
""", (search_term,))
|
|
|
|
results = cursor.fetchall()
|
|
conn.close()
|
|
|
|
if not results:
|
|
return {'avg_duration': None, 'sample_count': 0}
|
|
|
|
# Aggregate results
|
|
total_count = sum(r[2] for r in results)
|
|
success_count = sum(r[3] for r in results)
|
|
max_duration = max((r[1] for r in results if r[1]), default=None)
|
|
|
|
# Get average from successful runs
|
|
avg_success = next((r[0] for r in results if r[4] == 0 and r[0]), None)
|
|
|
|
return {
|
|
'avg_duration': avg_success,
|
|
'max_duration': max_duration,
|
|
'sample_count': total_count,
|
|
'success_count': success_count,
|
|
'success_rate': success_count / total_count if total_count > 0 else None,
|
|
'by_exit_code': [(r[4], r[0], r[2]) for r in results]
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error querying historical duration: {e}")
|
|
if conn:
|
|
conn.close()
|
|
return {'avg_duration': None, 'sample_count': 0, 'error': str(e)}
|
|
|
|
def recommend_timeout(self, task_title: str, task_description: str = '') -> Dict[str, Any]:
|
|
"""
|
|
Recommend timeout based on historical data.
|
|
|
|
Returns:
|
|
{
|
|
'recommended': int,
|
|
'based_on_history': bool,
|
|
'historical_avg': float or None,
|
|
'historical_max': float or None,
|
|
'sample_count': int,
|
|
'confidence': 'high'|'medium'|'low'|'none'
|
|
}
|
|
"""
|
|
# Check cache first
|
|
cache_key = task_title.lower()[:50]
|
|
now = time.time()
|
|
|
|
if cache_key in self.cache and (now - self.cache_time) < self.cache_ttl:
|
|
return self.cache[cache_key]
|
|
|
|
history = self.get_historical_duration(task_title)
|
|
|
|
if history.get('avg_duration') and history.get('sample_count', 0) >= 2:
|
|
# Use historical data with safety buffer
|
|
avg = history['avg_duration']
|
|
max_dur = history.get('max_duration', avg)
|
|
|
|
# Recommend max of avg*1.5 or max_duration
|
|
recommended = int(max(avg * 1.5, max_dur * 1.1))
|
|
recommended = max(60, min(recommended, 1800)) # Clamp 1min-30min
|
|
|
|
# Confidence based on sample size
|
|
sample_count = history['sample_count']
|
|
if sample_count >= 10:
|
|
confidence = 'high'
|
|
elif sample_count >= 5:
|
|
confidence = 'medium'
|
|
else:
|
|
confidence = 'low'
|
|
|
|
result = {
|
|
'recommended': recommended,
|
|
'based_on_history': True,
|
|
'historical_avg': round(avg, 1) if avg else None,
|
|
'historical_max': round(max_dur, 1) if max_dur else None,
|
|
'sample_count': sample_count,
|
|
'confidence': confidence,
|
|
'success_rate': history.get('success_rate')
|
|
}
|
|
else:
|
|
# No historical data - use default
|
|
result = {
|
|
'recommended': 300, # Default 5 min
|
|
'based_on_history': False,
|
|
'sample_count': history.get('sample_count', 0),
|
|
'confidence': 'none'
|
|
}
|
|
|
|
# Update cache
|
|
self.cache[cache_key] = result
|
|
self.cache_time = now
|
|
|
|
return result
|
|
|
|
|
|
def run_preflight_checks(task: Dict[str, Any],
|
|
config: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
|
|
"""
|
|
Run all 5 QA preflight checks before dispatching a task.
|
|
|
|
Args:
|
|
task: Task dict with 'id', 'title', 'description' keys
|
|
config: Optional QA config with enabled checks, timeouts, etc.
|
|
|
|
Returns:
|
|
(approved: bool, report: dict) tuple
|
|
|
|
report structure:
|
|
{
|
|
'task_id': str,
|
|
'checks': {
|
|
'timeout': {...},
|
|
'privileges': {...},
|
|
'services': {...},
|
|
'capabilities': {...},
|
|
'duration': {...}
|
|
},
|
|
'warnings': [str],
|
|
'errors': [str],
|
|
'approved': bool,
|
|
'recommended_timeout': int,
|
|
'timestamp': str
|
|
}
|
|
"""
|
|
config = config or {}
|
|
|
|
report = {
|
|
'task_id': task.get('id', 'unknown'),
|
|
'checks': {},
|
|
'warnings': [],
|
|
'errors': [],
|
|
'approved': False,
|
|
'recommended_timeout': 300,
|
|
'timestamp': datetime.now().isoformat()
|
|
}
|
|
|
|
title = task.get('title', '')
|
|
description = task.get('description', '')
|
|
|
|
# Check 1: Timeout validation
|
|
try:
|
|
timeout_validator = TimeoutValidator()
|
|
timeout_check = timeout_validator.validate_timeout(title, description)
|
|
report['checks']['timeout'] = timeout_check
|
|
|
|
if timeout_check.get('warning'):
|
|
report['warnings'].append(f"TIMEOUT: {timeout_check['warning']}")
|
|
|
|
# Update recommended timeout
|
|
report['recommended_timeout'] = max(
|
|
report['recommended_timeout'],
|
|
timeout_check.get('recommended_timeout', 300)
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Timeout check failed: {e}")
|
|
report['checks']['timeout'] = {'error': str(e)}
|
|
|
|
# Check 2: Privilege requirements
|
|
try:
|
|
priv_checker = PrivilegeChecker()
|
|
priv_check = priv_checker.check_privilege_requirements(title, description)
|
|
report['checks']['privileges'] = priv_check
|
|
|
|
if priv_check['action'] == 'block':
|
|
report['errors'].append(f"PRIVILEGE: {priv_check['suggestion']}")
|
|
elif priv_check['action'] == 'warn' and priv_check.get('suggestion'):
|
|
report['warnings'].append(f"PRIVILEGE: {priv_check['suggestion']}")
|
|
except Exception as e:
|
|
logger.error(f"Privilege check failed: {e}")
|
|
report['checks']['privileges'] = {'error': str(e)}
|
|
|
|
# Check 3: Service health
|
|
try:
|
|
service_checker = ServiceHealthChecker()
|
|
service_checks = service_checker.validate_task_services(description)
|
|
report['checks']['services'] = service_checks
|
|
|
|
for service, status in service_checks.items():
|
|
if status.get('running') is False:
|
|
report['warnings'].append(f"SERVICE: {service} is not running")
|
|
elif status.get('running') is True and not status.get('responsive'):
|
|
report['errors'].append(f"SERVICE: {service} is running but not responding")
|
|
except Exception as e:
|
|
logger.error(f"Service check failed: {e}")
|
|
report['checks']['services'] = {'error': str(e)}
|
|
|
|
# Check 4: Container capabilities
|
|
try:
|
|
cap_checker = ContainerCapabilityChecker()
|
|
cap_check = cap_checker.validate_capabilities(description)
|
|
report['checks']['capabilities'] = cap_check
|
|
|
|
if cap_check['action'] == 'block':
|
|
for gap in cap_check.get('capability_gaps', []):
|
|
report['errors'].append(f"CAPABILITY: {gap}")
|
|
except Exception as e:
|
|
logger.error(f"Capability check failed: {e}")
|
|
report['checks']['capabilities'] = {'error': str(e)}
|
|
|
|
# Check 5: Duration learning
|
|
try:
|
|
learner = DurationLearner()
|
|
duration_check = learner.recommend_timeout(title, description)
|
|
report['checks']['duration'] = duration_check
|
|
|
|
if duration_check.get('based_on_history'):
|
|
# Use historical recommendation if confident
|
|
if duration_check.get('confidence') in ('high', 'medium'):
|
|
report['recommended_timeout'] = max(
|
|
report['recommended_timeout'],
|
|
duration_check['recommended']
|
|
)
|
|
|
|
logger.info(
|
|
f"HISTORY: Similar tasks avg {duration_check.get('historical_avg')}s, "
|
|
f"recommending {duration_check['recommended']}s "
|
|
f"(confidence: {duration_check.get('confidence')})"
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Duration learning failed: {e}")
|
|
report['checks']['duration'] = {'error': str(e)}
|
|
|
|
# Final decision
|
|
report['approved'] = len(report['errors']) == 0
|
|
|
|
return report['approved'], report
|
|
|
|
|
|
def format_preflight_report(report: Dict[str, Any], verbose: bool = False) -> str:
|
|
"""Format preflight report for display."""
|
|
lines = []
|
|
|
|
status = "[OK]" if report['approved'] else "[BLOCKED]"
|
|
lines.append(f"\n=== Preflight Check {status} ===")
|
|
lines.append(f"Task: {report['task_id']}")
|
|
lines.append(f"Recommended timeout: {report['recommended_timeout']}s")
|
|
|
|
if report['errors']:
|
|
lines.append("\nBlocking Issues:")
|
|
for err in report['errors']:
|
|
lines.append(f" [!] {err}")
|
|
|
|
if report['warnings']:
|
|
lines.append("\nWarnings:")
|
|
for warn in report['warnings']:
|
|
lines.append(f" [?] {warn}")
|
|
|
|
if verbose:
|
|
lines.append("\nDetailed Checks:")
|
|
for check_name, check_result in report['checks'].items():
|
|
lines.append(f" {check_name}:")
|
|
if isinstance(check_result, dict):
|
|
for k, v in check_result.items():
|
|
if k != 'error':
|
|
lines.append(f" {k}: {v}")
|
|
|
|
return '\n'.join(lines)
|
|
|
|
|
|
# CLI interface for testing
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description="QA Preflight Validator")
|
|
parser.add_argument("--title", "-t", required=True, help="Task title")
|
|
parser.add_argument("--description", "-d", default="", help="Task description")
|
|
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
|
|
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
|
|
|
args = parser.parse_args()
|
|
|
|
task = {
|
|
'id': 'cli-test',
|
|
'title': args.title,
|
|
'description': args.description
|
|
}
|
|
|
|
approved, report = run_preflight_checks(task)
|
|
|
|
if args.json:
|
|
print(json.dumps(report, indent=2, default=str))
|
|
else:
|
|
print(format_preflight_report(report, verbose=args.verbose))
|
|
|
|
sys.exit(0 if approved else 1)
|