Based on claude-code-tools TmuxCLIController, this refactor: - Added DockerTmuxController class for robust tmux session management - Implements send_keys() with configurable delay_enter - Implements capture_pane() for output retrieval - Implements wait_for_prompt() for pattern-based completion detection - Implements wait_for_idle() for content-hash-based idle detection - Implements wait_for_shell_prompt() for shell prompt detection Also includes workflow improvements: - Pre-task git snapshot before agent execution - Post-task commit protocol in agent guidelines Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
985 lines
31 KiB
Python
985 lines
31 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Time Metrics Module for Luzia Task Measurement & Reporting
|
|
|
|
Provides:
|
|
- UTC timestamp generation for task dispatch/completion
|
|
- Duration calculations
|
|
- System context snapshots (CPU, memory, disk)
|
|
- Timezone conversion support
|
|
- Causality tracking between tasks
|
|
- Performance baseline and anomaly detection
|
|
|
|
Uses Time MCP for accurate timezone-aware timestamps.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import time
|
|
from datetime import datetime, timedelta
|
|
from pathlib import Path
|
|
from typing import Dict, Any, Optional, Tuple, List
|
|
import sqlite3
|
|
|
|
# Default timezone for display
|
|
DEFAULT_TIMEZONE = "America/Montevideo"
|
|
|
|
|
|
# =============================================================================
|
|
# TIMESTAMP FUNCTIONS
|
|
# =============================================================================
|
|
|
|
def get_utc_now() -> str:
|
|
"""Get current UTC timestamp in ISO 8601 format.
|
|
|
|
Returns format: 2026-01-11T03:31:57Z
|
|
"""
|
|
return datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
|
|
def get_utc_now_with_offset() -> str:
|
|
"""Get current UTC timestamp with +00:00 offset.
|
|
|
|
Returns format: 2026-01-11T03:31:57+00:00
|
|
"""
|
|
return datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S+00:00")
|
|
|
|
|
|
def parse_iso_timestamp(ts: str) -> Optional[datetime]:
|
|
"""Parse ISO 8601 timestamp to datetime object.
|
|
|
|
Handles formats:
|
|
- 2026-01-11T03:31:57Z
|
|
- 2026-01-11T03:31:57+00:00
|
|
- 2026-01-11T03:31:57-03:00
|
|
- 2026-01-11T03:31:57 (assumes UTC)
|
|
"""
|
|
if not ts:
|
|
return None
|
|
|
|
try:
|
|
# Remove Z suffix and treat as UTC
|
|
if ts.endswith('Z'):
|
|
ts = ts[:-1]
|
|
|
|
# Handle timezone offset
|
|
if '+' in ts or (ts.count('-') > 2):
|
|
# Has timezone - use fromisoformat
|
|
return datetime.fromisoformat(ts)
|
|
else:
|
|
# Assume UTC
|
|
return datetime.fromisoformat(ts)
|
|
except (ValueError, TypeError):
|
|
return None
|
|
|
|
|
|
def calculate_duration_seconds(start_ts: str, end_ts: str) -> Optional[float]:
|
|
"""Calculate duration between two ISO timestamps in seconds."""
|
|
start = parse_iso_timestamp(start_ts)
|
|
end = parse_iso_timestamp(end_ts)
|
|
|
|
if start is None or end is None:
|
|
return None
|
|
|
|
return (end - start).total_seconds()
|
|
|
|
|
|
def format_duration(seconds: Optional[float]) -> str:
|
|
"""Format duration in HH:MM:SS format.
|
|
|
|
Examples:
|
|
- 3661 -> "01:01:01"
|
|
- 45 -> "00:00:45"
|
|
- 3700 -> "01:01:40"
|
|
"""
|
|
if seconds is None or seconds < 0:
|
|
return "--:--:--"
|
|
|
|
hours = int(seconds // 3600)
|
|
minutes = int((seconds % 3600) // 60)
|
|
secs = int(seconds % 60)
|
|
|
|
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
|
|
|
|
|
|
def format_duration_human(seconds: Optional[float]) -> str:
|
|
"""Format duration in human-readable format.
|
|
|
|
Examples:
|
|
- 45 -> "45s"
|
|
- 125 -> "2m 5s"
|
|
- 3661 -> "1h 1m 1s"
|
|
- 90061 -> "1d 1h 1m"
|
|
"""
|
|
if seconds is None or seconds < 0:
|
|
return "unknown"
|
|
|
|
if seconds < 60:
|
|
return f"{int(seconds)}s"
|
|
elif seconds < 3600:
|
|
mins = int(seconds // 60)
|
|
secs = int(seconds % 60)
|
|
return f"{mins}m {secs}s"
|
|
elif seconds < 86400:
|
|
hours = int(seconds // 3600)
|
|
mins = int((seconds % 3600) // 60)
|
|
secs = int(seconds % 60)
|
|
return f"{hours}h {mins}m {secs}s"
|
|
else:
|
|
days = int(seconds // 86400)
|
|
hours = int((seconds % 86400) // 3600)
|
|
mins = int((seconds % 3600) // 60)
|
|
return f"{days}d {hours}h {mins}m"
|
|
|
|
|
|
def elapsed_since(start_ts: str) -> str:
|
|
"""Get human-readable elapsed time since a timestamp."""
|
|
now = get_utc_now()
|
|
seconds = calculate_duration_seconds(start_ts, now)
|
|
return format_duration_human(seconds)
|
|
|
|
|
|
# =============================================================================
|
|
# TIMEZONE CONVERSION
|
|
# =============================================================================
|
|
|
|
def convert_to_local_time(utc_ts: str, local_tz: str = DEFAULT_TIMEZONE) -> str:
|
|
"""Convert UTC timestamp to local timezone display string.
|
|
|
|
Returns: "HH:MM:SS" in local timezone
|
|
"""
|
|
dt = parse_iso_timestamp(utc_ts)
|
|
if dt is None:
|
|
return utc_ts
|
|
|
|
# Get offset for the timezone (simplified approach - use pytz if available)
|
|
try:
|
|
import pytz
|
|
utc = pytz.UTC
|
|
local = pytz.timezone(local_tz)
|
|
dt_utc = utc.localize(dt.replace(tzinfo=None) if dt.tzinfo else dt)
|
|
dt_local = dt_utc.astimezone(local)
|
|
return dt_local.strftime("%H:%M:%S")
|
|
except ImportError:
|
|
# Fallback: for America/Montevideo, offset is -03:00
|
|
if local_tz == "America/Montevideo":
|
|
local_dt = dt - timedelta(hours=3)
|
|
return local_dt.strftime("%H:%M:%S")
|
|
return dt.strftime("%H:%M:%S") + " UTC"
|
|
|
|
|
|
def format_timestamp_with_local(utc_ts: str, local_tz: str = DEFAULT_TIMEZONE) -> str:
|
|
"""Format timestamp showing both UTC and local time.
|
|
|
|
Returns: "2026-01-11T03:31:57Z (America/Montevideo: 00:31:57)"
|
|
"""
|
|
local_time = convert_to_local_time(utc_ts, local_tz)
|
|
return f"{utc_ts} ({local_tz}: {local_time})"
|
|
|
|
|
|
# =============================================================================
|
|
# SYSTEM CONTEXT CAPTURE
|
|
# =============================================================================
|
|
|
|
def get_system_load() -> Tuple[float, float, float]:
|
|
"""Get system load averages (1, 5, 15 minute)."""
|
|
try:
|
|
return os.getloadavg()
|
|
except OSError:
|
|
return (0.0, 0.0, 0.0)
|
|
|
|
|
|
def get_memory_usage() -> Dict[str, Any]:
|
|
"""Get memory usage from /proc/meminfo."""
|
|
try:
|
|
with open("/proc/meminfo") as f:
|
|
lines = f.readlines()
|
|
|
|
mem = {}
|
|
for line in lines:
|
|
parts = line.split()
|
|
if len(parts) >= 2:
|
|
key = parts[0].rstrip(":")
|
|
value = int(parts[1]) # kB
|
|
mem[key] = value
|
|
|
|
total_mb = mem.get("MemTotal", 0) // 1024
|
|
available_mb = mem.get("MemAvailable", mem.get("MemFree", 0)) // 1024
|
|
used_mb = total_mb - available_mb
|
|
used_pct = int(100 * used_mb / total_mb) if total_mb else 0
|
|
|
|
return {
|
|
"total_mb": total_mb,
|
|
"available_mb": available_mb,
|
|
"used_mb": used_mb,
|
|
"used_percent": used_pct
|
|
}
|
|
except Exception:
|
|
return {
|
|
"total_mb": 0,
|
|
"available_mb": 0,
|
|
"used_mb": 0,
|
|
"used_percent": 0
|
|
}
|
|
|
|
|
|
def get_disk_usage(path: str = "/") -> Dict[str, Any]:
|
|
"""Get disk usage for a path."""
|
|
try:
|
|
stat = os.statvfs(path)
|
|
total = stat.f_blocks * stat.f_frsize
|
|
free = stat.f_bavail * stat.f_frsize
|
|
used = total - free
|
|
used_pct = int(100 * used / total) if total else 0
|
|
|
|
return {
|
|
"total_gb": round(total / (1024**3), 1),
|
|
"free_gb": round(free / (1024**3), 1),
|
|
"used_gb": round(used / (1024**3), 1),
|
|
"used_percent": used_pct
|
|
}
|
|
except Exception:
|
|
return {
|
|
"total_gb": 0,
|
|
"free_gb": 0,
|
|
"used_gb": 0,
|
|
"used_percent": 0
|
|
}
|
|
|
|
|
|
def capture_system_context() -> Dict[str, Any]:
|
|
"""Capture full system context snapshot.
|
|
|
|
Returns dict suitable for storing in task metadata.
|
|
"""
|
|
load = get_system_load()
|
|
memory = get_memory_usage()
|
|
disk = get_disk_usage("/")
|
|
|
|
return {
|
|
"timestamp": get_utc_now(),
|
|
"system_load": [round(l, 2) for l in load],
|
|
"memory": {
|
|
"used_percent": memory["used_percent"],
|
|
"available_mb": memory["available_mb"]
|
|
},
|
|
"disk": {
|
|
"used_percent": disk["used_percent"],
|
|
"free_gb": disk["free_gb"]
|
|
}
|
|
}
|
|
|
|
|
|
# =============================================================================
|
|
# TASK TIME METADATA
|
|
# =============================================================================
|
|
|
|
class TaskTimeTracker:
|
|
"""Track time metrics for a task throughout its lifecycle."""
|
|
|
|
def __init__(self, task_id: str, project: str):
|
|
self.task_id = task_id
|
|
self.project = project
|
|
self.dispatch_time: Optional[str] = None
|
|
self.start_time: Optional[str] = None
|
|
self.completion_time: Optional[str] = None
|
|
self.dispatch_context: Optional[Dict] = None
|
|
self.completion_context: Optional[Dict] = None
|
|
self.agent_timezone: str = DEFAULT_TIMEZONE
|
|
|
|
def mark_dispatched(self) -> Dict[str, Any]:
|
|
"""Mark task as dispatched, capturing context."""
|
|
self.dispatch_time = get_utc_now()
|
|
self.dispatch_context = capture_system_context()
|
|
|
|
return {
|
|
"dispatch": {
|
|
"utc_time": self.dispatch_time,
|
|
"agent_timezone": self.agent_timezone,
|
|
"system_load": self.dispatch_context["system_load"],
|
|
"memory_percent": self.dispatch_context["memory"]["used_percent"],
|
|
"disk_percent": self.dispatch_context["disk"]["used_percent"]
|
|
}
|
|
}
|
|
|
|
def mark_started(self) -> Dict[str, Any]:
|
|
"""Mark task as started (agent begins execution)."""
|
|
self.start_time = get_utc_now()
|
|
|
|
return {
|
|
"start_time": self.start_time
|
|
}
|
|
|
|
def mark_completed(self, exit_code: int = 0) -> Dict[str, Any]:
|
|
"""Mark task as completed, capturing final context."""
|
|
self.completion_time = get_utc_now()
|
|
self.completion_context = capture_system_context()
|
|
|
|
duration = calculate_duration_seconds(
|
|
self.dispatch_time or self.start_time or self.completion_time,
|
|
self.completion_time
|
|
)
|
|
|
|
return {
|
|
"completion": {
|
|
"utc_time": self.completion_time,
|
|
"duration_seconds": duration,
|
|
"duration_formatted": format_duration(duration),
|
|
"exit_code": exit_code,
|
|
"system_load": self.completion_context["system_load"],
|
|
"memory_percent": self.completion_context["memory"]["used_percent"],
|
|
"disk_percent": self.completion_context["disk"]["used_percent"]
|
|
}
|
|
}
|
|
|
|
def get_full_metrics(self) -> Dict[str, Any]:
|
|
"""Get complete time metrics for the task."""
|
|
metrics = {
|
|
"task_id": self.task_id,
|
|
"project": self.project,
|
|
"agent_timezone": self.agent_timezone
|
|
}
|
|
|
|
if self.dispatch_time:
|
|
metrics["dispatch"] = {
|
|
"utc_time": self.dispatch_time,
|
|
"local_time": convert_to_local_time(self.dispatch_time, self.agent_timezone),
|
|
"context": self.dispatch_context
|
|
}
|
|
|
|
if self.start_time:
|
|
metrics["start_time"] = self.start_time
|
|
|
|
if self.completion_time:
|
|
duration = calculate_duration_seconds(
|
|
self.dispatch_time or self.start_time,
|
|
self.completion_time
|
|
)
|
|
metrics["completion"] = {
|
|
"utc_time": self.completion_time,
|
|
"local_time": convert_to_local_time(self.completion_time, self.agent_timezone),
|
|
"duration_seconds": duration,
|
|
"duration_formatted": format_duration(duration),
|
|
"context": self.completion_context
|
|
}
|
|
|
|
# Calculate elapsed if still running
|
|
if self.dispatch_time and not self.completion_time:
|
|
metrics["elapsed"] = elapsed_since(self.dispatch_time)
|
|
metrics["status"] = "running"
|
|
elif self.completion_time:
|
|
metrics["status"] = "completed"
|
|
else:
|
|
metrics["status"] = "pending"
|
|
|
|
return metrics
|
|
|
|
|
|
# =============================================================================
|
|
# CAUSALITY TRACKING
|
|
# =============================================================================
|
|
|
|
def get_recent_completions(limit: int = 10) -> List[Dict[str, Any]]:
|
|
"""Get recently completed tasks for causality tracking.
|
|
|
|
Returns list of {task_id, project, completion_time} sorted by time.
|
|
"""
|
|
jobs_dir = Path("/var/log/luz-orchestrator/jobs")
|
|
completions = []
|
|
|
|
if not jobs_dir.exists():
|
|
return completions
|
|
|
|
for job_dir in jobs_dir.iterdir():
|
|
if not job_dir.is_dir():
|
|
continue
|
|
|
|
meta_file = job_dir / "meta.json"
|
|
if not meta_file.exists():
|
|
continue
|
|
|
|
try:
|
|
with open(meta_file) as f:
|
|
meta = json.load(f)
|
|
|
|
# Check for time_metrics with completion
|
|
time_metrics = meta.get("time_metrics", {})
|
|
completion = time_metrics.get("completion", {})
|
|
|
|
if completion.get("utc_time"):
|
|
completions.append({
|
|
"task_id": meta.get("id", job_dir.name),
|
|
"project": meta.get("project", "unknown"),
|
|
"completion_time": completion["utc_time"],
|
|
"duration_seconds": completion.get("duration_seconds")
|
|
})
|
|
except Exception:
|
|
continue
|
|
|
|
# Sort by completion time, newest first
|
|
completions.sort(key=lambda x: x["completion_time"], reverse=True)
|
|
return completions[:limit]
|
|
|
|
|
|
def find_prior_task(dispatch_time: str, project: str = None) -> Optional[Dict[str, Any]]:
|
|
"""Find the most recent task that completed before this dispatch.
|
|
|
|
Useful for causality tracking: "This task started 5min after Task X completed"
|
|
"""
|
|
recent = get_recent_completions(limit=20)
|
|
|
|
for task in recent:
|
|
# Skip if filtering by project and doesn't match
|
|
if project and task["project"] != project:
|
|
continue
|
|
|
|
# Check if this task completed before our dispatch
|
|
if task["completion_time"] < dispatch_time:
|
|
# Calculate gap
|
|
gap_seconds = calculate_duration_seconds(task["completion_time"], dispatch_time)
|
|
return {
|
|
**task,
|
|
"gap_seconds": gap_seconds,
|
|
"gap_formatted": format_duration_human(gap_seconds)
|
|
}
|
|
|
|
return None
|
|
|
|
|
|
# =============================================================================
|
|
# PERFORMANCE BASELINE & ANOMALY DETECTION
|
|
# =============================================================================
|
|
|
|
# Performance baseline database
|
|
BASELINE_DB = Path("/var/lib/luzia/metrics/baselines.db")
|
|
|
|
|
|
def init_baseline_db():
|
|
"""Initialize the performance baseline database."""
|
|
BASELINE_DB.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
conn = sqlite3.connect(BASELINE_DB)
|
|
c = conn.cursor()
|
|
|
|
# Task durations by project
|
|
c.execute('''CREATE TABLE IF NOT EXISTS task_durations (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
project TEXT NOT NULL,
|
|
task_hash TEXT,
|
|
duration_seconds REAL NOT NULL,
|
|
exit_code INTEGER,
|
|
system_load_avg REAL,
|
|
memory_percent INTEGER,
|
|
recorded_at TEXT NOT NULL
|
|
)''')
|
|
|
|
# Baseline statistics (calculated aggregates)
|
|
c.execute('''CREATE TABLE IF NOT EXISTS baselines (
|
|
project TEXT PRIMARY KEY,
|
|
avg_duration REAL,
|
|
median_duration REAL,
|
|
stddev_duration REAL,
|
|
p95_duration REAL,
|
|
sample_count INTEGER,
|
|
updated_at TEXT
|
|
)''')
|
|
|
|
c.execute('CREATE INDEX IF NOT EXISTS idx_durations_project ON task_durations(project)')
|
|
c.execute('CREATE INDEX IF NOT EXISTS idx_durations_recorded ON task_durations(recorded_at)')
|
|
|
|
conn.commit()
|
|
return conn
|
|
|
|
|
|
def record_task_duration(project: str, duration_seconds: float,
|
|
exit_code: int = 0, task_hash: str = None,
|
|
system_load: float = None, memory_percent: int = None):
|
|
"""Record a task duration for baseline calculation."""
|
|
conn = init_baseline_db()
|
|
c = conn.cursor()
|
|
|
|
c.execute('''INSERT INTO task_durations
|
|
(project, task_hash, duration_seconds, exit_code,
|
|
system_load_avg, memory_percent, recorded_at)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?)''',
|
|
(project, task_hash, duration_seconds, exit_code,
|
|
system_load, memory_percent, get_utc_now()))
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
|
|
def calculate_baseline(project: str) -> Dict[str, Any]:
|
|
"""Calculate performance baseline for a project.
|
|
|
|
Uses last 30 days of successful tasks.
|
|
"""
|
|
conn = init_baseline_db()
|
|
c = conn.cursor()
|
|
|
|
# Get durations from last 30 days, exit_code 0 only
|
|
cutoff = (datetime.utcnow() - timedelta(days=30)).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
c.execute('''SELECT duration_seconds FROM task_durations
|
|
WHERE project = ? AND exit_code = 0 AND recorded_at > ?
|
|
ORDER BY duration_seconds''',
|
|
(project, cutoff))
|
|
|
|
durations = [row[0] for row in c.fetchall()]
|
|
|
|
if not durations:
|
|
conn.close()
|
|
return {
|
|
"project": project,
|
|
"sample_count": 0,
|
|
"error": "No data available"
|
|
}
|
|
|
|
# Calculate statistics
|
|
n = len(durations)
|
|
avg = sum(durations) / n
|
|
|
|
# Median
|
|
mid = n // 2
|
|
median = durations[mid] if n % 2 else (durations[mid-1] + durations[mid]) / 2
|
|
|
|
# Standard deviation
|
|
variance = sum((d - avg) ** 2 for d in durations) / n
|
|
stddev = variance ** 0.5
|
|
|
|
# 95th percentile
|
|
p95_idx = int(n * 0.95)
|
|
p95 = durations[min(p95_idx, n - 1)]
|
|
|
|
baseline = {
|
|
"project": project,
|
|
"avg_duration": round(avg, 2),
|
|
"median_duration": round(median, 2),
|
|
"stddev_duration": round(stddev, 2),
|
|
"p95_duration": round(p95, 2),
|
|
"sample_count": n,
|
|
"updated_at": get_utc_now()
|
|
}
|
|
|
|
# Store baseline
|
|
c.execute('''INSERT OR REPLACE INTO baselines
|
|
(project, avg_duration, median_duration, stddev_duration,
|
|
p95_duration, sample_count, updated_at)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?)''',
|
|
(project, baseline["avg_duration"], baseline["median_duration"],
|
|
baseline["stddev_duration"], baseline["p95_duration"],
|
|
baseline["sample_count"], baseline["updated_at"]))
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
return baseline
|
|
|
|
|
|
def check_anomaly(project: str, duration_seconds: float) -> Dict[str, Any]:
|
|
"""Check if a task duration is anomalous compared to baseline.
|
|
|
|
Returns:
|
|
{
|
|
"is_anomaly": bool,
|
|
"severity": "normal"|"slow"|"very_slow"|"extreme",
|
|
"ratio": float (duration / avg),
|
|
"message": str
|
|
}
|
|
"""
|
|
conn = init_baseline_db()
|
|
c = conn.cursor()
|
|
|
|
c.execute('''SELECT avg_duration, stddev_duration, p95_duration, sample_count
|
|
FROM baselines WHERE project = ?''', (project,))
|
|
row = c.fetchone()
|
|
conn.close()
|
|
|
|
if not row or row[3] < 5: # Need at least 5 samples
|
|
return {
|
|
"is_anomaly": False,
|
|
"severity": "unknown",
|
|
"ratio": 0,
|
|
"message": "Insufficient baseline data"
|
|
}
|
|
|
|
avg, stddev, p95, sample_count = row
|
|
|
|
if avg <= 0:
|
|
return {
|
|
"is_anomaly": False,
|
|
"severity": "unknown",
|
|
"ratio": 0,
|
|
"message": "Invalid baseline"
|
|
}
|
|
|
|
ratio = duration_seconds / avg
|
|
|
|
# Classify severity
|
|
if duration_seconds <= p95:
|
|
severity = "normal"
|
|
is_anomaly = False
|
|
message = "Within normal range"
|
|
elif duration_seconds <= avg + 2 * stddev:
|
|
severity = "slow"
|
|
is_anomaly = True
|
|
message = f"Slower than average ({ratio:.1f}x normal)"
|
|
elif duration_seconds <= avg + 3 * stddev:
|
|
severity = "very_slow"
|
|
is_anomaly = True
|
|
message = f"Much slower than average ({ratio:.1f}x normal)"
|
|
else:
|
|
severity = "extreme"
|
|
is_anomaly = True
|
|
message = f"Extremely slow ({ratio:.1f}x normal)"
|
|
|
|
return {
|
|
"is_anomaly": is_anomaly,
|
|
"severity": severity,
|
|
"ratio": round(ratio, 2),
|
|
"message": message,
|
|
"baseline_avg": round(avg, 2),
|
|
"baseline_p95": round(p95, 2)
|
|
}
|
|
|
|
|
|
# =============================================================================
|
|
# AGGREGATE METRICS
|
|
# =============================================================================
|
|
|
|
def get_project_metrics(project: str, days: int = 7) -> Dict[str, Any]:
|
|
"""Get aggregate metrics for a project over a time period."""
|
|
conn = init_baseline_db()
|
|
c = conn.cursor()
|
|
|
|
cutoff = (datetime.utcnow() - timedelta(days=days)).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
c.execute('''SELECT
|
|
COUNT(*) as total_tasks,
|
|
SUM(duration_seconds) as total_time,
|
|
AVG(duration_seconds) as avg_duration,
|
|
MIN(duration_seconds) as min_duration,
|
|
MAX(duration_seconds) as max_duration,
|
|
SUM(CASE WHEN exit_code = 0 THEN 1 ELSE 0 END) as success_count,
|
|
AVG(system_load_avg) as avg_load,
|
|
AVG(memory_percent) as avg_memory
|
|
FROM task_durations
|
|
WHERE project = ? AND recorded_at > ?''',
|
|
(project, cutoff))
|
|
|
|
row = c.fetchone()
|
|
conn.close()
|
|
|
|
if not row or row[0] == 0:
|
|
return {
|
|
"project": project,
|
|
"period_days": days,
|
|
"total_tasks": 0,
|
|
"error": "No data in period"
|
|
}
|
|
|
|
total, total_time, avg, min_dur, max_dur, success, avg_load, avg_mem = row
|
|
|
|
return {
|
|
"project": project,
|
|
"period_days": days,
|
|
"total_tasks": total,
|
|
"total_time_seconds": total_time or 0,
|
|
"total_time_formatted": format_duration(total_time),
|
|
"avg_duration_seconds": round(avg or 0, 2),
|
|
"avg_duration_formatted": format_duration(avg),
|
|
"min_duration_seconds": round(min_dur or 0, 2),
|
|
"max_duration_seconds": round(max_dur or 0, 2),
|
|
"success_rate": round(100 * success / total, 1) if total else 0,
|
|
"avg_system_load": round(avg_load or 0, 2),
|
|
"avg_memory_percent": round(avg_mem or 0, 1)
|
|
}
|
|
|
|
|
|
def get_all_projects_metrics(days: int = 7) -> Dict[str, Any]:
|
|
"""Get aggregate metrics for all projects."""
|
|
conn = init_baseline_db()
|
|
c = conn.cursor()
|
|
|
|
cutoff = (datetime.utcnow() - timedelta(days=days)).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
# Get per-project stats
|
|
c.execute('''SELECT
|
|
project,
|
|
COUNT(*) as total_tasks,
|
|
SUM(duration_seconds) as total_time,
|
|
AVG(duration_seconds) as avg_duration,
|
|
SUM(CASE WHEN exit_code = 0 THEN 1 ELSE 0 END) as success_count
|
|
FROM task_durations
|
|
WHERE recorded_at > ?
|
|
GROUP BY project
|
|
ORDER BY total_time DESC''',
|
|
(cutoff,))
|
|
|
|
by_project = {}
|
|
total_tasks = 0
|
|
total_time = 0
|
|
|
|
for row in c.fetchall():
|
|
project, tasks, time_sec, avg, success = row
|
|
by_project[project] = {
|
|
"total_tasks": tasks,
|
|
"total_time_seconds": time_sec or 0,
|
|
"total_time_formatted": format_duration(time_sec),
|
|
"avg_duration_seconds": round(avg or 0, 2),
|
|
"success_rate": round(100 * success / tasks, 1) if tasks else 0
|
|
}
|
|
total_tasks += tasks
|
|
total_time += (time_sec or 0)
|
|
|
|
# Get longest running tasks
|
|
c.execute('''SELECT project, task_hash, duration_seconds, recorded_at
|
|
FROM task_durations
|
|
WHERE recorded_at > ?
|
|
ORDER BY duration_seconds DESC
|
|
LIMIT 10''',
|
|
(cutoff,))
|
|
|
|
longest_tasks = []
|
|
for row in c.fetchall():
|
|
longest_tasks.append({
|
|
"project": row[0],
|
|
"task_hash": row[1],
|
|
"duration_seconds": row[2],
|
|
"duration_formatted": format_duration(row[2]),
|
|
"recorded_at": row[3]
|
|
})
|
|
|
|
conn.close()
|
|
|
|
return {
|
|
"period_days": days,
|
|
"total_tasks": total_tasks,
|
|
"total_time_seconds": total_time,
|
|
"total_time_formatted": format_duration(total_time),
|
|
"by_project": by_project,
|
|
"longest_tasks": longest_tasks
|
|
}
|
|
|
|
|
|
# =============================================================================
|
|
# TIME-BUCKETED SUCCESS RATES
|
|
# =============================================================================
|
|
|
|
def get_success_by_duration_bucket(project: str = None) -> Dict[str, Any]:
|
|
"""Get task success rate by execution time bucket.
|
|
|
|
Buckets: <1min, 1-5min, 5-15min, 15-30min, 30-60min, >60min
|
|
"""
|
|
conn = init_baseline_db()
|
|
c = conn.cursor()
|
|
|
|
buckets = [
|
|
("under_1m", 0, 60),
|
|
("1_to_5m", 60, 300),
|
|
("5_to_15m", 300, 900),
|
|
("15_to_30m", 900, 1800),
|
|
("30_to_60m", 1800, 3600),
|
|
("over_60m", 3600, float('inf'))
|
|
]
|
|
|
|
results = {}
|
|
|
|
for bucket_name, min_dur, max_dur in buckets:
|
|
if project:
|
|
c.execute('''SELECT
|
|
COUNT(*) as total,
|
|
SUM(CASE WHEN exit_code = 0 THEN 1 ELSE 0 END) as success
|
|
FROM task_durations
|
|
WHERE project = ? AND duration_seconds >= ? AND duration_seconds < ?''',
|
|
(project, min_dur, max_dur if max_dur != float('inf') else 1e10))
|
|
else:
|
|
c.execute('''SELECT
|
|
COUNT(*) as total,
|
|
SUM(CASE WHEN exit_code = 0 THEN 1 ELSE 0 END) as success
|
|
FROM task_durations
|
|
WHERE duration_seconds >= ? AND duration_seconds < ?''',
|
|
(min_dur, max_dur if max_dur != float('inf') else 1e10))
|
|
|
|
row = c.fetchone()
|
|
total, success = row[0] or 0, row[1] or 0
|
|
|
|
results[bucket_name] = {
|
|
"total": total,
|
|
"success": success,
|
|
"success_rate": round(100 * success / total, 1) if total else 0
|
|
}
|
|
|
|
conn.close()
|
|
return results
|
|
|
|
|
|
# =============================================================================
|
|
# DISPLAY FORMATTERS FOR LUZIA
|
|
# =============================================================================
|
|
|
|
def format_job_with_timing(job: Dict[str, Any]) -> str:
|
|
"""Format a job entry for 'luzia jobs' output with timing info.
|
|
|
|
Returns: "agent:dss:002746 | dss | in_progress | 14:30:00 UTC | 00:45:30 | CPU: 0.52"
|
|
"""
|
|
job_id = job.get("id", "unknown")
|
|
project = job.get("project", "unknown")
|
|
status = job.get("status", "unknown")
|
|
|
|
time_metrics = job.get("time_metrics", {})
|
|
dispatch = time_metrics.get("dispatch", {})
|
|
completion = time_metrics.get("completion", {})
|
|
|
|
# Dispatch time
|
|
dispatch_time = dispatch.get("utc_time", job.get("started", ""))
|
|
if dispatch_time:
|
|
dispatch_display = dispatch_time[11:19] # HH:MM:SS from ISO
|
|
else:
|
|
dispatch_display = "--:--:--"
|
|
|
|
# Duration
|
|
if completion.get("duration_formatted"):
|
|
duration = completion["duration_formatted"]
|
|
elif dispatch_time:
|
|
duration = elapsed_since(dispatch_time)
|
|
else:
|
|
duration = "--:--:--"
|
|
|
|
# System load
|
|
load = dispatch.get("system_load", [0, 0, 0])
|
|
if isinstance(load, list) and len(load) > 0:
|
|
load_display = f"{load[0]:.2f}"
|
|
else:
|
|
load_display = "-.--"
|
|
|
|
return f"{job_id} | {project} | {status} | {dispatch_display} UTC | {duration} | CPU: {load_display}"
|
|
|
|
|
|
def format_logs_header(job: Dict[str, Any], local_tz: str = DEFAULT_TIMEZONE) -> str:
|
|
"""Format the header for 'luzia logs' output with timing info.
|
|
|
|
Returns a formatted header block with timing and system info.
|
|
"""
|
|
job_id = job.get("id", "unknown")
|
|
project = job.get("project", "unknown")
|
|
status = job.get("status", "unknown")
|
|
|
|
time_metrics = job.get("time_metrics", {})
|
|
dispatch = time_metrics.get("dispatch", {})
|
|
completion = time_metrics.get("completion", {})
|
|
|
|
lines = [
|
|
"═" * 65,
|
|
f"Job: agent:{project}:{job_id}",
|
|
f"Agent: {project}",
|
|
]
|
|
|
|
# Dispatch time with local
|
|
dispatch_time = dispatch.get("utc_time", job.get("started", ""))
|
|
if dispatch_time:
|
|
local_time = convert_to_local_time(dispatch_time, local_tz)
|
|
lines.append(f"Dispatched: {dispatch_time} ({local_tz}: {local_time})")
|
|
|
|
# Status with duration
|
|
if completion.get("utc_time"):
|
|
duration = completion.get("duration_formatted", "--:--:--")
|
|
lines.append(f"Status: completed (took {duration})")
|
|
elif dispatch_time:
|
|
elapsed = elapsed_since(dispatch_time)
|
|
lines.append(f"Status: {status} (running for {elapsed})")
|
|
else:
|
|
lines.append(f"Status: {status}")
|
|
|
|
# System context
|
|
ctx = dispatch if dispatch else completion
|
|
if ctx:
|
|
load = ctx.get("system_load", [0, 0, 0])
|
|
mem = ctx.get("memory_percent", 0)
|
|
disk = ctx.get("disk_percent", 0)
|
|
if isinstance(load, list) and len(load) > 0:
|
|
lines.append(f"System: CPU {load[0]:.2f}, Memory {mem}%, Disk {disk}%")
|
|
|
|
lines.append("═" * 65)
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
# =============================================================================
|
|
# CONVENIENCE FUNCTION FOR LUZIA INTEGRATION
|
|
# =============================================================================
|
|
|
|
def create_task_time_metadata(task_id: str, project: str) -> Dict[str, Any]:
|
|
"""Create initial time metadata for a new task dispatch.
|
|
|
|
Call this from spawn_claude_agent() to add time tracking.
|
|
"""
|
|
tracker = TaskTimeTracker(task_id, project)
|
|
dispatch_data = tracker.mark_dispatched()
|
|
|
|
return {
|
|
"time_metrics": dispatch_data,
|
|
"time_tracker_data": {
|
|
"task_id": task_id,
|
|
"project": project,
|
|
"dispatch_time": tracker.dispatch_time,
|
|
"agent_timezone": tracker.agent_timezone
|
|
}
|
|
}
|
|
|
|
|
|
def update_task_completion_metadata(meta: Dict[str, Any], exit_code: int = 0) -> Dict[str, Any]:
|
|
"""Update task metadata with completion time metrics.
|
|
|
|
Call this when a job completes to finalize time tracking.
|
|
"""
|
|
tracker_data = meta.get("time_tracker_data", {})
|
|
|
|
tracker = TaskTimeTracker(
|
|
tracker_data.get("task_id", "unknown"),
|
|
tracker_data.get("project", "unknown")
|
|
)
|
|
tracker.dispatch_time = tracker_data.get("dispatch_time")
|
|
tracker.agent_timezone = tracker_data.get("agent_timezone", DEFAULT_TIMEZONE)
|
|
|
|
completion_data = tracker.mark_completed(exit_code)
|
|
|
|
# Merge with existing time_metrics
|
|
time_metrics = meta.get("time_metrics", {})
|
|
time_metrics.update(completion_data)
|
|
|
|
# Record for baseline
|
|
duration = completion_data.get("completion", {}).get("duration_seconds")
|
|
if duration:
|
|
try:
|
|
record_task_duration(
|
|
tracker.project,
|
|
duration,
|
|
exit_code,
|
|
system_load=tracker.completion_context.get("system_load", [0])[0],
|
|
memory_percent=tracker.completion_context.get("memory", {}).get("used_percent")
|
|
)
|
|
except Exception:
|
|
pass # Don't fail on baseline recording errors
|
|
|
|
return {"time_metrics": time_metrics}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Quick test
|
|
print("Time Metrics Module Test")
|
|
print("=" * 40)
|
|
print(f"Current UTC: {get_utc_now()}")
|
|
print(f"System Context: {json.dumps(capture_system_context(), indent=2)}")
|
|
|
|
# Test tracker
|
|
tracker = TaskTimeTracker("test-001", "admin")
|
|
print(f"\nDispatch: {json.dumps(tracker.mark_dispatched(), indent=2)}")
|
|
|
|
import time as t
|
|
t.sleep(1)
|
|
|
|
print(f"\nCompletion: {json.dumps(tracker.mark_completed(0), indent=2)}")
|
|
print(f"\nFull Metrics: {json.dumps(tracker.get_full_metrics(), indent=2)}")
|