Refactor cockpit to use DockerTmuxController pattern

Based on claude-code-tools TmuxCLIController, this refactor:

- Added DockerTmuxController class for robust tmux session management
- Implements send_keys() with configurable delay_enter
- Implements capture_pane() for output retrieval
- Implements wait_for_prompt() for pattern-based completion detection
- Implements wait_for_idle() for content-hash-based idle detection
- Implements wait_for_shell_prompt() for shell prompt detection

Also includes workflow improvements:
- Pre-task git snapshot before agent execution
- Post-task commit protocol in agent guidelines

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
admin
2026-01-14 10:42:16 -03:00
commit ec33ac1936
265 changed files with 92011 additions and 0 deletions

Binary file not shown.

511
tests/test_integrations.py Normal file
View File

@@ -0,0 +1,511 @@
#!/usr/bin/env python3
"""
Integration tests for Luzia orchestrator components
"""
import sys
import json
import os
import tempfile
import shutil
from pathlib import Path
from datetime import datetime
# Add lib to path
sys.path.insert(0, '/opt/server-agents/orchestrator/lib')
# Test results tracking
RESULTS = {'passed': 0, 'failed': 0, 'errors': []}
def test(name):
"""Decorator for test functions"""
def decorator(func):
def wrapper():
try:
func()
RESULTS['passed'] += 1
print(f"{name}")
return True
except AssertionError as e:
RESULTS['failed'] += 1
RESULTS['errors'].append(f"{name}: {e}")
print(f"{name}: {e}")
return False
except Exception as e:
RESULTS['failed'] += 1
RESULTS['errors'].append(f"{name}: {type(e).__name__}: {e}")
print(f"{name}: {type(e).__name__}: {e}")
return False
wrapper.__name__ = func.__name__
return wrapper
return decorator
# =============================================================================
# Chat Memory Lookup Tests
# =============================================================================
print("\n### Chat Memory Lookup Tests ###")
@test("ChatMemoryLookup imports")
def test_chat_memory_import():
from chat_memory_lookup import ChatMemoryLookup
assert ChatMemoryLookup is not None
@test("ChatMemoryLookup initializes")
def test_chat_memory_init():
from chat_memory_lookup import ChatMemoryLookup
lookup = ChatMemoryLookup(timeout_ms=150)
assert lookup.timeout_ms == 150
@test("ChatMemoryLookup.memory_statistics returns data")
def test_chat_memory_stats():
from chat_memory_lookup import ChatMemoryLookup
lookup = ChatMemoryLookup()
stats = lookup.memory_statistics()
assert 'available' in stats
assert stats['available'] == True
assert 'entities' in stats
assert stats['entities'] > 0
@test("ChatMemoryLookup.list_all_projects returns projects")
def test_chat_memory_projects():
from chat_memory_lookup import ChatMemoryLookup
lookup = ChatMemoryLookup()
result = lookup.list_all_projects()
assert 'projects' in result
assert 'count' in result
assert result['count'] > 0
assert len(result['projects']) > 0
@test("ChatMemoryLookup.search_entities works")
def test_chat_memory_search():
from chat_memory_lookup import ChatMemoryLookup
lookup = ChatMemoryLookup()
result = lookup.search_entities('admin', limit=5)
assert 'entities' in result
assert 'count' in result
test_chat_memory_import()
test_chat_memory_init()
test_chat_memory_stats()
test_chat_memory_projects()
test_chat_memory_search()
# =============================================================================
# Chat Intent Parser Tests
# =============================================================================
print("\n### Chat Intent Parser Tests ###")
@test("ChatIntentParser imports")
def test_intent_import():
from chat_intent_parser import ChatIntentParser
assert ChatIntentParser is not None
@test("ChatIntentParser.parse returns intent structure")
def test_intent_parse():
from chat_intent_parser import ChatIntentParser
parser = ChatIntentParser()
result = parser.parse("list projects")
assert 'intent' in result
assert 'keywords' in result
assert 'scope' in result
@test("ChatIntentParser detects project_info intent")
def test_intent_project():
from chat_intent_parser import ChatIntentParser
parser = ChatIntentParser()
result = parser.parse("list projects")
assert result['intent'] == 'project_info'
assert 'projects' in result['keywords']
@test("ChatIntentParser detects system_status intent")
def test_intent_status():
from chat_intent_parser import ChatIntentParser
parser = ChatIntentParser()
result = parser.parse("system status")
assert result['intent'] == 'system_status'
@test("ChatIntentParser.extract_search_term works")
def test_intent_search_term():
from chat_intent_parser import ChatIntentParser
parser = ChatIntentParser()
term = parser.extract_search_term("search for authentication")
assert term is not None
assert len(term) > 0
test_intent_import()
test_intent_parse()
test_intent_project()
test_intent_status()
test_intent_search_term()
# =============================================================================
# Chat Orchestrator Tests
# =============================================================================
print("\n### Chat Orchestrator Tests ###")
@test("ChatOrchestrator imports")
def test_orchestrator_import():
from chat_orchestrator import ChatOrchestrator
assert ChatOrchestrator is not None
@test("ChatOrchestrator initializes")
def test_orchestrator_init():
from chat_orchestrator import ChatOrchestrator
orch = ChatOrchestrator(timeout_ms=500)
assert orch.timeout_ms == 500
@test("ChatOrchestrator.process_query returns response")
def test_orchestrator_query():
from chat_orchestrator import ChatOrchestrator
orch = ChatOrchestrator()
result = orch.process_query("help")
assert 'response' in result
assert 'status' in result
assert result['status'] == 'success'
@test("ChatOrchestrator handles system status query")
def test_orchestrator_status():
from chat_orchestrator import ChatOrchestrator
orch = ChatOrchestrator()
result = orch.process_query("system status")
assert 'response' in result
assert 'execution_time_ms' in result
@test("ChatOrchestrator handles project query")
def test_orchestrator_projects():
from chat_orchestrator import ChatOrchestrator
orch = ChatOrchestrator()
result = orch.process_query("list projects")
assert 'response' in result
assert 'Projects' in result['response'] or 'project' in result['response'].lower()
test_orchestrator_import()
test_orchestrator_init()
test_orchestrator_query()
test_orchestrator_status()
test_orchestrator_projects()
# =============================================================================
# Chat Response Formatter Tests
# =============================================================================
print("\n### Chat Response Formatter Tests ###")
@test("ChatResponseFormatter imports")
def test_formatter_import():
from chat_response_formatter import ChatResponseFormatter
assert ChatResponseFormatter is not None
@test("ChatResponseFormatter.format_help returns markdown")
def test_formatter_help():
from chat_response_formatter import ChatResponseFormatter
formatter = ChatResponseFormatter()
help_text = formatter.format_help()
assert '# ' in help_text # Has markdown header
assert len(help_text) > 100
@test("ChatResponseFormatter.format_response_time works")
def test_formatter_time():
from chat_response_formatter import ChatResponseFormatter
formatter = ChatResponseFormatter()
instant = formatter.format_response_time(5)
assert 'instant' in instant
fast = formatter.format_response_time(150)
assert 'fast' in fast.lower() or 'ms' in fast
@test("ChatResponseFormatter.format_project_list works")
def test_formatter_projects():
from chat_response_formatter import ChatResponseFormatter
formatter = ChatResponseFormatter()
data = {'projects': [{'name': 'test', 'type': 'project'}], 'count': 1}
result = formatter.format_project_list(data)
assert 'test' in result
assert 'Project' in result or '1' in result
test_formatter_import()
test_formatter_help()
test_formatter_time()
test_formatter_projects()
# =============================================================================
# Chat Bash Executor Tests
# =============================================================================
print("\n### Chat Bash Executor Tests ###")
@test("ChatBashExecutor imports")
def test_bash_import():
from chat_bash_executor import ChatBashExecutor
assert ChatBashExecutor is not None
@test("ChatBashExecutor.execute runs uptime")
def test_bash_uptime():
from chat_bash_executor import ChatBashExecutor
executor = ChatBashExecutor()
result = executor.execute('uptime')
assert 'success' in result
assert result['success'] == True
assert 'output' in result
@test("ChatBashExecutor.execute runs disk")
def test_bash_disk():
from chat_bash_executor import ChatBashExecutor
executor = ChatBashExecutor()
result = executor.execute('disk')
assert result['success'] == True
@test("ChatBashExecutor rejects unknown commands")
def test_bash_reject():
from chat_bash_executor import ChatBashExecutor
executor = ChatBashExecutor()
result = executor.execute('unknown_dangerous_cmd')
# Unknown commands return error without success key
assert 'error' in result
assert 'not allowed' in result['error'].lower()
test_bash_import()
test_bash_uptime()
test_bash_disk()
test_bash_reject()
# =============================================================================
# Task Watchdog Tests
# =============================================================================
print("\n### Task Watchdog Tests ###")
@test("TaskWatchdog imports")
def test_watchdog_import():
from task_watchdog import TaskWatchdog
assert TaskWatchdog is not None
@test("TaskWatchdog initializes")
def test_watchdog_init():
from task_watchdog import TaskWatchdog
watchdog = TaskWatchdog()
assert watchdog.HEARTBEAT_TIMEOUT_SECONDS == 300
assert watchdog.LOCK_TIMEOUT_SECONDS == 3600
@test("TaskWatchdog.check_heartbeats runs")
def test_watchdog_heartbeats():
from task_watchdog import TaskWatchdog
watchdog = TaskWatchdog()
stuck = watchdog.check_heartbeats()
assert isinstance(stuck, list)
@test("TaskWatchdog.get_project_queue_status returns dict")
def test_watchdog_queue_status():
from task_watchdog import TaskWatchdog
watchdog = TaskWatchdog()
status = watchdog.get_project_queue_status()
assert isinstance(status, dict)
@test("TaskWatchdog.is_project_blocked returns tuple")
def test_watchdog_blocked():
from task_watchdog import TaskWatchdog
watchdog = TaskWatchdog()
blocked, reason = watchdog.is_project_blocked('test_nonexistent')
assert isinstance(blocked, bool)
@test("TaskWatchdog.run_check returns summary")
def test_watchdog_check():
from task_watchdog import TaskWatchdog
watchdog = TaskWatchdog()
summary = watchdog.run_check()
assert 'timestamp' in summary
assert 'stuck_tasks' in summary
assert 'project_status' in summary
test_watchdog_import()
test_watchdog_init()
test_watchdog_heartbeats()
test_watchdog_queue_status()
test_watchdog_blocked()
test_watchdog_check()
# =============================================================================
# Task Completion Tests
# =============================================================================
print("\n### Task Completion Tests ###")
@test("TaskCompletion imports")
def test_completion_import():
from task_completion import TaskCompletion, complete_task, fail_task
assert TaskCompletion is not None
assert complete_task is not None
assert fail_task is not None
@test("TaskCompletion initializes")
def test_completion_init():
from task_completion import TaskCompletion
handler = TaskCompletion()
assert handler.COMPLETED_DIR.exists() or True # May not exist yet
@test("TaskCompletion.complete_task handles missing task")
def test_completion_missing():
from task_completion import TaskCompletion
handler = TaskCompletion()
result = handler.complete_task('nonexistent-task-12345')
assert result['success'] == False
assert 'not found' in result.get('error', '').lower()
@test("TaskCompletion.fail_task handles missing task")
def test_fail_missing():
from task_completion import TaskCompletion
handler = TaskCompletion()
result = handler.fail_task('nonexistent-task-12345', 'test error')
assert result['success'] == False
@test("TaskCompletion.set_awaiting_human handles missing task")
def test_awaiting_missing():
from task_completion import TaskCompletion
handler = TaskCompletion()
result = handler.set_awaiting_human('nonexistent-task-12345', 'question?')
assert result['success'] == False
test_completion_import()
test_completion_init()
test_completion_missing()
test_fail_missing()
test_awaiting_missing()
# =============================================================================
# Cockpit Tests
# =============================================================================
print("\n### Cockpit Tests ###")
@test("Cockpit module imports")
def test_cockpit_import():
from cockpit import cockpit_status, cockpit_start, cockpit_stop, cockpit_send
assert cockpit_status is not None
assert cockpit_start is not None
assert cockpit_stop is not None
@test("cockpit_status returns state")
def test_cockpit_status():
from cockpit import cockpit_status
result = cockpit_status('admin')
assert isinstance(result, dict)
# Should have status info even if not running
@test("container_exists helper works")
def test_cockpit_container_exists():
from cockpit import container_exists
# Test with a non-existent container
result = container_exists('nonexistent-container-12345')
assert isinstance(result, bool)
assert result == False
@test("get_container_name generates correct name")
def test_cockpit_container_name():
from cockpit import get_container_name
name = get_container_name('testproject')
assert 'testproject' in name
assert 'cockpit' in name.lower()
test_cockpit_import()
test_cockpit_status()
test_cockpit_container_exists()
test_cockpit_container_name()
# =============================================================================
# KG Lookup Tests
# =============================================================================
print("\n### KG Lookup Tests ###")
@test("ChatKGLookup imports")
def test_kg_import():
from chat_kg_lookup import ChatKGLookup
assert ChatKGLookup is not None
@test("ChatKGLookup.get_kg_statistics returns stats")
def test_kg_stats():
from chat_kg_lookup import ChatKGLookup
lookup = ChatKGLookup()
stats = lookup.get_kg_statistics()
assert isinstance(stats, dict)
@test("ChatKGLookup.search_all_domains works")
def test_kg_search():
from chat_kg_lookup import ChatKGLookup
lookup = ChatKGLookup()
results = lookup.search_all_domains('admin', limit=5)
assert isinstance(results, dict)
test_kg_import()
test_kg_stats()
test_kg_search()
# =============================================================================
# CLI Integration Tests
# =============================================================================
print("\n### CLI Integration Tests ###")
import subprocess
@test("luzia --help works")
def test_cli_help():
result = subprocess.run(['luzia', '--help'], capture_output=True, text=True, timeout=10)
assert result.returncode == 0
assert 'luzia' in result.stdout.lower() or 'usage' in result.stdout.lower()
@test("luzia chat help works")
def test_cli_chat_help():
result = subprocess.run(['luzia', 'chat', 'help'], capture_output=True, text=True, timeout=10)
assert result.returncode == 0
assert 'Chat' in result.stdout or 'chat' in result.stdout.lower()
@test("luzia watchdog status works")
def test_cli_watchdog():
result = subprocess.run(['luzia', 'watchdog', 'status'], capture_output=True, text=True, timeout=10)
assert result.returncode == 0
assert 'PROJECT' in result.stdout or 'Queue' in result.stdout
@test("luzia cockpit status works")
def test_cli_cockpit():
result = subprocess.run(['luzia', 'cockpit', 'status'], capture_output=True, text=True, timeout=10)
assert result.returncode == 0
@test("luzia list works")
def test_cli_list():
result = subprocess.run(['luzia', 'list'], capture_output=True, text=True, timeout=10)
assert result.returncode == 0
test_cli_help()
test_cli_chat_help()
test_cli_watchdog()
test_cli_cockpit()
test_cli_list()
# =============================================================================
# Summary
# =============================================================================
print("\n" + "=" * 60)
print(f"RESULTS: {RESULTS['passed']} passed, {RESULTS['failed']} failed")
print("=" * 60)
if RESULTS['errors']:
print("\nFailed tests:")
for err in RESULTS['errors']:
print(f" - {err}")
sys.exit(0 if RESULTS['failed'] == 0 else 1)

View File

@@ -0,0 +1,287 @@
#!/usr/bin/env python3
"""
Test Per-User Queue System
Tests:
1. Per-user lock acquisition and release
2. Lock timeout and cleanup
3. Queue controller with per-user serialization
4. Fair scheduling respects per-user locks
5. Conductor lock cleanup
"""
import sys
import json
import time
from pathlib import Path
from datetime import datetime, timedelta
# Add lib to path
lib_path = Path(__file__).parent.parent / "lib"
sys.path.insert(0, str(lib_path))
from per_user_queue_manager import PerUserQueueManager
from queue_controller_v2 import QueueControllerV2
from conductor_lock_cleanup import ConductorLockCleanup
def test_per_user_lock_basic():
"""Test basic lock acquire and release."""
print("\n=== Test: Basic Lock Acquire/Release ===")
manager = PerUserQueueManager()
user = "testuser"
task_id = "task_123"
# Acquire lock
acquired, lock_id = manager.acquire_lock(user, task_id)
assert acquired, f"Failed to acquire lock for {user}"
assert lock_id, "Lock ID should not be None"
print(f"✓ Acquired lock: user={user}, lock_id={lock_id}")
# Check lock is active
assert manager.is_user_locked(user), "User should be locked"
print(f"✓ User is locked")
# Get lock info
lock_info = manager.get_lock_info(user)
assert lock_info, "Should return lock info"
assert lock_info["user"] == user
print(f"✓ Lock info retrieved: {lock_info['lock_id']}")
# Release lock
released = manager.release_lock(user, lock_id)
assert released, "Failed to release lock"
print(f"✓ Released lock")
# Check lock is gone
assert not manager.is_user_locked(user), "User should not be locked"
print(f"✓ Lock released successfully")
def test_concurrent_lock_contention():
"""Test that only one lock per user can be held."""
print("\n=== Test: Concurrent Lock Contention ===")
manager = PerUserQueueManager()
user = "contentionuser"
# Acquire first lock
acquired1, lock_id1 = manager.acquire_lock(user, "task_1", timeout=1)
assert acquired1, "First lock should succeed"
print(f"✓ First lock acquired: {lock_id1}")
# Try to acquire second lock (should timeout)
acquired2, lock_id2 = manager.acquire_lock(user, "task_2", timeout=1)
assert not acquired2, "Second lock should fail due to contention"
assert lock_id2 is None
print(f"✓ Second lock correctly rejected (contention)")
# Release first lock
manager.release_lock(user, lock_id1)
print(f"✓ First lock released")
# Now second should succeed
acquired3, lock_id3 = manager.acquire_lock(user, "task_2", timeout=1)
assert acquired3, "Third lock should succeed after release"
print(f"✓ Third lock acquired after release: {lock_id3}")
manager.release_lock(user, lock_id3)
def test_stale_lock_cleanup():
"""Test stale lock detection and cleanup."""
print("\n=== Test: Stale Lock Cleanup ===")
manager = PerUserQueueManager()
user = "staleuser"
# Acquire lock with custom timeout
acquired, lock_id = manager.acquire_lock(user, "task_stale")
assert acquired
print(f"✓ Lock acquired: {lock_id}")
# Manually set lock as expired
lock_meta_path = manager._get_lock_meta_path(user)
meta = json.loads(lock_meta_path.read_text())
meta["expires_at"] = (datetime.now() - timedelta(hours=1)).isoformat()
lock_meta_path.write_text(json.dumps(meta))
print(f"✓ Lock manually set as stale")
# Should be detected as stale
assert manager._is_lock_stale(user), "Lock should be detected as stale"
print(f"✓ Stale lock detected")
# Cleanup should remove it
manager._cleanup_stale_locks(user)
assert not manager.is_user_locked(user), "Stale lock should be cleaned up"
print(f"✓ Stale lock cleaned up")
def test_multiple_users():
"""Test that different users have independent locks."""
print("\n=== Test: Multiple Users Independence ===")
manager = PerUserQueueManager()
# Acquire locks for different users
acquired1, lock_id1 = manager.acquire_lock("user_a", "task_a")
acquired2, lock_id2 = manager.acquire_lock("user_b", "task_b")
assert acquired1 and acquired2, "Both locks should succeed"
print(f"✓ Acquired locks for user_a and user_b")
# Both should be locked
assert manager.is_user_locked("user_a"), "user_a should be locked"
assert manager.is_user_locked("user_b"), "user_b should be locked"
print(f"✓ Both users are locked")
# Release user_a's lock
manager.release_lock("user_a", lock_id1)
assert not manager.is_user_locked("user_a"), "user_a should be unlocked"
assert manager.is_user_locked("user_b"), "user_b should still be locked"
print(f"✓ user_a released, user_b still locked")
manager.release_lock("user_b", lock_id2)
def test_queue_controller_v2():
"""Test QueueControllerV2 with per-user serialization."""
print("\n=== Test: QueueControllerV2 Integration ===")
qc = QueueControllerV2()
# Ensure per-user serialization is in config and enabled for testing
if "per_user_serialization" not in qc.config:
qc.config["per_user_serialization"] = {"enabled": True, "lock_timeout_seconds": 3600}
qc.config["per_user_serialization"]["enabled"] = True
# Enqueue tasks for different projects (users)
task_id_1, pos_1 = qc.enqueue("project_a", "Task 1 for project A")
task_id_2, pos_2 = qc.enqueue("project_b", "Task 1 for project B")
task_id_3, pos_3 = qc.enqueue("project_a", "Task 2 for project A")
print(f"✓ Enqueued 3 tasks")
print(f" - project_a: {task_id_1} (pos {pos_1}), {task_id_3} (pos {pos_3})")
print(f" - project_b: {task_id_2} (pos {pos_2})")
# Get queue status
status = qc.get_queue_status()
initial_pending = status["pending"]["total"]
assert initial_pending >= 3, f"Should have at least 3 pending tasks, have {initial_pending}"
print(f"✓ Queue status: {initial_pending} total pending tasks (at least 3 new ones)")
# Check that per-user locks are respected
user_a = qc.extract_user_from_project("project_a")
user_b = qc.extract_user_from_project("project_b")
can_exec_a = qc.can_user_execute_task(user_a)
can_exec_b = qc.can_user_execute_task(user_b)
assert can_exec_a and can_exec_b, "Both users should be able to execute"
print(f"✓ Both users can execute tasks")
# Acquire locks
acq_a, lock_a = qc.acquire_user_lock(user_a, task_id_1)
assert acq_a and lock_a, "Should acquire lock for user_a"
print(f"✓ Acquired lock for user_a: {lock_a}")
# Now user_a cannot execute another task
can_exec_a2 = qc.can_user_execute_task(user_a)
assert not can_exec_a2, "user_a should not be able to execute while locked"
print(f"✓ user_a locked, cannot execute new tasks")
# But user_b can
can_exec_b2 = qc.can_user_execute_task(user_b)
assert can_exec_b2, "user_b should still be able to execute"
print(f"✓ user_b can still execute")
# Release user_a's lock
qc.release_user_lock(user_a, lock_a)
can_exec_a3 = qc.can_user_execute_task(user_a)
assert can_exec_a3, "user_a should be able to execute again"
print(f"✓ Released user_a lock, can execute again")
def test_fair_scheduling_with_locks():
"""Test that fair scheduling respects per-user locks."""
print("\n=== Test: Fair Scheduling with Per-User Locks ===")
qc = QueueControllerV2()
# Ensure per-user serialization is in config and enabled for testing
if "per_user_serialization" not in qc.config:
qc.config["per_user_serialization"] = {"enabled": True, "lock_timeout_seconds": 3600}
qc.config["per_user_serialization"]["enabled"] = True
# Enqueue multiple tasks
task_id_1, _ = qc.enqueue("proj_a", "Task A1", priority=5)
task_id_2, _ = qc.enqueue("proj_b", "Task B1", priority=5)
task_id_3, _ = qc.enqueue("proj_a", "Task A2", priority=5)
# Get pending tasks
capacity = qc._read_capacity()
task = qc._select_next_task(capacity)
assert task, "Should select a task"
print(f"✓ Selected task: {task['id']} for {task['project']}")
# Acquire lock for this task's user
user = task.get("user") or qc.extract_user_from_project(task["project"])
acq, lock_id = qc.acquire_user_lock(user, task["id"])
assert acq, "Should acquire user lock"
# Now selecting next task should skip tasks for this user
# and select from another user
task2 = qc._select_next_task(capacity)
if task2:
user2 = task2.get("user") or qc.extract_user_from_project(task2["project"])
# Task should be from a different user or None
assert user2 != user, f"Should select different user, got {user2}"
print(f"✓ Fair scheduling respects user lock: skipped {user}, selected {user2}")
else:
print(f"✓ Fair scheduling: no available task (all from locked user)")
qc.release_user_lock(user, lock_id)
def run_all_tests():
"""Run all tests."""
print("=" * 60)
print("Per-User Queue System Tests")
print("=" * 60)
tests = [
test_per_user_lock_basic,
test_concurrent_lock_contention,
test_stale_lock_cleanup,
test_multiple_users,
test_queue_controller_v2,
test_fair_scheduling_with_locks,
]
passed = 0
failed = 0
for test_func in tests:
try:
test_func()
passed += 1
except AssertionError as e:
print(f"✗ FAILED: {e}")
failed += 1
except Exception as e:
print(f"✗ ERROR: {e}")
failed += 1
print("\n" + "=" * 60)
print(f"Results: {passed} passed, {failed} failed")
print("=" * 60)
return failed == 0
if __name__ == "__main__":
success = run_all_tests()
sys.exit(0 if success else 1)

470
tests/test_plugin_system.py Normal file
View File

@@ -0,0 +1,470 @@
#!/usr/bin/env python3
"""
Test Suite - Plugin Marketplace System
Tests for:
1. Plugin marketplace registry and loading
2. Plugin skill generation and matching
3. Dispatcher integration with plugins
4. Knowledge graph exports
5. Plugin-aware task dispatch
"""
import json
import sys
import tempfile
from pathlib import Path
from typing import Dict, List, Any
# Add lib to path
sys.path.insert(0, str(Path(__file__).parent.parent / "lib"))
from plugin_marketplace import (
PluginMarketplaceRegistry,
PluginCapabilityMatcher,
get_marketplace_registry
)
from plugin_skill_loader import (
PluginSkillLoader,
get_plugin_skill_loader
)
from dispatcher_plugin_integration import (
DispatcherPluginBridge,
PluginAwareTaskDispatcher
)
from plugin_kg_integration import (
PluginKnowledgeGraphExporter,
export_plugins_to_kg
)
class TestResults:
def __init__(self):
self.tests: List[Dict[str, Any]] = []
self.passed = 0
self.failed = 0
def add_test(self, name: str, passed: bool, details: str = ""):
status = "PASS" if passed else "FAIL"
self.tests.append({
'name': name,
'status': status,
'details': details
})
if passed:
self.passed += 1
else:
self.failed += 1
print(f"[{status}] {name}" + (f": {details}" if details else ""))
def summary(self) -> str:
return f"\nTest Summary: {self.passed} passed, {self.failed} failed out of {self.passed + self.failed}"
def test_plugin_registry() -> TestResults:
"""Test plugin marketplace registry"""
results = TestResults()
print("\n=== Testing Plugin Marketplace Registry ===\n")
# Test 1: Registry initialization
try:
registry = get_marketplace_registry()
results.add_test(
"Registry initialization",
len(registry.plugins) > 0,
f"Loaded {len(registry.plugins)} plugins"
)
except Exception as e:
results.add_test("Registry initialization", False, str(e))
return results
# Test 2: Plugin retrieval
try:
plugin = registry.get_plugin('code-simplifier')
results.add_test(
"Plugin retrieval",
plugin is not None and plugin.name == 'Code Simplifier',
f"Retrieved: {plugin.name if plugin else 'None'}"
)
except Exception as e:
results.add_test("Plugin retrieval", False, str(e))
# Test 3: List plugins by category
try:
code_analysis_plugins = registry.list_plugins('code-analysis')
results.add_test(
"Filter plugins by category",
len(code_analysis_plugins) > 0,
f"Found {len(code_analysis_plugins)} code-analysis plugins"
)
except Exception as e:
results.add_test("Filter plugins by category", False, str(e))
# Test 4: Find plugins for task
try:
task = "Review my code for security vulnerabilities"
matches = registry.find_plugins_for_task(task, ['security', 'review', 'code'])
results.add_test(
"Find plugins for task",
len(matches) > 0,
f"Found {len(matches)} matching plugins"
)
except Exception as e:
results.add_test("Find plugins for task", False, str(e))
# Test 5: Export plugin data
try:
export_data = registry.export_for_knowledge_graph()
results.add_test(
"Export for knowledge graph",
'plugins' in export_data and 'categories' in export_data,
f"Exported {len(export_data.get('plugins', {}))} plugins"
)
except Exception as e:
results.add_test("Export for knowledge graph", False, str(e))
return results
def test_plugin_skills() -> TestResults:
"""Test plugin skill generation and matching"""
results = TestResults()
print("\n=== Testing Plugin Skill System ===\n")
# Test 1: Skill loader initialization
try:
loader = get_plugin_skill_loader()
results.add_test(
"Skill loader initialization",
loader is not None,
"Initialized successfully"
)
except Exception as e:
results.add_test("Skill loader initialization", False, str(e))
return results
# Test 2: Generate skills from plugins
try:
skills = loader.generate_skills_from_plugins()
results.add_test(
"Generate skills from plugins",
len(skills) > 0,
f"Generated {len(skills)} skills"
)
except Exception as e:
results.add_test("Generate skills from plugins", False, str(e))
# Test 3: List all skills
try:
all_skills = loader.list_skills()
results.add_test(
"List all skills",
len(all_skills) > 0,
f"Listed {len(all_skills)} skills"
)
except Exception as e:
results.add_test("List all skills", False, str(e))
# Test 4: Filter skills by category
try:
code_skills = loader.list_skills(category='code-analysis')
results.add_test(
"Filter skills by category",
len(code_skills) > 0,
f"Found {len(code_skills)} code-analysis skills"
)
except Exception as e:
results.add_test("Filter skills by category", False, str(e))
# Test 5: Find skills for task
try:
task = "Simplify and optimize this Python function"
matched = loader.find_skills_for_task(task, min_relevance=0.3)
results.add_test(
"Find skills for task",
len(matched) > 0,
f"Found {len(matched)} matching skills"
)
except Exception as e:
results.add_test("Find skills for task", False, str(e))
# Test 6: Export for dispatcher
try:
dispatch_export = loader.export_for_dispatcher()
results.add_test(
"Export for dispatcher",
'skill_count' in dispatch_export and dispatch_export['skill_count'] > 0,
f"Exported {dispatch_export.get('skill_count', 0)} skills"
)
except Exception as e:
results.add_test("Export for dispatcher", False, str(e))
# Test 7: Export for knowledge graph
try:
kg_export = loader.export_for_knowledge_graph()
results.add_test(
"Export for knowledge graph",
'total_skills' in kg_export and kg_export['total_skills'] > 0,
f"Exported {kg_export.get('total_skills', 0)} skills"
)
except Exception as e:
results.add_test("Export for knowledge graph", False, str(e))
return results
def test_dispatcher_integration() -> TestResults:
"""Test dispatcher-plugin integration"""
results = TestResults()
print("\n=== Testing Dispatcher Integration ===\n")
# Test 1: Bridge initialization
try:
bridge = DispatcherPluginBridge()
results.add_test(
"Bridge initialization",
bridge is not None and len(bridge.skill_loader.skills) > 0,
f"Loaded {len(bridge.skill_loader.skills)} skills"
)
except Exception as e:
results.add_test("Bridge initialization", False, str(e))
return results
# Test 2: Enhance task context
try:
task = "Review this code for security issues and performance"
context = bridge.enhance_task_context(task, "test-project", "job-123")
results.add_test(
"Enhance task context",
'plugin_analysis' in context and 'matched_skills' in context['plugin_analysis'],
f"Found {len(context['plugin_analysis'].get('matched_skills', []))} skills"
)
except Exception as e:
results.add_test("Enhance task context", False, str(e))
# Test 3: Generate recommendations
try:
task = "Simplify and refactor this code"
context = bridge.enhance_task_context(task, "test-project", "job-456")
recommendations = context.get('recommended_plugins', {})
results.add_test(
"Generate recommendations",
'primary_skill' in recommendations,
f"Primary skill: {recommendations.get('primary_skill', {}).get('name', 'None')}"
)
except Exception as e:
results.add_test("Generate recommendations", False, str(e))
# Test 4: Plugin-aware task dispatcher
try:
dispatcher = PluginAwareTaskDispatcher(bridge)
dispatch_result = dispatcher.dispatch_with_plugin_context(
"Review code quality",
"test-project",
"job-789"
)
results.add_test(
"Plugin-aware dispatch",
dispatch_result['plugin_enhanced'] and 'plugin_context' in dispatch_result,
"Dispatch successful with plugin context"
)
except Exception as e:
results.add_test("Plugin-aware dispatch", False, str(e))
# Test 5: Get dispatch recommendations
try:
dispatcher = PluginAwareTaskDispatcher(bridge)
dispatcher.dispatch_with_plugin_context(
"Analyze code performance",
"test-project",
"job-999"
)
recommendations = dispatcher.get_dispatch_recommendations("job-999")
results.add_test(
"Get dispatch recommendations",
recommendations is not None and 'primary_skill' in recommendations,
"Retrieved recommendations successfully"
)
except Exception as e:
results.add_test("Get dispatch recommendations", False, str(e))
return results
def test_capability_matching() -> TestResults:
"""Test plugin capability matching"""
results = TestResults()
print("\n=== Testing Capability Matching ===\n")
# Test 1: Matcher initialization
try:
registry = get_marketplace_registry()
matcher = PluginCapabilityMatcher(registry)
results.add_test(
"Matcher initialization",
matcher is not None,
"Initialized successfully"
)
except Exception as e:
results.add_test("Matcher initialization", False, str(e))
return results
# Test 2: Extract keywords
try:
task = "Find security vulnerabilities in this code"
keywords = matcher.extract_task_keywords(task)
results.add_test(
"Extract keywords",
len(keywords) > 0 and 'security' in keywords,
f"Extracted keywords: {keywords}"
)
except Exception as e:
results.add_test("Extract keywords", False, str(e))
# Test 3: Match plugins to task
try:
task = "Review code for performance issues"
matches = matcher.match_plugins(task, min_relevance=0.3)
results.add_test(
"Match plugins to task",
len(matches) > 0,
f"Matched {len(matches)} plugins"
)
except Exception as e:
results.add_test("Match plugins to task", False, str(e))
# Test 4: Relevance scoring
try:
task1 = "Review code for security"
task2 = "Deploy application"
matches1 = matcher.match_plugins(task1)
matches2 = matcher.match_plugins(task2)
results.add_test(
"Relevance scoring",
len(matches1) > 0 and (len(matches2) == 0 or len(matches1) >= len(matches2)),
"Security task has more relevant plugins than deploy task"
)
except Exception as e:
results.add_test("Relevance scoring", False, str(e))
return results
def test_knowledge_graph_export() -> TestResults:
"""Test knowledge graph exports"""
results = TestResults()
print("\n=== Testing Knowledge Graph Export ===\n")
# Test 1: Exporter initialization
try:
exporter = PluginKnowledgeGraphExporter()
results.add_test(
"Exporter initialization",
exporter is not None,
"Initialized successfully"
)
except Exception as e:
results.add_test("Exporter initialization", False, str(e))
return results
# Test 2: Export plugins as entities
try:
entities = exporter.export_plugins_as_entities()
results.add_test(
"Export plugins as entities",
'entities' in entities and len(entities['entities']) > 0,
f"Exported {len(entities['entities'])} plugin entities"
)
except Exception as e:
results.add_test("Export plugins as entities", False, str(e))
# Test 3: Export skills as entities
try:
entities = exporter.export_plugin_skills_as_entities()
results.add_test(
"Export skills as entities",
'entities' in entities and len(entities['entities']) > 0,
f"Exported {len(entities['entities'])} skill entities"
)
except Exception as e:
results.add_test("Export skills as entities", False, str(e))
# Test 4: Export relationships
try:
relations = exporter.export_plugin_relationships()
results.add_test(
"Export relationships",
'relations' in relations and len(relations['relations']) > 0,
f"Exported {len(relations['relations'])} relationships"
)
except Exception as e:
results.add_test("Export relationships", False, str(e))
# Test 5: Complete export
try:
complete = exporter.export_for_shared_kg()
results.add_test(
"Complete KG export",
'plugins' in complete and 'skills' in complete and 'categories' in complete,
f"Plugins: {len(complete['plugins'])}, Skills: {len(complete['skills'])}"
)
except Exception as e:
results.add_test("Complete KG export", False, str(e))
# Test 6: Save exports
try:
with tempfile.TemporaryDirectory() as tmpdir:
export_dir = Path(tmpdir)
saved = exporter.save_exports()
results.add_test(
"Save exports to files",
len(saved) >= 3,
f"Saved {len(saved)} export files"
)
except Exception as e:
results.add_test("Save exports to files", False, str(e))
return results
def run_all_tests() -> None:
"""Run all test suites"""
print("=" * 60)
print("PLUGIN SYSTEM TEST SUITE")
print("=" * 60)
all_results = []
# Run test suites
all_results.append(test_plugin_registry())
all_results.append(test_plugin_skills())
all_results.append(test_capability_matching())
all_results.append(test_dispatcher_integration())
all_results.append(test_knowledge_graph_export())
# Print overall summary
print("\n" + "=" * 60)
print("OVERALL TEST SUMMARY")
print("=" * 60)
total_passed = sum(r.passed for r in all_results)
total_failed = sum(r.failed for r in all_results)
total_tests = total_passed + total_failed
print(f"\nTotal: {total_passed}/{total_tests} tests passed")
if total_failed > 0:
print(f"\n{total_failed} tests failed:")
for result_set in all_results:
for test in result_set.tests:
if test['status'] == 'FAIL':
print(f" - {test['name']}: {test['details']}")
print("\n" + "=" * 60)
exit_code = 0 if total_failed == 0 else 1
print(f"Exit code: {exit_code}")
sys.exit(exit_code)
if __name__ == '__main__':
run_all_tests()

View File

@@ -0,0 +1,285 @@
#!/usr/bin/env python3
"""
Test Suite for Responsive Dispatcher
Tests:
1. Immediate job dispatch with job_id return
2. Non-blocking task spawning
3. Background status monitoring
4. Concurrent task handling
5. Status polling and updates
6. CLI feedback rendering
"""
import json
import sys
import time
import tempfile
import threading
from pathlib import Path
# Add lib to path
lib_path = Path(__file__).parent.parent / "lib"
sys.path.insert(0, str(lib_path))
from responsive_dispatcher import ResponseiveDispatcher
from cli_feedback import CLIFeedback, Colors, ProgressBar
from dispatcher_enhancements import EnhancedDispatcher, get_enhanced_dispatcher
class TestResponsiveDispatcher:
"""Test responsive dispatcher functionality"""
def __init__(self):
self.test_dir = Path(tempfile.mkdtemp(prefix="luzia_test_"))
self.dispatcher = ResponseiveDispatcher(self.test_dir)
self.feedback = CLIFeedback()
self.passed = 0
self.failed = 0
def run_all_tests(self):
"""Run all tests"""
print(f"\n{Colors.BOLD}=== Responsive Dispatcher Test Suite ==={Colors.RESET}\n")
tests = [
self.test_immediate_dispatch,
self.test_job_status_retrieval,
self.test_status_updates,
self.test_concurrent_jobs,
self.test_cache_behavior,
self.test_cli_feedback,
self.test_progress_bar,
self.test_background_monitoring,
]
for test in tests:
try:
print(f" Running {test.__name__}...", end=" ", flush=True)
test()
self.passed += 1
print(f"{Colors.GREEN}{Colors.RESET}")
except AssertionError as e:
self.failed += 1
print(f"{Colors.RED}{Colors.RESET}")
print(f" Error: {e}")
except Exception as e:
self.failed += 1
print(f"{Colors.RED}{Colors.RESET}")
print(f" Unexpected error: {e}")
# Summary
print(f"\n{Colors.BOLD}=== Test Summary ==={Colors.RESET}")
print(f" {Colors.GREEN}Passed:{Colors.RESET} {self.passed}")
print(f" {Colors.RED}Failed:{Colors.RESET} {self.failed}")
print(f" {Colors.BLUE}Total:{Colors.RESET} {self.passed + self.failed}\n")
return self.failed == 0
def test_immediate_dispatch(self):
"""Test that dispatch returns immediately with job_id"""
start_time = time.time()
job_id, status = self.dispatcher.dispatch_task("test_project", "echo hello")
elapsed = time.time() - start_time
assert job_id, "Job ID should be returned"
assert isinstance(status, dict), "Status should be dict"
assert status["status"] == "dispatched", "Initial status should be 'dispatched'"
assert status["project"] == "test_project", "Project should match"
assert elapsed < 0.5, f"Dispatch should be instant (took {elapsed}s)"
def test_job_status_retrieval(self):
"""Test retrieving job status"""
job_id, initial_status = self.dispatcher.dispatch_task("proj1", "task1")
# Retrieve status
retrieved = self.dispatcher.get_status(job_id)
assert retrieved is not None, "Status should be retrievable"
assert retrieved["id"] == job_id, "Job ID should match"
assert retrieved["status"] == "dispatched", "Status should be dispatched"
def test_status_updates(self):
"""Test updating job status"""
job_id, _ = self.dispatcher.dispatch_task("proj1", "task1")
# Update status
self.dispatcher.update_status(job_id, "running", progress=25, message="Processing...")
status = self.dispatcher.get_status(job_id, use_cache=False)
assert status["status"] == "running", "Status should be updated"
assert status["progress"] == 25, "Progress should be updated"
assert status["message"] == "Processing...", "Message should be updated"
def test_concurrent_jobs(self):
"""Test handling multiple concurrent jobs"""
jobs = []
for i in range(5):
job_id, status = self.dispatcher.dispatch_task(f"proj{i}", f"task{i}")
jobs.append(job_id)
# Verify all jobs exist
for job_id in jobs:
status = self.dispatcher.get_status(job_id)
assert status is not None, f"Job {job_id} should exist"
# Verify list shows all jobs
all_jobs = self.dispatcher.list_jobs()
assert len(all_jobs) >= 5, "Should have at least 5 jobs"
def test_cache_behavior(self):
"""Test cache behavior"""
job_id, _ = self.dispatcher.dispatch_task("proj1", "task1")
# First read should cache
status1 = self.dispatcher.get_status(job_id, use_cache=True)
# Update directly on disk
self.dispatcher.update_status(job_id, "running", progress=50)
# Cached read should be stale
status2 = self.dispatcher.get_status(job_id, use_cache=True)
assert status2["progress"] == 50, "Cache should be updated on write"
# Non-cached read should be fresh
time.sleep(1.1) # Wait for cache to expire
status3 = self.dispatcher.get_status(job_id, use_cache=False)
assert status3["progress"] == 50, "Fresh read should show updated status"
def test_cli_feedback(self):
"""Test CLI feedback rendering"""
status = {
"id": "test-job-id",
"project": "test_proj",
"status": "running",
"progress": 45,
"message": "Processing files...",
}
# Should not raise exception
self.feedback.show_status(status)
self.feedback.show_status_line(status)
self.feedback.job_dispatched("test-id", "proj", "task")
def test_progress_bar(self):
"""Test progress bar rendering"""
bar = ProgressBar.render(0)
assert "[" in bar and "]" in bar, "Progress bar should have brackets"
bar50 = ProgressBar.render(50)
bar100 = ProgressBar.render(100)
assert bar50.count("") > bar.count(""), "50% should have more filled blocks"
assert bar100.count("") > bar50.count(""), "100% should have all filled blocks"
def test_background_monitoring(self):
"""Test background monitoring queue"""
job_id, _ = self.dispatcher.dispatch_task("proj1", "test task")
# Monitoring queue should have the job
assert not self.dispatcher.monitoring_queue.empty(), "Queue should have job"
# Get item from queue (with retry in case timing issues)
try:
job_info = self.dispatcher.monitoring_queue.get(timeout=1)
assert job_info["job_id"] == job_id, "Queue should contain correct job_id"
except Exception:
# Queue might have been processed already - verify job exists instead
status = self.dispatcher.get_status(job_id)
assert status is not None, "Job should exist in dispatcher"
class TestEnhancedDispatcher:
"""Test enhanced dispatcher with integrated features"""
def __init__(self):
self.test_dir = Path(tempfile.mkdtemp(prefix="luzia_enh_test_"))
self.enhanced = EnhancedDispatcher(self.test_dir)
self.passed = 0
self.failed = 0
def run_all_tests(self):
"""Run all tests"""
print(f"\n{Colors.BOLD}=== Enhanced Dispatcher Test Suite ==={Colors.RESET}\n")
tests = [
self.test_dispatch_and_report,
self.test_status_display,
self.test_jobs_summary,
]
for test in tests:
try:
print(f" Running {test.__name__}...", end=" ", flush=True)
test()
self.passed += 1
print(f"{Colors.GREEN}{Colors.RESET}")
except AssertionError as e:
self.failed += 1
print(f"{Colors.RED}{Colors.RESET}")
print(f" Error: {e}")
except Exception as e:
self.failed += 1
print(f"{Colors.RED}{Colors.RESET}")
print(f" Unexpected error: {e}")
print(f"\n{Colors.BOLD}=== Test Summary ==={Colors.RESET}")
print(f" {Colors.GREEN}Passed:{Colors.RESET} {self.passed}")
print(f" {Colors.RED}Failed:{Colors.RESET} {self.failed}")
print(f" {Colors.BLUE}Total:{Colors.RESET} {self.passed + self.failed}\n")
return self.failed == 0
def test_dispatch_and_report(self):
"""Test dispatch with feedback"""
job_id, status = self.enhanced.dispatch_and_report(
"test_proj", "test task", show_feedback=False
)
assert job_id, "Should return job_id"
assert status["status"] == "dispatched", "Should be dispatched"
def test_status_display(self):
"""Test status display"""
job_id, _ = self.enhanced.dispatch_and_report(
"proj", "task", show_feedback=False
)
status = self.enhanced.get_status_and_display(job_id, show_full=False)
assert status is not None, "Should retrieve status"
def test_jobs_summary(self):
"""Test jobs summary display"""
for i in range(3):
self.enhanced.dispatch_and_report(f"proj{i}", f"task{i}", show_feedback=False)
# Should not raise exception
self.enhanced.show_jobs_summary()
self.enhanced.show_concurrent_summary()
def main():
"""Run all test suites"""
print(f"\n{Colors.BOLD}{Colors.CYAN}Luzia Responsive Dispatcher Tests{Colors.RESET}")
print(f"{Colors.GRAY}Testing non-blocking dispatch and status tracking{Colors.RESET}")
# Test responsive dispatcher
dispatcher_tests = TestResponsiveDispatcher()
dispatcher_ok = dispatcher_tests.run_all_tests()
# Test enhanced dispatcher
enhanced_tests = TestEnhancedDispatcher()
enhanced_ok = enhanced_tests.run_all_tests()
# Summary
all_passed = dispatcher_ok and enhanced_ok
if all_passed:
print(
f"{Colors.GREEN}{Colors.BOLD}✓ All tests passed!{Colors.RESET}\n"
)
return 0
else:
print(
f"{Colors.RED}{Colors.BOLD}✗ Some tests failed{Colors.RESET}\n"
)
return 1
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,433 @@
#!/usr/bin/env python3
"""
Tests for skill learning system.
Tests the complete pipeline:
1. Task execution analysis
2. Skill extraction
3. Learning storage in KG
4. Skill recommendations
5. QA integration
"""
import pytest
import json
import sys
from pathlib import Path
from datetime import datetime
from unittest.mock import MagicMock, patch
# Add lib to path
sys.path.insert(0, str(Path(__file__).parent.parent / "lib"))
from skill_learning_engine import (
TaskAnalyzer, SkillExtractor, LearningEngine,
SkillRecommender, SkillLearningSystem,
TaskExecution, ExtractedSkill
)
class TestTaskAnalyzer:
"""Test task analysis and pattern extraction."""
def test_analyze_valid_task(self):
"""Test analyzing a valid task execution."""
analyzer = TaskAnalyzer()
task_data = {
"task_id": "test_001",
"prompt": "Refactor database schema",
"project": "overbits",
"status": "success",
"tools_used": ["Bash", "Read", "Edit"],
"duration": 45.2,
"result_summary": "Successfully refactored",
"qa_passed": True,
"timestamp": datetime.now().isoformat()
}
execution = analyzer.analyze_task(task_data)
assert execution is not None
assert execution.task_id == "test_001"
assert execution.project == "overbits"
assert execution.status == "success"
assert len(execution.tools_used) == 3
def test_extract_patterns(self):
"""Test pattern extraction from multiple tasks."""
analyzer = TaskAnalyzer()
# Add multiple tasks
executions = []
for i in range(3):
task_data = {
"task_id": f"task_{i}",
"prompt": "Test task",
"project": "overbits",
"status": "success" if i < 2 else "failed",
"tools_used": ["Bash", "Read"],
"duration": 30.0 + i,
"result_summary": "Test",
"qa_passed": i < 2,
"timestamp": datetime.now().isoformat()
}
exec = analyzer.analyze_task(task_data)
if exec:
executions.append(exec)
patterns = analyzer.extract_patterns(executions)
assert "success_rate" in patterns
assert "average_duration" in patterns
assert "common_tools" in patterns
assert patterns["success_rate"] == 2/3
class TestSkillExtractor:
"""Test skill extraction from tasks and QA results."""
def test_extract_from_task(self):
"""Test skill extraction from task execution."""
extractor = SkillExtractor()
execution = TaskExecution(
task_id="test_001",
prompt="Debug authentication flow for users",
project="overbits",
status="success",
tools_used=["Read", "Bash", "Edit"],
duration=30.0,
result_summary="Fixed login issue",
qa_passed=True,
timestamp=datetime.now()
)
skills = extractor.extract_from_task(execution)
assert len(skills) > 0
# Should have tool skills
tool_skills = [s for s in skills if s.category == "tool_usage"]
assert len(tool_skills) >= 3
# Should have decision patterns
decision_skills = [s for s in skills if s.category == "decision"]
assert len(decision_skills) > 0
def test_extract_from_qa_results(self):
"""Test skill extraction from QA results."""
extractor = SkillExtractor()
qa_results = {
"passed": True,
"results": {
"syntax": True,
"routes": True,
"command_docs": True,
},
"task_id": "test_001"
}
skills = extractor.extract_from_qa_results(qa_results)
assert len(skills) == 3
assert all(s.category == "pattern" for s in skills)
assert all(s.confidence == 0.9 for s in skills)
def test_extract_decision_patterns(self):
"""Test decision pattern extraction."""
extractor = SkillExtractor()
test_cases = [
("Optimize database query", "optimization"),
("Debug authentication issue", "debugging"),
("Write documentation for API", "documentation"),
("Test new feature", "testing"),
("Refactor old code", "refactoring"),
]
for prompt, expected_pattern in test_cases:
skills = extractor._extract_decision_patterns(prompt)
pattern_names = [s.name for s in skills]
assert any(expected_pattern in name for name in pattern_names)
def test_aggregate_skills(self):
"""Test skill aggregation."""
extractor = SkillExtractor()
skills = [
ExtractedSkill(
name="tool_read",
category="tool_usage",
confidence=0.8,
context={"tool": "Read"},
source_task_id="task_1",
evidence="Used Read tool"
),
ExtractedSkill(
name="tool_read",
category="tool_usage",
confidence=0.85,
context={"tool": "Read"},
source_task_id="task_2",
evidence="Used Read tool again"
),
]
aggregated = extractor.aggregate_skills(skills)
assert "tool_read" in aggregated
assert aggregated["tool_read"]["occurrences"] == 2
assert aggregated["tool_read"]["average_confidence"] == 0.825
class TestLearningEngine:
"""Test learning extraction and storage."""
@patch('skill_learning_engine.KnowledgeGraph')
def test_extract_learning(self, mock_kg):
"""Test learning extraction."""
engine = LearningEngine()
execution = TaskExecution(
task_id="test_001",
prompt="Refactor database schema for performance",
project="overbits",
status="success",
tools_used=["Bash", "Read", "Edit"],
duration=45.0,
result_summary="Schema refactored successfully",
qa_passed=True,
timestamp=datetime.now()
)
skills = [
ExtractedSkill(
name="tool_bash",
category="tool_usage",
confidence=0.8,
context={"tool": "Bash"},
source_task_id="test_001",
evidence="Used Bash"
),
]
qa_results = {
"passed": True,
"results": {"syntax": True},
"summary": {"errors": 0}
}
learning = engine.extract_learning(execution, skills, qa_results)
assert learning is not None
assert len(learning.skill_names) > 0
assert learning.confidence > 0
assert "overbits" in learning.applicability
@patch('skill_learning_engine.KnowledgeGraph')
def test_extract_learning_failed_qa(self, mock_kg):
"""Test that learning is not extracted if QA fails."""
engine = LearningEngine()
execution = TaskExecution(
task_id="test_001",
prompt="Test task",
project="test",
status="success",
tools_used=["Read"],
duration=10.0,
result_summary="Test",
qa_passed=False,
timestamp=datetime.now()
)
skills = []
qa_results = {
"passed": False,
"results": {"syntax": False},
}
learning = engine.extract_learning(execution, skills, qa_results)
assert learning is None
class TestSkillRecommender:
"""Test skill recommendation system."""
@patch('skill_learning_engine.KnowledgeGraph')
def test_recommend_for_task(self, mock_kg):
"""Test getting recommendations for a task."""
recommender = SkillRecommender()
# Mock KG search to return test learnings
mock_kg.return_value.search.return_value = [
{
"name": "learning_001",
"type": "finding",
"metadata": {
"skills": ["tool_bash", "pattern_optimization"],
"confidence": 0.85,
"applicability": ["overbits", "general"],
}
},
]
recommendations = recommender.recommend_for_task(
"Optimize database performance",
project="overbits"
)
assert len(recommendations) > 0
assert recommendations[0]["confidence"] > 0
@patch('skill_learning_engine.KnowledgeGraph')
def test_get_skill_profile(self, mock_kg):
"""Test getting skill profile."""
recommender = SkillRecommender()
mock_kg.return_value.list_entities.return_value = [
{
"name": "skill_001",
"type": "finding",
"metadata": {
"category": "tool_usage",
"skills": ["tool_bash", "tool_read"],
}
},
]
profile = recommender.get_skill_profile()
assert "total_learnings" in profile
assert "by_category" in profile
assert "top_skills" in profile
class TestSkillLearningSystem:
"""Test integrated skill learning system."""
@patch('skill_learning_engine.KnowledgeGraph')
def test_process_task_completion(self, mock_kg):
"""Test full task completion processing."""
system = SkillLearningSystem()
task_data = {
"task_id": "test_001",
"prompt": "Refactor authentication module",
"project": "overbits",
"status": "success",
"tools_used": ["Read", "Edit", "Bash"],
"duration": 60.0,
"result_summary": "Successfully refactored",
"qa_passed": True,
"timestamp": datetime.now().isoformat()
}
qa_results = {
"passed": True,
"results": {
"syntax": True,
"routes": True,
},
"summary": {"errors": 0, "warnings": 0, "info": 2}
}
result = system.process_task_completion(task_data, qa_results)
assert result["success"]
assert result["skills_extracted"] > 0
assert result["learning_created"]
@patch('skill_learning_engine.KnowledgeGraph')
def test_get_recommendations(self, mock_kg):
"""Test getting recommendations from system."""
system = SkillLearningSystem()
# Mock recommender
mock_kg.return_value.search.return_value = []
recommendations = system.get_recommendations(
"Debug authentication issue",
project="overbits"
)
assert isinstance(recommendations, list)
class TestIntegration:
"""Integration tests for complete workflows."""
@patch('skill_learning_engine.KnowledgeGraph')
def test_complete_learning_pipeline(self, mock_kg):
"""Test complete pipeline from task to recommendation."""
system = SkillLearningSystem()
# Process a task
task_data = {
"task_id": "pipeline_test",
"prompt": "Optimize API endpoint performance",
"project": "overbits",
"status": "success",
"tools_used": ["Bash", "Read"],
"duration": 30.0,
"result_summary": "30% performance improvement",
"qa_passed": True,
"timestamp": datetime.now().isoformat()
}
qa_results = {
"passed": True,
"results": {"syntax": True, "routes": True},
"summary": {"errors": 0}
}
# Process task
result = system.process_task_completion(task_data, qa_results)
assert result["success"]
# Get recommendations
recommendations = system.get_recommendations(
"Improve API performance",
project="overbits"
)
# Should be able to get recommendations
assert isinstance(recommendations, list)
@patch('skill_learning_engine.KnowledgeGraph')
def test_skill_profile_evolution(self, mock_kg):
"""Test how skill profile evolves with multiple tasks."""
system = SkillLearningSystem()
# Process multiple tasks
for i in range(3):
task_data = {
"task_id": f"task_{i}",
"prompt": f"Test task {i}",
"project": "overbits",
"status": "success",
"tools_used": ["Bash", "Read"] if i % 2 == 0 else ["Read", "Edit"],
"duration": 20.0 + i,
"result_summary": f"Task {i} completed",
"qa_passed": True,
"timestamp": datetime.now().isoformat()
}
qa_results = {
"passed": True,
"results": {"syntax": True},
"summary": {"errors": 0}
}
system.process_task_completion(task_data, qa_results)
# Get profile
profile = system.get_learning_summary()
assert profile["total_learnings"] >= 0
if __name__ == "__main__":
pytest.main([__file__, "-v"])

View File

@@ -0,0 +1,490 @@
#!/usr/bin/env python3
"""
Tests for Sub-Agent Context Management
Verifies:
1. Sub-agent context creation and retrieval
2. Phase progression tracking
3. Sibling agent discovery and coordination
4. Context persistence
5. Flow integration
"""
import pytest
import tempfile
from pathlib import Path
import sys
import os
# Add parent directory to path for imports
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'lib'))
from sub_agent_context import (
SubAgentContext,
SubAgentContextManager,
FlowPhase,
)
from sub_agent_flow_integration import SubAgentFlowIntegrator
class TestSubAgentContextCreation:
"""Test sub-agent context creation"""
def setup_method(self):
"""Setup test fixtures"""
self.temp_dir = tempfile.TemporaryDirectory()
self.manager = SubAgentContextManager(Path(self.temp_dir.name))
def teardown_method(self):
"""Cleanup"""
self.temp_dir.cleanup()
def test_create_sub_agent_context(self):
"""Test creating a new sub-agent context"""
context = self.manager.create_sub_agent_context(
parent_task_id="task-123",
parent_project="admin",
parent_description="Test parent task",
parent_context={"key": "value"},
parent_tags=["important", "research"],
)
assert context.sub_agent_id is not None
assert context.parent_task_id == "task-123"
assert context.parent_project == "admin"
assert context.parent_description == "Test parent task"
assert len(context.phase_progression) == 9
assert context.phase_progression[0].phase_name == "CONTEXT_PREP"
def test_phase_progression_initialization(self):
"""Test that all 9 phases are initialized"""
context = self.manager.create_sub_agent_context(
parent_task_id="task-456",
parent_project="test",
parent_description="Phase test",
)
phase_names = [p.phase_name for p in context.phase_progression]
expected_phases = [
"CONTEXT_PREP",
"RECEIVED",
"PREDICTING",
"ANALYZING",
"CONSENSUS_CHECK",
"AWAITING_APPROVAL",
"STRATEGIZING",
"EXECUTING",
"LEARNING",
]
assert phase_names == expected_phases
def test_retrieve_sub_agent_context(self):
"""Test retrieving sub-agent context"""
created = self.manager.create_sub_agent_context(
parent_task_id="task-789",
parent_project="admin",
parent_description="Retrieve test",
)
retrieved = self.manager.get_sub_agent_context(created.sub_agent_id)
assert retrieved is not None
assert retrieved.sub_agent_id == created.sub_agent_id
assert retrieved.parent_task_id == "task-789"
class TestSiblingDiscovery:
"""Test sibling agent discovery and awareness"""
def setup_method(self):
"""Setup test fixtures"""
self.temp_dir = tempfile.TemporaryDirectory()
self.manager = SubAgentContextManager(Path(self.temp_dir.name))
def teardown_method(self):
"""Cleanup"""
self.temp_dir.cleanup()
def test_single_sub_agent_no_siblings(self):
"""Test first sub-agent has no siblings"""
context = self.manager.create_sub_agent_context(
parent_task_id="parent-1",
parent_project="admin",
parent_description="First agent",
)
assert len(context.sibling_agents) == 0
def test_multiple_sub_agents_discover_siblings(self):
"""Test multiple sub-agents discover each other as siblings"""
# Create first sub-agent
agent1 = self.manager.create_sub_agent_context(
parent_task_id="parent-2",
parent_project="admin",
parent_description="Agent 1",
)
# Create second sub-agent for same parent
agent2 = self.manager.create_sub_agent_context(
parent_task_id="parent-2",
parent_project="admin",
parent_description="Agent 2",
)
# Create third sub-agent for same parent
agent3 = self.manager.create_sub_agent_context(
parent_task_id="parent-2",
parent_project="admin",
parent_description="Agent 3",
)
# Verify sibling relationships
assert agent2.sub_agent_id in self.manager.get_sibling_agents(agent1.sub_agent_id)
assert agent3.sub_agent_id in self.manager.get_sibling_agents(agent1.sub_agent_id)
assert len(self.manager.get_sibling_agents(agent1.sub_agent_id)) == 2
assert agent1.sub_agent_id in self.manager.get_sibling_agents(agent2.sub_agent_id)
assert agent3.sub_agent_id in self.manager.get_sibling_agents(agent2.sub_agent_id)
assert len(self.manager.get_sibling_agents(agent2.sub_agent_id)) == 2
def test_agents_from_different_parents_not_siblings(self):
"""Test agents from different parents are not siblings"""
agent1 = self.manager.create_sub_agent_context(
parent_task_id="parent-a",
parent_project="admin",
parent_description="Agent 1",
)
agent2 = self.manager.create_sub_agent_context(
parent_task_id="parent-b",
parent_project="admin",
parent_description="Agent 2",
)
assert agent2.sub_agent_id not in self.manager.get_sibling_agents(agent1.sub_agent_id)
assert agent1.sub_agent_id not in self.manager.get_sibling_agents(agent2.sub_agent_id)
class TestPhaseProgression:
"""Test phase progression tracking"""
def setup_method(self):
"""Setup test fixtures"""
self.temp_dir = tempfile.TemporaryDirectory()
self.manager = SubAgentContextManager(Path(self.temp_dir.name))
self.context = self.manager.create_sub_agent_context(
parent_task_id="task-phase",
parent_project="admin",
parent_description="Phase test",
)
def teardown_method(self):
"""Cleanup"""
self.temp_dir.cleanup()
def test_update_phase_status(self):
"""Test updating phase status"""
success = self.manager.update_phase(
self.context.sub_agent_id,
"CONTEXT_PREP",
"completed",
output="Context prepared",
)
assert success is True
updated = self.manager.get_sub_agent_context(self.context.sub_agent_id)
phase = updated.phase_progression[0]
assert phase.status == "completed"
assert phase.output == "Context prepared"
def test_get_current_phase(self):
"""Test getting current active phase"""
# Initially should be first pending phase
current = self.manager.get_current_phase(self.context.sub_agent_id)
assert current == "CONTEXT_PREP"
# Mark first phase as complete
self.manager.update_phase(
self.context.sub_agent_id,
"CONTEXT_PREP",
"completed",
)
# Now should be next pending phase
current = self.manager.get_current_phase(self.context.sub_agent_id)
assert current == "RECEIVED"
def test_phase_duration_calculation(self):
"""Test duration calculation for completed phases"""
# Mark phase as in progress
self.manager.update_phase(
self.context.sub_agent_id,
"CONTEXT_PREP",
"in_progress",
)
# Mark as completed
self.manager.update_phase(
self.context.sub_agent_id,
"CONTEXT_PREP",
"completed",
output="Done",
)
updated = self.manager.get_sub_agent_context(self.context.sub_agent_id)
phase = updated.phase_progression[0]
assert phase.duration_seconds is not None
assert phase.duration_seconds >= 0
def test_phase_progression_sequence(self):
"""Test progressing through all phases"""
sub_agent_id = self.context.sub_agent_id
phases = [p.phase_name for p in self.context.phase_progression]
for phase_name in phases:
self.manager.update_phase(
sub_agent_id,
phase_name,
"completed",
output=f"Completed {phase_name}",
)
updated = self.manager.get_sub_agent_context(sub_agent_id)
all_completed = all(p.status == "completed" for p in updated.phase_progression)
assert all_completed is True
class TestCoordination:
"""Test sub-agent coordination and messaging"""
def setup_method(self):
"""Setup test fixtures"""
self.temp_dir = tempfile.TemporaryDirectory()
self.manager = SubAgentContextManager(Path(self.temp_dir.name))
# Create two sibling agents
self.agent1 = self.manager.create_sub_agent_context(
parent_task_id="parent-coord",
parent_project="admin",
parent_description="Agent 1",
)
self.agent2 = self.manager.create_sub_agent_context(
parent_task_id="parent-coord",
parent_project="admin",
parent_description="Agent 2",
)
def teardown_method(self):
"""Cleanup"""
self.temp_dir.cleanup()
def test_send_message_to_sibling(self):
"""Test sending coordination message to sibling"""
success = self.manager.send_message_to_sibling(
self.agent1.sub_agent_id,
self.agent2.sub_agent_id,
"request",
{"type": "need_data", "data_type": "context"},
)
assert success is True
def test_message_appears_in_both_agents(self):
"""Test message is visible to both sender and receiver"""
self.manager.send_message_to_sibling(
self.agent1.sub_agent_id,
self.agent2.sub_agent_id,
"update",
{"status": "ready"},
)
agent1_updated = self.manager.get_sub_agent_context(self.agent1.sub_agent_id)
agent2_updated = self.manager.get_sub_agent_context(self.agent2.sub_agent_id)
assert len(agent1_updated.coordination_messages) == 1
assert len(agent2_updated.coordination_messages) == 1
assert agent1_updated.coordination_messages[0]["type"] == "update"
assert agent2_updated.coordination_messages[0]["type"] == "update"
def test_cannot_message_non_sibling(self):
"""Test cannot send message to non-sibling agent"""
# Create agent with different parent
agent3 = self.manager.create_sub_agent_context(
parent_task_id="parent-other",
parent_project="admin",
parent_description="Agent 3",
)
# Try to send message across parent boundary
success = self.manager.send_message_to_sibling(
self.agent1.sub_agent_id,
agent3.sub_agent_id,
"request",
{"data": "test"},
)
assert success is False
class TestContextPersistence:
"""Test context persistence to disk"""
def test_context_saved_and_loaded(self):
"""Test contexts are saved to disk and reloaded"""
with tempfile.TemporaryDirectory() as temp_dir:
manager1 = SubAgentContextManager(Path(temp_dir))
# Create context in first manager
context1 = manager1.create_sub_agent_context(
parent_task_id="task-persist",
parent_project="admin",
parent_description="Persistence test",
)
sub_agent_id = context1.sub_agent_id
# Create new manager pointing to same directory
manager2 = SubAgentContextManager(Path(temp_dir))
# Should be able to retrieve context from new manager
context2 = manager2.get_sub_agent_context(sub_agent_id)
assert context2 is not None
assert context2.parent_task_id == "task-persist"
assert context2.sub_agent_id == sub_agent_id
class TestFlowIntegration:
"""Test flow integration with sub-agent context"""
def setup_method(self):
"""Setup test fixtures"""
self.temp_dir = tempfile.TemporaryDirectory()
self.context_manager = SubAgentContextManager(Path(self.temp_dir.name))
self.integrator = SubAgentFlowIntegrator(self.context_manager)
def teardown_method(self):
"""Cleanup"""
self.temp_dir.cleanup()
def test_execute_sub_agent_flow(self):
"""Test executing full sub-agent flow"""
results = self.integrator.execute_sub_agent_flow(
parent_task_id="task-flow",
parent_project="admin",
parent_description="Flow test",
parent_context={"key": "value"},
)
assert results["sub_agent_id"] is not None
assert "phases" in results
# Should have results for all 9 phases
assert len(results["phases"]) == 9
def test_execute_single_phase(self):
"""Test executing a single phase"""
context = self.context_manager.create_sub_agent_context(
parent_task_id="task-single",
parent_project="admin",
parent_description="Single phase test",
)
result = self.integrator.execute_phase(context.sub_agent_id, "CONTEXT_PREP")
assert result["status"] == "completed"
assert "output" in result
def test_get_sub_agent_progress(self):
"""Test getting progress report"""
context = self.context_manager.create_sub_agent_context(
parent_task_id="task-progress",
parent_project="admin",
parent_description="Progress test",
)
# Execute a phase
self.integrator.execute_phase(context.sub_agent_id, "CONTEXT_PREP")
self.integrator.execute_phase(context.sub_agent_id, "RECEIVED")
progress = self.integrator.get_sub_agent_progress(context.sub_agent_id)
assert progress["completed_phases"] == 2
assert progress["in_progress_phases"] == 0
assert progress["total_phases"] == 9
def test_coordinate_sequential_sub_agents(self):
"""Test sequential coordination of sub-agents"""
# Create multiple sub-agents for same parent
for i in range(3):
self.context_manager.create_sub_agent_context(
parent_task_id="task-coord",
parent_project="admin",
parent_description=f"Agent {i+1}",
)
coordination = self.integrator.coordinate_sub_agents(
parent_task_id="task-coord",
coordination_strategy="sequential",
)
assert len(coordination["sub_agents"]) == 3
assert coordination["strategy"] == "sequential"
def test_collect_sub_agent_results(self):
"""Test collecting results from multiple sub-agents"""
# Create and execute multiple sub-agents
for i in range(2):
context = self.context_manager.create_sub_agent_context(
parent_task_id="task-collect",
parent_project="admin",
parent_description=f"Agent {i+1}",
)
self.integrator.execute_phase(context.sub_agent_id, "CONTEXT_PREP")
results = self.integrator.collect_sub_agent_results("task-collect")
assert results["sub_agents_total"] == 2
assert len(results["sub_agents"]) == 2
assert all("progress" in s for s in results["sub_agents"])
class TestContextSummary:
"""Test context summary generation"""
def setup_method(self):
"""Setup test fixtures"""
self.temp_dir = tempfile.TemporaryDirectory()
self.manager = SubAgentContextManager(Path(self.temp_dir.name))
def teardown_method(self):
"""Cleanup"""
self.temp_dir.cleanup()
def test_get_context_summary(self):
"""Test getting human-readable summary"""
context = self.manager.create_sub_agent_context(
parent_task_id="task-summary",
parent_project="admin",
parent_description="Summary test",
parent_tags=["important", "urgent"],
)
# Create a sibling
self.manager.create_sub_agent_context(
parent_task_id="task-summary",
parent_project="admin",
parent_description="Sibling agent",
)
summary = self.manager.get_context_summary(context.sub_agent_id)
assert summary is not None
assert summary["sub_agent_id"] == context.sub_agent_id
assert summary["parent_task_id"] == "task-summary"
assert summary["sibling_count"] == 1
assert summary["parent_tags"] == ["important", "urgent"]
if __name__ == "__main__":
pytest.main([__file__, "-v"])

436
tests/test_time_metrics.py Normal file
View File

@@ -0,0 +1,436 @@
#!/usr/bin/env python3
"""
Test cases for time_metrics module.
Run with: pytest /opt/server-agents/orchestrator/tests/test_time_metrics.py -v
"""
import json
import os
import sys
import tempfile
import time
from datetime import datetime, timedelta
from pathlib import Path
from unittest.mock import patch, MagicMock
# Add lib to path
sys.path.insert(0, str(Path(__file__).parent.parent / "lib"))
import pytest
# Import module under test
from time_metrics import (
get_utc_now,
get_utc_now_with_offset,
parse_iso_timestamp,
calculate_duration_seconds,
format_duration,
format_duration_human,
elapsed_since,
convert_to_local_time,
format_timestamp_with_local,
get_system_load,
get_memory_usage,
get_disk_usage,
capture_system_context,
TaskTimeTracker,
create_task_time_metadata,
update_task_completion_metadata,
format_job_with_timing,
format_logs_header,
DEFAULT_TIMEZONE
)
class TestTimestampFunctions:
"""Test timestamp generation and parsing."""
def test_get_utc_now_format(self):
"""UTC timestamp should be in ISO 8601 format with Z suffix."""
ts = get_utc_now()
assert ts.endswith("Z")
# Should be parseable
dt = datetime.fromisoformat(ts[:-1])
assert dt is not None
def test_get_utc_now_with_offset_format(self):
"""UTC timestamp with offset should end with +00:00."""
ts = get_utc_now_with_offset()
assert "+00:00" in ts
def test_parse_iso_timestamp_z_suffix(self):
"""Parse timestamp with Z suffix."""
ts = "2026-01-11T03:31:57Z"
dt = parse_iso_timestamp(ts)
assert dt is not None
assert dt.year == 2026
assert dt.month == 1
assert dt.day == 11
assert dt.hour == 3
assert dt.minute == 31
assert dt.second == 57
def test_parse_iso_timestamp_with_offset(self):
"""Parse timestamp with timezone offset."""
ts = "2026-01-11T00:31:57-03:00"
dt = parse_iso_timestamp(ts)
assert dt is not None
assert dt.hour == 0 # Local hour
def test_parse_iso_timestamp_no_tz(self):
"""Parse timestamp without timezone (assume UTC)."""
ts = "2026-01-11T03:31:57"
dt = parse_iso_timestamp(ts)
assert dt is not None
assert dt.hour == 3
def test_parse_iso_timestamp_none(self):
"""None input should return None."""
assert parse_iso_timestamp(None) is None
assert parse_iso_timestamp("") is None
def test_parse_iso_timestamp_invalid(self):
"""Invalid timestamp should return None."""
assert parse_iso_timestamp("not-a-timestamp") is None
assert parse_iso_timestamp("2026-99-99T99:99:99Z") is None
class TestDurationCalculations:
"""Test duration calculation and formatting."""
def test_calculate_duration_seconds(self):
"""Calculate duration between two timestamps."""
start = "2026-01-11T10:00:00Z"
end = "2026-01-11T10:01:00Z"
duration = calculate_duration_seconds(start, end)
assert duration == 60.0
def test_calculate_duration_hours(self):
"""Calculate duration spanning hours."""
start = "2026-01-11T10:00:00Z"
end = "2026-01-11T12:30:00Z"
duration = calculate_duration_seconds(start, end)
assert duration == 2.5 * 3600 # 2.5 hours
def test_calculate_duration_negative(self):
"""Duration can be negative if end is before start."""
start = "2026-01-11T12:00:00Z"
end = "2026-01-11T10:00:00Z"
duration = calculate_duration_seconds(start, end)
assert duration < 0
def test_calculate_duration_none(self):
"""Invalid inputs should return None."""
assert calculate_duration_seconds(None, "2026-01-11T10:00:00Z") is None
assert calculate_duration_seconds("2026-01-11T10:00:00Z", None) is None
def test_format_duration_seconds(self):
"""Format durations under a minute."""
assert format_duration(0) == "00:00:00"
assert format_duration(45) == "00:00:45"
assert format_duration(59) == "00:00:59"
def test_format_duration_minutes(self):
"""Format durations in minutes."""
assert format_duration(60) == "00:01:00"
assert format_duration(125) == "00:02:05"
assert format_duration(3599) == "00:59:59"
def test_format_duration_hours(self):
"""Format durations in hours."""
assert format_duration(3600) == "01:00:00"
assert format_duration(3661) == "01:01:01"
assert format_duration(7200) == "02:00:00"
def test_format_duration_none(self):
"""None or negative should return placeholder."""
assert format_duration(None) == "--:--:--"
assert format_duration(-1) == "--:--:--"
def test_format_duration_human_seconds(self):
"""Human-readable format for seconds."""
assert format_duration_human(0) == "0s"
assert format_duration_human(45) == "45s"
assert format_duration_human(59) == "59s"
def test_format_duration_human_minutes(self):
"""Human-readable format for minutes."""
assert format_duration_human(60) == "1m 0s"
assert format_duration_human(125) == "2m 5s"
assert format_duration_human(3599) == "59m 59s"
def test_format_duration_human_hours(self):
"""Human-readable format for hours."""
assert format_duration_human(3600) == "1h 0m 0s"
assert format_duration_human(3661) == "1h 1m 1s"
assert format_duration_human(7200) == "2h 0m 0s"
def test_format_duration_human_days(self):
"""Human-readable format for days."""
assert format_duration_human(86400) == "1d 0h 0m"
assert format_duration_human(90061) == "1d 1h 1m"
def test_format_duration_human_none(self):
"""None or negative should return 'unknown'."""
assert format_duration_human(None) == "unknown"
assert format_duration_human(-1) == "unknown"
class TestTimezoneConversion:
"""Test timezone conversion functions."""
def test_convert_to_local_time_montevideo(self):
"""Convert UTC to Montevideo time (UTC-3)."""
utc_ts = "2026-01-11T03:31:57Z"
local = convert_to_local_time(utc_ts, "America/Montevideo")
# Should be 00:31:57 local (UTC-3)
assert "00:31:57" in local
def test_convert_to_local_time_invalid(self):
"""Invalid timestamp should return original."""
result = convert_to_local_time("invalid", "America/Montevideo")
assert "invalid" in result
def test_format_timestamp_with_local(self):
"""Format timestamp showing both UTC and local."""
utc_ts = "2026-01-11T03:31:57Z"
result = format_timestamp_with_local(utc_ts, "America/Montevideo")
assert "2026-01-11T03:31:57Z" in result
assert "America/Montevideo" in result
class TestSystemContext:
"""Test system context capture functions."""
def test_get_system_load_returns_tuple(self):
"""System load should return 3-element tuple."""
load = get_system_load()
assert isinstance(load, tuple)
assert len(load) == 3
assert all(isinstance(l, (int, float)) for l in load)
def test_get_memory_usage_keys(self):
"""Memory usage should have expected keys."""
mem = get_memory_usage()
assert "total_mb" in mem
assert "available_mb" in mem
assert "used_mb" in mem
assert "used_percent" in mem
assert 0 <= mem["used_percent"] <= 100
def test_get_disk_usage_keys(self):
"""Disk usage should have expected keys."""
disk = get_disk_usage("/")
assert "total_gb" in disk
assert "free_gb" in disk
assert "used_gb" in disk
assert "used_percent" in disk
assert 0 <= disk["used_percent"] <= 100
def test_capture_system_context_structure(self):
"""System context should have complete structure."""
ctx = capture_system_context()
assert "timestamp" in ctx
assert "system_load" in ctx
assert "memory" in ctx
assert "disk" in ctx
assert isinstance(ctx["system_load"], list)
assert len(ctx["system_load"]) == 3
assert "used_percent" in ctx["memory"]
assert "available_mb" in ctx["memory"]
assert "used_percent" in ctx["disk"]
assert "free_gb" in ctx["disk"]
class TestTaskTimeTracker:
"""Test TaskTimeTracker class."""
def test_tracker_initialization(self):
"""Tracker should initialize with task_id and project."""
tracker = TaskTimeTracker("test-001", "admin")
assert tracker.task_id == "test-001"
assert tracker.project == "admin"
assert tracker.dispatch_time is None
assert tracker.completion_time is None
def test_mark_dispatched(self):
"""mark_dispatched should record dispatch time and context."""
tracker = TaskTimeTracker("test-001", "admin")
result = tracker.mark_dispatched()
assert tracker.dispatch_time is not None
assert tracker.dispatch_context is not None
assert "dispatch" in result
assert "utc_time" in result["dispatch"]
assert "system_load" in result["dispatch"]
assert "memory_percent" in result["dispatch"]
def test_mark_started(self):
"""mark_started should record start time."""
tracker = TaskTimeTracker("test-001", "admin")
tracker.mark_dispatched()
result = tracker.mark_started()
assert tracker.start_time is not None
assert "start_time" in result
def test_mark_completed(self):
"""mark_completed should calculate duration."""
tracker = TaskTimeTracker("test-001", "admin")
tracker.mark_dispatched()
time.sleep(1.1) # Delay for measurable duration (must be > 1 sec for second resolution)
result = tracker.mark_completed(exit_code=0)
assert tracker.completion_time is not None
assert "completion" in result
assert "utc_time" in result["completion"]
assert "duration_seconds" in result["completion"]
# Duration should be at least 1 second
assert result["completion"]["duration_seconds"] >= 1.0
assert "exit_code" in result["completion"]
assert result["completion"]["exit_code"] == 0
def test_get_full_metrics_running(self):
"""get_full_metrics for running task should show elapsed."""
tracker = TaskTimeTracker("test-001", "admin")
tracker.mark_dispatched()
metrics = tracker.get_full_metrics()
assert metrics["status"] == "running"
assert "elapsed" in metrics
assert "dispatch" in metrics
def test_get_full_metrics_completed(self):
"""get_full_metrics for completed task should show duration."""
tracker = TaskTimeTracker("test-001", "admin")
tracker.mark_dispatched()
tracker.mark_completed(0)
metrics = tracker.get_full_metrics()
assert metrics["status"] == "completed"
assert "completion" in metrics
assert "duration_seconds" in metrics["completion"]
class TestMetadataFunctions:
"""Test metadata helper functions."""
def test_create_task_time_metadata(self):
"""create_task_time_metadata should return dispatch info."""
meta = create_task_time_metadata("test-001", "admin")
assert "time_metrics" in meta
assert "time_tracker_data" in meta
assert "dispatch" in meta["time_metrics"]
assert meta["time_tracker_data"]["task_id"] == "test-001"
assert meta["time_tracker_data"]["project"] == "admin"
def test_update_task_completion_metadata(self):
"""update_task_completion_metadata should add completion info."""
# Create initial metadata
meta = create_task_time_metadata("test-001", "admin")
time.sleep(0.1)
# Update with completion
updated = update_task_completion_metadata(meta, exit_code=0)
assert "time_metrics" in updated
assert "completion" in updated["time_metrics"]
assert updated["time_metrics"]["completion"]["exit_code"] == 0
class TestOutputFormatters:
"""Test output formatting functions."""
def test_format_job_with_timing_complete(self):
"""Format job with timing info should include all fields."""
job = {
"id": "123456-abcd",
"project": "admin",
"status": "completed",
"time_metrics": {
"dispatch": {
"utc_time": "2026-01-11T10:00:00Z",
"system_load": [0.5, 0.6, 0.7]
},
"completion": {
"duration_formatted": "00:05:30"
}
}
}
result = format_job_with_timing(job)
assert "123456-abcd" in result
assert "admin" in result
assert "completed" in result
assert "10:00:00" in result
assert "00:05:30" in result
def test_format_job_with_timing_running(self):
"""Format running job should show elapsed time."""
job = {
"id": "123456-abcd",
"project": "admin",
"status": "running",
"time_metrics": {
"dispatch": {
"utc_time": "2026-01-11T10:00:00Z",
"system_load": [0.5, 0.6, 0.7]
}
}
}
result = format_job_with_timing(job)
assert "123456-abcd" in result
assert "running" in result
def test_format_logs_header_structure(self):
"""Logs header should contain timing sections."""
job = {
"id": "123456-abcd",
"project": "admin",
"status": "completed",
"time_metrics": {
"dispatch": {
"utc_time": "2026-01-11T10:00:00Z",
"system_load": [0.5, 0.6, 0.7],
"memory_percent": 65,
"disk_percent": 45
},
"completion": {
"utc_time": "2026-01-11T10:05:30Z",
"duration_formatted": "00:05:30"
}
}
}
header = format_logs_header(job)
assert "" in header # Box drawing
assert "Job:" in header
assert "Agent: admin" in header
assert "Dispatched:" in header
assert "Status:" in header
assert "System:" in header
class TestElapsedSince:
"""Test elapsed_since function."""
def test_elapsed_since_recent(self):
"""elapsed_since should calculate time from now."""
# Use a timestamp 5 seconds ago
past = datetime.utcnow() - timedelta(seconds=5)
past_ts = past.strftime("%Y-%m-%dT%H:%M:%SZ")
elapsed = elapsed_since(past_ts)
# Should be around 5s (allow some tolerance)
assert "s" in elapsed # Should have seconds format
if __name__ == "__main__":
pytest.main([__file__, "-v"])