Refactor cockpit to use DockerTmuxController pattern
Based on claude-code-tools TmuxCLIController, this refactor: - Added DockerTmuxController class for robust tmux session management - Implements send_keys() with configurable delay_enter - Implements capture_pane() for output retrieval - Implements wait_for_prompt() for pattern-based completion detection - Implements wait_for_idle() for content-hash-based idle detection - Implements wait_for_shell_prompt() for shell prompt detection Also includes workflow improvements: - Pre-task git snapshot before agent execution - Post-task commit protocol in agent guidelines Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Binary file not shown.
BIN
tests/__pycache__/test_skill_learning.cpython-310.pyc
Normal file
BIN
tests/__pycache__/test_skill_learning.cpython-310.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
tests/__pycache__/test_time_metrics.cpython-310-pytest-9.0.2.pyc
Normal file
BIN
tests/__pycache__/test_time_metrics.cpython-310-pytest-9.0.2.pyc
Normal file
Binary file not shown.
511
tests/test_integrations.py
Normal file
511
tests/test_integrations.py
Normal file
@@ -0,0 +1,511 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Integration tests for Luzia orchestrator components
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
# Add lib to path
|
||||
sys.path.insert(0, '/opt/server-agents/orchestrator/lib')
|
||||
|
||||
# Test results tracking
|
||||
RESULTS = {'passed': 0, 'failed': 0, 'errors': []}
|
||||
|
||||
def test(name):
|
||||
"""Decorator for test functions"""
|
||||
def decorator(func):
|
||||
def wrapper():
|
||||
try:
|
||||
func()
|
||||
RESULTS['passed'] += 1
|
||||
print(f" ✓ {name}")
|
||||
return True
|
||||
except AssertionError as e:
|
||||
RESULTS['failed'] += 1
|
||||
RESULTS['errors'].append(f"{name}: {e}")
|
||||
print(f" ✗ {name}: {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
RESULTS['failed'] += 1
|
||||
RESULTS['errors'].append(f"{name}: {type(e).__name__}: {e}")
|
||||
print(f" ✗ {name}: {type(e).__name__}: {e}")
|
||||
return False
|
||||
wrapper.__name__ = func.__name__
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Chat Memory Lookup Tests
|
||||
# =============================================================================
|
||||
|
||||
print("\n### Chat Memory Lookup Tests ###")
|
||||
|
||||
@test("ChatMemoryLookup imports")
|
||||
def test_chat_memory_import():
|
||||
from chat_memory_lookup import ChatMemoryLookup
|
||||
assert ChatMemoryLookup is not None
|
||||
|
||||
@test("ChatMemoryLookup initializes")
|
||||
def test_chat_memory_init():
|
||||
from chat_memory_lookup import ChatMemoryLookup
|
||||
lookup = ChatMemoryLookup(timeout_ms=150)
|
||||
assert lookup.timeout_ms == 150
|
||||
|
||||
@test("ChatMemoryLookup.memory_statistics returns data")
|
||||
def test_chat_memory_stats():
|
||||
from chat_memory_lookup import ChatMemoryLookup
|
||||
lookup = ChatMemoryLookup()
|
||||
stats = lookup.memory_statistics()
|
||||
assert 'available' in stats
|
||||
assert stats['available'] == True
|
||||
assert 'entities' in stats
|
||||
assert stats['entities'] > 0
|
||||
|
||||
@test("ChatMemoryLookup.list_all_projects returns projects")
|
||||
def test_chat_memory_projects():
|
||||
from chat_memory_lookup import ChatMemoryLookup
|
||||
lookup = ChatMemoryLookup()
|
||||
result = lookup.list_all_projects()
|
||||
assert 'projects' in result
|
||||
assert 'count' in result
|
||||
assert result['count'] > 0
|
||||
assert len(result['projects']) > 0
|
||||
|
||||
@test("ChatMemoryLookup.search_entities works")
|
||||
def test_chat_memory_search():
|
||||
from chat_memory_lookup import ChatMemoryLookup
|
||||
lookup = ChatMemoryLookup()
|
||||
result = lookup.search_entities('admin', limit=5)
|
||||
assert 'entities' in result
|
||||
assert 'count' in result
|
||||
|
||||
test_chat_memory_import()
|
||||
test_chat_memory_init()
|
||||
test_chat_memory_stats()
|
||||
test_chat_memory_projects()
|
||||
test_chat_memory_search()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Chat Intent Parser Tests
|
||||
# =============================================================================
|
||||
|
||||
print("\n### Chat Intent Parser Tests ###")
|
||||
|
||||
@test("ChatIntentParser imports")
|
||||
def test_intent_import():
|
||||
from chat_intent_parser import ChatIntentParser
|
||||
assert ChatIntentParser is not None
|
||||
|
||||
@test("ChatIntentParser.parse returns intent structure")
|
||||
def test_intent_parse():
|
||||
from chat_intent_parser import ChatIntentParser
|
||||
parser = ChatIntentParser()
|
||||
result = parser.parse("list projects")
|
||||
assert 'intent' in result
|
||||
assert 'keywords' in result
|
||||
assert 'scope' in result
|
||||
|
||||
@test("ChatIntentParser detects project_info intent")
|
||||
def test_intent_project():
|
||||
from chat_intent_parser import ChatIntentParser
|
||||
parser = ChatIntentParser()
|
||||
result = parser.parse("list projects")
|
||||
assert result['intent'] == 'project_info'
|
||||
assert 'projects' in result['keywords']
|
||||
|
||||
@test("ChatIntentParser detects system_status intent")
|
||||
def test_intent_status():
|
||||
from chat_intent_parser import ChatIntentParser
|
||||
parser = ChatIntentParser()
|
||||
result = parser.parse("system status")
|
||||
assert result['intent'] == 'system_status'
|
||||
|
||||
@test("ChatIntentParser.extract_search_term works")
|
||||
def test_intent_search_term():
|
||||
from chat_intent_parser import ChatIntentParser
|
||||
parser = ChatIntentParser()
|
||||
term = parser.extract_search_term("search for authentication")
|
||||
assert term is not None
|
||||
assert len(term) > 0
|
||||
|
||||
test_intent_import()
|
||||
test_intent_parse()
|
||||
test_intent_project()
|
||||
test_intent_status()
|
||||
test_intent_search_term()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Chat Orchestrator Tests
|
||||
# =============================================================================
|
||||
|
||||
print("\n### Chat Orchestrator Tests ###")
|
||||
|
||||
@test("ChatOrchestrator imports")
|
||||
def test_orchestrator_import():
|
||||
from chat_orchestrator import ChatOrchestrator
|
||||
assert ChatOrchestrator is not None
|
||||
|
||||
@test("ChatOrchestrator initializes")
|
||||
def test_orchestrator_init():
|
||||
from chat_orchestrator import ChatOrchestrator
|
||||
orch = ChatOrchestrator(timeout_ms=500)
|
||||
assert orch.timeout_ms == 500
|
||||
|
||||
@test("ChatOrchestrator.process_query returns response")
|
||||
def test_orchestrator_query():
|
||||
from chat_orchestrator import ChatOrchestrator
|
||||
orch = ChatOrchestrator()
|
||||
result = orch.process_query("help")
|
||||
assert 'response' in result
|
||||
assert 'status' in result
|
||||
assert result['status'] == 'success'
|
||||
|
||||
@test("ChatOrchestrator handles system status query")
|
||||
def test_orchestrator_status():
|
||||
from chat_orchestrator import ChatOrchestrator
|
||||
orch = ChatOrchestrator()
|
||||
result = orch.process_query("system status")
|
||||
assert 'response' in result
|
||||
assert 'execution_time_ms' in result
|
||||
|
||||
@test("ChatOrchestrator handles project query")
|
||||
def test_orchestrator_projects():
|
||||
from chat_orchestrator import ChatOrchestrator
|
||||
orch = ChatOrchestrator()
|
||||
result = orch.process_query("list projects")
|
||||
assert 'response' in result
|
||||
assert 'Projects' in result['response'] or 'project' in result['response'].lower()
|
||||
|
||||
test_orchestrator_import()
|
||||
test_orchestrator_init()
|
||||
test_orchestrator_query()
|
||||
test_orchestrator_status()
|
||||
test_orchestrator_projects()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Chat Response Formatter Tests
|
||||
# =============================================================================
|
||||
|
||||
print("\n### Chat Response Formatter Tests ###")
|
||||
|
||||
@test("ChatResponseFormatter imports")
|
||||
def test_formatter_import():
|
||||
from chat_response_formatter import ChatResponseFormatter
|
||||
assert ChatResponseFormatter is not None
|
||||
|
||||
@test("ChatResponseFormatter.format_help returns markdown")
|
||||
def test_formatter_help():
|
||||
from chat_response_formatter import ChatResponseFormatter
|
||||
formatter = ChatResponseFormatter()
|
||||
help_text = formatter.format_help()
|
||||
assert '# ' in help_text # Has markdown header
|
||||
assert len(help_text) > 100
|
||||
|
||||
@test("ChatResponseFormatter.format_response_time works")
|
||||
def test_formatter_time():
|
||||
from chat_response_formatter import ChatResponseFormatter
|
||||
formatter = ChatResponseFormatter()
|
||||
instant = formatter.format_response_time(5)
|
||||
assert 'instant' in instant
|
||||
fast = formatter.format_response_time(150)
|
||||
assert 'fast' in fast.lower() or 'ms' in fast
|
||||
|
||||
@test("ChatResponseFormatter.format_project_list works")
|
||||
def test_formatter_projects():
|
||||
from chat_response_formatter import ChatResponseFormatter
|
||||
formatter = ChatResponseFormatter()
|
||||
data = {'projects': [{'name': 'test', 'type': 'project'}], 'count': 1}
|
||||
result = formatter.format_project_list(data)
|
||||
assert 'test' in result
|
||||
assert 'Project' in result or '1' in result
|
||||
|
||||
test_formatter_import()
|
||||
test_formatter_help()
|
||||
test_formatter_time()
|
||||
test_formatter_projects()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Chat Bash Executor Tests
|
||||
# =============================================================================
|
||||
|
||||
print("\n### Chat Bash Executor Tests ###")
|
||||
|
||||
@test("ChatBashExecutor imports")
|
||||
def test_bash_import():
|
||||
from chat_bash_executor import ChatBashExecutor
|
||||
assert ChatBashExecutor is not None
|
||||
|
||||
@test("ChatBashExecutor.execute runs uptime")
|
||||
def test_bash_uptime():
|
||||
from chat_bash_executor import ChatBashExecutor
|
||||
executor = ChatBashExecutor()
|
||||
result = executor.execute('uptime')
|
||||
assert 'success' in result
|
||||
assert result['success'] == True
|
||||
assert 'output' in result
|
||||
|
||||
@test("ChatBashExecutor.execute runs disk")
|
||||
def test_bash_disk():
|
||||
from chat_bash_executor import ChatBashExecutor
|
||||
executor = ChatBashExecutor()
|
||||
result = executor.execute('disk')
|
||||
assert result['success'] == True
|
||||
|
||||
@test("ChatBashExecutor rejects unknown commands")
|
||||
def test_bash_reject():
|
||||
from chat_bash_executor import ChatBashExecutor
|
||||
executor = ChatBashExecutor()
|
||||
result = executor.execute('unknown_dangerous_cmd')
|
||||
# Unknown commands return error without success key
|
||||
assert 'error' in result
|
||||
assert 'not allowed' in result['error'].lower()
|
||||
|
||||
test_bash_import()
|
||||
test_bash_uptime()
|
||||
test_bash_disk()
|
||||
test_bash_reject()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Task Watchdog Tests
|
||||
# =============================================================================
|
||||
|
||||
print("\n### Task Watchdog Tests ###")
|
||||
|
||||
@test("TaskWatchdog imports")
|
||||
def test_watchdog_import():
|
||||
from task_watchdog import TaskWatchdog
|
||||
assert TaskWatchdog is not None
|
||||
|
||||
@test("TaskWatchdog initializes")
|
||||
def test_watchdog_init():
|
||||
from task_watchdog import TaskWatchdog
|
||||
watchdog = TaskWatchdog()
|
||||
assert watchdog.HEARTBEAT_TIMEOUT_SECONDS == 300
|
||||
assert watchdog.LOCK_TIMEOUT_SECONDS == 3600
|
||||
|
||||
@test("TaskWatchdog.check_heartbeats runs")
|
||||
def test_watchdog_heartbeats():
|
||||
from task_watchdog import TaskWatchdog
|
||||
watchdog = TaskWatchdog()
|
||||
stuck = watchdog.check_heartbeats()
|
||||
assert isinstance(stuck, list)
|
||||
|
||||
@test("TaskWatchdog.get_project_queue_status returns dict")
|
||||
def test_watchdog_queue_status():
|
||||
from task_watchdog import TaskWatchdog
|
||||
watchdog = TaskWatchdog()
|
||||
status = watchdog.get_project_queue_status()
|
||||
assert isinstance(status, dict)
|
||||
|
||||
@test("TaskWatchdog.is_project_blocked returns tuple")
|
||||
def test_watchdog_blocked():
|
||||
from task_watchdog import TaskWatchdog
|
||||
watchdog = TaskWatchdog()
|
||||
blocked, reason = watchdog.is_project_blocked('test_nonexistent')
|
||||
assert isinstance(blocked, bool)
|
||||
|
||||
@test("TaskWatchdog.run_check returns summary")
|
||||
def test_watchdog_check():
|
||||
from task_watchdog import TaskWatchdog
|
||||
watchdog = TaskWatchdog()
|
||||
summary = watchdog.run_check()
|
||||
assert 'timestamp' in summary
|
||||
assert 'stuck_tasks' in summary
|
||||
assert 'project_status' in summary
|
||||
|
||||
test_watchdog_import()
|
||||
test_watchdog_init()
|
||||
test_watchdog_heartbeats()
|
||||
test_watchdog_queue_status()
|
||||
test_watchdog_blocked()
|
||||
test_watchdog_check()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Task Completion Tests
|
||||
# =============================================================================
|
||||
|
||||
print("\n### Task Completion Tests ###")
|
||||
|
||||
@test("TaskCompletion imports")
|
||||
def test_completion_import():
|
||||
from task_completion import TaskCompletion, complete_task, fail_task
|
||||
assert TaskCompletion is not None
|
||||
assert complete_task is not None
|
||||
assert fail_task is not None
|
||||
|
||||
@test("TaskCompletion initializes")
|
||||
def test_completion_init():
|
||||
from task_completion import TaskCompletion
|
||||
handler = TaskCompletion()
|
||||
assert handler.COMPLETED_DIR.exists() or True # May not exist yet
|
||||
|
||||
@test("TaskCompletion.complete_task handles missing task")
|
||||
def test_completion_missing():
|
||||
from task_completion import TaskCompletion
|
||||
handler = TaskCompletion()
|
||||
result = handler.complete_task('nonexistent-task-12345')
|
||||
assert result['success'] == False
|
||||
assert 'not found' in result.get('error', '').lower()
|
||||
|
||||
@test("TaskCompletion.fail_task handles missing task")
|
||||
def test_fail_missing():
|
||||
from task_completion import TaskCompletion
|
||||
handler = TaskCompletion()
|
||||
result = handler.fail_task('nonexistent-task-12345', 'test error')
|
||||
assert result['success'] == False
|
||||
|
||||
@test("TaskCompletion.set_awaiting_human handles missing task")
|
||||
def test_awaiting_missing():
|
||||
from task_completion import TaskCompletion
|
||||
handler = TaskCompletion()
|
||||
result = handler.set_awaiting_human('nonexistent-task-12345', 'question?')
|
||||
assert result['success'] == False
|
||||
|
||||
test_completion_import()
|
||||
test_completion_init()
|
||||
test_completion_missing()
|
||||
test_fail_missing()
|
||||
test_awaiting_missing()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Cockpit Tests
|
||||
# =============================================================================
|
||||
|
||||
print("\n### Cockpit Tests ###")
|
||||
|
||||
@test("Cockpit module imports")
|
||||
def test_cockpit_import():
|
||||
from cockpit import cockpit_status, cockpit_start, cockpit_stop, cockpit_send
|
||||
assert cockpit_status is not None
|
||||
assert cockpit_start is not None
|
||||
assert cockpit_stop is not None
|
||||
|
||||
@test("cockpit_status returns state")
|
||||
def test_cockpit_status():
|
||||
from cockpit import cockpit_status
|
||||
result = cockpit_status('admin')
|
||||
assert isinstance(result, dict)
|
||||
# Should have status info even if not running
|
||||
|
||||
@test("container_exists helper works")
|
||||
def test_cockpit_container_exists():
|
||||
from cockpit import container_exists
|
||||
# Test with a non-existent container
|
||||
result = container_exists('nonexistent-container-12345')
|
||||
assert isinstance(result, bool)
|
||||
assert result == False
|
||||
|
||||
@test("get_container_name generates correct name")
|
||||
def test_cockpit_container_name():
|
||||
from cockpit import get_container_name
|
||||
name = get_container_name('testproject')
|
||||
assert 'testproject' in name
|
||||
assert 'cockpit' in name.lower()
|
||||
|
||||
test_cockpit_import()
|
||||
test_cockpit_status()
|
||||
test_cockpit_container_exists()
|
||||
test_cockpit_container_name()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# KG Lookup Tests
|
||||
# =============================================================================
|
||||
|
||||
print("\n### KG Lookup Tests ###")
|
||||
|
||||
@test("ChatKGLookup imports")
|
||||
def test_kg_import():
|
||||
from chat_kg_lookup import ChatKGLookup
|
||||
assert ChatKGLookup is not None
|
||||
|
||||
@test("ChatKGLookup.get_kg_statistics returns stats")
|
||||
def test_kg_stats():
|
||||
from chat_kg_lookup import ChatKGLookup
|
||||
lookup = ChatKGLookup()
|
||||
stats = lookup.get_kg_statistics()
|
||||
assert isinstance(stats, dict)
|
||||
|
||||
@test("ChatKGLookup.search_all_domains works")
|
||||
def test_kg_search():
|
||||
from chat_kg_lookup import ChatKGLookup
|
||||
lookup = ChatKGLookup()
|
||||
results = lookup.search_all_domains('admin', limit=5)
|
||||
assert isinstance(results, dict)
|
||||
|
||||
test_kg_import()
|
||||
test_kg_stats()
|
||||
test_kg_search()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CLI Integration Tests
|
||||
# =============================================================================
|
||||
|
||||
print("\n### CLI Integration Tests ###")
|
||||
|
||||
import subprocess
|
||||
|
||||
@test("luzia --help works")
|
||||
def test_cli_help():
|
||||
result = subprocess.run(['luzia', '--help'], capture_output=True, text=True, timeout=10)
|
||||
assert result.returncode == 0
|
||||
assert 'luzia' in result.stdout.lower() or 'usage' in result.stdout.lower()
|
||||
|
||||
@test("luzia chat help works")
|
||||
def test_cli_chat_help():
|
||||
result = subprocess.run(['luzia', 'chat', 'help'], capture_output=True, text=True, timeout=10)
|
||||
assert result.returncode == 0
|
||||
assert 'Chat' in result.stdout or 'chat' in result.stdout.lower()
|
||||
|
||||
@test("luzia watchdog status works")
|
||||
def test_cli_watchdog():
|
||||
result = subprocess.run(['luzia', 'watchdog', 'status'], capture_output=True, text=True, timeout=10)
|
||||
assert result.returncode == 0
|
||||
assert 'PROJECT' in result.stdout or 'Queue' in result.stdout
|
||||
|
||||
@test("luzia cockpit status works")
|
||||
def test_cli_cockpit():
|
||||
result = subprocess.run(['luzia', 'cockpit', 'status'], capture_output=True, text=True, timeout=10)
|
||||
assert result.returncode == 0
|
||||
|
||||
@test("luzia list works")
|
||||
def test_cli_list():
|
||||
result = subprocess.run(['luzia', 'list'], capture_output=True, text=True, timeout=10)
|
||||
assert result.returncode == 0
|
||||
|
||||
test_cli_help()
|
||||
test_cli_chat_help()
|
||||
test_cli_watchdog()
|
||||
test_cli_cockpit()
|
||||
test_cli_list()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Summary
|
||||
# =============================================================================
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print(f"RESULTS: {RESULTS['passed']} passed, {RESULTS['failed']} failed")
|
||||
print("=" * 60)
|
||||
|
||||
if RESULTS['errors']:
|
||||
print("\nFailed tests:")
|
||||
for err in RESULTS['errors']:
|
||||
print(f" - {err}")
|
||||
|
||||
sys.exit(0 if RESULTS['failed'] == 0 else 1)
|
||||
287
tests/test_per_user_queue.py
Normal file
287
tests/test_per_user_queue.py
Normal file
@@ -0,0 +1,287 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test Per-User Queue System
|
||||
|
||||
Tests:
|
||||
1. Per-user lock acquisition and release
|
||||
2. Lock timeout and cleanup
|
||||
3. Queue controller with per-user serialization
|
||||
4. Fair scheduling respects per-user locks
|
||||
5. Conductor lock cleanup
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
# Add lib to path
|
||||
lib_path = Path(__file__).parent.parent / "lib"
|
||||
sys.path.insert(0, str(lib_path))
|
||||
|
||||
from per_user_queue_manager import PerUserQueueManager
|
||||
from queue_controller_v2 import QueueControllerV2
|
||||
from conductor_lock_cleanup import ConductorLockCleanup
|
||||
|
||||
|
||||
def test_per_user_lock_basic():
|
||||
"""Test basic lock acquire and release."""
|
||||
print("\n=== Test: Basic Lock Acquire/Release ===")
|
||||
|
||||
manager = PerUserQueueManager()
|
||||
user = "testuser"
|
||||
task_id = "task_123"
|
||||
|
||||
# Acquire lock
|
||||
acquired, lock_id = manager.acquire_lock(user, task_id)
|
||||
assert acquired, f"Failed to acquire lock for {user}"
|
||||
assert lock_id, "Lock ID should not be None"
|
||||
print(f"✓ Acquired lock: user={user}, lock_id={lock_id}")
|
||||
|
||||
# Check lock is active
|
||||
assert manager.is_user_locked(user), "User should be locked"
|
||||
print(f"✓ User is locked")
|
||||
|
||||
# Get lock info
|
||||
lock_info = manager.get_lock_info(user)
|
||||
assert lock_info, "Should return lock info"
|
||||
assert lock_info["user"] == user
|
||||
print(f"✓ Lock info retrieved: {lock_info['lock_id']}")
|
||||
|
||||
# Release lock
|
||||
released = manager.release_lock(user, lock_id)
|
||||
assert released, "Failed to release lock"
|
||||
print(f"✓ Released lock")
|
||||
|
||||
# Check lock is gone
|
||||
assert not manager.is_user_locked(user), "User should not be locked"
|
||||
print(f"✓ Lock released successfully")
|
||||
|
||||
|
||||
def test_concurrent_lock_contention():
|
||||
"""Test that only one lock per user can be held."""
|
||||
print("\n=== Test: Concurrent Lock Contention ===")
|
||||
|
||||
manager = PerUserQueueManager()
|
||||
user = "contentionuser"
|
||||
|
||||
# Acquire first lock
|
||||
acquired1, lock_id1 = manager.acquire_lock(user, "task_1", timeout=1)
|
||||
assert acquired1, "First lock should succeed"
|
||||
print(f"✓ First lock acquired: {lock_id1}")
|
||||
|
||||
# Try to acquire second lock (should timeout)
|
||||
acquired2, lock_id2 = manager.acquire_lock(user, "task_2", timeout=1)
|
||||
assert not acquired2, "Second lock should fail due to contention"
|
||||
assert lock_id2 is None
|
||||
print(f"✓ Second lock correctly rejected (contention)")
|
||||
|
||||
# Release first lock
|
||||
manager.release_lock(user, lock_id1)
|
||||
print(f"✓ First lock released")
|
||||
|
||||
# Now second should succeed
|
||||
acquired3, lock_id3 = manager.acquire_lock(user, "task_2", timeout=1)
|
||||
assert acquired3, "Third lock should succeed after release"
|
||||
print(f"✓ Third lock acquired after release: {lock_id3}")
|
||||
|
||||
manager.release_lock(user, lock_id3)
|
||||
|
||||
|
||||
def test_stale_lock_cleanup():
|
||||
"""Test stale lock detection and cleanup."""
|
||||
print("\n=== Test: Stale Lock Cleanup ===")
|
||||
|
||||
manager = PerUserQueueManager()
|
||||
user = "staleuser"
|
||||
|
||||
# Acquire lock with custom timeout
|
||||
acquired, lock_id = manager.acquire_lock(user, "task_stale")
|
||||
assert acquired
|
||||
print(f"✓ Lock acquired: {lock_id}")
|
||||
|
||||
# Manually set lock as expired
|
||||
lock_meta_path = manager._get_lock_meta_path(user)
|
||||
meta = json.loads(lock_meta_path.read_text())
|
||||
meta["expires_at"] = (datetime.now() - timedelta(hours=1)).isoformat()
|
||||
lock_meta_path.write_text(json.dumps(meta))
|
||||
print(f"✓ Lock manually set as stale")
|
||||
|
||||
# Should be detected as stale
|
||||
assert manager._is_lock_stale(user), "Lock should be detected as stale"
|
||||
print(f"✓ Stale lock detected")
|
||||
|
||||
# Cleanup should remove it
|
||||
manager._cleanup_stale_locks(user)
|
||||
assert not manager.is_user_locked(user), "Stale lock should be cleaned up"
|
||||
print(f"✓ Stale lock cleaned up")
|
||||
|
||||
|
||||
def test_multiple_users():
|
||||
"""Test that different users have independent locks."""
|
||||
print("\n=== Test: Multiple Users Independence ===")
|
||||
|
||||
manager = PerUserQueueManager()
|
||||
|
||||
# Acquire locks for different users
|
||||
acquired1, lock_id1 = manager.acquire_lock("user_a", "task_a")
|
||||
acquired2, lock_id2 = manager.acquire_lock("user_b", "task_b")
|
||||
|
||||
assert acquired1 and acquired2, "Both locks should succeed"
|
||||
print(f"✓ Acquired locks for user_a and user_b")
|
||||
|
||||
# Both should be locked
|
||||
assert manager.is_user_locked("user_a"), "user_a should be locked"
|
||||
assert manager.is_user_locked("user_b"), "user_b should be locked"
|
||||
print(f"✓ Both users are locked")
|
||||
|
||||
# Release user_a's lock
|
||||
manager.release_lock("user_a", lock_id1)
|
||||
assert not manager.is_user_locked("user_a"), "user_a should be unlocked"
|
||||
assert manager.is_user_locked("user_b"), "user_b should still be locked"
|
||||
print(f"✓ user_a released, user_b still locked")
|
||||
|
||||
manager.release_lock("user_b", lock_id2)
|
||||
|
||||
|
||||
def test_queue_controller_v2():
|
||||
"""Test QueueControllerV2 with per-user serialization."""
|
||||
print("\n=== Test: QueueControllerV2 Integration ===")
|
||||
|
||||
qc = QueueControllerV2()
|
||||
|
||||
# Ensure per-user serialization is in config and enabled for testing
|
||||
if "per_user_serialization" not in qc.config:
|
||||
qc.config["per_user_serialization"] = {"enabled": True, "lock_timeout_seconds": 3600}
|
||||
qc.config["per_user_serialization"]["enabled"] = True
|
||||
|
||||
# Enqueue tasks for different projects (users)
|
||||
task_id_1, pos_1 = qc.enqueue("project_a", "Task 1 for project A")
|
||||
task_id_2, pos_2 = qc.enqueue("project_b", "Task 1 for project B")
|
||||
task_id_3, pos_3 = qc.enqueue("project_a", "Task 2 for project A")
|
||||
|
||||
print(f"✓ Enqueued 3 tasks")
|
||||
print(f" - project_a: {task_id_1} (pos {pos_1}), {task_id_3} (pos {pos_3})")
|
||||
print(f" - project_b: {task_id_2} (pos {pos_2})")
|
||||
|
||||
# Get queue status
|
||||
status = qc.get_queue_status()
|
||||
initial_pending = status["pending"]["total"]
|
||||
assert initial_pending >= 3, f"Should have at least 3 pending tasks, have {initial_pending}"
|
||||
print(f"✓ Queue status: {initial_pending} total pending tasks (at least 3 new ones)")
|
||||
|
||||
# Check that per-user locks are respected
|
||||
user_a = qc.extract_user_from_project("project_a")
|
||||
user_b = qc.extract_user_from_project("project_b")
|
||||
|
||||
can_exec_a = qc.can_user_execute_task(user_a)
|
||||
can_exec_b = qc.can_user_execute_task(user_b)
|
||||
|
||||
assert can_exec_a and can_exec_b, "Both users should be able to execute"
|
||||
print(f"✓ Both users can execute tasks")
|
||||
|
||||
# Acquire locks
|
||||
acq_a, lock_a = qc.acquire_user_lock(user_a, task_id_1)
|
||||
assert acq_a and lock_a, "Should acquire lock for user_a"
|
||||
print(f"✓ Acquired lock for user_a: {lock_a}")
|
||||
|
||||
# Now user_a cannot execute another task
|
||||
can_exec_a2 = qc.can_user_execute_task(user_a)
|
||||
assert not can_exec_a2, "user_a should not be able to execute while locked"
|
||||
print(f"✓ user_a locked, cannot execute new tasks")
|
||||
|
||||
# But user_b can
|
||||
can_exec_b2 = qc.can_user_execute_task(user_b)
|
||||
assert can_exec_b2, "user_b should still be able to execute"
|
||||
print(f"✓ user_b can still execute")
|
||||
|
||||
# Release user_a's lock
|
||||
qc.release_user_lock(user_a, lock_a)
|
||||
can_exec_a3 = qc.can_user_execute_task(user_a)
|
||||
assert can_exec_a3, "user_a should be able to execute again"
|
||||
print(f"✓ Released user_a lock, can execute again")
|
||||
|
||||
|
||||
def test_fair_scheduling_with_locks():
|
||||
"""Test that fair scheduling respects per-user locks."""
|
||||
print("\n=== Test: Fair Scheduling with Per-User Locks ===")
|
||||
|
||||
qc = QueueControllerV2()
|
||||
|
||||
# Ensure per-user serialization is in config and enabled for testing
|
||||
if "per_user_serialization" not in qc.config:
|
||||
qc.config["per_user_serialization"] = {"enabled": True, "lock_timeout_seconds": 3600}
|
||||
qc.config["per_user_serialization"]["enabled"] = True
|
||||
|
||||
# Enqueue multiple tasks
|
||||
task_id_1, _ = qc.enqueue("proj_a", "Task A1", priority=5)
|
||||
task_id_2, _ = qc.enqueue("proj_b", "Task B1", priority=5)
|
||||
task_id_3, _ = qc.enqueue("proj_a", "Task A2", priority=5)
|
||||
|
||||
# Get pending tasks
|
||||
capacity = qc._read_capacity()
|
||||
task = qc._select_next_task(capacity)
|
||||
|
||||
assert task, "Should select a task"
|
||||
print(f"✓ Selected task: {task['id']} for {task['project']}")
|
||||
|
||||
# Acquire lock for this task's user
|
||||
user = task.get("user") or qc.extract_user_from_project(task["project"])
|
||||
acq, lock_id = qc.acquire_user_lock(user, task["id"])
|
||||
assert acq, "Should acquire user lock"
|
||||
|
||||
# Now selecting next task should skip tasks for this user
|
||||
# and select from another user
|
||||
task2 = qc._select_next_task(capacity)
|
||||
|
||||
if task2:
|
||||
user2 = task2.get("user") or qc.extract_user_from_project(task2["project"])
|
||||
# Task should be from a different user or None
|
||||
assert user2 != user, f"Should select different user, got {user2}"
|
||||
print(f"✓ Fair scheduling respects user lock: skipped {user}, selected {user2}")
|
||||
else:
|
||||
print(f"✓ Fair scheduling: no available task (all from locked user)")
|
||||
|
||||
qc.release_user_lock(user, lock_id)
|
||||
|
||||
|
||||
def run_all_tests():
|
||||
"""Run all tests."""
|
||||
print("=" * 60)
|
||||
print("Per-User Queue System Tests")
|
||||
print("=" * 60)
|
||||
|
||||
tests = [
|
||||
test_per_user_lock_basic,
|
||||
test_concurrent_lock_contention,
|
||||
test_stale_lock_cleanup,
|
||||
test_multiple_users,
|
||||
test_queue_controller_v2,
|
||||
test_fair_scheduling_with_locks,
|
||||
]
|
||||
|
||||
passed = 0
|
||||
failed = 0
|
||||
|
||||
for test_func in tests:
|
||||
try:
|
||||
test_func()
|
||||
passed += 1
|
||||
except AssertionError as e:
|
||||
print(f"✗ FAILED: {e}")
|
||||
failed += 1
|
||||
except Exception as e:
|
||||
print(f"✗ ERROR: {e}")
|
||||
failed += 1
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print(f"Results: {passed} passed, {failed} failed")
|
||||
print("=" * 60)
|
||||
|
||||
return failed == 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = run_all_tests()
|
||||
sys.exit(0 if success else 1)
|
||||
470
tests/test_plugin_system.py
Normal file
470
tests/test_plugin_system.py
Normal file
@@ -0,0 +1,470 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test Suite - Plugin Marketplace System
|
||||
|
||||
Tests for:
|
||||
1. Plugin marketplace registry and loading
|
||||
2. Plugin skill generation and matching
|
||||
3. Dispatcher integration with plugins
|
||||
4. Knowledge graph exports
|
||||
5. Plugin-aware task dispatch
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Any
|
||||
|
||||
# Add lib to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "lib"))
|
||||
|
||||
from plugin_marketplace import (
|
||||
PluginMarketplaceRegistry,
|
||||
PluginCapabilityMatcher,
|
||||
get_marketplace_registry
|
||||
)
|
||||
from plugin_skill_loader import (
|
||||
PluginSkillLoader,
|
||||
get_plugin_skill_loader
|
||||
)
|
||||
from dispatcher_plugin_integration import (
|
||||
DispatcherPluginBridge,
|
||||
PluginAwareTaskDispatcher
|
||||
)
|
||||
from plugin_kg_integration import (
|
||||
PluginKnowledgeGraphExporter,
|
||||
export_plugins_to_kg
|
||||
)
|
||||
|
||||
|
||||
class TestResults:
|
||||
def __init__(self):
|
||||
self.tests: List[Dict[str, Any]] = []
|
||||
self.passed = 0
|
||||
self.failed = 0
|
||||
|
||||
def add_test(self, name: str, passed: bool, details: str = ""):
|
||||
status = "PASS" if passed else "FAIL"
|
||||
self.tests.append({
|
||||
'name': name,
|
||||
'status': status,
|
||||
'details': details
|
||||
})
|
||||
if passed:
|
||||
self.passed += 1
|
||||
else:
|
||||
self.failed += 1
|
||||
print(f"[{status}] {name}" + (f": {details}" if details else ""))
|
||||
|
||||
def summary(self) -> str:
|
||||
return f"\nTest Summary: {self.passed} passed, {self.failed} failed out of {self.passed + self.failed}"
|
||||
|
||||
|
||||
def test_plugin_registry() -> TestResults:
|
||||
"""Test plugin marketplace registry"""
|
||||
results = TestResults()
|
||||
print("\n=== Testing Plugin Marketplace Registry ===\n")
|
||||
|
||||
# Test 1: Registry initialization
|
||||
try:
|
||||
registry = get_marketplace_registry()
|
||||
results.add_test(
|
||||
"Registry initialization",
|
||||
len(registry.plugins) > 0,
|
||||
f"Loaded {len(registry.plugins)} plugins"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Registry initialization", False, str(e))
|
||||
return results
|
||||
|
||||
# Test 2: Plugin retrieval
|
||||
try:
|
||||
plugin = registry.get_plugin('code-simplifier')
|
||||
results.add_test(
|
||||
"Plugin retrieval",
|
||||
plugin is not None and plugin.name == 'Code Simplifier',
|
||||
f"Retrieved: {plugin.name if plugin else 'None'}"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Plugin retrieval", False, str(e))
|
||||
|
||||
# Test 3: List plugins by category
|
||||
try:
|
||||
code_analysis_plugins = registry.list_plugins('code-analysis')
|
||||
results.add_test(
|
||||
"Filter plugins by category",
|
||||
len(code_analysis_plugins) > 0,
|
||||
f"Found {len(code_analysis_plugins)} code-analysis plugins"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Filter plugins by category", False, str(e))
|
||||
|
||||
# Test 4: Find plugins for task
|
||||
try:
|
||||
task = "Review my code for security vulnerabilities"
|
||||
matches = registry.find_plugins_for_task(task, ['security', 'review', 'code'])
|
||||
results.add_test(
|
||||
"Find plugins for task",
|
||||
len(matches) > 0,
|
||||
f"Found {len(matches)} matching plugins"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Find plugins for task", False, str(e))
|
||||
|
||||
# Test 5: Export plugin data
|
||||
try:
|
||||
export_data = registry.export_for_knowledge_graph()
|
||||
results.add_test(
|
||||
"Export for knowledge graph",
|
||||
'plugins' in export_data and 'categories' in export_data,
|
||||
f"Exported {len(export_data.get('plugins', {}))} plugins"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Export for knowledge graph", False, str(e))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def test_plugin_skills() -> TestResults:
|
||||
"""Test plugin skill generation and matching"""
|
||||
results = TestResults()
|
||||
print("\n=== Testing Plugin Skill System ===\n")
|
||||
|
||||
# Test 1: Skill loader initialization
|
||||
try:
|
||||
loader = get_plugin_skill_loader()
|
||||
results.add_test(
|
||||
"Skill loader initialization",
|
||||
loader is not None,
|
||||
"Initialized successfully"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Skill loader initialization", False, str(e))
|
||||
return results
|
||||
|
||||
# Test 2: Generate skills from plugins
|
||||
try:
|
||||
skills = loader.generate_skills_from_plugins()
|
||||
results.add_test(
|
||||
"Generate skills from plugins",
|
||||
len(skills) > 0,
|
||||
f"Generated {len(skills)} skills"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Generate skills from plugins", False, str(e))
|
||||
|
||||
# Test 3: List all skills
|
||||
try:
|
||||
all_skills = loader.list_skills()
|
||||
results.add_test(
|
||||
"List all skills",
|
||||
len(all_skills) > 0,
|
||||
f"Listed {len(all_skills)} skills"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("List all skills", False, str(e))
|
||||
|
||||
# Test 4: Filter skills by category
|
||||
try:
|
||||
code_skills = loader.list_skills(category='code-analysis')
|
||||
results.add_test(
|
||||
"Filter skills by category",
|
||||
len(code_skills) > 0,
|
||||
f"Found {len(code_skills)} code-analysis skills"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Filter skills by category", False, str(e))
|
||||
|
||||
# Test 5: Find skills for task
|
||||
try:
|
||||
task = "Simplify and optimize this Python function"
|
||||
matched = loader.find_skills_for_task(task, min_relevance=0.3)
|
||||
results.add_test(
|
||||
"Find skills for task",
|
||||
len(matched) > 0,
|
||||
f"Found {len(matched)} matching skills"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Find skills for task", False, str(e))
|
||||
|
||||
# Test 6: Export for dispatcher
|
||||
try:
|
||||
dispatch_export = loader.export_for_dispatcher()
|
||||
results.add_test(
|
||||
"Export for dispatcher",
|
||||
'skill_count' in dispatch_export and dispatch_export['skill_count'] > 0,
|
||||
f"Exported {dispatch_export.get('skill_count', 0)} skills"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Export for dispatcher", False, str(e))
|
||||
|
||||
# Test 7: Export for knowledge graph
|
||||
try:
|
||||
kg_export = loader.export_for_knowledge_graph()
|
||||
results.add_test(
|
||||
"Export for knowledge graph",
|
||||
'total_skills' in kg_export and kg_export['total_skills'] > 0,
|
||||
f"Exported {kg_export.get('total_skills', 0)} skills"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Export for knowledge graph", False, str(e))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def test_dispatcher_integration() -> TestResults:
|
||||
"""Test dispatcher-plugin integration"""
|
||||
results = TestResults()
|
||||
print("\n=== Testing Dispatcher Integration ===\n")
|
||||
|
||||
# Test 1: Bridge initialization
|
||||
try:
|
||||
bridge = DispatcherPluginBridge()
|
||||
results.add_test(
|
||||
"Bridge initialization",
|
||||
bridge is not None and len(bridge.skill_loader.skills) > 0,
|
||||
f"Loaded {len(bridge.skill_loader.skills)} skills"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Bridge initialization", False, str(e))
|
||||
return results
|
||||
|
||||
# Test 2: Enhance task context
|
||||
try:
|
||||
task = "Review this code for security issues and performance"
|
||||
context = bridge.enhance_task_context(task, "test-project", "job-123")
|
||||
results.add_test(
|
||||
"Enhance task context",
|
||||
'plugin_analysis' in context and 'matched_skills' in context['plugin_analysis'],
|
||||
f"Found {len(context['plugin_analysis'].get('matched_skills', []))} skills"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Enhance task context", False, str(e))
|
||||
|
||||
# Test 3: Generate recommendations
|
||||
try:
|
||||
task = "Simplify and refactor this code"
|
||||
context = bridge.enhance_task_context(task, "test-project", "job-456")
|
||||
recommendations = context.get('recommended_plugins', {})
|
||||
results.add_test(
|
||||
"Generate recommendations",
|
||||
'primary_skill' in recommendations,
|
||||
f"Primary skill: {recommendations.get('primary_skill', {}).get('name', 'None')}"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Generate recommendations", False, str(e))
|
||||
|
||||
# Test 4: Plugin-aware task dispatcher
|
||||
try:
|
||||
dispatcher = PluginAwareTaskDispatcher(bridge)
|
||||
dispatch_result = dispatcher.dispatch_with_plugin_context(
|
||||
"Review code quality",
|
||||
"test-project",
|
||||
"job-789"
|
||||
)
|
||||
results.add_test(
|
||||
"Plugin-aware dispatch",
|
||||
dispatch_result['plugin_enhanced'] and 'plugin_context' in dispatch_result,
|
||||
"Dispatch successful with plugin context"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Plugin-aware dispatch", False, str(e))
|
||||
|
||||
# Test 5: Get dispatch recommendations
|
||||
try:
|
||||
dispatcher = PluginAwareTaskDispatcher(bridge)
|
||||
dispatcher.dispatch_with_plugin_context(
|
||||
"Analyze code performance",
|
||||
"test-project",
|
||||
"job-999"
|
||||
)
|
||||
recommendations = dispatcher.get_dispatch_recommendations("job-999")
|
||||
results.add_test(
|
||||
"Get dispatch recommendations",
|
||||
recommendations is not None and 'primary_skill' in recommendations,
|
||||
"Retrieved recommendations successfully"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Get dispatch recommendations", False, str(e))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def test_capability_matching() -> TestResults:
|
||||
"""Test plugin capability matching"""
|
||||
results = TestResults()
|
||||
print("\n=== Testing Capability Matching ===\n")
|
||||
|
||||
# Test 1: Matcher initialization
|
||||
try:
|
||||
registry = get_marketplace_registry()
|
||||
matcher = PluginCapabilityMatcher(registry)
|
||||
results.add_test(
|
||||
"Matcher initialization",
|
||||
matcher is not None,
|
||||
"Initialized successfully"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Matcher initialization", False, str(e))
|
||||
return results
|
||||
|
||||
# Test 2: Extract keywords
|
||||
try:
|
||||
task = "Find security vulnerabilities in this code"
|
||||
keywords = matcher.extract_task_keywords(task)
|
||||
results.add_test(
|
||||
"Extract keywords",
|
||||
len(keywords) > 0 and 'security' in keywords,
|
||||
f"Extracted keywords: {keywords}"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Extract keywords", False, str(e))
|
||||
|
||||
# Test 3: Match plugins to task
|
||||
try:
|
||||
task = "Review code for performance issues"
|
||||
matches = matcher.match_plugins(task, min_relevance=0.3)
|
||||
results.add_test(
|
||||
"Match plugins to task",
|
||||
len(matches) > 0,
|
||||
f"Matched {len(matches)} plugins"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Match plugins to task", False, str(e))
|
||||
|
||||
# Test 4: Relevance scoring
|
||||
try:
|
||||
task1 = "Review code for security"
|
||||
task2 = "Deploy application"
|
||||
matches1 = matcher.match_plugins(task1)
|
||||
matches2 = matcher.match_plugins(task2)
|
||||
results.add_test(
|
||||
"Relevance scoring",
|
||||
len(matches1) > 0 and (len(matches2) == 0 or len(matches1) >= len(matches2)),
|
||||
"Security task has more relevant plugins than deploy task"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Relevance scoring", False, str(e))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def test_knowledge_graph_export() -> TestResults:
|
||||
"""Test knowledge graph exports"""
|
||||
results = TestResults()
|
||||
print("\n=== Testing Knowledge Graph Export ===\n")
|
||||
|
||||
# Test 1: Exporter initialization
|
||||
try:
|
||||
exporter = PluginKnowledgeGraphExporter()
|
||||
results.add_test(
|
||||
"Exporter initialization",
|
||||
exporter is not None,
|
||||
"Initialized successfully"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Exporter initialization", False, str(e))
|
||||
return results
|
||||
|
||||
# Test 2: Export plugins as entities
|
||||
try:
|
||||
entities = exporter.export_plugins_as_entities()
|
||||
results.add_test(
|
||||
"Export plugins as entities",
|
||||
'entities' in entities and len(entities['entities']) > 0,
|
||||
f"Exported {len(entities['entities'])} plugin entities"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Export plugins as entities", False, str(e))
|
||||
|
||||
# Test 3: Export skills as entities
|
||||
try:
|
||||
entities = exporter.export_plugin_skills_as_entities()
|
||||
results.add_test(
|
||||
"Export skills as entities",
|
||||
'entities' in entities and len(entities['entities']) > 0,
|
||||
f"Exported {len(entities['entities'])} skill entities"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Export skills as entities", False, str(e))
|
||||
|
||||
# Test 4: Export relationships
|
||||
try:
|
||||
relations = exporter.export_plugin_relationships()
|
||||
results.add_test(
|
||||
"Export relationships",
|
||||
'relations' in relations and len(relations['relations']) > 0,
|
||||
f"Exported {len(relations['relations'])} relationships"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Export relationships", False, str(e))
|
||||
|
||||
# Test 5: Complete export
|
||||
try:
|
||||
complete = exporter.export_for_shared_kg()
|
||||
results.add_test(
|
||||
"Complete KG export",
|
||||
'plugins' in complete and 'skills' in complete and 'categories' in complete,
|
||||
f"Plugins: {len(complete['plugins'])}, Skills: {len(complete['skills'])}"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Complete KG export", False, str(e))
|
||||
|
||||
# Test 6: Save exports
|
||||
try:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
export_dir = Path(tmpdir)
|
||||
saved = exporter.save_exports()
|
||||
results.add_test(
|
||||
"Save exports to files",
|
||||
len(saved) >= 3,
|
||||
f"Saved {len(saved)} export files"
|
||||
)
|
||||
except Exception as e:
|
||||
results.add_test("Save exports to files", False, str(e))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def run_all_tests() -> None:
|
||||
"""Run all test suites"""
|
||||
print("=" * 60)
|
||||
print("PLUGIN SYSTEM TEST SUITE")
|
||||
print("=" * 60)
|
||||
|
||||
all_results = []
|
||||
|
||||
# Run test suites
|
||||
all_results.append(test_plugin_registry())
|
||||
all_results.append(test_plugin_skills())
|
||||
all_results.append(test_capability_matching())
|
||||
all_results.append(test_dispatcher_integration())
|
||||
all_results.append(test_knowledge_graph_export())
|
||||
|
||||
# Print overall summary
|
||||
print("\n" + "=" * 60)
|
||||
print("OVERALL TEST SUMMARY")
|
||||
print("=" * 60)
|
||||
|
||||
total_passed = sum(r.passed for r in all_results)
|
||||
total_failed = sum(r.failed for r in all_results)
|
||||
total_tests = total_passed + total_failed
|
||||
|
||||
print(f"\nTotal: {total_passed}/{total_tests} tests passed")
|
||||
|
||||
if total_failed > 0:
|
||||
print(f"\n{total_failed} tests failed:")
|
||||
for result_set in all_results:
|
||||
for test in result_set.tests:
|
||||
if test['status'] == 'FAIL':
|
||||
print(f" - {test['name']}: {test['details']}")
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
exit_code = 0 if total_failed == 0 else 1
|
||||
print(f"Exit code: {exit_code}")
|
||||
sys.exit(exit_code)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_all_tests()
|
||||
285
tests/test_responsive_dispatcher.py
Normal file
285
tests/test_responsive_dispatcher.py
Normal file
@@ -0,0 +1,285 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test Suite for Responsive Dispatcher
|
||||
|
||||
Tests:
|
||||
1. Immediate job dispatch with job_id return
|
||||
2. Non-blocking task spawning
|
||||
3. Background status monitoring
|
||||
4. Concurrent task handling
|
||||
5. Status polling and updates
|
||||
6. CLI feedback rendering
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
import tempfile
|
||||
import threading
|
||||
from pathlib import Path
|
||||
|
||||
# Add lib to path
|
||||
lib_path = Path(__file__).parent.parent / "lib"
|
||||
sys.path.insert(0, str(lib_path))
|
||||
|
||||
from responsive_dispatcher import ResponseiveDispatcher
|
||||
from cli_feedback import CLIFeedback, Colors, ProgressBar
|
||||
from dispatcher_enhancements import EnhancedDispatcher, get_enhanced_dispatcher
|
||||
|
||||
|
||||
class TestResponsiveDispatcher:
|
||||
"""Test responsive dispatcher functionality"""
|
||||
|
||||
def __init__(self):
|
||||
self.test_dir = Path(tempfile.mkdtemp(prefix="luzia_test_"))
|
||||
self.dispatcher = ResponseiveDispatcher(self.test_dir)
|
||||
self.feedback = CLIFeedback()
|
||||
self.passed = 0
|
||||
self.failed = 0
|
||||
|
||||
def run_all_tests(self):
|
||||
"""Run all tests"""
|
||||
print(f"\n{Colors.BOLD}=== Responsive Dispatcher Test Suite ==={Colors.RESET}\n")
|
||||
|
||||
tests = [
|
||||
self.test_immediate_dispatch,
|
||||
self.test_job_status_retrieval,
|
||||
self.test_status_updates,
|
||||
self.test_concurrent_jobs,
|
||||
self.test_cache_behavior,
|
||||
self.test_cli_feedback,
|
||||
self.test_progress_bar,
|
||||
self.test_background_monitoring,
|
||||
]
|
||||
|
||||
for test in tests:
|
||||
try:
|
||||
print(f" Running {test.__name__}...", end=" ", flush=True)
|
||||
test()
|
||||
self.passed += 1
|
||||
print(f"{Colors.GREEN}✓{Colors.RESET}")
|
||||
except AssertionError as e:
|
||||
self.failed += 1
|
||||
print(f"{Colors.RED}✗{Colors.RESET}")
|
||||
print(f" Error: {e}")
|
||||
except Exception as e:
|
||||
self.failed += 1
|
||||
print(f"{Colors.RED}✗{Colors.RESET}")
|
||||
print(f" Unexpected error: {e}")
|
||||
|
||||
# Summary
|
||||
print(f"\n{Colors.BOLD}=== Test Summary ==={Colors.RESET}")
|
||||
print(f" {Colors.GREEN}Passed:{Colors.RESET} {self.passed}")
|
||||
print(f" {Colors.RED}Failed:{Colors.RESET} {self.failed}")
|
||||
print(f" {Colors.BLUE}Total:{Colors.RESET} {self.passed + self.failed}\n")
|
||||
|
||||
return self.failed == 0
|
||||
|
||||
def test_immediate_dispatch(self):
|
||||
"""Test that dispatch returns immediately with job_id"""
|
||||
start_time = time.time()
|
||||
job_id, status = self.dispatcher.dispatch_task("test_project", "echo hello")
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
assert job_id, "Job ID should be returned"
|
||||
assert isinstance(status, dict), "Status should be dict"
|
||||
assert status["status"] == "dispatched", "Initial status should be 'dispatched'"
|
||||
assert status["project"] == "test_project", "Project should match"
|
||||
assert elapsed < 0.5, f"Dispatch should be instant (took {elapsed}s)"
|
||||
|
||||
def test_job_status_retrieval(self):
|
||||
"""Test retrieving job status"""
|
||||
job_id, initial_status = self.dispatcher.dispatch_task("proj1", "task1")
|
||||
|
||||
# Retrieve status
|
||||
retrieved = self.dispatcher.get_status(job_id)
|
||||
assert retrieved is not None, "Status should be retrievable"
|
||||
assert retrieved["id"] == job_id, "Job ID should match"
|
||||
assert retrieved["status"] == "dispatched", "Status should be dispatched"
|
||||
|
||||
def test_status_updates(self):
|
||||
"""Test updating job status"""
|
||||
job_id, _ = self.dispatcher.dispatch_task("proj1", "task1")
|
||||
|
||||
# Update status
|
||||
self.dispatcher.update_status(job_id, "running", progress=25, message="Processing...")
|
||||
status = self.dispatcher.get_status(job_id, use_cache=False)
|
||||
|
||||
assert status["status"] == "running", "Status should be updated"
|
||||
assert status["progress"] == 25, "Progress should be updated"
|
||||
assert status["message"] == "Processing...", "Message should be updated"
|
||||
|
||||
def test_concurrent_jobs(self):
|
||||
"""Test handling multiple concurrent jobs"""
|
||||
jobs = []
|
||||
for i in range(5):
|
||||
job_id, status = self.dispatcher.dispatch_task(f"proj{i}", f"task{i}")
|
||||
jobs.append(job_id)
|
||||
|
||||
# Verify all jobs exist
|
||||
for job_id in jobs:
|
||||
status = self.dispatcher.get_status(job_id)
|
||||
assert status is not None, f"Job {job_id} should exist"
|
||||
|
||||
# Verify list shows all jobs
|
||||
all_jobs = self.dispatcher.list_jobs()
|
||||
assert len(all_jobs) >= 5, "Should have at least 5 jobs"
|
||||
|
||||
def test_cache_behavior(self):
|
||||
"""Test cache behavior"""
|
||||
job_id, _ = self.dispatcher.dispatch_task("proj1", "task1")
|
||||
|
||||
# First read should cache
|
||||
status1 = self.dispatcher.get_status(job_id, use_cache=True)
|
||||
|
||||
# Update directly on disk
|
||||
self.dispatcher.update_status(job_id, "running", progress=50)
|
||||
|
||||
# Cached read should be stale
|
||||
status2 = self.dispatcher.get_status(job_id, use_cache=True)
|
||||
assert status2["progress"] == 50, "Cache should be updated on write"
|
||||
|
||||
# Non-cached read should be fresh
|
||||
time.sleep(1.1) # Wait for cache to expire
|
||||
status3 = self.dispatcher.get_status(job_id, use_cache=False)
|
||||
assert status3["progress"] == 50, "Fresh read should show updated status"
|
||||
|
||||
def test_cli_feedback(self):
|
||||
"""Test CLI feedback rendering"""
|
||||
status = {
|
||||
"id": "test-job-id",
|
||||
"project": "test_proj",
|
||||
"status": "running",
|
||||
"progress": 45,
|
||||
"message": "Processing files...",
|
||||
}
|
||||
|
||||
# Should not raise exception
|
||||
self.feedback.show_status(status)
|
||||
self.feedback.show_status_line(status)
|
||||
self.feedback.job_dispatched("test-id", "proj", "task")
|
||||
|
||||
def test_progress_bar(self):
|
||||
"""Test progress bar rendering"""
|
||||
bar = ProgressBar.render(0)
|
||||
assert "[" in bar and "]" in bar, "Progress bar should have brackets"
|
||||
|
||||
bar50 = ProgressBar.render(50)
|
||||
bar100 = ProgressBar.render(100)
|
||||
|
||||
assert bar50.count("█") > bar.count("█"), "50% should have more filled blocks"
|
||||
assert bar100.count("█") > bar50.count("█"), "100% should have all filled blocks"
|
||||
|
||||
def test_background_monitoring(self):
|
||||
"""Test background monitoring queue"""
|
||||
job_id, _ = self.dispatcher.dispatch_task("proj1", "test task")
|
||||
|
||||
# Monitoring queue should have the job
|
||||
assert not self.dispatcher.monitoring_queue.empty(), "Queue should have job"
|
||||
|
||||
# Get item from queue (with retry in case timing issues)
|
||||
try:
|
||||
job_info = self.dispatcher.monitoring_queue.get(timeout=1)
|
||||
assert job_info["job_id"] == job_id, "Queue should contain correct job_id"
|
||||
except Exception:
|
||||
# Queue might have been processed already - verify job exists instead
|
||||
status = self.dispatcher.get_status(job_id)
|
||||
assert status is not None, "Job should exist in dispatcher"
|
||||
|
||||
|
||||
class TestEnhancedDispatcher:
|
||||
"""Test enhanced dispatcher with integrated features"""
|
||||
|
||||
def __init__(self):
|
||||
self.test_dir = Path(tempfile.mkdtemp(prefix="luzia_enh_test_"))
|
||||
self.enhanced = EnhancedDispatcher(self.test_dir)
|
||||
self.passed = 0
|
||||
self.failed = 0
|
||||
|
||||
def run_all_tests(self):
|
||||
"""Run all tests"""
|
||||
print(f"\n{Colors.BOLD}=== Enhanced Dispatcher Test Suite ==={Colors.RESET}\n")
|
||||
|
||||
tests = [
|
||||
self.test_dispatch_and_report,
|
||||
self.test_status_display,
|
||||
self.test_jobs_summary,
|
||||
]
|
||||
|
||||
for test in tests:
|
||||
try:
|
||||
print(f" Running {test.__name__}...", end=" ", flush=True)
|
||||
test()
|
||||
self.passed += 1
|
||||
print(f"{Colors.GREEN}✓{Colors.RESET}")
|
||||
except AssertionError as e:
|
||||
self.failed += 1
|
||||
print(f"{Colors.RED}✗{Colors.RESET}")
|
||||
print(f" Error: {e}")
|
||||
except Exception as e:
|
||||
self.failed += 1
|
||||
print(f"{Colors.RED}✗{Colors.RESET}")
|
||||
print(f" Unexpected error: {e}")
|
||||
|
||||
print(f"\n{Colors.BOLD}=== Test Summary ==={Colors.RESET}")
|
||||
print(f" {Colors.GREEN}Passed:{Colors.RESET} {self.passed}")
|
||||
print(f" {Colors.RED}Failed:{Colors.RESET} {self.failed}")
|
||||
print(f" {Colors.BLUE}Total:{Colors.RESET} {self.passed + self.failed}\n")
|
||||
|
||||
return self.failed == 0
|
||||
|
||||
def test_dispatch_and_report(self):
|
||||
"""Test dispatch with feedback"""
|
||||
job_id, status = self.enhanced.dispatch_and_report(
|
||||
"test_proj", "test task", show_feedback=False
|
||||
)
|
||||
assert job_id, "Should return job_id"
|
||||
assert status["status"] == "dispatched", "Should be dispatched"
|
||||
|
||||
def test_status_display(self):
|
||||
"""Test status display"""
|
||||
job_id, _ = self.enhanced.dispatch_and_report(
|
||||
"proj", "task", show_feedback=False
|
||||
)
|
||||
status = self.enhanced.get_status_and_display(job_id, show_full=False)
|
||||
assert status is not None, "Should retrieve status"
|
||||
|
||||
def test_jobs_summary(self):
|
||||
"""Test jobs summary display"""
|
||||
for i in range(3):
|
||||
self.enhanced.dispatch_and_report(f"proj{i}", f"task{i}", show_feedback=False)
|
||||
|
||||
# Should not raise exception
|
||||
self.enhanced.show_jobs_summary()
|
||||
self.enhanced.show_concurrent_summary()
|
||||
|
||||
|
||||
def main():
|
||||
"""Run all test suites"""
|
||||
print(f"\n{Colors.BOLD}{Colors.CYAN}Luzia Responsive Dispatcher Tests{Colors.RESET}")
|
||||
print(f"{Colors.GRAY}Testing non-blocking dispatch and status tracking{Colors.RESET}")
|
||||
|
||||
# Test responsive dispatcher
|
||||
dispatcher_tests = TestResponsiveDispatcher()
|
||||
dispatcher_ok = dispatcher_tests.run_all_tests()
|
||||
|
||||
# Test enhanced dispatcher
|
||||
enhanced_tests = TestEnhancedDispatcher()
|
||||
enhanced_ok = enhanced_tests.run_all_tests()
|
||||
|
||||
# Summary
|
||||
all_passed = dispatcher_ok and enhanced_ok
|
||||
if all_passed:
|
||||
print(
|
||||
f"{Colors.GREEN}{Colors.BOLD}✓ All tests passed!{Colors.RESET}\n"
|
||||
)
|
||||
return 0
|
||||
else:
|
||||
print(
|
||||
f"{Colors.RED}{Colors.BOLD}✗ Some tests failed{Colors.RESET}\n"
|
||||
)
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
433
tests/test_skill_learning.py
Normal file
433
tests/test_skill_learning.py
Normal file
@@ -0,0 +1,433 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Tests for skill learning system.
|
||||
|
||||
Tests the complete pipeline:
|
||||
1. Task execution analysis
|
||||
2. Skill extraction
|
||||
3. Learning storage in KG
|
||||
4. Skill recommendations
|
||||
5. QA integration
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
# Add lib to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "lib"))
|
||||
|
||||
from skill_learning_engine import (
|
||||
TaskAnalyzer, SkillExtractor, LearningEngine,
|
||||
SkillRecommender, SkillLearningSystem,
|
||||
TaskExecution, ExtractedSkill
|
||||
)
|
||||
|
||||
|
||||
class TestTaskAnalyzer:
|
||||
"""Test task analysis and pattern extraction."""
|
||||
|
||||
def test_analyze_valid_task(self):
|
||||
"""Test analyzing a valid task execution."""
|
||||
analyzer = TaskAnalyzer()
|
||||
|
||||
task_data = {
|
||||
"task_id": "test_001",
|
||||
"prompt": "Refactor database schema",
|
||||
"project": "overbits",
|
||||
"status": "success",
|
||||
"tools_used": ["Bash", "Read", "Edit"],
|
||||
"duration": 45.2,
|
||||
"result_summary": "Successfully refactored",
|
||||
"qa_passed": True,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
execution = analyzer.analyze_task(task_data)
|
||||
|
||||
assert execution is not None
|
||||
assert execution.task_id == "test_001"
|
||||
assert execution.project == "overbits"
|
||||
assert execution.status == "success"
|
||||
assert len(execution.tools_used) == 3
|
||||
|
||||
def test_extract_patterns(self):
|
||||
"""Test pattern extraction from multiple tasks."""
|
||||
analyzer = TaskAnalyzer()
|
||||
|
||||
# Add multiple tasks
|
||||
executions = []
|
||||
for i in range(3):
|
||||
task_data = {
|
||||
"task_id": f"task_{i}",
|
||||
"prompt": "Test task",
|
||||
"project": "overbits",
|
||||
"status": "success" if i < 2 else "failed",
|
||||
"tools_used": ["Bash", "Read"],
|
||||
"duration": 30.0 + i,
|
||||
"result_summary": "Test",
|
||||
"qa_passed": i < 2,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
exec = analyzer.analyze_task(task_data)
|
||||
if exec:
|
||||
executions.append(exec)
|
||||
|
||||
patterns = analyzer.extract_patterns(executions)
|
||||
|
||||
assert "success_rate" in patterns
|
||||
assert "average_duration" in patterns
|
||||
assert "common_tools" in patterns
|
||||
assert patterns["success_rate"] == 2/3
|
||||
|
||||
|
||||
class TestSkillExtractor:
|
||||
"""Test skill extraction from tasks and QA results."""
|
||||
|
||||
def test_extract_from_task(self):
|
||||
"""Test skill extraction from task execution."""
|
||||
extractor = SkillExtractor()
|
||||
|
||||
execution = TaskExecution(
|
||||
task_id="test_001",
|
||||
prompt="Debug authentication flow for users",
|
||||
project="overbits",
|
||||
status="success",
|
||||
tools_used=["Read", "Bash", "Edit"],
|
||||
duration=30.0,
|
||||
result_summary="Fixed login issue",
|
||||
qa_passed=True,
|
||||
timestamp=datetime.now()
|
||||
)
|
||||
|
||||
skills = extractor.extract_from_task(execution)
|
||||
|
||||
assert len(skills) > 0
|
||||
# Should have tool skills
|
||||
tool_skills = [s for s in skills if s.category == "tool_usage"]
|
||||
assert len(tool_skills) >= 3
|
||||
# Should have decision patterns
|
||||
decision_skills = [s for s in skills if s.category == "decision"]
|
||||
assert len(decision_skills) > 0
|
||||
|
||||
def test_extract_from_qa_results(self):
|
||||
"""Test skill extraction from QA results."""
|
||||
extractor = SkillExtractor()
|
||||
|
||||
qa_results = {
|
||||
"passed": True,
|
||||
"results": {
|
||||
"syntax": True,
|
||||
"routes": True,
|
||||
"command_docs": True,
|
||||
},
|
||||
"task_id": "test_001"
|
||||
}
|
||||
|
||||
skills = extractor.extract_from_qa_results(qa_results)
|
||||
|
||||
assert len(skills) == 3
|
||||
assert all(s.category == "pattern" for s in skills)
|
||||
assert all(s.confidence == 0.9 for s in skills)
|
||||
|
||||
def test_extract_decision_patterns(self):
|
||||
"""Test decision pattern extraction."""
|
||||
extractor = SkillExtractor()
|
||||
|
||||
test_cases = [
|
||||
("Optimize database query", "optimization"),
|
||||
("Debug authentication issue", "debugging"),
|
||||
("Write documentation for API", "documentation"),
|
||||
("Test new feature", "testing"),
|
||||
("Refactor old code", "refactoring"),
|
||||
]
|
||||
|
||||
for prompt, expected_pattern in test_cases:
|
||||
skills = extractor._extract_decision_patterns(prompt)
|
||||
pattern_names = [s.name for s in skills]
|
||||
assert any(expected_pattern in name for name in pattern_names)
|
||||
|
||||
def test_aggregate_skills(self):
|
||||
"""Test skill aggregation."""
|
||||
extractor = SkillExtractor()
|
||||
|
||||
skills = [
|
||||
ExtractedSkill(
|
||||
name="tool_read",
|
||||
category="tool_usage",
|
||||
confidence=0.8,
|
||||
context={"tool": "Read"},
|
||||
source_task_id="task_1",
|
||||
evidence="Used Read tool"
|
||||
),
|
||||
ExtractedSkill(
|
||||
name="tool_read",
|
||||
category="tool_usage",
|
||||
confidence=0.85,
|
||||
context={"tool": "Read"},
|
||||
source_task_id="task_2",
|
||||
evidence="Used Read tool again"
|
||||
),
|
||||
]
|
||||
|
||||
aggregated = extractor.aggregate_skills(skills)
|
||||
|
||||
assert "tool_read" in aggregated
|
||||
assert aggregated["tool_read"]["occurrences"] == 2
|
||||
assert aggregated["tool_read"]["average_confidence"] == 0.825
|
||||
|
||||
|
||||
class TestLearningEngine:
|
||||
"""Test learning extraction and storage."""
|
||||
|
||||
@patch('skill_learning_engine.KnowledgeGraph')
|
||||
def test_extract_learning(self, mock_kg):
|
||||
"""Test learning extraction."""
|
||||
engine = LearningEngine()
|
||||
|
||||
execution = TaskExecution(
|
||||
task_id="test_001",
|
||||
prompt="Refactor database schema for performance",
|
||||
project="overbits",
|
||||
status="success",
|
||||
tools_used=["Bash", "Read", "Edit"],
|
||||
duration=45.0,
|
||||
result_summary="Schema refactored successfully",
|
||||
qa_passed=True,
|
||||
timestamp=datetime.now()
|
||||
)
|
||||
|
||||
skills = [
|
||||
ExtractedSkill(
|
||||
name="tool_bash",
|
||||
category="tool_usage",
|
||||
confidence=0.8,
|
||||
context={"tool": "Bash"},
|
||||
source_task_id="test_001",
|
||||
evidence="Used Bash"
|
||||
),
|
||||
]
|
||||
|
||||
qa_results = {
|
||||
"passed": True,
|
||||
"results": {"syntax": True},
|
||||
"summary": {"errors": 0}
|
||||
}
|
||||
|
||||
learning = engine.extract_learning(execution, skills, qa_results)
|
||||
|
||||
assert learning is not None
|
||||
assert len(learning.skill_names) > 0
|
||||
assert learning.confidence > 0
|
||||
assert "overbits" in learning.applicability
|
||||
|
||||
@patch('skill_learning_engine.KnowledgeGraph')
|
||||
def test_extract_learning_failed_qa(self, mock_kg):
|
||||
"""Test that learning is not extracted if QA fails."""
|
||||
engine = LearningEngine()
|
||||
|
||||
execution = TaskExecution(
|
||||
task_id="test_001",
|
||||
prompt="Test task",
|
||||
project="test",
|
||||
status="success",
|
||||
tools_used=["Read"],
|
||||
duration=10.0,
|
||||
result_summary="Test",
|
||||
qa_passed=False,
|
||||
timestamp=datetime.now()
|
||||
)
|
||||
|
||||
skills = []
|
||||
|
||||
qa_results = {
|
||||
"passed": False,
|
||||
"results": {"syntax": False},
|
||||
}
|
||||
|
||||
learning = engine.extract_learning(execution, skills, qa_results)
|
||||
|
||||
assert learning is None
|
||||
|
||||
|
||||
class TestSkillRecommender:
|
||||
"""Test skill recommendation system."""
|
||||
|
||||
@patch('skill_learning_engine.KnowledgeGraph')
|
||||
def test_recommend_for_task(self, mock_kg):
|
||||
"""Test getting recommendations for a task."""
|
||||
recommender = SkillRecommender()
|
||||
|
||||
# Mock KG search to return test learnings
|
||||
mock_kg.return_value.search.return_value = [
|
||||
{
|
||||
"name": "learning_001",
|
||||
"type": "finding",
|
||||
"metadata": {
|
||||
"skills": ["tool_bash", "pattern_optimization"],
|
||||
"confidence": 0.85,
|
||||
"applicability": ["overbits", "general"],
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
recommendations = recommender.recommend_for_task(
|
||||
"Optimize database performance",
|
||||
project="overbits"
|
||||
)
|
||||
|
||||
assert len(recommendations) > 0
|
||||
assert recommendations[0]["confidence"] > 0
|
||||
|
||||
@patch('skill_learning_engine.KnowledgeGraph')
|
||||
def test_get_skill_profile(self, mock_kg):
|
||||
"""Test getting skill profile."""
|
||||
recommender = SkillRecommender()
|
||||
|
||||
mock_kg.return_value.list_entities.return_value = [
|
||||
{
|
||||
"name": "skill_001",
|
||||
"type": "finding",
|
||||
"metadata": {
|
||||
"category": "tool_usage",
|
||||
"skills": ["tool_bash", "tool_read"],
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
profile = recommender.get_skill_profile()
|
||||
|
||||
assert "total_learnings" in profile
|
||||
assert "by_category" in profile
|
||||
assert "top_skills" in profile
|
||||
|
||||
|
||||
class TestSkillLearningSystem:
|
||||
"""Test integrated skill learning system."""
|
||||
|
||||
@patch('skill_learning_engine.KnowledgeGraph')
|
||||
def test_process_task_completion(self, mock_kg):
|
||||
"""Test full task completion processing."""
|
||||
system = SkillLearningSystem()
|
||||
|
||||
task_data = {
|
||||
"task_id": "test_001",
|
||||
"prompt": "Refactor authentication module",
|
||||
"project": "overbits",
|
||||
"status": "success",
|
||||
"tools_used": ["Read", "Edit", "Bash"],
|
||||
"duration": 60.0,
|
||||
"result_summary": "Successfully refactored",
|
||||
"qa_passed": True,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
qa_results = {
|
||||
"passed": True,
|
||||
"results": {
|
||||
"syntax": True,
|
||||
"routes": True,
|
||||
},
|
||||
"summary": {"errors": 0, "warnings": 0, "info": 2}
|
||||
}
|
||||
|
||||
result = system.process_task_completion(task_data, qa_results)
|
||||
|
||||
assert result["success"]
|
||||
assert result["skills_extracted"] > 0
|
||||
assert result["learning_created"]
|
||||
|
||||
@patch('skill_learning_engine.KnowledgeGraph')
|
||||
def test_get_recommendations(self, mock_kg):
|
||||
"""Test getting recommendations from system."""
|
||||
system = SkillLearningSystem()
|
||||
|
||||
# Mock recommender
|
||||
mock_kg.return_value.search.return_value = []
|
||||
|
||||
recommendations = system.get_recommendations(
|
||||
"Debug authentication issue",
|
||||
project="overbits"
|
||||
)
|
||||
|
||||
assert isinstance(recommendations, list)
|
||||
|
||||
|
||||
class TestIntegration:
|
||||
"""Integration tests for complete workflows."""
|
||||
|
||||
@patch('skill_learning_engine.KnowledgeGraph')
|
||||
def test_complete_learning_pipeline(self, mock_kg):
|
||||
"""Test complete pipeline from task to recommendation."""
|
||||
system = SkillLearningSystem()
|
||||
|
||||
# Process a task
|
||||
task_data = {
|
||||
"task_id": "pipeline_test",
|
||||
"prompt": "Optimize API endpoint performance",
|
||||
"project": "overbits",
|
||||
"status": "success",
|
||||
"tools_used": ["Bash", "Read"],
|
||||
"duration": 30.0,
|
||||
"result_summary": "30% performance improvement",
|
||||
"qa_passed": True,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
qa_results = {
|
||||
"passed": True,
|
||||
"results": {"syntax": True, "routes": True},
|
||||
"summary": {"errors": 0}
|
||||
}
|
||||
|
||||
# Process task
|
||||
result = system.process_task_completion(task_data, qa_results)
|
||||
assert result["success"]
|
||||
|
||||
# Get recommendations
|
||||
recommendations = system.get_recommendations(
|
||||
"Improve API performance",
|
||||
project="overbits"
|
||||
)
|
||||
|
||||
# Should be able to get recommendations
|
||||
assert isinstance(recommendations, list)
|
||||
|
||||
@patch('skill_learning_engine.KnowledgeGraph')
|
||||
def test_skill_profile_evolution(self, mock_kg):
|
||||
"""Test how skill profile evolves with multiple tasks."""
|
||||
system = SkillLearningSystem()
|
||||
|
||||
# Process multiple tasks
|
||||
for i in range(3):
|
||||
task_data = {
|
||||
"task_id": f"task_{i}",
|
||||
"prompt": f"Test task {i}",
|
||||
"project": "overbits",
|
||||
"status": "success",
|
||||
"tools_used": ["Bash", "Read"] if i % 2 == 0 else ["Read", "Edit"],
|
||||
"duration": 20.0 + i,
|
||||
"result_summary": f"Task {i} completed",
|
||||
"qa_passed": True,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
qa_results = {
|
||||
"passed": True,
|
||||
"results": {"syntax": True},
|
||||
"summary": {"errors": 0}
|
||||
}
|
||||
|
||||
system.process_task_completion(task_data, qa_results)
|
||||
|
||||
# Get profile
|
||||
profile = system.get_learning_summary()
|
||||
|
||||
assert profile["total_learnings"] >= 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
490
tests/test_sub_agent_context.py
Normal file
490
tests/test_sub_agent_context.py
Normal file
@@ -0,0 +1,490 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Tests for Sub-Agent Context Management
|
||||
|
||||
Verifies:
|
||||
1. Sub-agent context creation and retrieval
|
||||
2. Phase progression tracking
|
||||
3. Sibling agent discovery and coordination
|
||||
4. Context persistence
|
||||
5. Flow integration
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'lib'))
|
||||
|
||||
from sub_agent_context import (
|
||||
SubAgentContext,
|
||||
SubAgentContextManager,
|
||||
FlowPhase,
|
||||
)
|
||||
from sub_agent_flow_integration import SubAgentFlowIntegrator
|
||||
|
||||
|
||||
class TestSubAgentContextCreation:
|
||||
"""Test sub-agent context creation"""
|
||||
|
||||
def setup_method(self):
|
||||
"""Setup test fixtures"""
|
||||
self.temp_dir = tempfile.TemporaryDirectory()
|
||||
self.manager = SubAgentContextManager(Path(self.temp_dir.name))
|
||||
|
||||
def teardown_method(self):
|
||||
"""Cleanup"""
|
||||
self.temp_dir.cleanup()
|
||||
|
||||
def test_create_sub_agent_context(self):
|
||||
"""Test creating a new sub-agent context"""
|
||||
context = self.manager.create_sub_agent_context(
|
||||
parent_task_id="task-123",
|
||||
parent_project="admin",
|
||||
parent_description="Test parent task",
|
||||
parent_context={"key": "value"},
|
||||
parent_tags=["important", "research"],
|
||||
)
|
||||
|
||||
assert context.sub_agent_id is not None
|
||||
assert context.parent_task_id == "task-123"
|
||||
assert context.parent_project == "admin"
|
||||
assert context.parent_description == "Test parent task"
|
||||
assert len(context.phase_progression) == 9
|
||||
assert context.phase_progression[0].phase_name == "CONTEXT_PREP"
|
||||
|
||||
def test_phase_progression_initialization(self):
|
||||
"""Test that all 9 phases are initialized"""
|
||||
context = self.manager.create_sub_agent_context(
|
||||
parent_task_id="task-456",
|
||||
parent_project="test",
|
||||
parent_description="Phase test",
|
||||
)
|
||||
|
||||
phase_names = [p.phase_name for p in context.phase_progression]
|
||||
expected_phases = [
|
||||
"CONTEXT_PREP",
|
||||
"RECEIVED",
|
||||
"PREDICTING",
|
||||
"ANALYZING",
|
||||
"CONSENSUS_CHECK",
|
||||
"AWAITING_APPROVAL",
|
||||
"STRATEGIZING",
|
||||
"EXECUTING",
|
||||
"LEARNING",
|
||||
]
|
||||
|
||||
assert phase_names == expected_phases
|
||||
|
||||
def test_retrieve_sub_agent_context(self):
|
||||
"""Test retrieving sub-agent context"""
|
||||
created = self.manager.create_sub_agent_context(
|
||||
parent_task_id="task-789",
|
||||
parent_project="admin",
|
||||
parent_description="Retrieve test",
|
||||
)
|
||||
|
||||
retrieved = self.manager.get_sub_agent_context(created.sub_agent_id)
|
||||
|
||||
assert retrieved is not None
|
||||
assert retrieved.sub_agent_id == created.sub_agent_id
|
||||
assert retrieved.parent_task_id == "task-789"
|
||||
|
||||
|
||||
class TestSiblingDiscovery:
|
||||
"""Test sibling agent discovery and awareness"""
|
||||
|
||||
def setup_method(self):
|
||||
"""Setup test fixtures"""
|
||||
self.temp_dir = tempfile.TemporaryDirectory()
|
||||
self.manager = SubAgentContextManager(Path(self.temp_dir.name))
|
||||
|
||||
def teardown_method(self):
|
||||
"""Cleanup"""
|
||||
self.temp_dir.cleanup()
|
||||
|
||||
def test_single_sub_agent_no_siblings(self):
|
||||
"""Test first sub-agent has no siblings"""
|
||||
context = self.manager.create_sub_agent_context(
|
||||
parent_task_id="parent-1",
|
||||
parent_project="admin",
|
||||
parent_description="First agent",
|
||||
)
|
||||
|
||||
assert len(context.sibling_agents) == 0
|
||||
|
||||
def test_multiple_sub_agents_discover_siblings(self):
|
||||
"""Test multiple sub-agents discover each other as siblings"""
|
||||
# Create first sub-agent
|
||||
agent1 = self.manager.create_sub_agent_context(
|
||||
parent_task_id="parent-2",
|
||||
parent_project="admin",
|
||||
parent_description="Agent 1",
|
||||
)
|
||||
|
||||
# Create second sub-agent for same parent
|
||||
agent2 = self.manager.create_sub_agent_context(
|
||||
parent_task_id="parent-2",
|
||||
parent_project="admin",
|
||||
parent_description="Agent 2",
|
||||
)
|
||||
|
||||
# Create third sub-agent for same parent
|
||||
agent3 = self.manager.create_sub_agent_context(
|
||||
parent_task_id="parent-2",
|
||||
parent_project="admin",
|
||||
parent_description="Agent 3",
|
||||
)
|
||||
|
||||
# Verify sibling relationships
|
||||
assert agent2.sub_agent_id in self.manager.get_sibling_agents(agent1.sub_agent_id)
|
||||
assert agent3.sub_agent_id in self.manager.get_sibling_agents(agent1.sub_agent_id)
|
||||
assert len(self.manager.get_sibling_agents(agent1.sub_agent_id)) == 2
|
||||
|
||||
assert agent1.sub_agent_id in self.manager.get_sibling_agents(agent2.sub_agent_id)
|
||||
assert agent3.sub_agent_id in self.manager.get_sibling_agents(agent2.sub_agent_id)
|
||||
assert len(self.manager.get_sibling_agents(agent2.sub_agent_id)) == 2
|
||||
|
||||
def test_agents_from_different_parents_not_siblings(self):
|
||||
"""Test agents from different parents are not siblings"""
|
||||
agent1 = self.manager.create_sub_agent_context(
|
||||
parent_task_id="parent-a",
|
||||
parent_project="admin",
|
||||
parent_description="Agent 1",
|
||||
)
|
||||
|
||||
agent2 = self.manager.create_sub_agent_context(
|
||||
parent_task_id="parent-b",
|
||||
parent_project="admin",
|
||||
parent_description="Agent 2",
|
||||
)
|
||||
|
||||
assert agent2.sub_agent_id not in self.manager.get_sibling_agents(agent1.sub_agent_id)
|
||||
assert agent1.sub_agent_id not in self.manager.get_sibling_agents(agent2.sub_agent_id)
|
||||
|
||||
|
||||
class TestPhaseProgression:
|
||||
"""Test phase progression tracking"""
|
||||
|
||||
def setup_method(self):
|
||||
"""Setup test fixtures"""
|
||||
self.temp_dir = tempfile.TemporaryDirectory()
|
||||
self.manager = SubAgentContextManager(Path(self.temp_dir.name))
|
||||
self.context = self.manager.create_sub_agent_context(
|
||||
parent_task_id="task-phase",
|
||||
parent_project="admin",
|
||||
parent_description="Phase test",
|
||||
)
|
||||
|
||||
def teardown_method(self):
|
||||
"""Cleanup"""
|
||||
self.temp_dir.cleanup()
|
||||
|
||||
def test_update_phase_status(self):
|
||||
"""Test updating phase status"""
|
||||
success = self.manager.update_phase(
|
||||
self.context.sub_agent_id,
|
||||
"CONTEXT_PREP",
|
||||
"completed",
|
||||
output="Context prepared",
|
||||
)
|
||||
|
||||
assert success is True
|
||||
|
||||
updated = self.manager.get_sub_agent_context(self.context.sub_agent_id)
|
||||
phase = updated.phase_progression[0]
|
||||
assert phase.status == "completed"
|
||||
assert phase.output == "Context prepared"
|
||||
|
||||
def test_get_current_phase(self):
|
||||
"""Test getting current active phase"""
|
||||
# Initially should be first pending phase
|
||||
current = self.manager.get_current_phase(self.context.sub_agent_id)
|
||||
assert current == "CONTEXT_PREP"
|
||||
|
||||
# Mark first phase as complete
|
||||
self.manager.update_phase(
|
||||
self.context.sub_agent_id,
|
||||
"CONTEXT_PREP",
|
||||
"completed",
|
||||
)
|
||||
|
||||
# Now should be next pending phase
|
||||
current = self.manager.get_current_phase(self.context.sub_agent_id)
|
||||
assert current == "RECEIVED"
|
||||
|
||||
def test_phase_duration_calculation(self):
|
||||
"""Test duration calculation for completed phases"""
|
||||
# Mark phase as in progress
|
||||
self.manager.update_phase(
|
||||
self.context.sub_agent_id,
|
||||
"CONTEXT_PREP",
|
||||
"in_progress",
|
||||
)
|
||||
|
||||
# Mark as completed
|
||||
self.manager.update_phase(
|
||||
self.context.sub_agent_id,
|
||||
"CONTEXT_PREP",
|
||||
"completed",
|
||||
output="Done",
|
||||
)
|
||||
|
||||
updated = self.manager.get_sub_agent_context(self.context.sub_agent_id)
|
||||
phase = updated.phase_progression[0]
|
||||
assert phase.duration_seconds is not None
|
||||
assert phase.duration_seconds >= 0
|
||||
|
||||
def test_phase_progression_sequence(self):
|
||||
"""Test progressing through all phases"""
|
||||
sub_agent_id = self.context.sub_agent_id
|
||||
phases = [p.phase_name for p in self.context.phase_progression]
|
||||
|
||||
for phase_name in phases:
|
||||
self.manager.update_phase(
|
||||
sub_agent_id,
|
||||
phase_name,
|
||||
"completed",
|
||||
output=f"Completed {phase_name}",
|
||||
)
|
||||
|
||||
updated = self.manager.get_sub_agent_context(sub_agent_id)
|
||||
all_completed = all(p.status == "completed" for p in updated.phase_progression)
|
||||
assert all_completed is True
|
||||
|
||||
|
||||
class TestCoordination:
|
||||
"""Test sub-agent coordination and messaging"""
|
||||
|
||||
def setup_method(self):
|
||||
"""Setup test fixtures"""
|
||||
self.temp_dir = tempfile.TemporaryDirectory()
|
||||
self.manager = SubAgentContextManager(Path(self.temp_dir.name))
|
||||
|
||||
# Create two sibling agents
|
||||
self.agent1 = self.manager.create_sub_agent_context(
|
||||
parent_task_id="parent-coord",
|
||||
parent_project="admin",
|
||||
parent_description="Agent 1",
|
||||
)
|
||||
self.agent2 = self.manager.create_sub_agent_context(
|
||||
parent_task_id="parent-coord",
|
||||
parent_project="admin",
|
||||
parent_description="Agent 2",
|
||||
)
|
||||
|
||||
def teardown_method(self):
|
||||
"""Cleanup"""
|
||||
self.temp_dir.cleanup()
|
||||
|
||||
def test_send_message_to_sibling(self):
|
||||
"""Test sending coordination message to sibling"""
|
||||
success = self.manager.send_message_to_sibling(
|
||||
self.agent1.sub_agent_id,
|
||||
self.agent2.sub_agent_id,
|
||||
"request",
|
||||
{"type": "need_data", "data_type": "context"},
|
||||
)
|
||||
|
||||
assert success is True
|
||||
|
||||
def test_message_appears_in_both_agents(self):
|
||||
"""Test message is visible to both sender and receiver"""
|
||||
self.manager.send_message_to_sibling(
|
||||
self.agent1.sub_agent_id,
|
||||
self.agent2.sub_agent_id,
|
||||
"update",
|
||||
{"status": "ready"},
|
||||
)
|
||||
|
||||
agent1_updated = self.manager.get_sub_agent_context(self.agent1.sub_agent_id)
|
||||
agent2_updated = self.manager.get_sub_agent_context(self.agent2.sub_agent_id)
|
||||
|
||||
assert len(agent1_updated.coordination_messages) == 1
|
||||
assert len(agent2_updated.coordination_messages) == 1
|
||||
assert agent1_updated.coordination_messages[0]["type"] == "update"
|
||||
assert agent2_updated.coordination_messages[0]["type"] == "update"
|
||||
|
||||
def test_cannot_message_non_sibling(self):
|
||||
"""Test cannot send message to non-sibling agent"""
|
||||
# Create agent with different parent
|
||||
agent3 = self.manager.create_sub_agent_context(
|
||||
parent_task_id="parent-other",
|
||||
parent_project="admin",
|
||||
parent_description="Agent 3",
|
||||
)
|
||||
|
||||
# Try to send message across parent boundary
|
||||
success = self.manager.send_message_to_sibling(
|
||||
self.agent1.sub_agent_id,
|
||||
agent3.sub_agent_id,
|
||||
"request",
|
||||
{"data": "test"},
|
||||
)
|
||||
|
||||
assert success is False
|
||||
|
||||
|
||||
class TestContextPersistence:
|
||||
"""Test context persistence to disk"""
|
||||
|
||||
def test_context_saved_and_loaded(self):
|
||||
"""Test contexts are saved to disk and reloaded"""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
manager1 = SubAgentContextManager(Path(temp_dir))
|
||||
|
||||
# Create context in first manager
|
||||
context1 = manager1.create_sub_agent_context(
|
||||
parent_task_id="task-persist",
|
||||
parent_project="admin",
|
||||
parent_description="Persistence test",
|
||||
)
|
||||
sub_agent_id = context1.sub_agent_id
|
||||
|
||||
# Create new manager pointing to same directory
|
||||
manager2 = SubAgentContextManager(Path(temp_dir))
|
||||
|
||||
# Should be able to retrieve context from new manager
|
||||
context2 = manager2.get_sub_agent_context(sub_agent_id)
|
||||
|
||||
assert context2 is not None
|
||||
assert context2.parent_task_id == "task-persist"
|
||||
assert context2.sub_agent_id == sub_agent_id
|
||||
|
||||
|
||||
class TestFlowIntegration:
|
||||
"""Test flow integration with sub-agent context"""
|
||||
|
||||
def setup_method(self):
|
||||
"""Setup test fixtures"""
|
||||
self.temp_dir = tempfile.TemporaryDirectory()
|
||||
self.context_manager = SubAgentContextManager(Path(self.temp_dir.name))
|
||||
self.integrator = SubAgentFlowIntegrator(self.context_manager)
|
||||
|
||||
def teardown_method(self):
|
||||
"""Cleanup"""
|
||||
self.temp_dir.cleanup()
|
||||
|
||||
def test_execute_sub_agent_flow(self):
|
||||
"""Test executing full sub-agent flow"""
|
||||
results = self.integrator.execute_sub_agent_flow(
|
||||
parent_task_id="task-flow",
|
||||
parent_project="admin",
|
||||
parent_description="Flow test",
|
||||
parent_context={"key": "value"},
|
||||
)
|
||||
|
||||
assert results["sub_agent_id"] is not None
|
||||
assert "phases" in results
|
||||
# Should have results for all 9 phases
|
||||
assert len(results["phases"]) == 9
|
||||
|
||||
def test_execute_single_phase(self):
|
||||
"""Test executing a single phase"""
|
||||
context = self.context_manager.create_sub_agent_context(
|
||||
parent_task_id="task-single",
|
||||
parent_project="admin",
|
||||
parent_description="Single phase test",
|
||||
)
|
||||
|
||||
result = self.integrator.execute_phase(context.sub_agent_id, "CONTEXT_PREP")
|
||||
|
||||
assert result["status"] == "completed"
|
||||
assert "output" in result
|
||||
|
||||
def test_get_sub_agent_progress(self):
|
||||
"""Test getting progress report"""
|
||||
context = self.context_manager.create_sub_agent_context(
|
||||
parent_task_id="task-progress",
|
||||
parent_project="admin",
|
||||
parent_description="Progress test",
|
||||
)
|
||||
|
||||
# Execute a phase
|
||||
self.integrator.execute_phase(context.sub_agent_id, "CONTEXT_PREP")
|
||||
self.integrator.execute_phase(context.sub_agent_id, "RECEIVED")
|
||||
|
||||
progress = self.integrator.get_sub_agent_progress(context.sub_agent_id)
|
||||
|
||||
assert progress["completed_phases"] == 2
|
||||
assert progress["in_progress_phases"] == 0
|
||||
assert progress["total_phases"] == 9
|
||||
|
||||
def test_coordinate_sequential_sub_agents(self):
|
||||
"""Test sequential coordination of sub-agents"""
|
||||
# Create multiple sub-agents for same parent
|
||||
for i in range(3):
|
||||
self.context_manager.create_sub_agent_context(
|
||||
parent_task_id="task-coord",
|
||||
parent_project="admin",
|
||||
parent_description=f"Agent {i+1}",
|
||||
)
|
||||
|
||||
coordination = self.integrator.coordinate_sub_agents(
|
||||
parent_task_id="task-coord",
|
||||
coordination_strategy="sequential",
|
||||
)
|
||||
|
||||
assert len(coordination["sub_agents"]) == 3
|
||||
assert coordination["strategy"] == "sequential"
|
||||
|
||||
def test_collect_sub_agent_results(self):
|
||||
"""Test collecting results from multiple sub-agents"""
|
||||
# Create and execute multiple sub-agents
|
||||
for i in range(2):
|
||||
context = self.context_manager.create_sub_agent_context(
|
||||
parent_task_id="task-collect",
|
||||
parent_project="admin",
|
||||
parent_description=f"Agent {i+1}",
|
||||
)
|
||||
self.integrator.execute_phase(context.sub_agent_id, "CONTEXT_PREP")
|
||||
|
||||
results = self.integrator.collect_sub_agent_results("task-collect")
|
||||
|
||||
assert results["sub_agents_total"] == 2
|
||||
assert len(results["sub_agents"]) == 2
|
||||
assert all("progress" in s for s in results["sub_agents"])
|
||||
|
||||
|
||||
class TestContextSummary:
|
||||
"""Test context summary generation"""
|
||||
|
||||
def setup_method(self):
|
||||
"""Setup test fixtures"""
|
||||
self.temp_dir = tempfile.TemporaryDirectory()
|
||||
self.manager = SubAgentContextManager(Path(self.temp_dir.name))
|
||||
|
||||
def teardown_method(self):
|
||||
"""Cleanup"""
|
||||
self.temp_dir.cleanup()
|
||||
|
||||
def test_get_context_summary(self):
|
||||
"""Test getting human-readable summary"""
|
||||
context = self.manager.create_sub_agent_context(
|
||||
parent_task_id="task-summary",
|
||||
parent_project="admin",
|
||||
parent_description="Summary test",
|
||||
parent_tags=["important", "urgent"],
|
||||
)
|
||||
|
||||
# Create a sibling
|
||||
self.manager.create_sub_agent_context(
|
||||
parent_task_id="task-summary",
|
||||
parent_project="admin",
|
||||
parent_description="Sibling agent",
|
||||
)
|
||||
|
||||
summary = self.manager.get_context_summary(context.sub_agent_id)
|
||||
|
||||
assert summary is not None
|
||||
assert summary["sub_agent_id"] == context.sub_agent_id
|
||||
assert summary["parent_task_id"] == "task-summary"
|
||||
assert summary["sibling_count"] == 1
|
||||
assert summary["parent_tags"] == ["important", "urgent"]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
436
tests/test_time_metrics.py
Normal file
436
tests/test_time_metrics.py
Normal file
@@ -0,0 +1,436 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test cases for time_metrics module.
|
||||
|
||||
Run with: pytest /opt/server-agents/orchestrator/tests/test_time_metrics.py -v
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
# Add lib to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "lib"))
|
||||
|
||||
import pytest
|
||||
|
||||
# Import module under test
|
||||
from time_metrics import (
|
||||
get_utc_now,
|
||||
get_utc_now_with_offset,
|
||||
parse_iso_timestamp,
|
||||
calculate_duration_seconds,
|
||||
format_duration,
|
||||
format_duration_human,
|
||||
elapsed_since,
|
||||
convert_to_local_time,
|
||||
format_timestamp_with_local,
|
||||
get_system_load,
|
||||
get_memory_usage,
|
||||
get_disk_usage,
|
||||
capture_system_context,
|
||||
TaskTimeTracker,
|
||||
create_task_time_metadata,
|
||||
update_task_completion_metadata,
|
||||
format_job_with_timing,
|
||||
format_logs_header,
|
||||
DEFAULT_TIMEZONE
|
||||
)
|
||||
|
||||
|
||||
class TestTimestampFunctions:
|
||||
"""Test timestamp generation and parsing."""
|
||||
|
||||
def test_get_utc_now_format(self):
|
||||
"""UTC timestamp should be in ISO 8601 format with Z suffix."""
|
||||
ts = get_utc_now()
|
||||
assert ts.endswith("Z")
|
||||
# Should be parseable
|
||||
dt = datetime.fromisoformat(ts[:-1])
|
||||
assert dt is not None
|
||||
|
||||
def test_get_utc_now_with_offset_format(self):
|
||||
"""UTC timestamp with offset should end with +00:00."""
|
||||
ts = get_utc_now_with_offset()
|
||||
assert "+00:00" in ts
|
||||
|
||||
def test_parse_iso_timestamp_z_suffix(self):
|
||||
"""Parse timestamp with Z suffix."""
|
||||
ts = "2026-01-11T03:31:57Z"
|
||||
dt = parse_iso_timestamp(ts)
|
||||
assert dt is not None
|
||||
assert dt.year == 2026
|
||||
assert dt.month == 1
|
||||
assert dt.day == 11
|
||||
assert dt.hour == 3
|
||||
assert dt.minute == 31
|
||||
assert dt.second == 57
|
||||
|
||||
def test_parse_iso_timestamp_with_offset(self):
|
||||
"""Parse timestamp with timezone offset."""
|
||||
ts = "2026-01-11T00:31:57-03:00"
|
||||
dt = parse_iso_timestamp(ts)
|
||||
assert dt is not None
|
||||
assert dt.hour == 0 # Local hour
|
||||
|
||||
def test_parse_iso_timestamp_no_tz(self):
|
||||
"""Parse timestamp without timezone (assume UTC)."""
|
||||
ts = "2026-01-11T03:31:57"
|
||||
dt = parse_iso_timestamp(ts)
|
||||
assert dt is not None
|
||||
assert dt.hour == 3
|
||||
|
||||
def test_parse_iso_timestamp_none(self):
|
||||
"""None input should return None."""
|
||||
assert parse_iso_timestamp(None) is None
|
||||
assert parse_iso_timestamp("") is None
|
||||
|
||||
def test_parse_iso_timestamp_invalid(self):
|
||||
"""Invalid timestamp should return None."""
|
||||
assert parse_iso_timestamp("not-a-timestamp") is None
|
||||
assert parse_iso_timestamp("2026-99-99T99:99:99Z") is None
|
||||
|
||||
|
||||
class TestDurationCalculations:
|
||||
"""Test duration calculation and formatting."""
|
||||
|
||||
def test_calculate_duration_seconds(self):
|
||||
"""Calculate duration between two timestamps."""
|
||||
start = "2026-01-11T10:00:00Z"
|
||||
end = "2026-01-11T10:01:00Z"
|
||||
duration = calculate_duration_seconds(start, end)
|
||||
assert duration == 60.0
|
||||
|
||||
def test_calculate_duration_hours(self):
|
||||
"""Calculate duration spanning hours."""
|
||||
start = "2026-01-11T10:00:00Z"
|
||||
end = "2026-01-11T12:30:00Z"
|
||||
duration = calculate_duration_seconds(start, end)
|
||||
assert duration == 2.5 * 3600 # 2.5 hours
|
||||
|
||||
def test_calculate_duration_negative(self):
|
||||
"""Duration can be negative if end is before start."""
|
||||
start = "2026-01-11T12:00:00Z"
|
||||
end = "2026-01-11T10:00:00Z"
|
||||
duration = calculate_duration_seconds(start, end)
|
||||
assert duration < 0
|
||||
|
||||
def test_calculate_duration_none(self):
|
||||
"""Invalid inputs should return None."""
|
||||
assert calculate_duration_seconds(None, "2026-01-11T10:00:00Z") is None
|
||||
assert calculate_duration_seconds("2026-01-11T10:00:00Z", None) is None
|
||||
|
||||
def test_format_duration_seconds(self):
|
||||
"""Format durations under a minute."""
|
||||
assert format_duration(0) == "00:00:00"
|
||||
assert format_duration(45) == "00:00:45"
|
||||
assert format_duration(59) == "00:00:59"
|
||||
|
||||
def test_format_duration_minutes(self):
|
||||
"""Format durations in minutes."""
|
||||
assert format_duration(60) == "00:01:00"
|
||||
assert format_duration(125) == "00:02:05"
|
||||
assert format_duration(3599) == "00:59:59"
|
||||
|
||||
def test_format_duration_hours(self):
|
||||
"""Format durations in hours."""
|
||||
assert format_duration(3600) == "01:00:00"
|
||||
assert format_duration(3661) == "01:01:01"
|
||||
assert format_duration(7200) == "02:00:00"
|
||||
|
||||
def test_format_duration_none(self):
|
||||
"""None or negative should return placeholder."""
|
||||
assert format_duration(None) == "--:--:--"
|
||||
assert format_duration(-1) == "--:--:--"
|
||||
|
||||
def test_format_duration_human_seconds(self):
|
||||
"""Human-readable format for seconds."""
|
||||
assert format_duration_human(0) == "0s"
|
||||
assert format_duration_human(45) == "45s"
|
||||
assert format_duration_human(59) == "59s"
|
||||
|
||||
def test_format_duration_human_minutes(self):
|
||||
"""Human-readable format for minutes."""
|
||||
assert format_duration_human(60) == "1m 0s"
|
||||
assert format_duration_human(125) == "2m 5s"
|
||||
assert format_duration_human(3599) == "59m 59s"
|
||||
|
||||
def test_format_duration_human_hours(self):
|
||||
"""Human-readable format for hours."""
|
||||
assert format_duration_human(3600) == "1h 0m 0s"
|
||||
assert format_duration_human(3661) == "1h 1m 1s"
|
||||
assert format_duration_human(7200) == "2h 0m 0s"
|
||||
|
||||
def test_format_duration_human_days(self):
|
||||
"""Human-readable format for days."""
|
||||
assert format_duration_human(86400) == "1d 0h 0m"
|
||||
assert format_duration_human(90061) == "1d 1h 1m"
|
||||
|
||||
def test_format_duration_human_none(self):
|
||||
"""None or negative should return 'unknown'."""
|
||||
assert format_duration_human(None) == "unknown"
|
||||
assert format_duration_human(-1) == "unknown"
|
||||
|
||||
|
||||
class TestTimezoneConversion:
|
||||
"""Test timezone conversion functions."""
|
||||
|
||||
def test_convert_to_local_time_montevideo(self):
|
||||
"""Convert UTC to Montevideo time (UTC-3)."""
|
||||
utc_ts = "2026-01-11T03:31:57Z"
|
||||
local = convert_to_local_time(utc_ts, "America/Montevideo")
|
||||
# Should be 00:31:57 local (UTC-3)
|
||||
assert "00:31:57" in local
|
||||
|
||||
def test_convert_to_local_time_invalid(self):
|
||||
"""Invalid timestamp should return original."""
|
||||
result = convert_to_local_time("invalid", "America/Montevideo")
|
||||
assert "invalid" in result
|
||||
|
||||
def test_format_timestamp_with_local(self):
|
||||
"""Format timestamp showing both UTC and local."""
|
||||
utc_ts = "2026-01-11T03:31:57Z"
|
||||
result = format_timestamp_with_local(utc_ts, "America/Montevideo")
|
||||
assert "2026-01-11T03:31:57Z" in result
|
||||
assert "America/Montevideo" in result
|
||||
|
||||
|
||||
class TestSystemContext:
|
||||
"""Test system context capture functions."""
|
||||
|
||||
def test_get_system_load_returns_tuple(self):
|
||||
"""System load should return 3-element tuple."""
|
||||
load = get_system_load()
|
||||
assert isinstance(load, tuple)
|
||||
assert len(load) == 3
|
||||
assert all(isinstance(l, (int, float)) for l in load)
|
||||
|
||||
def test_get_memory_usage_keys(self):
|
||||
"""Memory usage should have expected keys."""
|
||||
mem = get_memory_usage()
|
||||
assert "total_mb" in mem
|
||||
assert "available_mb" in mem
|
||||
assert "used_mb" in mem
|
||||
assert "used_percent" in mem
|
||||
assert 0 <= mem["used_percent"] <= 100
|
||||
|
||||
def test_get_disk_usage_keys(self):
|
||||
"""Disk usage should have expected keys."""
|
||||
disk = get_disk_usage("/")
|
||||
assert "total_gb" in disk
|
||||
assert "free_gb" in disk
|
||||
assert "used_gb" in disk
|
||||
assert "used_percent" in disk
|
||||
assert 0 <= disk["used_percent"] <= 100
|
||||
|
||||
def test_capture_system_context_structure(self):
|
||||
"""System context should have complete structure."""
|
||||
ctx = capture_system_context()
|
||||
assert "timestamp" in ctx
|
||||
assert "system_load" in ctx
|
||||
assert "memory" in ctx
|
||||
assert "disk" in ctx
|
||||
|
||||
assert isinstance(ctx["system_load"], list)
|
||||
assert len(ctx["system_load"]) == 3
|
||||
|
||||
assert "used_percent" in ctx["memory"]
|
||||
assert "available_mb" in ctx["memory"]
|
||||
|
||||
assert "used_percent" in ctx["disk"]
|
||||
assert "free_gb" in ctx["disk"]
|
||||
|
||||
|
||||
class TestTaskTimeTracker:
|
||||
"""Test TaskTimeTracker class."""
|
||||
|
||||
def test_tracker_initialization(self):
|
||||
"""Tracker should initialize with task_id and project."""
|
||||
tracker = TaskTimeTracker("test-001", "admin")
|
||||
assert tracker.task_id == "test-001"
|
||||
assert tracker.project == "admin"
|
||||
assert tracker.dispatch_time is None
|
||||
assert tracker.completion_time is None
|
||||
|
||||
def test_mark_dispatched(self):
|
||||
"""mark_dispatched should record dispatch time and context."""
|
||||
tracker = TaskTimeTracker("test-001", "admin")
|
||||
result = tracker.mark_dispatched()
|
||||
|
||||
assert tracker.dispatch_time is not None
|
||||
assert tracker.dispatch_context is not None
|
||||
|
||||
assert "dispatch" in result
|
||||
assert "utc_time" in result["dispatch"]
|
||||
assert "system_load" in result["dispatch"]
|
||||
assert "memory_percent" in result["dispatch"]
|
||||
|
||||
def test_mark_started(self):
|
||||
"""mark_started should record start time."""
|
||||
tracker = TaskTimeTracker("test-001", "admin")
|
||||
tracker.mark_dispatched()
|
||||
result = tracker.mark_started()
|
||||
|
||||
assert tracker.start_time is not None
|
||||
assert "start_time" in result
|
||||
|
||||
def test_mark_completed(self):
|
||||
"""mark_completed should calculate duration."""
|
||||
tracker = TaskTimeTracker("test-001", "admin")
|
||||
tracker.mark_dispatched()
|
||||
time.sleep(1.1) # Delay for measurable duration (must be > 1 sec for second resolution)
|
||||
result = tracker.mark_completed(exit_code=0)
|
||||
|
||||
assert tracker.completion_time is not None
|
||||
assert "completion" in result
|
||||
assert "utc_time" in result["completion"]
|
||||
assert "duration_seconds" in result["completion"]
|
||||
# Duration should be at least 1 second
|
||||
assert result["completion"]["duration_seconds"] >= 1.0
|
||||
assert "exit_code" in result["completion"]
|
||||
assert result["completion"]["exit_code"] == 0
|
||||
|
||||
def test_get_full_metrics_running(self):
|
||||
"""get_full_metrics for running task should show elapsed."""
|
||||
tracker = TaskTimeTracker("test-001", "admin")
|
||||
tracker.mark_dispatched()
|
||||
metrics = tracker.get_full_metrics()
|
||||
|
||||
assert metrics["status"] == "running"
|
||||
assert "elapsed" in metrics
|
||||
assert "dispatch" in metrics
|
||||
|
||||
def test_get_full_metrics_completed(self):
|
||||
"""get_full_metrics for completed task should show duration."""
|
||||
tracker = TaskTimeTracker("test-001", "admin")
|
||||
tracker.mark_dispatched()
|
||||
tracker.mark_completed(0)
|
||||
metrics = tracker.get_full_metrics()
|
||||
|
||||
assert metrics["status"] == "completed"
|
||||
assert "completion" in metrics
|
||||
assert "duration_seconds" in metrics["completion"]
|
||||
|
||||
|
||||
class TestMetadataFunctions:
|
||||
"""Test metadata helper functions."""
|
||||
|
||||
def test_create_task_time_metadata(self):
|
||||
"""create_task_time_metadata should return dispatch info."""
|
||||
meta = create_task_time_metadata("test-001", "admin")
|
||||
|
||||
assert "time_metrics" in meta
|
||||
assert "time_tracker_data" in meta
|
||||
assert "dispatch" in meta["time_metrics"]
|
||||
assert meta["time_tracker_data"]["task_id"] == "test-001"
|
||||
assert meta["time_tracker_data"]["project"] == "admin"
|
||||
|
||||
def test_update_task_completion_metadata(self):
|
||||
"""update_task_completion_metadata should add completion info."""
|
||||
# Create initial metadata
|
||||
meta = create_task_time_metadata("test-001", "admin")
|
||||
time.sleep(0.1)
|
||||
|
||||
# Update with completion
|
||||
updated = update_task_completion_metadata(meta, exit_code=0)
|
||||
|
||||
assert "time_metrics" in updated
|
||||
assert "completion" in updated["time_metrics"]
|
||||
assert updated["time_metrics"]["completion"]["exit_code"] == 0
|
||||
|
||||
|
||||
class TestOutputFormatters:
|
||||
"""Test output formatting functions."""
|
||||
|
||||
def test_format_job_with_timing_complete(self):
|
||||
"""Format job with timing info should include all fields."""
|
||||
job = {
|
||||
"id": "123456-abcd",
|
||||
"project": "admin",
|
||||
"status": "completed",
|
||||
"time_metrics": {
|
||||
"dispatch": {
|
||||
"utc_time": "2026-01-11T10:00:00Z",
|
||||
"system_load": [0.5, 0.6, 0.7]
|
||||
},
|
||||
"completion": {
|
||||
"duration_formatted": "00:05:30"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result = format_job_with_timing(job)
|
||||
assert "123456-abcd" in result
|
||||
assert "admin" in result
|
||||
assert "completed" in result
|
||||
assert "10:00:00" in result
|
||||
assert "00:05:30" in result
|
||||
|
||||
def test_format_job_with_timing_running(self):
|
||||
"""Format running job should show elapsed time."""
|
||||
job = {
|
||||
"id": "123456-abcd",
|
||||
"project": "admin",
|
||||
"status": "running",
|
||||
"time_metrics": {
|
||||
"dispatch": {
|
||||
"utc_time": "2026-01-11T10:00:00Z",
|
||||
"system_load": [0.5, 0.6, 0.7]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result = format_job_with_timing(job)
|
||||
assert "123456-abcd" in result
|
||||
assert "running" in result
|
||||
|
||||
def test_format_logs_header_structure(self):
|
||||
"""Logs header should contain timing sections."""
|
||||
job = {
|
||||
"id": "123456-abcd",
|
||||
"project": "admin",
|
||||
"status": "completed",
|
||||
"time_metrics": {
|
||||
"dispatch": {
|
||||
"utc_time": "2026-01-11T10:00:00Z",
|
||||
"system_load": [0.5, 0.6, 0.7],
|
||||
"memory_percent": 65,
|
||||
"disk_percent": 45
|
||||
},
|
||||
"completion": {
|
||||
"utc_time": "2026-01-11T10:05:30Z",
|
||||
"duration_formatted": "00:05:30"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
header = format_logs_header(job)
|
||||
assert "═" in header # Box drawing
|
||||
assert "Job:" in header
|
||||
assert "Agent: admin" in header
|
||||
assert "Dispatched:" in header
|
||||
assert "Status:" in header
|
||||
assert "System:" in header
|
||||
|
||||
|
||||
class TestElapsedSince:
|
||||
"""Test elapsed_since function."""
|
||||
|
||||
def test_elapsed_since_recent(self):
|
||||
"""elapsed_since should calculate time from now."""
|
||||
# Use a timestamp 5 seconds ago
|
||||
past = datetime.utcnow() - timedelta(seconds=5)
|
||||
past_ts = past.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
elapsed = elapsed_since(past_ts)
|
||||
# Should be around 5s (allow some tolerance)
|
||||
assert "s" in elapsed # Should have seconds format
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
Reference in New Issue
Block a user