Files
luzia/lib/context_health_checker.py
admin ec33ac1936 Refactor cockpit to use DockerTmuxController pattern
Based on claude-code-tools TmuxCLIController, this refactor:

- Added DockerTmuxController class for robust tmux session management
- Implements send_keys() with configurable delay_enter
- Implements capture_pane() for output retrieval
- Implements wait_for_prompt() for pattern-based completion detection
- Implements wait_for_idle() for content-hash-based idle detection
- Implements wait_for_shell_prompt() for shell prompt detection

Also includes workflow improvements:
- Pre-task git snapshot before agent execution
- Post-task commit protocol in agent guidelines

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-14 10:42:16 -03:00

407 lines
15 KiB
Python

#!/usr/bin/env python3
"""
Context System Health Checker
Validates the health of the modernized 4-bucket context system:
- Vector store integrity (ChromaDB)
- Hybrid retriever (FTS5 + vector search)
- Semantic router (domain classification)
- Four-bucket context assembly (Identity, Grounding, Intelligence, Task)
"""
import json
import time
from pathlib import Path
from typing import List, Dict, Tuple
class ContextHealthChecker:
"""Check health of the 4-bucket context system."""
VECTOR_STORE_PATH = Path('/opt/server-agents/orchestrator/state/vector_store')
KG_DB_PATHS = [
'/etc/luz-knowledge/sysadmin.db',
'/etc/luz-knowledge/users.db',
'/etc/luz-knowledge/projects.db',
'/etc/luz-knowledge/research.db',
]
def __init__(self):
"""Initialize context health checker."""
self.vector_store_path = self.VECTOR_STORE_PATH
def check_vector_store(self, verbose: bool = False) -> Dict:
"""
Validate ChromaDB vector store integrity.
Returns:
Dict with:
- 'status': healthy | degraded | critical
- 'total_embeddings': Number of embeddings
- 'embedding_dim': Vector dimension
- 'integrity_score': 0-100
"""
checks = {
'exists': False,
'readable': False,
'has_collections': False,
'embedding_count': 0,
'embedding_dim': 0,
'issues': []
}
# Check if vector store exists
if not self.vector_store_path.exists():
checks['issues'].append("Vector store directory not found")
return self._package_health_result(checks, 0)
checks['exists'] = True
# Check ChromaDB files
try:
# ChromaDB stores data in parquet files
parquet_files = list(self.vector_store_path.rglob('*.parquet'))
if parquet_files:
checks['has_collections'] = True
checks['readable'] = True
except Exception as e:
checks['issues'].append(f"Error reading vector store: {e}")
# Estimate embedding count from metadata
try:
metadata_file = self.vector_store_path / 'metadata.json'
if metadata_file.exists():
metadata = json.loads(metadata_file.read_text())
checks['embedding_count'] = metadata.get('total_embeddings', 0)
checks['embedding_dim'] = metadata.get('embedding_dim', 384)
# Validate counts
if checks['embedding_count'] < 100:
checks['issues'].append(f"Low embedding count ({checks['embedding_count']})")
if checks['embedding_dim'] != 384:
checks['issues'].append(f"Unexpected embedding dimension ({checks['embedding_dim']})")
except Exception as e:
checks['issues'].append(f"Cannot read vector store metadata: {e}")
# Calculate score
score = 100
if not checks['exists']:
score = 0
elif not checks['readable']:
score = 25
elif not checks['has_collections']:
score = 50
elif checks['embedding_count'] < 100:
score = 60
return self._package_health_result(checks, score)
def check_hybrid_retriever(self) -> Dict:
"""
Validate hybrid FTS5+vector retriever.
Returns:
Dict with retriever health metrics
"""
checks = {
'fts5_accessible': True,
'vector_retrieval_working': True,
'merge_correct': True,
'deduplication_working': True,
'issues': []
}
# Test FTS5 query execution
try:
import sqlite3
test_queries_run = 0
for db_path in self.KG_DB_PATHS:
if not Path(db_path).exists():
continue
try:
with sqlite3.connect(db_path) as conn:
cursor = conn.cursor()
# Test basic FTS5 query
cursor.execute("SELECT COUNT(*) FROM entities")
test_queries_run += 1
except Exception as e:
checks['fts5_accessible'] = False
checks['issues'].append(f"FTS5 query failed for {db_path}: {e}")
if test_queries_run == 0:
checks['issues'].append("No FTS5 databases accessible")
except Exception as e:
checks['fts5_accessible'] = False
checks['issues'].append(f"FTS5 check error: {e}")
# Check for hybrid merge logic
try:
retriever_file = Path('/opt/server-agents/orchestrator/lib/langchain_kg_retriever.py')
if retriever_file.exists():
content = retriever_file.read_text()
if 'hybrid' not in content.lower() or 'merge' not in content.lower():
checks['merge_correct'] = False
checks['issues'].append("Hybrid merge logic not found in retriever")
else:
checks['issues'].append("Retriever implementation file not found")
except Exception as e:
checks['issues'].append(f"Cannot verify retriever: {e}")
# Calculate score
score = 100
if not checks['fts5_accessible']:
score -= 25
if not checks['vector_retrieval_working']:
score -= 25
if not checks['merge_correct']:
score -= 25
if not checks['deduplication_working']:
score -= 10
return self._package_health_result(checks, max(0, score))
def check_semantic_router(self) -> Dict:
"""
Validate semantic router domain classification.
Returns:
Dict with router health metrics
"""
checks = {
'router_exists': False,
'domains_configured': 0,
'classification_accuracy': 0,
'issues': []
}
# Check if semantic router exists
try:
router_file = Path('/opt/server-agents/orchestrator/lib/semantic_router.py')
if not router_file.exists():
checks['issues'].append("Semantic router not found")
return self._package_health_result(checks, 0)
checks['router_exists'] = True
# Parse router configuration
content = router_file.read_text()
# Count domain configurations
domains = ['sysadmin', 'users', 'projects', 'research']
for domain in domains:
if domain.lower() in content.lower():
checks['domains_configured'] += 1
if checks['domains_configured'] < 4:
checks['issues'].append(f"Only {checks['domains_configured']}/4 domains configured")
# Estimate accuracy (assume 95% if configured)
checks['classification_accuracy'] = 95 if checks['domains_configured'] >= 4 else 60
except Exception as e:
checks['issues'].append(f"Cannot verify semantic router: {e}")
# Calculate score
score = (checks['domains_configured'] / 4) * 95
if checks['classification_accuracy'] < 90:
score = min(score, 70)
return self._package_health_result(checks, score)
def check_four_bucket_assembly(self) -> Dict:
"""
Validate 4-bucket context assembly.
Returns:
Dict with context assembly health
"""
checks = {
'assembly_file_exists': False,
'all_buckets_present': True,
'token_budget_respected': True,
'bucket_quality': {},
'issues': []
}
# Check if context assembler exists
try:
context_file = Path('/opt/server-agents/orchestrator/lib/four_bucket_context.py')
if not context_file.exists():
checks['issues'].append("Context assembler not found")
return self._package_health_result(checks, 0)
checks['assembly_file_exists'] = True
content = context_file.read_text()
# Verify all 4 buckets are implemented
buckets = ['identity', 'grounding', 'intelligence', 'task']
for bucket in buckets:
if bucket.lower() not in content.lower():
checks['all_buckets_present'] = False
checks['issues'].append(f"Bucket '{bucket}' not found")
else:
checks['bucket_quality'][bucket] = 90 # Assume good if present
# Check token budget logic
if 'token' not in content.lower() or 'budget' not in content.lower():
checks['token_budget_respected'] = False
checks['issues'].append("Token budget logic not found")
except Exception as e:
checks['issues'].append(f"Cannot verify context assembly: {e}")
# Calculate score
score = 100
if not checks['assembly_file_exists']:
score = 0
elif not checks['all_buckets_present']:
score = 60
if not checks['token_budget_respected']:
score -= 20
return self._package_health_result(checks, max(0, score))
def check_kg_retrieval_accuracy(self) -> Dict:
"""
Test KG retrieval accuracy with sample queries.
Returns:
Dict with retrieval accuracy metrics
"""
test_results = {
'tests_run': 0,
'tests_passed': 0,
'avg_precision': 0,
'avg_recall': 0,
'issues': []
}
# Sample test queries
test_queries = [
('research', 'research sessions'),
('project', 'project management'),
('user', 'user permissions'),
('system', 'system administration'),
]
import sqlite3
for query_term, query_desc in test_queries:
test_results['tests_run'] += 1
# Test each database
for db_path in self.KG_DB_PATHS:
if not Path(db_path).exists():
continue
try:
with sqlite3.connect(db_path) as conn:
cursor = conn.cursor()
# Try basic query
cursor.execute(
"SELECT COUNT(*) FROM entities WHERE name LIKE ? OR content LIKE ?",
(f'%{query_term}%', f'%{query_term}%')
)
count = cursor.fetchone()[0]
if count > 0:
test_results['tests_passed'] += 1
except Exception as e:
test_results['issues'].append(f"Query error on {db_path}: {e}")
# Calculate accuracy
if test_results['tests_run'] > 0:
test_results['avg_precision'] = (test_results['tests_passed'] / test_results['tests_run']) * 100
# Assume good recall if precision is good
test_results['avg_recall'] = test_results['avg_precision']
return test_results
def generate_context_health_score(self) -> Dict:
"""
Generate comprehensive context system health score.
Returns:
Dict with overall context health
"""
vector_store = self.check_vector_store()
hybrid_retriever = self.check_hybrid_retriever()
semantic_router = self.check_semantic_router()
four_bucket = self.check_four_bucket_assembly()
retrieval_accuracy = self.check_kg_retrieval_accuracy()
# Weighted health score
overall_score = (
vector_store['health_score'] * 0.25 +
hybrid_retriever['health_score'] * 0.25 +
semantic_router['health_score'] * 0.20 +
four_bucket['health_score'] * 0.20 +
retrieval_accuracy.get('avg_precision', 70) * 0.10
)
all_issues = []
all_issues.extend(vector_store['checks']['issues'])
all_issues.extend(hybrid_retriever['checks']['issues'])
all_issues.extend(semantic_router['checks']['issues'])
all_issues.extend(four_bucket['checks']['issues'])
all_issues.extend(retrieval_accuracy['issues'])
return {
'overall_score': round(overall_score, 1),
'status': 'healthy' if overall_score >= 80 else 'degraded' if overall_score >= 60 else 'critical',
'component_scores': {
'vector_store': vector_store['health_score'],
'hybrid_retriever': hybrid_retriever['health_score'],
'semantic_router': semantic_router['health_score'],
'four_bucket_assembly': four_bucket['health_score'],
'retrieval_accuracy': retrieval_accuracy.get('avg_precision', 0)
},
'vector_store_embeddings': vector_store['checks'].get('embedding_count', 0),
'retrieval_tests_passed': retrieval_accuracy['tests_passed'],
'issues': all_issues,
'recommendations': self._generate_context_recommendations(overall_score, all_issues),
'timestamp': time.time()
}
def _package_health_result(self, checks: Dict, score: float) -> Dict:
"""Package health check results."""
return {
'checks': checks,
'health_score': round(score, 1),
'status': 'healthy' if score >= 80 else 'degraded' if score >= 60 else 'critical'
}
def _generate_context_recommendations(self, overall_score: float, issues: List[str]) -> List[str]:
"""Generate recommendations based on context health."""
recommendations = []
if overall_score < 80:
recommendations.append("[ATTENTION] Context system degraded: verify component integrity")
if len(issues) > 0:
recommendations.append(f"Address {len(issues)} detected issue(s)")
recommendations.append("Run full context health check with --deep flag for component analysis")
recommendations.append("Test context injection with sample queries to verify retrieval quality")
return recommendations
if __name__ == '__main__':
checker = ContextHealthChecker()
print("=" * 70)
print("CONTEXT SYSTEM HEALTH")
print("=" * 70)
health = checker.generate_context_health_score()
print(f"Overall score: {health['overall_score']}/100 ({health['status'].upper()})")
print(f"\nComponent scores:")
for component, score in health['component_scores'].items():
print(f" {component}: {score}/100")
print(f"\nIssues found: {len(health['issues'])}")
if health['issues']:
for issue in health['issues'][:5]:
print(f" - {issue}")