#!/usr/bin/env python3 """ Knowledge Graph Review Marker Marks incomplete research sessions for manual review without auto-fixing. Creates review queue entries and annotations in the KG database. """ import json import time from datetime import datetime from pathlib import Path from typing import List, Dict class IncompleteResearchReviewMarker: """Mark incomplete research for manual review.""" REVIEW_QUEUE_DIR = Path('/home/admin/conductor/review') REVIEW_TAG = 'needs_human_review' def __init__(self): """Initialize the review marker.""" self.REVIEW_QUEUE_DIR.mkdir(parents=True, exist_ok=True) def mark_for_review(self, entity_id: str, entity_name: str, reason: str, severity: str = 'medium', pattern: str = 'unknown') -> Dict: """ Mark a single KG entity for review. Args: entity_id: KG entity ID entity_name: Human-readable entity name reason: Why this needs review (pattern type) severity: 'high' | 'medium' | 'low' pattern: Pattern type that triggered marking Returns: Dict with review_id, file_path, status """ review_id = f"{entity_id}_review_{int(time.time())}" review_file = self.REVIEW_QUEUE_DIR / f"{review_id}.json" review_data = { 'review_id': review_id, 'entity_id': entity_id, 'entity_name': entity_name, 'pattern': pattern, 'severity': severity, 'reason': reason, 'marked_at': datetime.now().isoformat(), 'marked_by': 'kg_health_checker', 'status': 'pending_review', 'action_required': self._get_action_for_pattern(pattern), 'notes': '' } review_file.write_text(json.dumps(review_data, indent=2)) return { 'review_id': review_id, 'entity_id': entity_id, 'file_path': str(review_file), 'status': 'marked' } def mark_findings_batch(self, findings: List[Dict]) -> Dict: """ Mark multiple findings for review. Args: findings: List of finding dicts from KGPatternDetector Returns: Dict with: - 'marked_count': Number of sessions marked - 'review_files': List of created files - 'summary': Breakdown by severity """ marked = [] review_files = [] summary = {'high': 0, 'medium': 0, 'low': 0} for finding in findings: # Only mark KG database findings (not files) if finding['source'] != 'kg_database': continue result = self.mark_for_review( entity_id=finding['id'], entity_name=finding.get('name', 'unknown'), reason=f"Pattern: {finding['pattern']}", severity=finding.get('severity', 'medium'), pattern=finding['pattern'] ) marked.append(result) review_files.append(result['file_path']) summary[finding.get('severity', 'medium')] += 1 return { 'marked_count': len(marked), 'review_files': review_files, 'summary': summary, 'review_queue_path': str(self.REVIEW_QUEUE_DIR), 'timestamp': time.time() } def create_review_queue(self, findings: List[Dict]) -> Dict: """ Create review queue from findings (alias for mark_findings_batch). Args: findings: List of findings to create review queue for Returns: Review queue creation result """ return self.mark_findings_batch(findings) def get_pending_reviews(self) -> List[Dict]: """Get list of all pending review items.""" reviews = [] if not self.REVIEW_QUEUE_DIR.exists(): return reviews for review_file in self.REVIEW_QUEUE_DIR.glob('*_review_*.json'): try: data = json.loads(review_file.read_text()) if data.get('status') == 'pending_review': reviews.append(data) except Exception: pass return sorted(reviews, key=lambda x: x.get('severity') == 'high', reverse=True) def mark_review_complete(self, review_id: str, resolution: str = '') -> Dict: """ Mark a review as complete. Args: review_id: The review ID to complete resolution: What was done to resolve the issue Returns: Updated review data """ review_file = self.REVIEW_QUEUE_DIR / f"{review_id}.json" if not review_file.exists(): return {'status': 'error', 'message': f'Review {review_id} not found'} data = json.loads(review_file.read_text()) data['status'] = 'completed' data['completed_at'] = datetime.now().isoformat() data['resolution'] = resolution review_file.write_text(json.dumps(data, indent=2)) return { 'status': 'completed', 'review_id': review_id, 'resolution': resolution } def get_review_stats(self) -> Dict: """Get statistics about review queue.""" reviews = [] if not self.REVIEW_QUEUE_DIR.exists(): return { 'pending': 0, 'completed': 0, 'by_severity': {}, 'by_pattern': {} } pending = 0 completed = 0 by_severity = {} by_pattern = {} for review_file in self.REVIEW_QUEUE_DIR.glob('*_review_*.json'): try: data = json.loads(review_file.read_text()) severity = data.get('severity', 'unknown') pattern = data.get('pattern', 'unknown') if data.get('status') == 'pending_review': pending += 1 elif data.get('status') == 'completed': completed += 1 by_severity[severity] = by_severity.get(severity, 0) + 1 by_pattern[pattern] = by_pattern.get(pattern, 0) + 1 except Exception: pass return { 'pending': pending, 'completed': completed, 'total': pending + completed, 'by_severity': by_severity, 'by_pattern': by_pattern, 'review_queue_path': str(self.REVIEW_QUEUE_DIR) } def _get_action_for_pattern(self, pattern: str) -> str: """Get recommended action for a pattern.""" actions = { 'unresolved_question': 'Resume research session with user feedback; complete final analysis', 'incomplete_duration': 'Investigate why session ended early; may need deeper research', 'claude_no_conclusion': 'Add missing conclusion/summary section to analysis', 'unknown': 'Review and complete research session' } return actions.get(pattern, actions['unknown']) if __name__ == '__main__': marker = IncompleteResearchReviewMarker() # Test: create sample review result = marker.mark_for_review( entity_id='research_12345', entity_name='Structured Repositories & Trusted Data Sources for AI Agents', reason='Session ended with unresolved user question', severity='high', pattern='unresolved_question' ) print(f"Created review: {result['review_id']}") print(f"Review file: {result['file_path']}") # Show stats stats = marker.get_review_stats() print(f"\nReview queue stats:") print(f" Pending: {stats['pending']}") print(f" Completed: {stats['completed']}") print(f" By severity: {stats['by_severity']}")