#!/usr/bin/env python3 """ QA Postflight - Post-task validation and learning capture Runs after each task completes to: 1. Validate task output quality 2. Detect common error patterns 3. Capture learnings for the knowledge graph 4. Generate QA report """ import json import re import os from pathlib import Path from datetime import datetime from typing import Dict, Any, List, Optional import logging # Configure logging log_dir = Path("/var/log/luz-orchestrator") log_dir.mkdir(parents=True, exist_ok=True) logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # Paths JOBS_DIR = Path("/var/log/luz-orchestrator/jobs") QA_REPORTS_DIR = Path("/var/log/luz-orchestrator/qa-reports") LEARNING_LOG = log_dir / "learning-captures.jsonl" class QAPostflight: """Post-task QA validation and learning capture.""" # Error patterns to detect ERROR_PATTERNS = [ (r"error:|Error:|ERROR:", "error_detected", "high"), (r"exception:|Exception:|EXCEPTION:", "exception_detected", "high"), (r"failed|Failed|FAILED", "failure_detected", "medium"), (r"permission denied|Permission denied", "permission_error", "high"), (r"not found|Not found|NOT FOUND", "not_found_error", "medium"), (r"timeout|Timeout|TIMEOUT", "timeout_error", "high"), (r"connection refused|Connection refused", "connection_error", "high"), (r"syntax error|SyntaxError", "syntax_error", "high"), (r"import error|ImportError|ModuleNotFoundError", "import_error", "high"), (r"GOOGLE_KEY not configured|API.*not configured", "config_error", "medium"), ] # Success patterns SUCCESS_PATTERNS = [ (r"completed successfully|task completed|done", "success_signal"), (r"tests? passed|all.*pass", "tests_passed"), (r"deployed|deployment.*success", "deployment_success"), (r"created|updated|fixed", "action_completed"), ] # Learning extraction patterns LEARNING_PATTERNS = [ (r"learned?:?\s*(.+?)(?:\n|$)", "explicit_learning"), (r"solution:?\s*(.+?)(?:\n|$)", "solution_found"), (r"fixed by:?\s*(.+?)(?:\n|$)", "fix_applied"), (r"root cause:?\s*(.+?)(?:\n|$)", "root_cause"), (r"workaround:?\s*(.+?)(?:\n|$)", "workaround"), ] def __init__(self): QA_REPORTS_DIR.mkdir(parents=True, exist_ok=True) def validate_task(self, job_id: str) -> Dict[str, Any]: """ Run full postflight validation on a completed task. Returns validation report with: - exit_code analysis - error detection - success signals - quality score - extracted learnings """ job_dir = JOBS_DIR / job_id report = { "job_id": job_id, "timestamp": datetime.now().isoformat(), "validated": False, "exit_code": None, "quality_score": 0, "errors": [], "warnings": [], "successes": [], "learnings": [], "recommendations": [], } if not job_dir.exists(): report["errors"].append(f"Job directory not found: {job_dir}") return report # Read output file output_file = job_dir / "output.log" output_content = "" if output_file.exists(): try: output_content = output_file.read_text(errors='ignore') except Exception as e: report["warnings"].append(f"Could not read output: {e}") # Read metadata meta_file = job_dir / "meta.json" meta = {} if meta_file.exists(): try: meta = json.loads(meta_file.read_text()) except: pass report["project"] = meta.get("project", "unknown") report["task"] = meta.get("task", "")[:200] # Extract exit code report["exit_code"] = self._extract_exit_code(output_content) # Run validations report["errors"] = self._detect_errors(output_content) report["successes"] = self._detect_successes(output_content) report["learnings"] = self._extract_learnings(output_content) # Calculate quality score report["quality_score"] = self._calculate_quality_score(report) # Generate recommendations report["recommendations"] = self._generate_recommendations(report) report["validated"] = True # Save report self._save_report(report) # Capture learnings if report["learnings"]: self._capture_learnings(report) return report def _extract_exit_code(self, content: str) -> Optional[int]: """Extract exit code from output.""" match = re.search(r'exit:(\d+)', content) if match: return int(match.group(1)) return None def _detect_errors(self, content: str) -> List[Dict[str, Any]]: """Detect error patterns in output.""" errors = [] for pattern, error_type, severity in self.ERROR_PATTERNS: matches = re.findall(pattern, content, re.IGNORECASE) if matches: # Get context around first match match = re.search(pattern, content, re.IGNORECASE) if match: start = max(0, match.start() - 50) end = min(len(content), match.end() + 100) context = content[start:end].strip() errors.append({ "type": error_type, "severity": severity, "count": len(matches), "context": context[:200], }) return errors def _detect_successes(self, content: str) -> List[Dict[str, str]]: """Detect success patterns in output.""" successes = [] for pattern, success_type in self.SUCCESS_PATTERNS: if re.search(pattern, content, re.IGNORECASE): successes.append({"type": success_type}) return successes def _extract_learnings(self, content: str) -> List[Dict[str, str]]: """Extract learnings from output.""" learnings = [] for pattern, learning_type in self.LEARNING_PATTERNS: matches = re.findall(pattern, content, re.IGNORECASE) for match in matches: if len(match.strip()) > 10: # Filter noise learnings.append({ "type": learning_type, "content": match.strip()[:500], }) return learnings def _calculate_quality_score(self, report: Dict) -> int: """Calculate quality score 0-100.""" score = 50 # Base score # Exit code impact if report["exit_code"] == 0: score += 30 elif report["exit_code"] is not None: score -= 20 # Error impact for error in report["errors"]: if error["severity"] == "high": score -= 15 elif error["severity"] == "medium": score -= 8 # Success signals boost score += len(report["successes"]) * 5 # Learnings boost (shows reflection) score += len(report["learnings"]) * 3 return max(0, min(100, score)) def _generate_recommendations(self, report: Dict) -> List[str]: """Generate actionable recommendations.""" recs = [] if report["exit_code"] != 0 and report["exit_code"] is not None: recs.append("Task failed - review error logs and consider retry") for error in report["errors"]: if error["type"] == "config_error": recs.append("Configuration error detected - check environment variables") elif error["type"] == "permission_error": recs.append("Permission issue - verify file ownership and access rights") elif error["type"] == "timeout_error": recs.append("Timeout occurred - consider increasing timeout or optimizing task") elif error["type"] == "import_error": recs.append("Import error - check dependencies are installed") if report["quality_score"] < 50: recs.append("Low quality score - task may need review or retry") if not report["learnings"]: recs.append("No learnings captured - consider documenting key insights") return recs def _save_report(self, report: Dict): """Save QA report to file.""" report_file = QA_REPORTS_DIR / f"{report['job_id']}.json" try: with open(report_file, 'w') as f: json.dump(report, f, indent=2, default=str) logger.info(f"QA report saved: {report_file}") except Exception as e: logger.error(f"Failed to save QA report: {e}") def _capture_learnings(self, report: Dict): """Capture learnings to learning log.""" try: with open(LEARNING_LOG, 'a') as f: for learning in report["learnings"]: entry = { "timestamp": report["timestamp"], "job_id": report["job_id"], "project": report["project"], "type": learning["type"], "content": learning["content"], "quality_score": report["quality_score"], } f.write(json.dumps(entry) + "\n") logger.info(f"Captured {len(report['learnings'])} learnings from {report['job_id']}") except Exception as e: logger.error(f"Failed to capture learnings: {e}") class PerTaskLearning: """Per-task learning capture and KG integration.""" def __init__(self): self.kg_path = Path("/etc/luz-knowledge/research.db") def capture_task_learning(self, job_id: str, report: Dict) -> Dict[str, Any]: """ Capture learnings from task and store in KG. Extracts: - Solutions found - Errors resolved - Patterns discovered - Tools/commands used """ result = { "job_id": job_id, "learnings_stored": 0, "relations_created": 0, } if not report.get("learnings"): return result # Try to store in KG try: from knowledge_graph import KnowledgeGraph kg = KnowledgeGraph("research") for learning in report["learnings"]: # Create learning entity entity_name = f"learning_{job_id}_{learning['type']}" content = f""" Project: {report.get('project', 'unknown')} Task: {report.get('task', '')[:100]} Type: {learning['type']} Learning: {learning['content']} Quality Score: {report.get('quality_score', 0)} """ kg.add_entity( name=entity_name, entity_type="finding", content=content, metadata={ "job_id": job_id, "project": report.get("project"), "learning_type": learning["type"], "quality_score": report.get("quality_score", 0), }, source=f"job:{job_id}" ) result["learnings_stored"] += 1 # Create relation to project if exists project = report.get("project") if project: try: kg.add_relation(entity_name, project, "learned_from") result["relations_created"] += 1 except: pass logger.info(f"Stored {result['learnings_stored']} learnings in KG for {job_id}") except ImportError: logger.warning("KnowledgeGraph not available - learnings stored to log only") except Exception as e: logger.error(f"Failed to store learnings in KG: {e}") return result def _send_telegram_notification(report: Dict[str, Any]) -> bool: """ Send telegram notification for important task completions. Notifies for: - Task failures (exit_code != 0) - Low quality score (<50) - High severity errors """ try: # Import telegram bridge import sys sys.path.insert(0, str(Path(__file__).parent)) from telegram_bridge import notify_bruno as send_notification job_id = report.get("job_id", "unknown")[:8] project = report.get("project", "luzia") exit_code = report.get("exit_code") quality = report.get("quality_score", 0) # Determine if notification needed and severity should_notify = False severity = "info" message = "" # Task failure if exit_code is not None and exit_code != 0: should_notify = True severity = "critical" if exit_code in [126, 137, 254] else "warning" message = f"Task `{job_id}` failed (exit {exit_code})" # Low quality score elif quality < 50: should_notify = True severity = "warning" message = f"Task `{job_id}` low quality ({quality}/100)" # High severity errors detected elif any(e.get("severity") == "high" for e in report.get("errors", [])): should_notify = True severity = "warning" high_errors = [e["type"] for e in report.get("errors", []) if e.get("severity") == "high"] message = f"Task `{job_id}` errors: {', '.join(high_errors[:3])}" # Success with learnings (optional notification) elif exit_code == 0 and report.get("learnings"): # Only notify on success if there are significant learnings if len(report.get("learnings", [])) >= 2: should_notify = True severity = "info" message = f"Task `{job_id}` completed with {len(report['learnings'])} learnings" if should_notify: send_notification(message, project, job_id, severity) logger.info(f"Telegram notification sent for {job_id}") return True except ImportError: logger.debug("Telegram bridge not available - notification skipped") except Exception as e: logger.warning(f"Failed to send telegram notification: {e}") return False def run_postflight(job_id: str) -> Dict[str, Any]: """ Main entry point for postflight validation. Called after task completion to: 1. Validate output quality 2. Extract and store learnings 3. Generate QA report 4. Send telegram notification for important events """ logger.info(f"Running postflight for job: {job_id}") qa = QAPostflight() report = qa.validate_task(job_id) # Per-task learning learning = PerTaskLearning() learning_result = learning.capture_task_learning(job_id, report) report["learning_result"] = learning_result # Send telegram notification for important events report["telegram_notified"] = _send_telegram_notification(report) # Log summary logger.info( f"Postflight complete for {job_id}: " f"score={report['quality_score']}, " f"errors={len(report['errors'])}, " f"learnings={len(report['learnings'])}, " f"notified={report.get('telegram_notified', False)}" ) return report # CLI interface if __name__ == "__main__": import sys if len(sys.argv) < 2: print("Usage: qa_postflight.py ") print(" qa_postflight.py --recent [count]") sys.exit(1) if sys.argv[1] == "--recent": # Run postflight on recent jobs count = int(sys.argv[2]) if len(sys.argv) > 2 else 5 jobs = sorted(JOBS_DIR.iterdir(), key=lambda x: x.stat().st_mtime, reverse=True)[:count] for job_dir in jobs: job_id = job_dir.name print(f"\n=== Postflight: {job_id} ===") report = run_postflight(job_id) print(f" Score: {report['quality_score']}/100") print(f" Errors: {len(report['errors'])}") print(f" Learnings: {len(report['learnings'])}") if report['recommendations']: print(f" Recommendations:") for rec in report['recommendations'][:3]: print(f" - {rec}") else: job_id = sys.argv[1] report = run_postflight(job_id) print(json.dumps(report, indent=2, default=str))