Refactor cockpit to use DockerTmuxController pattern

Based on claude-code-tools TmuxCLIController, this refactor: - Added DockerTmuxController class for robust tmux session management - Implements send_keys() with configurable delay_enter - Implements capture_pane() for output retrieval - Implements wait_for_prompt() for pattern-based completion detection - Implements wait_for_idle() for content-hash-based idle detection - Implements wait_for_shell_prompt() for shell prompt detection Also includes workflow improvements: - Pre-task git snapshot before agent execution - Post-task commit protocol in agent guidelines Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-14 10:42:16 -03:00
commit ec33ac1936
265 changed files with 92011 additions and 0 deletions
--- a/lib/prompt_techniques.py
+++ b/lib/prompt_techniques.py
@@ -0,0 +1,589 @@
+#!/usr/bin/env python3
+"""
+Advanced Prompt Augmentation Techniques
+
+Implements latest research on prompt engineering for different task types:
+
+1. Chain-of-Thought (CoT): Step-by-step reasoning
+2. Few-Shot Learning: Examples for task clarification
+3. Role-Based Prompting: Set specific persona/expertise
+4. System Prompts: Foundational constraints and guidelines
+5. Context Hierarchies: Prioritized context levels
+6. Task-Specific Patterns: Domain-optimized prompt structures
+7. Dynamic Difficulty: Adaptive prompting based on complexity
+
+References:
+- Wei et al. (2022): Chain-of-Thought Prompting Elicits Reasoning in Large Language Models
+- Brown et al. (2020): Language Models are Few-Shot Learners
+- Kojima et al. (2022): Large Language Models are Zero-Shot Reasoners
+- Reynolds & McDonell (2021): Prompt Programming for Large Language Models
+- Zhong et al. (2023): How Can We Know What Language Models Know?
+"""
+
+import json
+from typing import Dict, List, Optional, Any, Tuple
+from enum import Enum
+from dataclasses import dataclass, asdict
+from datetime import datetime
+
+
+class TaskType(Enum):
+    """Enumeration of task types with specific augmentation strategies"""
+    ANALYSIS = "analysis"
+    DEBUGGING = "debugging"
+    IMPLEMENTATION = "implementation"
+    RESEARCH = "research"
+    REFACTORING = "refactoring"
+    PLANNING = "planning"
+    REVIEW = "review"
+    OPTIMIZATION = "optimization"
+    TESTING = "testing"
+    DOCUMENTATION = "documentation"
+    SECURITY = "security"
+
+
+class PromptStrategy(Enum):
+    """Prompt augmentation strategies"""
+    CHAIN_OF_THOUGHT = "chain_of_thought"
+    FEW_SHOT = "few_shot"
+    ROLE_BASED = "role_based"
+    SYSTEM_INSTRUCTION = "system_instruction"
+    TREE_OF_THOUGHT = "tree_of_thought"
+    SELF_CONSISTENCY = "self_consistency"
+    UNCERTAINTY_AWARE = "uncertainty_aware"
+
+
+@dataclass
+class PromptContext:
+    """Structured prompt context with priority levels"""
+    task_type: TaskType
+    primary: Dict[str, str]           # Highest priority - directly relevant
+    secondary: Dict[str, str]         # Important context
+    tertiary: Dict[str, str]          # Nice to have
+    examples: List[Dict[str, str]]    # Few-shot examples
+    role: str = "Expert Analyst"
+    complexity_level: int = 1          # 1-5 scale
+    timestamp: str = None
+
+    def __post_init__(self):
+        if self.timestamp is None:
+            self.timestamp = datetime.now().isoformat()
+
+
+class ChainOfThoughtEngine:
+    """Implements Chain-of-Thought prompting for complex reasoning"""
+
+    @staticmethod
+    def generate_cot_prompt(task: str, complexity: int = 1) -> str:
+        """
+        Generate Chain-of-Thought prompt encouraging step-by-step reasoning.
+
+        Args:
+            task: The task to decompose
+            complexity: 1-5 scale for reasoning depth
+        """
+        depth = min(complexity, 5)
+        num_steps = 3 + depth
+
+        prompt = f"""Please solve this step-by-step:
+
+{task}
+
+**Your Reasoning Process:**
+Think through this problem systematically. Break it into {num_steps} logical steps:
+
+"""
+        for i in range(1, num_steps + 1):
+            prompt += f"Step {i}: [What is the {i}th logical component of this problem?]\n"
+
+        prompt += """
+After completing each step, briefly verify your logic before moving to the next.
+Explicitly state any assumptions you're making.
+If you encounter conflicting information, address it directly.
+
+**Final Answer:**
+Summarize your complete solution, clearly showing how each step led to your conclusion."""
+
+        return prompt
+
+    @staticmethod
+    def generate_subquestion_cot(task: str, context: str = "") -> str:
+        """
+        Generate intermediate question-based Chain-of-Thought.
+
+        Breaks complex problems into subquestions to improve reasoning.
+        Reference: Wei et al. (2022), Kojima et al. (2022)
+        """
+        prompt = f"""Break down and answer this problem systematically:
+
+**Main Question:**
+{task}
+
+{f"**Context:**{chr(10)}{context}" if context else ""}
+
+**Approach:**
+1. Identify the key subquestions that must be answered to solve the main problem
+2. Answer each subquestion with clear reasoning
+3. Combine the answers into a comprehensive solution
+4. Verify your solution makes sense in context
+
+Please work through this methodically, showing your thinking at each stage."""
+
+        return prompt
+
+
+class FewShotExampleBuilder:
+    """Constructs few-shot examples for better task understanding"""
+
+    @staticmethod
+    def build_examples_for_task(task_type: TaskType, num_examples: int = 3) -> List[Dict[str, str]]:
+        """Build task-specific few-shot examples"""
+        examples_library = {
+            TaskType.ANALYSIS: [
+                {
+                    "input": "Analyze the performance bottleneck in this authentication flow",
+                    "approach": "1) Identify critical path, 2) Measure time per step, 3) Locate worst performer, 4) Suggest optimization",
+                    "output_structure": "Current bottleneck: [X]. Root cause: [Y]. Recommended fix: [Z]. Expected improvement: [percentage]%"
+                },
+                {
+                    "input": "Analyze security implications of storing user tokens in localStorage",
+                    "approach": "1) Enumerate attack vectors, 2) Assess likelihood and impact, 3) Compare to alternatives, 4) Make recommendation",
+                    "output_structure": "Risks: [list]. Severity: [high/medium/low]. Better approach: [X]. Implementation effort: [Y]"
+                },
+                {
+                    "input": "Analyze code complexity and maintainability of this module",
+                    "approach": "1) Calculate metrics (cyclomatic, cognitive), 2) Identify problematic patterns, 3) Assess testing difficulty, 4) Recommend refactoring",
+                    "output_structure": "Complexity score: [X]/10. Hot spots: [list]. Refactoring priority: [high/medium/low]"
+                }
+            ],
+            TaskType.DEBUGGING: [
+                {
+                    "input": "Fix intermittent race condition in async handler",
+                    "approach": "1) Understand race condition mechanics, 2) Create minimal reproducible case, 3) Identify ordering issue, 4) Add synchronization, 5) Test thoroughly",
+                    "output_structure": "Root cause: [X]. Fix location: [file:line]. Change: [code diff]. Test strategy: [steps]"
+                },
+                {
+                    "input": "Debug memory leak in event listeners",
+                    "approach": "1) Profile memory usage, 2) Identify growth pattern, 3) Find leaked references, 4) Remove cleanup, 5) Verify fix",
+                    "output_structure": "Leak type: [X]. Source: [component]. Fix: [cleanup code]. Verification: [test approach]"
+                },
+                {
+                    "input": "Fix undefined behavior in concurrent map access",
+                    "approach": "1) Reproduce concurrency issue, 2) Find synchronization gap, 3) Add proper locking, 4) Test with concurrent load",
+                    "output_structure": "Issue: [X]. Cause: [Y]. Fix: [locking mechanism]. Verification: [concurrency test]"
+                }
+            ],
+            TaskType.IMPLEMENTATION: [
+                {
+                    "input": "Implement rate limiting for API endpoint",
+                    "approach": "1) Define strategy (sliding window/token bucket), 2) Choose storage (in-memory/redis), 3) Implement core logic, 4) Add tests",
+                    "output_structure": "Strategy: [X]. Storage: [Y]. Key metrics tracked: [list]. Test coverage: [percentage]%"
+                },
+                {
+                    "input": "Add caching layer to database queries",
+                    "approach": "1) Identify hot queries, 2) Choose cache (redis/memcached), 3) Set TTL strategy, 4) Handle invalidation, 5) Monitor hit rate",
+                    "output_structure": "Cache strategy: [X]. Expected hit rate: [Y]%. Hit cost: [Z]ms. Invalidation: [method]"
+                },
+                {
+                    "input": "Implement graceful shutdown with in-flight request handling",
+                    "approach": "1) Define shutdown signal handling, 2) Stop accepting new requests, 3) Wait for in-flight, 4) Timeout and force quit",
+                    "output_structure": "Signal handling: [X]. Timeout: [Y]s. Graceful drain: [code]. Forced quit: [code]"
+                }
+            ],
+            TaskType.REFACTORING: [
+                {
+                    "input": "Reduce cognitive complexity in 500-line function",
+                    "approach": "1) Map control flow, 2) Extract conditional branches, 3) Create helper methods, 4) Test each change, 5) Verify coverage",
+                    "output_structure": "Original complexity: [X]. Target: [Y]. Extracted methods: [list]. Final complexity: [Z]"
+                }
+            ],
+            TaskType.TESTING: [
+                {
+                    "input": "Write comprehensive tests for authentication module",
+                    "approach": "1) Identify happy path, 2) List edge cases, 3) Test error conditions, 4) Add integration tests, 5) Measure coverage",
+                    "output_structure": "Test count: [X]. Coverage: [Y]%. Critical paths: [Z]. Integration tests: [list]"
+                }
+            ]
+        }
+
+        examples = examples_library.get(task_type, [])
+        return examples[:num_examples]
+
+    @staticmethod
+    def format_examples_for_prompt(examples: List[Dict[str, str]]) -> str:
+        """Format examples into prompt text"""
+        if not examples:
+            return ""
+
+        formatted = "\n**Examples of this task type:**\n\n"
+        for i, example in enumerate(examples, 1):
+            formatted += f"Example {i}:\n"
+            formatted += f"- Input: {example.get('input', 'N/A')}\n"
+            formatted += f"- Approach: {example.get('approach', 'N/A')}\n"
+            formatted += f"- Output structure: {example.get('output_structure', 'N/A')}\n\n"
+
+        return formatted
+
+
+class RoleBasedPrompting:
+    """Role-based prompting for expertise-specific responses"""
+
+    ROLES = {
+        TaskType.ANALYSIS: {
+            "role": "Expert Systems Analyst",
+            "expertise": "Systems performance, architecture, and optimization",
+            "constraints": "Provide quantifiable metrics and data-driven insights"
+        },
+        TaskType.DEBUGGING: {
+            "role": "Expert Debugger",
+            "expertise": "Root cause analysis, system behavior, and edge cases",
+            "constraints": "Always consider concurrency, timing, and resource issues"
+        },
+        TaskType.IMPLEMENTATION: {
+            "role": "Senior Software Engineer",
+            "expertise": "Production-quality code, maintainability, and scalability",
+            "constraints": "Write defensive code with error handling and clear design"
+        },
+        TaskType.SECURITY: {
+            "role": "Security Researcher",
+            "expertise": "Threat modeling, vulnerability analysis, and secure design",
+            "constraints": "Assume adversarial input and verify all assumptions"
+        },
+        TaskType.RESEARCH: {
+            "role": "Research Scientist",
+            "expertise": "Literature review, systematic analysis, and knowledge synthesis",
+            "constraints": "Cite sources and distinguish between established facts and speculation"
+        },
+        TaskType.PLANNING: {
+            "role": "Project Architect",
+            "expertise": "System design, risk assessment, and strategic planning",
+            "constraints": "Consider dependencies, timeline, and team constraints"
+        },
+        TaskType.REVIEW: {
+            "role": "Code Reviewer",
+            "expertise": "Code quality, best practices, and maintainability",
+            "constraints": "Focus on correctness, readability, and adherence to standards"
+        },
+        TaskType.OPTIMIZATION: {
+            "role": "Performance Engineer",
+            "expertise": "Performance bottlenecks, optimization techniques, and profiling",
+            "constraints": "Measure before and after, prioritize high-impact improvements"
+        }
+    }
+
+    @staticmethod
+    def get_role_prompt(task_type: TaskType) -> str:
+        """Generate role-based system prompt"""
+        role_info = RoleBasedPrompting.ROLES.get(
+            task_type,
+            RoleBasedPrompting.ROLES[TaskType.ANALYSIS]  # Default role
+        )
+
+        return f"""You are a {role_info['role']} with expertise in {role_info['expertise']}.
+
+Your responsibilities:
+- Provide expert-level analysis and solutions
+- Apply industry best practices consistently
+- Question assumptions and verify conclusions
+- Explain your reasoning clearly
+
+Key constraint: {role_info['constraints']}
+
+Maintain this expertise level throughout your response."""
+
+
+class ContextHierarchy:
+    """Manages hierarchical context with priority-based injection"""
+
+    def __init__(self):
+        self.context_levels = {
+            "critical": [],        # Must always include
+            "high": [],            # Include unless very constrained
+            "medium": [],          # Include if space allows
+            "low": [],             # Include only with extra space
+        }
+
+    def add_context(self, level: str, context: str) -> None:
+        """Add context at specified priority level"""
+        if level in self.context_levels:
+            self.context_levels[level].append(context)
+
+    def build_hierarchical_context(self, max_tokens: int = 2000) -> str:
+        """Build context respecting hierarchy and token budget"""
+        context_str = ""
+        token_count = 0
+        target_tokens = int(max_tokens * 0.8)  # Leave room for task
+
+        # Always include critical
+        for item in self.context_levels["critical"]:
+            context_str += item + "\n\n"
+            token_count += len(item.split())
+
+        # Include high priority
+        for item in self.context_levels["high"]:
+            item_tokens = len(item.split())
+            if token_count + item_tokens < target_tokens:
+                context_str += item + "\n\n"
+                token_count += item_tokens
+
+        # Include medium if space permits
+        for item in self.context_levels["medium"]:
+            item_tokens = len(item.split())
+            if token_count + item_tokens < target_tokens:
+                context_str += item + "\n\n"
+                token_count += item_tokens
+
+        return context_str.strip()
+
+
+class TaskSpecificPatterns:
+    """Task-specific prompt patterns optimized for different domains"""
+
+    @staticmethod
+    def get_analysis_pattern(topic: str, focus_areas: List[str], depth: str = "comprehensive") -> str:
+        """Optimized pattern for analysis tasks"""
+        return f"""# Analysis Task: {topic}
+
+## Objective
+Provide a {depth} analysis focusing on:
+{chr(10).join(f'- {area}' for area in focus_areas)}
+
+## Analysis Framework
+1. **Current State**: Describe what exists now
+2. **Key Metrics**: Quantify important aspects
+3. **Issues/Gaps**: Identify problems and gaps
+4. **Root Causes**: Explain why issues exist
+5. **Opportunities**: What could improve
+6. **Risk Assessment**: Potential downsides
+7. **Recommendations**: Specific, actionable next steps
+
+## Output Requirements
+- Use concrete data and examples
+- Prioritize findings by impact
+- Distinguish facts from interpretations
+- Provide confidence levels
+- Include supporting evidence"""
+
+    @staticmethod
+    def get_debugging_pattern(symptom: str, affected_component: str, severity: str = "high") -> str:
+        """Optimized pattern for debugging tasks"""
+        return f"""# Debugging Task: {affected_component}
+
+## Symptom
+{symptom}
+
+## Severity: {severity}
+
+## Systematic Debugging Approach
+1. **Understand the Failure**: What goes wrong? When? Under what conditions?
+2. **Boundary Testing**: What works? What doesn't? Where's the boundary?
+3. **Hypothesis Formation**: What could cause this?
+4. **Evidence Gathering**: What would confirm/refute each hypothesis?
+5. **Root Cause Identification**: Which hypothesis is correct?
+6. **Solution Verification**: Test the fix thoroughly
+7. **Prevention**: How to prevent recurrence?
+
+## Investigation Priorities
+- Reproducibility: Can we reliably trigger the issue?
+- Isolation: What's the minimal failing case?
+- Impact Scope: What systems are affected?
+- Concurrency: Are timing/ordering factors involved?
+
+## Output Requirements
+- Root cause with high confidence
+- Minimal reproducible test case
+- Proposed fix with rationale
+- Verification strategy
+- Regression prevention measures"""
+
+    @staticmethod
+    def get_implementation_pattern(feature: str, requirements: List[str],
+                                  constraints: List[str] = None) -> str:
+        """Optimized pattern for implementation tasks"""
+        constraints = constraints or []
+        return f"""# Implementation Task: {feature}
+
+## Requirements
+{chr(10).join(f'- {req}' for req in requirements)}
+
+{f"## Constraints{chr(10)}{chr(10).join(f'- {c}' for c in constraints)}" if constraints else ""}
+
+## Implementation Strategy
+1. **Design Phase**: Architecture, interfaces, design patterns
+2. **Implementation Phase**: Code, error handling, documentation
+3. **Testing Phase**: Unit, integration, edge case testing
+4. **Integration Phase**: How it fits with existing code
+5. **Deployment Phase**: Rollout strategy and monitoring
+
+## Code Quality Requirements
+- Error handling for all failure modes
+- Clear, self-documenting code
+- No external dependencies without justification
+- Performance within acceptable bounds
+- Security reviewed for input validation
+
+## Testing Requirements
+- Unit test coverage for core logic
+- Edge case and error path testing
+- Integration tests with dependent systems
+- Performance/load testing if applicable
+
+## Output Deliverables
+1. Detailed implementation plan
+2. Complete code implementation
+3. Comprehensive test suite
+4. Documentation updates
+5. Deployment considerations and rollout plan"""
+
+    @staticmethod
+    def get_planning_pattern(objective: str, scope: str, constraints: List[str] = None) -> str:
+        """Optimized pattern for planning tasks"""
+        constraints = constraints or []
+        return f"""# Planning Task: {objective}
+
+## Scope
+{scope}
+
+{f"## Constraints{chr(10)}{chr(10).join(f'- {c}' for c in constraints)}" if constraints else ""}
+
+## Planning Framework
+1. **Goal Clarity**: What are we trying to achieve?
+2. **Success Criteria**: How will we know we succeeded?
+3. **Resource Analysis**: What's needed (people, tools, time)?
+4. **Dependency Mapping**: What must happen in order?
+5. **Risk Assessment**: What could go wrong?
+6. **Contingency Planning**: How to handle risks?
+7. **Communication Plan**: How to keep stakeholders informed?
+
+## Output Requirements
+- Clear, prioritized action items
+- Realistic milestones and dependencies
+- Risk assessment with mitigation strategies
+- Resource and timeline estimates
+- Success metrics and validation approach"""
+
+
+class PromptEngineer:
+    """Main orchestrator for advanced prompt engineering"""
+
+    def __init__(self):
+        self.cot_engine = ChainOfThoughtEngine()
+        self.few_shot = FewShotExampleBuilder()
+        self.role_prompter = RoleBasedPrompting()
+        self.patterns = TaskSpecificPatterns()
+
+    def engineer_prompt(self,
+                       task: str,
+                       task_type: TaskType,
+                       strategies: List[PromptStrategy] = None,
+                       context: Optional[PromptContext] = None,
+                       max_prompt_length: int = 3000) -> Tuple[str, Dict[str, Any]]:
+        """
+        Engineer an optimized prompt combining multiple strategies.
+
+        Returns:
+            (engineered_prompt, metadata)
+        """
+        strategies = strategies or [
+            PromptStrategy.SYSTEM_INSTRUCTION,
+            PromptStrategy.CHAIN_OF_THOUGHT,
+            PromptStrategy.FEW_SHOT,
+            PromptStrategy.ROLE_BASED
+        ]
+
+        sections = []
+        metadata = {
+            "task_type": task_type.value,
+            "strategies_used": [s.value for s in strategies],
+            "estimated_tokens": 0
+        }
+
+        # 1. System instruction
+        if PromptStrategy.SYSTEM_INSTRUCTION in strategies:
+            system_prompt = f"""You are an expert at solving {task_type.value} problems.
+Apply best practices, think step-by-step, and provide clear explanations."""
+            sections.append(("## System Instructions", system_prompt))
+
+        # 2. Role-based prompt
+        if PromptStrategy.ROLE_BASED in strategies:
+            role_prompt = self.role_prompter.get_role_prompt(task_type)
+            sections.append(("## Your Role & Expertise", role_prompt))
+
+        # 3. Task-specific pattern
+        task_pattern = self._get_task_pattern(task, task_type)
+        if task_pattern:
+            sections.append(("## Task Structure", task_pattern))
+
+        # 4. Few-shot examples
+        if PromptStrategy.FEW_SHOT in strategies:
+            examples = self.few_shot.build_examples_for_task(task_type, num_examples=2)
+            if examples:
+                examples_text = self.few_shot.format_examples_for_prompt(examples)
+                sections.append(("## Learning from Examples", examples_text))
+
+        # 5. Chain-of-thought prompting
+        if PromptStrategy.CHAIN_OF_THOUGHT in strategies:
+            complexity = context.complexity_level if context else 1
+            cot_prompt = self.cot_engine.generate_cot_prompt(task, complexity)
+            sections.append(("## Reasoning Process", cot_prompt))
+
+        # 6. The actual task
+        sections.append(("## Your Task", f"Execute: {task}"))
+
+        # Build final prompt
+        final_prompt = "\n\n".join(f"{title}\n{content}" for title, content in sections)
+
+        # Calculate metadata
+        metadata["estimated_tokens"] = len(final_prompt.split())
+
+        return final_prompt, metadata
+
+    def _get_task_pattern(self, task: str, task_type: TaskType) -> Optional[str]:
+        """Get task-specific pattern based on task type"""
+        patterns = {
+            TaskType.ANALYSIS: lambda: self.patterns.get_analysis_pattern(
+                "Analysis", ["Key findings", "Implications", "Recommendations"]
+            ),
+            TaskType.DEBUGGING: lambda: self.patterns.get_debugging_pattern(
+                task, "System", "High"
+            ),
+            TaskType.IMPLEMENTATION: lambda: self.patterns.get_implementation_pattern(
+                task, ["Functional requirements", "Non-functional requirements"]
+            ),
+            TaskType.PLANNING: lambda: self.patterns.get_planning_pattern(
+                task, "Comprehensive planning"
+            ),
+        }
+
+        pattern_func = patterns.get(task_type)
+        return pattern_func() if pattern_func else None
+
+    def suggest_strategies(self, task_type: TaskType, complexity: int = 1) -> List[PromptStrategy]:
+        """Suggest strategies based on task type and complexity"""
+        base_strategies = [
+            PromptStrategy.SYSTEM_INSTRUCTION,
+            PromptStrategy.ROLE_BASED,
+        ]
+
+        if complexity >= 2:
+            base_strategies.append(PromptStrategy.CHAIN_OF_THOUGHT)
+        if complexity >= 3:
+            base_strategies.append(PromptStrategy.FEW_SHOT)
+        if complexity >= 4:
+            base_strategies.append(PromptStrategy.TREE_OF_THOUGHT)
+
+        return base_strategies
+
+
+# Export for use in other modules
+__all__ = [
+    'TaskType',
+    'PromptStrategy',
+    'PromptContext',
+    'ChainOfThoughtEngine',
+    'FewShotExampleBuilder',
+    'RoleBasedPrompting',
+    'ContextHierarchy',
+    'TaskSpecificPatterns',
+    'PromptEngineer',
+]