Refactor cockpit to use DockerTmuxController pattern
Based on claude-code-tools TmuxCLIController, this refactor: - Added DockerTmuxController class for robust tmux session management - Implements send_keys() with configurable delay_enter - Implements capture_pane() for output retrieval - Implements wait_for_prompt() for pattern-based completion detection - Implements wait_for_idle() for content-hash-based idle detection - Implements wait_for_shell_prompt() for shell prompt detection Also includes workflow improvements: - Pre-task git snapshot before agent execution - Post-task commit protocol in agent guidelines Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
240
lib/research_security_sanitizer.py
Normal file
240
lib/research_security_sanitizer.py
Normal file
@@ -0,0 +1,240 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Research Security Sanitizer - Remove sensitive information from research prompts
|
||||
|
||||
NEVER expose in research prompts:
|
||||
- Passwords / API keys / tokens / credentials
|
||||
- User names / email addresses
|
||||
- Internal server IPs / hostnames
|
||||
- Internal routes / endpoints
|
||||
- Database names / structure
|
||||
- Infrastructure details
|
||||
- Personal information
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Tuple
|
||||
|
||||
|
||||
class ResearchSecuritySanitizer:
|
||||
"""Sanitize research prompts to remove sensitive information"""
|
||||
|
||||
# Patterns to detect and redact
|
||||
PATTERNS = {
|
||||
'password': (r'(?:password|pwd|passwd|pass)\s*[:=]\s*[^\s\n]+', '[REDACTED_PASSWORD]'),
|
||||
'api_key': (r'(?:api[_-]?key|apikey|api_secret|secret)\s*[:=]\s*[^\s\n]+', '[REDACTED_API_KEY]'),
|
||||
'token': (r'(?:token|bearer|auth)\s*[:=]\s*[^\s\n]+', '[REDACTED_TOKEN]'),
|
||||
'email': (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[REDACTED_EMAIL]'),
|
||||
'ip_address': (r'\b(?:\d{1,3}\.){3}\d{1,3}\b', '[REDACTED_IP]'),
|
||||
'internal_hostname': (r'(?:localhost|127\.0\.0\.1|admin|server|prod|staging)', '[REDACTED_HOSTNAME]'),
|
||||
'database_name': (r'(?:database|db|schema)\s*[:=]\s*[^\s\n]+', '[REDACTED_DATABASE]'),
|
||||
'username': (r'(?:user|username|uid)\s*[:=]\s*[^\s\n]+', '[REDACTED_USERNAME]'),
|
||||
'path': (r'(?:/home/|/opt/|/var/|/etc/)[^\s\n]+', '[REDACTED_PATH]'),
|
||||
}
|
||||
|
||||
# Keywords to flag as sensitive context
|
||||
SENSITIVE_KEYWORDS = {
|
||||
'password', 'secret', 'credential', 'token', 'key', 'auth',
|
||||
'admin', 'root', 'sudo', 'private', 'confidential',
|
||||
'database', 'production', 'internal', 'infrastructure',
|
||||
'deploy', 'ssh', 'api', 'endpoint',
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def sanitize_prompt(prompt: str, task_type: str = 'research') -> Tuple[str, dict]:
|
||||
"""
|
||||
Sanitize research prompt to remove sensitive information.
|
||||
|
||||
Returns: (sanitized_prompt, redaction_report)
|
||||
"""
|
||||
original = prompt
|
||||
sanitized = prompt
|
||||
redactions = {}
|
||||
|
||||
# Apply all pattern-based redactions
|
||||
for pattern_name, (pattern, replacement) in ResearchSecuritySanitizer.PATTERNS.items():
|
||||
matches = re.findall(pattern, sanitized, re.IGNORECASE)
|
||||
if matches:
|
||||
sanitized = re.sub(pattern, replacement, sanitized, flags=re.IGNORECASE)
|
||||
redactions[pattern_name] = len(matches)
|
||||
|
||||
# Check for sensitive keywords in context
|
||||
sensitive_context = ResearchSecuritySanitizer._check_sensitive_context(original)
|
||||
if sensitive_context:
|
||||
redactions['sensitive_context'] = sensitive_context
|
||||
|
||||
# For web research: additional warnings
|
||||
if 'web' in task_type.lower():
|
||||
# Warn if trying to search for specific internal info
|
||||
internal_markers = ['internal', 'admin', 'production', 'credentials', 'secrets']
|
||||
for marker in internal_markers:
|
||||
if marker in sanitized.lower():
|
||||
redactions['web_research_warning'] = f"Searching for '{marker}' may expose internal details"
|
||||
|
||||
return sanitized, redactions
|
||||
|
||||
@staticmethod
|
||||
def _check_sensitive_context(text: str) -> str:
|
||||
"""Check for sensitive keywords in context"""
|
||||
text_lower = text.lower()
|
||||
found_keywords = []
|
||||
|
||||
for keyword in ResearchSecuritySanitizer.SENSITIVE_KEYWORDS:
|
||||
if keyword in text_lower:
|
||||
found_keywords.append(keyword)
|
||||
|
||||
if found_keywords:
|
||||
return f"Sensitive keywords detected: {', '.join(set(found_keywords))}"
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def verify_prompt_safe(prompt: str) -> Tuple[bool, str]:
|
||||
"""
|
||||
Verify that a prompt is safe to send to external tools (web, APIs).
|
||||
|
||||
Returns: (is_safe, reason_if_unsafe)
|
||||
"""
|
||||
# List of things that should NEVER be in external prompts
|
||||
dangerous_patterns = [
|
||||
(r'password\s*[:=]\s*\S+', 'Password detected'),
|
||||
(r'api[_-]?key\s*[:=]\s*\S+', 'API key detected'),
|
||||
(r'secret\s*[:=]\s*\S+', 'Secret detected'),
|
||||
(r'admin.*[:=]\s*\S+', 'Admin credential detected'),
|
||||
(r'root.*[:=]\s*\S+', 'Root credential detected'),
|
||||
]
|
||||
|
||||
for pattern, reason in dangerous_patterns:
|
||||
if re.search(pattern, prompt, re.IGNORECASE):
|
||||
return False, reason
|
||||
|
||||
# Check for specific IP/domain patterns that might be internal
|
||||
internal_ips = re.findall(r'(?:10\.|172\.16\.|192\.168\.)', prompt)
|
||||
if internal_ips:
|
||||
return False, f"Internal IP addresses detected: {internal_ips}"
|
||||
|
||||
internal_domains = re.findall(r'\.local|\.internal|\.private', prompt)
|
||||
if internal_domains:
|
||||
return False, f"Internal domain patterns detected: {internal_domains}"
|
||||
|
||||
return True, "Safe to send"
|
||||
|
||||
@staticmethod
|
||||
def apply_security_context(prompt: str, security_level: str) -> str:
|
||||
"""
|
||||
Apply security context based on task security level.
|
||||
|
||||
CRITICAL/SENSITIVE: More aggressive redaction
|
||||
INTERNAL/PUBLIC: Standard redaction
|
||||
"""
|
||||
if security_level in ['critical', 'sensitive']:
|
||||
# Aggressive mode: also redact organization names, project names
|
||||
redacted = re.sub(r'\b(?:admin|dss|librechat|musica|overbits|luzia)\b', '[REDACTED_PROJECT]', prompt, flags=re.IGNORECASE)
|
||||
return redacted
|
||||
|
||||
# Standard redaction
|
||||
return prompt
|
||||
|
||||
@staticmethod
|
||||
def create_external_safe_prompt(original_prompt: str, task_type: str) -> str:
|
||||
"""
|
||||
Create a safe version of prompt for external tools (web search, APIs).
|
||||
Removes ALL potentially sensitive information.
|
||||
"""
|
||||
safe = original_prompt
|
||||
|
||||
# Remove paths
|
||||
safe = re.sub(r'(?:/home/|/opt/|/var/|/etc/)[^\s\n]+', '', safe)
|
||||
|
||||
# Remove IPs
|
||||
safe = re.sub(r'\b(?:\d{1,3}\.){3}\d{1,3}\b', '', safe)
|
||||
|
||||
# Remove credentials
|
||||
safe = re.sub(r'(?:password|api[_-]?key|token|secret)\s*[:=]\s*[^\s\n]+', '', safe, flags=re.IGNORECASE)
|
||||
|
||||
# Remove hostnames
|
||||
safe = re.sub(r'(?:localhost|127\.0\.0\.1|admin|server|prod|staging)\b', '', safe, flags=re.IGNORECASE)
|
||||
|
||||
# Remove email addresses (unless specifically asking about email)
|
||||
if 'email' not in task_type.lower():
|
||||
safe = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '', safe)
|
||||
|
||||
# Clean up extra whitespace
|
||||
safe = re.sub(r'\s+', ' ', safe).strip()
|
||||
|
||||
return safe
|
||||
|
||||
|
||||
class ResearchPromptBuilder:
|
||||
"""Build safe research prompts for external tools"""
|
||||
|
||||
@staticmethod
|
||||
def build_web_search_prompt(task_description: str, security_level: str) -> Tuple[str, dict]:
|
||||
"""
|
||||
Build safe prompt for web search.
|
||||
Removes sensitive info, keeps research intent.
|
||||
"""
|
||||
sanitized, report = ResearchSecuritySanitizer.sanitize_prompt(task_description, 'web_research')
|
||||
|
||||
# Additional processing for web search
|
||||
safe_for_external = ResearchSecuritySanitizer.create_external_safe_prompt(sanitized, 'web_research')
|
||||
|
||||
return safe_for_external, report
|
||||
|
||||
@staticmethod
|
||||
def build_deep_research_prompt(task_description: str, security_level: str) -> Tuple[str, dict]:
|
||||
"""
|
||||
Build safe prompt for deep research (thinkdeep).
|
||||
Can be more detailed since it's internal.
|
||||
"""
|
||||
sanitized, report = ResearchSecuritySanitizer.sanitize_prompt(task_description, 'deep_research')
|
||||
|
||||
# Apply security context based on sensitivity
|
||||
safe = ResearchSecuritySanitizer.apply_security_context(sanitized, security_level)
|
||||
|
||||
return safe, report
|
||||
|
||||
@staticmethod
|
||||
def build_code_analysis_prompt(code_context: str, task_description: str, security_level: str) -> Tuple[str, dict]:
|
||||
"""
|
||||
Build safe prompt for code analysis.
|
||||
Remove sensitive strings from code (passwords, tokens, etc).
|
||||
"""
|
||||
# Sanitize the code first
|
||||
code_sanitized = code_context
|
||||
code_sanitized = re.sub(r'(?:password|api[_-]?key|secret)\s*=\s*["\']?[^"\';\n]+["\']?', '[REDACTED]', code_sanitized, flags=re.IGNORECASE)
|
||||
code_sanitized = re.sub(r'(?:token|auth)\s*=\s*["\']?[^"\';\n]+["\']?', '[REDACTED]', code_sanitized, flags=re.IGNORECASE)
|
||||
|
||||
# Sanitize task description
|
||||
task_sanitized, report = ResearchSecuritySanitizer.sanitize_prompt(task_description, 'code_analysis')
|
||||
|
||||
prompt = f"Code Analysis Request:\n\nTask: {task_sanitized}\n\nCode:\n{code_sanitized}"
|
||||
|
||||
return prompt, report
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Test sanitization
|
||||
print("=" * 80)
|
||||
print("RESEARCH SECURITY SANITIZER - DEMONSTRATION")
|
||||
print("=" * 80)
|
||||
|
||||
test_prompts = [
|
||||
"What is the latest OAuth 2.1 specification?",
|
||||
"Research password hashing algorithms for admin account with password=secret123",
|
||||
"Analyze distributed caching approaches, see implementation at 192.168.1.100:6379",
|
||||
"Latest security vulnerabilities in our API at https://api.internal/v1/users",
|
||||
"Compare REST vs GraphQL for our production infrastructure at prod.example.local",
|
||||
]
|
||||
|
||||
for prompt in test_prompts:
|
||||
print(f"\n📋 Original: {prompt}")
|
||||
|
||||
sanitized, redactions = ResearchSecuritySanitizer.sanitize_prompt(prompt, 'web_research')
|
||||
print(f" Sanitized: {sanitized}")
|
||||
|
||||
if redactions:
|
||||
print(f" Redactions: {redactions}")
|
||||
|
||||
safe, reason = ResearchSecuritySanitizer.verify_prompt_safe(prompt)
|
||||
print(f" Safe for external: {safe} ({reason})")
|
||||
Reference in New Issue
Block a user