luzia/lib/research_security_sanitizer.py

#!/usr/bin/env python3
"""
Research Security Sanitizer - Remove sensitive information from research prompts

NEVER expose in research prompts:
- Passwords / API keys / tokens / credentials
- User names / email addresses
- Internal server IPs / hostnames
- Internal routes / endpoints
- Database names / structure
- Infrastructure details
- Personal information
"""

import re
from typing import Tuple


class ResearchSecuritySanitizer:
    """Sanitize research prompts to remove sensitive information"""

    # Patterns to detect and redact
    PATTERNS = {
        'password': (r'(?:password|pwd|passwd|pass)\s*[:=]\s*[^\s\n]+', '[REDACTED_PASSWORD]'),
        'api_key': (r'(?:api[_-]?key|apikey|api_secret|secret)\s*[:=]\s*[^\s\n]+', '[REDACTED_API_KEY]'),
        'token': (r'(?:token|bearer|auth)\s*[:=]\s*[^\s\n]+', '[REDACTED_TOKEN]'),
        'email': (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[REDACTED_EMAIL]'),
        'ip_address': (r'\b(?:\d{1,3}\.){3}\d{1,3}\b', '[REDACTED_IP]'),
        'internal_hostname': (r'(?:localhost|127\.0\.0\.1|admin|server|prod|staging)', '[REDACTED_HOSTNAME]'),
        'database_name': (r'(?:database|db|schema)\s*[:=]\s*[^\s\n]+', '[REDACTED_DATABASE]'),
        'username': (r'(?:user|username|uid)\s*[:=]\s*[^\s\n]+', '[REDACTED_USERNAME]'),
        'path': (r'(?:/home/|/opt/|/var/|/etc/)[^\s\n]+', '[REDACTED_PATH]'),
    }

    # Keywords to flag as sensitive context
    SENSITIVE_KEYWORDS = {
        'password', 'secret', 'credential', 'token', 'key', 'auth',
        'admin', 'root', 'sudo', 'private', 'confidential',
        'database', 'production', 'internal', 'infrastructure',
        'deploy', 'ssh', 'api', 'endpoint',
    }

    @staticmethod
    def sanitize_prompt(prompt: str, task_type: str = 'research') -> Tuple[str, dict]:
        """
        Sanitize research prompt to remove sensitive information.

        Returns: (sanitized_prompt, redaction_report)
        """
        original = prompt
        sanitized = prompt
        redactions = {}

        # Apply all pattern-based redactions
        for pattern_name, (pattern, replacement) in ResearchSecuritySanitizer.PATTERNS.items():
            matches = re.findall(pattern, sanitized, re.IGNORECASE)
            if matches:
                sanitized = re.sub(pattern, replacement, sanitized, flags=re.IGNORECASE)
                redactions[pattern_name] = len(matches)

        # Check for sensitive keywords in context
        sensitive_context = ResearchSecuritySanitizer._check_sensitive_context(original)
        if sensitive_context:
            redactions['sensitive_context'] = sensitive_context

        # For web research: additional warnings
        if 'web' in task_type.lower():
            # Warn if trying to search for specific internal info
            internal_markers = ['internal', 'admin', 'production', 'credentials', 'secrets']
            for marker in internal_markers:
                if marker in sanitized.lower():
                    redactions['web_research_warning'] = f"Searching for '{marker}' may expose internal details"

        return sanitized, redactions

    @staticmethod
    def _check_sensitive_context(text: str) -> str:
        """Check for sensitive keywords in context"""
        text_lower = text.lower()
        found_keywords = []

        for keyword in ResearchSecuritySanitizer.SENSITIVE_KEYWORDS:
            if keyword in text_lower:
                found_keywords.append(keyword)

        if found_keywords:
            return f"Sensitive keywords detected: {', '.join(set(found_keywords))}"

        return None

    @staticmethod
    def verify_prompt_safe(prompt: str) -> Tuple[bool, str]:
        """
        Verify that a prompt is safe to send to external tools (web, APIs).

        Returns: (is_safe, reason_if_unsafe)
        """
        # List of things that should NEVER be in external prompts
        dangerous_patterns = [
            (r'password\s*[:=]\s*\S+', 'Password detected'),
            (r'api[_-]?key\s*[:=]\s*\S+', 'API key detected'),
            (r'secret\s*[:=]\s*\S+', 'Secret detected'),
            (r'admin.*[:=]\s*\S+', 'Admin credential detected'),
            (r'root.*[:=]\s*\S+', 'Root credential detected'),
        ]

        for pattern, reason in dangerous_patterns:
            if re.search(pattern, prompt, re.IGNORECASE):
                return False, reason

        # Check for specific IP/domain patterns that might be internal
        internal_ips = re.findall(r'(?:10\.|172\.16\.|192\.168\.)', prompt)
        if internal_ips:
            return False, f"Internal IP addresses detected: {internal_ips}"

        internal_domains = re.findall(r'\.local|\.internal|\.private', prompt)
        if internal_domains:
            return False, f"Internal domain patterns detected: {internal_domains}"

        return True, "Safe to send"

    @staticmethod
    def apply_security_context(prompt: str, security_level: str) -> str:
        """
        Apply security context based on task security level.

        CRITICAL/SENSITIVE: More aggressive redaction
        INTERNAL/PUBLIC: Standard redaction
        """
        if security_level in ['critical', 'sensitive']:
            # Aggressive mode: also redact organization names, project names
            redacted = re.sub(r'\b(?:admin|dss|librechat|musica|overbits|luzia)\b', '[REDACTED_PROJECT]', prompt, flags=re.IGNORECASE)
            return redacted

        # Standard redaction
        return prompt

    @staticmethod
    def create_external_safe_prompt(original_prompt: str, task_type: str) -> str:
        """
        Create a safe version of prompt for external tools (web search, APIs).
        Removes ALL potentially sensitive information.
        """
        safe = original_prompt

        # Remove paths
        safe = re.sub(r'(?:/home/|/opt/|/var/|/etc/)[^\s\n]+', '', safe)

        # Remove IPs
        safe = re.sub(r'\b(?:\d{1,3}\.){3}\d{1,3}\b', '', safe)

        # Remove credentials
        safe = re.sub(r'(?:password|api[_-]?key|token|secret)\s*[:=]\s*[^\s\n]+', '', safe, flags=re.IGNORECASE)

        # Remove hostnames
        safe = re.sub(r'(?:localhost|127\.0\.0\.1|admin|server|prod|staging)\b', '', safe, flags=re.IGNORECASE)

        # Remove email addresses (unless specifically asking about email)
        if 'email' not in task_type.lower():
            safe = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '', safe)

        # Clean up extra whitespace
        safe = re.sub(r'\s+', ' ', safe).strip()

        return safe


class ResearchPromptBuilder:
    """Build safe research prompts for external tools"""

    @staticmethod
    def build_web_search_prompt(task_description: str, security_level: str) -> Tuple[str, dict]:
        """
        Build safe prompt for web search.
        Removes sensitive info, keeps research intent.
        """
        sanitized, report = ResearchSecuritySanitizer.sanitize_prompt(task_description, 'web_research')

        # Additional processing for web search
        safe_for_external = ResearchSecuritySanitizer.create_external_safe_prompt(sanitized, 'web_research')

        return safe_for_external, report

    @staticmethod
    def build_deep_research_prompt(task_description: str, security_level: str) -> Tuple[str, dict]:
        """
        Build safe prompt for deep research (thinkdeep).
        Can be more detailed since it's internal.
        """
        sanitized, report = ResearchSecuritySanitizer.sanitize_prompt(task_description, 'deep_research')

        # Apply security context based on sensitivity
        safe = ResearchSecuritySanitizer.apply_security_context(sanitized, security_level)

        return safe, report

    @staticmethod
    def build_code_analysis_prompt(code_context: str, task_description: str, security_level: str) -> Tuple[str, dict]:
        """
        Build safe prompt for code analysis.
        Remove sensitive strings from code (passwords, tokens, etc).
        """
        # Sanitize the code first
        code_sanitized = code_context
        code_sanitized = re.sub(r'(?:password|api[_-]?key|secret)\s*=\s*["\']?[^"\';\n]+["\']?', '[REDACTED]', code_sanitized, flags=re.IGNORECASE)
        code_sanitized = re.sub(r'(?:token|auth)\s*=\s*["\']?[^"\';\n]+["\']?', '[REDACTED]', code_sanitized, flags=re.IGNORECASE)

        # Sanitize task description
        task_sanitized, report = ResearchSecuritySanitizer.sanitize_prompt(task_description, 'code_analysis')

        prompt = f"Code Analysis Request:\n\nTask: {task_sanitized}\n\nCode:\n{code_sanitized}"

        return prompt, report


if __name__ == '__main__':
    # Test sanitization
    print("=" * 80)
    print("RESEARCH SECURITY SANITIZER - DEMONSTRATION")
    print("=" * 80)

    test_prompts = [
        "What is the latest OAuth 2.1 specification?",
        "Research password hashing algorithms for admin account with password=secret123",
        "Analyze distributed caching approaches, see implementation at 192.168.1.100:6379",
        "Latest security vulnerabilities in our API at https://api.internal/v1/users",
        "Compare REST vs GraphQL for our production infrastructure at prod.example.local",
    ]

    for prompt in test_prompts:
        print(f"\n📋 Original: {prompt}")

        sanitized, redactions = ResearchSecuritySanitizer.sanitize_prompt(prompt, 'web_research')
        print(f"   Sanitized: {sanitized}")

        if redactions:
            print(f"   Redactions: {redactions}")

        safe, reason = ResearchSecuritySanitizer.verify_prompt_safe(prompt)
        print(f"   Safe for external: {safe} ({reason})")