#!/usr/bin/env python3

import json
import os
import re
import tempfile
from typing import Optional, List
from pydantic import BaseModel

try:
    from google import genai
    from google.genai import types
except ImportError:
    genai = None

from .config import (
    GEMINI_MODEL,
    SECURITY_THINKING_BUDGET,
    FILE_ANALYSIS_THINKING_BUDGET,
)


class SeverityBreakdown(BaseModel):
    BLOCK: Optional[List[str]] = []
    WARN: Optional[List[str]] = []
    INFO: Optional[List[str]] = []


class ValidationResponse(BaseModel):
    approved: bool
    reason: str
    suggestions: Optional[List[str]] = []
    detailed_analysis: Optional[str] = None
    thinking_process: Optional[str] = None
    full_context: Optional[str] = None
    performance_analysis: Optional[str] = None
    code_quality_analysis: Optional[str] = None
    alternative_approaches: Optional[List[str]] = []
    severity_breakdown: Optional[SeverityBreakdown] = None


class FileAnalysisResponse(BaseModel):
    security_issues: List[str]
    code_quality_concerns: List[str]
    risk_assessment: str
    recommendations: List[str]


class SecurityValidator:
    """
    SecurityValidator handles all security-related validation including:
    - Pattern-based threat detection
    - File content security analysis
    - LLM-powered security analysis
    - File upload and analysis via Gemini
    """

    def __init__(self, api_key: Optional[str] = None):
        self.api_key = api_key
        self.client = genai.Client(api_key=api_key) if api_key and genai else None
        self.model_name = GEMINI_MODEL
        self.uploaded_files: List[dict] = []

    def validate(self, tool_name: str, tool_input: dict, context: str) -> dict:
        """
        Main security validation entry point.

        Args:
            tool_name: The Claude tool being executed (Bash, Write, Edit, etc.)
            tool_input: The tool's input parameters
            context: Conversation context from transcript

        Returns:
            ValidationResponse dict with approval status and security analysis
        """
        # Stage 1: Fast rule-based validation
        quick_result = self.perform_quick_validation(tool_name, tool_input)
        if not quick_result["approved"]:
            return quick_result

        # Stage 2: Skip LLM analysis if no API key (basic security still works)
        if not self.api_key:
            return quick_result

        # Stage 3: File analysis for large content
        file_analysis = None
        if tool_name in ["Write", "Edit", "MultiEdit"] and "content" in tool_input:
            file_path = tool_input.get("file_path", "")
            content = tool_input.get("content", "")

            if content and len(content) > 500:
                uploaded_file = self.upload_file_for_analysis(file_path, content)
                if uploaded_file:
                    file_analysis = self.analyze_uploaded_file(uploaded_file, file_path)
                    if file_analysis and file_analysis.get("security_issues"):
                        return {
                            "approved": False,
                            "reason": f"File analysis result: {', '.join(file_analysis['security_issues'])}",
                            "suggestions": file_analysis.get("recommendations", []),
                            "file_analysis": file_analysis,
                        }

        # Stage 4: LLM-powered comprehensive analysis if needed
        if quick_result.get("approved", True) and not file_analysis:
            return quick_result

        try:
            prompt = self.build_security_prompt(
                tool_name, tool_input, context, file_analysis
            )

            thinking_config = types.ThinkingConfig(
                thinking_budget=SECURITY_THINKING_BUDGET
            )

            assert self.client is not None  # Type guard for mypy
            response = self.client.models.generate_content(
                model=self.model_name,
                contents=prompt,
                config=types.GenerateContentConfig(
                    response_mime_type="application/json",
                    response_schema=ValidationResponse,
                    thinking_config=thinking_config,
                    tools=[types.Tool(google_search=types.GoogleSearch())],
                ),
            )

            # Extract full response details
            raw_response = response.text if hasattr(response, "text") else str(response)
            thinking_content = ""

            # Try to extract thinking process if available
            if hasattr(response, "candidates") and response.candidates:
                for candidate in response.candidates:
                    if hasattr(candidate, "content") and candidate.content:
                        for part in candidate.content.parts:
                            if hasattr(part, "thought") and part.thought:
                                thinking_content += part.thought + "\n"

            if hasattr(response, "parsed") and response.parsed:
                result = response.parsed
                return {
                    "approved": result.approved,
                    "reason": result.reason,
                    "suggestions": result.suggestions or [],
                    "detailed_analysis": getattr(result, "detailed_analysis", None),
                    "thinking_process": thinking_content
                    or getattr(result, "thinking_process", None),
                    "full_context": context,
                    "raw_response": raw_response,
                    "file_analysis": file_analysis,
                    "performance_analysis": getattr(
                        result, "performance_analysis", None
                    ),
                    "code_quality_analysis": getattr(
                        result, "code_quality_analysis", None
                    ),
                    "alternative_approaches": getattr(
                        result, "alternative_approaches", []
                    ),
                    "severity_breakdown": (
                        lambda x: x.model_dump() if x is not None else None
                    )(getattr(result, "severity_breakdown", None)),
                }
            else:
                # Fallback JSON parsing
                result = json.loads(response.text)
                required_fields = ["approved", "reason"]
                if all(field in result for field in required_fields):
                    result["thinking_process"] = thinking_content
                    result["full_context"] = context
                    result["raw_response"] = raw_response
                    result["file_analysis"] = file_analysis
                    return dict(result)
                else:
                    return {
                        "approved": False,
                        "reason": "Invalid response structure from validation service",
                        "raw_response": raw_response,
                        "full_context": context,
                    }
        except Exception as e:
            # Fail-safe: allow operation if security validation fails
            return {
                "approved": True,
                "reason": f"Security validation service unavailable: {str(e)}",
            }
        finally:
            self.cleanup_uploaded_files()

    def perform_quick_validation(self, tool_name: str, tool_input: dict) -> dict:
        """Fast rule-based security validation (Tier 1)"""

        if tool_name == "Bash":
            return self.validate_bash_command(tool_input)
        elif tool_name in ["Write", "Edit", "MultiEdit"]:
            return self.validate_file_operation(tool_input)
        else:
            return {"approved": True}

    def validate_bash_command(self, tool_input: dict) -> dict:
        """Enhanced validation for bash commands"""
        command = tool_input.get("command", "")

        critical_patterns = [
            r"rm\s+-rf\s+/",
            r"sudo\s+rm.*/",
            r"mkfs",
            r"dd\s+if=.*of=.*",
            r"curl.*\|\s*bash",
            r"wget.*\|\s*(bash|sh)",
            r"> /etc/",
            r"> /bin/",
            r"> /usr/",
        ]

        # Check for file write operations that bypass Write/Edit tools
        file_write_patterns = [
            (r"cat\s*>\s*", "Use Write tool for creating files"),
            (r"cat\s*>>\s*", "Use Edit tool for appending to files"),
            (r"echo\s+.*>\s*['\"]?[\w/.-]+", "Use Write tool for creating files"),
            (r"echo\s+.*>>\s*['\"]?[\w/.-]+", "Use Edit tool for appending to files"),
            (r"printf\s+.*>\s*['\"]?[\w/.-]+", "Use Write tool for creating files"),
            (r"tee\s+['\"]?[\w/.-]+", "Use Write tool for file operations"),
            (r"sed\s+.*-i", "Use Edit tool for in-place file modifications"),
            (r"awk\s+.*>\s*['\"]?[\w/.-]+", "Use Write tool for file operations"),
        ]

        for pattern, suggestion in file_write_patterns:
            match = re.search(pattern, command, re.IGNORECASE)
            if match:
                return {
                    "approved": False,
                    "reason": f"File write operation detected in bash command. {suggestion} instead.",
                    "risk_level": "high",
                    "suggestions": [suggestion],
                }

        for pattern in critical_patterns:
            if re.search(pattern, command, re.IGNORECASE):
                return {
                    "approved": False,
                    "reason": "Dangerous command pattern detected: potentially destructive operation",
                }

        # Performance/tool enforcement - block inefficient commands
        tool_enforcement = [
            (
                r"^grep\b|^\s*grep\b",
                "Using deprecated 'grep' command",
                "Use 'rg' (ripgrep) instead of 'grep' for better performance and features",
            ),
            (
                r"^find\s+.*-name\b|^\s*find\s+.*-name\b",
                "Using inefficient 'find -name' pattern",
                "Use 'rg --files -g pattern' or 'rg --files | rg pattern' instead of 'find -name' for better performance",
            ),
            (
                r"^(python|python3)\b|^\s*(python|python3)\b",
                "Using python without virtual environment management",
                "Use 'uv run python' instead of direct python for better dependency management",
            ),
        ]

        for pattern, reason, suggestion in tool_enforcement:
            if re.search(pattern, command, re.IGNORECASE):
                return {
                    "approved": False,
                    "reason": reason,
                    "suggestions": [suggestion],
                }

        warning_patterns = [
            r"sudo",
            r"rm\s+-rf",
            r"git\s+reset\s+--hard",
            r"npm\s+uninstall",
            r"pip\s+uninstall",
        ]

        for pattern in warning_patterns:
            if re.search(pattern, command, re.IGNORECASE):
                return {
                    "approved": True,
                    "reason": "Command requires elevated privileges or has destructive potential",
                }

        return {"approved": True}

    def validate_file_operation(self, tool_input: dict) -> dict:
        """Enhanced validation for file operations"""
        file_path = tool_input.get("file_path", "")
        content = tool_input.get("content", "")

        if (
            "../" in file_path
            or file_path.startswith("/etc/")
            or file_path.startswith("/bin/")
        ):
            return {
                "approved": False,
                "reason": "Potentially dangerous file path - outside project boundary or system directory",
            }

        # Basic secret detection patterns (LLM handles sophisticated cases)
        secret_patterns = [
            (r"sk_live_[a-zA-Z0-9]{24,}", "Stripe live secret key detected"),
            (r"sk_test_[a-zA-Z0-9]{24,}", "Stripe test secret key detected"),
            (r"AKIA[0-9A-Z]{16}", "AWS access key ID detected"),
            (r"ghp_[a-zA-Z0-9]{36}", "GitHub personal access token detected"),
            (r"gho_[a-zA-Z0-9]{36}", "GitHub OAuth token detected"),
            (r"ghr_[a-zA-Z0-9]{36}", "GitHub refresh token detected"),
            (
                r"eyJ[a-zA-Z0-9_-]+\.eyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+",
                "JWT token detected",
            ),
            (
                r"xoxb-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24}",
                "Slack bot token detected",
            ),
            (r"-----BEGIN [A-Z]+ PRIVATE KEY-----", "Private key detected"),
        ]

        for pattern, message in secret_patterns:
            if re.search(pattern, content):
                return {
                    "approved": False,
                    "reason": f"Security violation: {message}",
                    "suggestions": [
                        "Use environment variables for secrets",
                        "Consider using a secrets manager like AWS Secrets Manager",
                        "Never commit real credentials to version control",
                    ],
                }

        if file_path.endswith(".py") and (
            "import os" in content and "system(" in content
        ):
            return {
                "approved": False,
                "reason": "Potential shell injection pattern in Python code",
            }

        return {"approved": True}

    def upload_file_for_analysis(
        self, file_path: str, content: str
    ) -> Optional[object]:
        """Upload file content to Gemini for enhanced analysis"""
        if not self.client:
            return None
        try:
            with tempfile.NamedTemporaryFile(
                mode="w", suffix=os.path.splitext(file_path)[1], delete=False
            ) as temp_file:
                temp_file.write(content)
                temp_file_path = temp_file.name

            uploaded_file = self.client.files.upload(file=temp_file_path)
            self.uploaded_files.append(
                {"file_obj": uploaded_file, "temp_path": temp_file_path}
            )
            return uploaded_file
        except Exception:
            return None

    def analyze_uploaded_file(
        self, uploaded_file: object, file_path: str
    ) -> Optional[dict]:
        """Perform enhanced security analysis using uploaded file"""
        if not self.client:
            return None
        try:
            prompt = f"""Perform comprehensive analysis of this file: {os.path.basename(file_path)}

Analyze for:
-  Security vulnerabilities (injections, exposures, dangerous functions)
- Following SOLID principles
  - Single-Responsibility Principle:
    - A class should have one and only one reason to change, meaning that a class should have only one job.
  - Open-Closed Principle
    - Objects or entities should be open for extension but closed for modification.
  - Liskov Substitution Principle
    - every subclass or derived class should be substitutable for their base or parent class.
  - Interface Segregation Principle
    - A client should never be forced to implement an interface that it doesn’t use, or clients shouldn’t be forced to depend on methods they do not use.
  - Dependency Inversion Principle
    - Entities must depend on abstractions, not on concretions. It states that the high-level module must not depend on the low-level module, but they should depend on abstractions.
- Code quality issues (complexity, readability, zen-like, self-evident, no comments, maintainability, best practices)
- Configuration security (permissions, secrets, access controls)
- Potential attack vectors and exploitation risks (as recommendation)
- Compliance with security standards
- No line comments in code. Code should be pythonic, zen-like, self-evident
  - Beautiful is better than ugly.
  - Explicit is better than implicit.
  - Simple is better than complex.
  - Complex is better than complicated.
  - Flat is better than nested.
  - Sparse is better than dense.
  - Readability counts.
  - Special cases aren't special enough to break the rules.
  - Although practicality beats purity.
  - Errors should never pass silently.
  - Unless explicitly silenced.
  - In the face of ambiguity, refuse the temptation to guess.
  - There should be one-- and preferably only one --obvious way to do it.
  - Although that way may not be obvious at first unless you're Dutch.
  - Now is better than never.
  - Although never is often better than *right* now.
  - If the implementation is hard to explain, it's a bad idea.
  - If the implementation is easy to explain, it may be a good idea.
  - Namespaces are one honking great idea -- let's do more of those!

Google Search for latest information and practices before responding.

Provide structured assessment and actionable recommendations."""

            response = self.client.models.generate_content(
                model=self.model_name,
                contents=[prompt, uploaded_file],
                config=types.GenerateContentConfig(
                    response_mime_type="application/json",
                    response_schema=FileAnalysisResponse,
                    thinking_config=types.ThinkingConfig(
                        thinking_budget=FILE_ANALYSIS_THINKING_BUDGET
                    ),
                    tools=[types.Tool(google_search=types.GoogleSearch())],
                ),
            )

            if hasattr(response, "parsed") and response.parsed:
                result = response.parsed
                return {
                    "security_issues": result.security_issues,
                    "code_quality_concerns": result.code_quality_concerns,
                    "risk_assessment": result.risk_assessment,
                    "recommendations": result.recommendations,
                }
            else:
                return dict(json.loads(response.text))
        except Exception:
            return None

    def cleanup_uploaded_files(self) -> None:
        """Clean up uploaded files and temporary files"""
        for file_info in self.uploaded_files:
            try:
                if os.path.exists(file_info["temp_path"]):
                    os.unlink(file_info["temp_path"])
            except Exception:
                pass
        self.uploaded_files = []

    def extract_conversation_context(self, transcript_path: str) -> str:
        """Extract recent conversation context from transcript"""
        try:
            if os.path.exists(transcript_path):
                with open(transcript_path, "r", encoding="utf-8") as f:
                    content = f.read()
                return content
        except Exception:
            pass
        return ""

    def build_security_prompt(
        self,
        tool_name: str,
        tool_input: dict,
        context: str,
        file_analysis: Optional[dict] = None,
    ) -> str:
        """Build comprehensive security validation prompt for Gemini"""

        file_analysis_section = ""
        if file_analysis:
            file_analysis_section = f"""
FILE_ANALYSIS_RESULTS:
Security Issues Detected: {json.dumps(file_analysis.get('security_issues', []), indent=2)}
Code Quality Concerns: {json.dumps(file_analysis.get('code_quality_concerns', []), indent=2)}
Risk Assessment: {file_analysis.get('risk_assessment', 'Not available')}
Recommendations: {json.dumps(file_analysis.get('recommendations', []), indent=2)}
"""

        return f"""You are a comprehensive security analyzer for Claude Code. You provide security validation AND constructive feedback on performance, code quality, and best practices for ALL operations.

TOOL: {tool_name}
INPUT: {json.dumps(tool_input, indent=2)}
RECENT_CONTEXT: {context}
{file_analysis_section}

ANALYSIS SCOPE:
1. **Security** - Identify vulnerabilities and dangerous patterns
2. **Performance** - Suggest faster tools and more efficient approaches
3. **Code Quality** - Promote clean, self-evident, pythonic code
4. **Best Practices** - Recommend modern patterns and industry standards
5. **Alternative Approaches** - Suggest different ways to achieve the same goal

CRITICAL SECURITY RULES - YOU MUST BLOCK THESE:

1. **ALWAYS BLOCK Real Credentials/Secrets**:
   - AWS keys (AKIA..., 40-char base64 strings)
   - GitHub tokens (ghp_..., gho_..., ghr_...)  
   - Stripe keys (sk_live_..., pk_live_...)
   - JWT tokens (ey...ey...signature)
   - API keys that look real (long, random, not placeholders)
   - Actual passwords in configuration files
   - Private keys, certificates, or tokens

2. **ALWAYS BLOCK Dangerous Commands**:
   - System destruction (rm -rf /, mkfs, dd to devices)
   - Malicious downloads (curl | bash, wget | sh)
   - Privilege escalation attempts
   - Network exfiltration
   - System file modifications outside project

3. **ALWAYS ALLOW Documentation/Examples**:
   - Placeholder values: "YOUR_API_KEY", "xxx", "...", "<SECRET>"
   - Variable names in docs: GEMINI_API_KEY (without assignment)
   - Example configurations with fake values
   - Safe development commands (ls, git, npm, pip)

ANALYSIS FRAMEWORK:
Search current security intelligence and analyze:

**Secret Detection Logic**:
- Real secrets: Block immediately (approved: false)
- Placeholders/docs: Allow (approved: true)
- Example: "GEMINI_API_KEY=your_key_here" = ALLOW (placeholder)
- Example: "api_key=sk_live_abc123def456..." = BLOCK (real Stripe key)

**Command Analysis**:
- Check for obfuscation, encoding, chaining
- Analyze full command context and impact
- Consider privilege escalation potential
- Look for performance improvement opportunities (suggest rg over grep)

**Context Awareness**:
- Is this legitimate development activity?
- Does the action match user intent?
- Are there safer alternatives?

**Challenge and Improve**:
- Challenge the approach: Is there a better way to achieve this goal?
- Question assumptions: Are there hidden risks or better practices?
- Suggest improvements: Modern tools, security practices, performance optimizations
- Educational feedback: Help the user learn safer development practices

USE YOUR THINKING BUDGET to reason through complex scenarios. Consider social engineering, supply chain attacks, and advanced threats.

RESPONSE REQUIREMENTS:
1. **Decision**: approved: true/false with clear reasoning
2. **Risk Level**: low/medium/high/critical based on overall assessment
3. **Suggestions**: Provide 2-3 specific actionable improvements
4. **Detailed Analysis**: Comprehensive security evaluation
5. **Performance Analysis**: Evaluate efficiency and suggest optimizations
6. **Code Quality Analysis**: Assess clarity and maintainability
7. **Alternative Approaches**: List different ways to achieve the same goal
8. **Severity Breakdown**: Categorize all findings
9. **Thinking Process**: Document your reasoning

ANALYSIS PRINCIPLES:
- Provide value on EVERY operation, not just failures
- Be a mentor, not just a guard
- Challenge approaches constructively
- Suggest modern, efficient alternatives
- Promote self-evident code that needs no comments
- Consider both immediate and long-term implications

DECISION CRITERIA:
- approved: true = Operation can proceed (may have WARN/INFO items)
- approved: false = Operation blocked (has BLOCK items)
- Always be educational and constructive
- Focus on helping developers write better code

Analyze comprehensively, teach continuously, and help developers level up their skills with every interaction."""
