#!/usr/bin/env python3
"""
Advanced Parsing for TuskLang Python SDK
========================================
Enhanced parsing capabilities with syntax validation and intelligent parsing

This module provides advanced parsing features for the TuskLang Python SDK,
including syntax highlighting, validation, error recovery, and intelligent parsing strategies.
"""

import re
import json
import ast
from typing import Any, Dict, List, Optional, Tuple, Union
from dataclasses import dataclass
from enum import Enum
import logging
from pathlib import Path


class TokenType(Enum):
    """Token types for syntax highlighting"""
    KEYWORD = "keyword"
    STRING = "string"
    NUMBER = "number"
    OPERATOR = "operator"
    VARIABLE = "variable"
    COMMENT = "comment"
    SECTION = "section"
    FUNCTION = "function"
    PUNCTUATION = "punctuation"
    WHITESPACE = "whitespace"


class ValidationLevel(Enum):
    """Validation levels"""
    NONE = "none"
    BASIC = "basic"
    STRICT = "strict"
    ENTERPRISE = "enterprise"


@dataclass
class Token:
    """Token structure for syntax analysis"""
    type: TokenType
    value: str
    start_pos: int
    end_pos: int
    line: int
    column: int


@dataclass
class ParseError:
    """Parse error structure"""
    message: str
    line: int
    column: int
    severity: str
    suggestion: Optional[str] = None


@dataclass
class SyntaxNode:
    """Syntax tree node"""
    node_type: str
    value: Any
    children: List['SyntaxNode']
    start_pos: int
    end_pos: int
    line: int
    column: int


class AdvancedParser:
    """Advanced TuskLang parser with enhanced capabilities"""
    
    def __init__(self, validation_level: ValidationLevel = ValidationLevel.BASIC):
        self.validation_level = validation_level
        self.logger = logging.getLogger('tusklang.parser')
        
        # Initialize patterns
        self._init_patterns()
        
        # Initialize validation rules
        self._init_validation_rules()
    
    def _init_patterns(self):
        """Initialize regex patterns for parsing"""
        self.patterns = {
            'section': re.compile(r'^([a-zA-Z_][a-zA-Z0-9_]*)\s*\{'),
            'variable': re.compile(r'^\$([a-zA-Z_][a-zA-Z0-9_]*)'),
            'string': re.compile(r'^"([^"\\]*(\\.[^"\\]*)*)"'),
            'number': re.compile(r'^-?\d+(\.\d+)?'),
            'operator': re.compile(r'^@([a-zA-Z_][a-zA-Z0-9_]*)'),
            'comment': re.compile(r'^#.*'),
            'whitespace': re.compile(r'^\s+'),
            'punctuation': re.compile(r'^[{}[\],;=]'),
            'function_call': re.compile(r'^@([a-zA-Z_][a-zA-Z0-9_]*)\(([^)]*)\)'),
            'array': re.compile(r'^\[([^\]]*)\]'),
            'object': re.compile(r'^\{([^}]*)\}'),
            'range': re.compile(r'^(\d+)-(\d+)'),
            'ternary': re.compile(r'^(.+?)\s*\?\s*(.+?)\s*:\s*(.+)'),
            'concatenation': re.compile(r'^(.+?)\s*\+\s*(.+)')
        }
    
    def _init_validation_rules(self):
        """Initialize validation rules"""
        self.validation_rules = {
            ValidationLevel.BASIC: [
                self._validate_basic_syntax,
                self._validate_section_structure,
                self._validate_variable_names
            ],
            ValidationLevel.STRICT: [
                self._validate_basic_syntax,
                self._validate_section_structure,
                self._validate_variable_names,
                self._validate_operator_syntax,
                self._validate_data_types,
                self._validate_nesting
            ],
            ValidationLevel.ENTERPRISE: [
                self._validate_basic_syntax,
                self._validate_section_structure,
                self._validate_variable_names,
                self._validate_operator_syntax,
                self._validate_data_types,
                self._validate_nesting,
                self._validate_security,
                self._validate_performance,
                self._validate_compliance
            ]
        }
    
    def parse_with_validation(self, content: str) -> Tuple[Dict[str, Any], List[ParseError]]:
        """Parse content with validation"""
        errors = []
        
        # Tokenize content
        tokens = self.tokenize(content)
        
        # Validate tokens
        validation_errors = self.validate_tokens(tokens)
        errors.extend(validation_errors)
        
        # Parse content
        try:
            result = self.parse_content(content)
        except Exception as e:
            errors.append(ParseError(
                message=f"Parse error: {str(e)}",
                line=1,
                column=1,
                severity="error"
            ))
            result = {}
        
        return result, errors
    
    def tokenize(self, content: str) -> List[Token]:
        """Tokenize content for syntax analysis"""
        tokens = []
        lines = content.split('\n')
        
        for line_num, line in enumerate(lines, 1):
            pos = 0
            col = 0
            
            while pos < len(line):
                token = self._get_next_token(line, pos, line_num, col)
                if token:
                    tokens.append(token)
                    pos = token.end_pos
                    col = token.column + len(token.value)
                else:
                    # Skip unknown character
                    pos += 1
                    col += 1
        
        return tokens
    
    def _get_next_token(self, line: str, pos: int, line_num: int, col: int) -> Optional[Token]:
        """Get next token from line"""
        remaining = line[pos:]
        
        # Check each pattern
        for token_type, pattern in [
            (TokenType.WHITESPACE, self.patterns['whitespace']),
            (TokenType.COMMENT, self.patterns['comment']),
            (TokenType.SECTION, self.patterns['section']),
            (TokenType.VARIABLE, self.patterns['variable']),
            (TokenType.STRING, self.patterns['string']),
            (TokenType.NUMBER, self.patterns['number']),
            (TokenType.OPERATOR, self.patterns['operator']),
            (TokenType.FUNCTION, self.patterns['function_call']),
            (TokenType.PUNCTUATION, self.patterns['punctuation'])
        ]:
            match = pattern.match(remaining)
            if match:
                value = match.group(0)
                return Token(
                    type=token_type,
                    value=value,
                    start_pos=pos,
                    end_pos=pos + len(value),
                    line=line_num,
                    column=col
                )
        
        return None
    
    def validate_tokens(self, tokens: List[Token]) -> List[ParseError]:
        """Validate tokens based on validation level"""
        errors = []
        
        # Get validation rules for current level
        rules = self.validation_rules.get(self.validation_level, [])
        
        for rule in rules:
            rule_errors = rule(tokens)
            errors.extend(rule_errors)
        
        return errors
    
    def _validate_basic_syntax(self, tokens: List[Token]) -> List[ParseError]:
        """Validate basic syntax"""
        errors = []
        
        # Check for unmatched braces
        brace_stack = []
        
        for token in tokens:
            if token.value == '{':
                brace_stack.append(('{', token.line, token.column))
            elif token.value == '}':
                if not brace_stack or brace_stack[-1][0] != '{':
                    errors.append(ParseError(
                        message="Unmatched closing brace",
                        line=token.line,
                        column=token.column,
                        severity="error",
                        suggestion="Check for missing opening brace"
                    ))
                else:
                    brace_stack.pop()
        
        # Check for unclosed braces
        for brace, line, col in brace_stack:
            errors.append(ParseError(
                message="Unclosed brace",
                line=line,
                column=col,
                severity="error",
                suggestion="Add closing brace"
            ))
        
        return errors
    
    def _validate_section_structure(self, tokens: List[Token]) -> List[ParseError]:
        """Validate section structure"""
        errors = []
        sections = []
        
        for i, token in enumerate(tokens):
            if token.type == TokenType.SECTION:
                section_name = token.value.split('{')[0].strip()
                sections.append(section_name)
                
                # Check for duplicate sections
                if sections.count(section_name) > 1:
                    errors.append(ParseError(
                        message=f"Duplicate section: {section_name}",
                        line=token.line,
                        column=token.column,
                        severity="warning",
                        suggestion="Consider merging or renaming sections"
                    ))
        
        return errors
    
    def _validate_variable_names(self, tokens: List[Token]) -> List[ParseError]:
        """Validate variable names"""
        errors = []
        
        for token in tokens:
            if token.type == TokenType.VARIABLE:
                var_name = token.value[1:]  # Remove $ prefix
                
                # Check for reserved keywords
                reserved_keywords = ['true', 'false', 'null', 'section', 'import']
                if var_name in reserved_keywords:
                    errors.append(ParseError(
                        message=f"Variable name '{var_name}' is a reserved keyword",
                        line=token.line,
                        column=token.column,
                        severity="error",
                        suggestion=f"Use a different name for variable '{var_name}'"
                    ))
                
                # Check for naming conventions
                if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', var_name):
                    errors.append(ParseError(
                        message=f"Invalid variable name: {var_name}",
                        line=token.line,
                        column=token.column,
                        severity="warning",
                        suggestion="Use only letters, numbers, and underscores"
                    ))
        
        return errors
    
    def _validate_operator_syntax(self, tokens: List[Token]) -> List[ParseError]:
        """Validate operator syntax"""
        errors = []
        
        for token in tokens:
            if token.type == TokenType.FUNCTION:
                # Extract operator name and parameters
                match = self.patterns['function_call'].match(token.value)
                if match:
                    operator = match.group(1)
                    params = match.group(2)
                    
                    # Validate operator name
                    if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', operator):
                        errors.append(ParseError(
                            message=f"Invalid operator name: {operator}",
                            line=token.line,
                            column=token.column,
                            severity="error",
                            suggestion="Use only letters, numbers, and underscores"
                        ))
                    
                    # Validate parameters
                    if params and not self._validate_parameters(params):
                        errors.append(ParseError(
                            message=f"Invalid parameters for operator {operator}",
                            line=token.line,
                            column=token.column,
                            severity="warning",
                            suggestion="Check parameter syntax and types"
                        ))
        
        return errors
    
    def _validate_data_types(self, tokens: List[Token]) -> List[ParseError]:
        """Validate data types"""
        errors = []
        
        for token in tokens:
            if token.type == TokenType.STRING:
                # Check for unescaped quotes
                if token.value.count('"') % 2 != 0:
                    errors.append(ParseError(
                        message="Unescaped quote in string",
                        line=token.line,
                        column=token.column,
                        severity="error",
                        suggestion="Escape quotes with backslash"
                    ))
            
            elif token.type == TokenType.NUMBER:
                # Validate number format
                try:
                    float(token.value)
                except ValueError:
                    errors.append(ParseError(
                        message=f"Invalid number format: {token.value}",
                        line=token.line,
                        column=token.column,
                        severity="error",
                        suggestion="Use valid number format"
                    ))
        
        return errors
    
    def _validate_nesting(self, tokens: List[Token]) -> List[ParseError]:
        """Validate nesting structure"""
        errors = []
        nesting_stack = []
        
        for token in tokens:
            if token.value in ['{', '[', '(']:
                nesting_stack.append((token.value, token.line, token.column))
            elif token.value in ['}', ']', ')']:
                if not nesting_stack:
                    errors.append(ParseError(
                        message=f"Unmatched closing {token.value}",
                        line=token.line,
                        column=token.column,
                        severity="error",
                        suggestion="Check for missing opening bracket"
                    ))
                else:
                    expected = {'}': '{', ']': '[', ')': '('}[token.value]
                    if nesting_stack[-1][0] != expected:
                        errors.append(ParseError(
                            message=f"Mismatched brackets: expected {expected}, got {token.value}",
                            line=token.line,
                            column=token.column,
                            severity="error",
                            suggestion="Check bracket matching"
                        ))
                    else:
                        nesting_stack.pop()
        
        return errors
    
    def _validate_security(self, tokens: List[Token]) -> List[ParseError]:
        """Validate security aspects"""
        errors = []
        
        for token in tokens:
            if token.type == TokenType.FUNCTION:
                # Check for potentially dangerous operators
                dangerous_operators = ['exec', 'eval', 'system', 'shell']
                operator = token.value.split('(')[0][1:]  # Remove @ and get operator name
                
                if operator in dangerous_operators:
                    errors.append(ParseError(
                        message=f"Potentially dangerous operator: {operator}",
                        line=token.line,
                        column=token.column,
                        severity="warning",
                        suggestion="Review security implications"
                    ))
        
        return errors
    
    def _validate_performance(self, tokens: List[Token]) -> List[ParseError]:
        """Validate performance aspects"""
        errors = []
        
        # Check for expensive operations
        expensive_operators = ['query', 'file', 'network', 'api']
        
        for token in tokens:
            if token.type == TokenType.FUNCTION:
                operator = token.value.split('(')[0][1:]
                
                if operator in expensive_operators:
                    errors.append(ParseError(
                        message=f"Expensive operation: {operator}",
                        line=token.line,
                        column=token.column,
                        severity="info",
                        suggestion="Consider caching or optimization"
                    ))
        
        return errors
    
    def _validate_compliance(self, tokens: List[Token]) -> List[ParseError]:
        """Validate compliance requirements"""
        errors = []
        
        # Check for sensitive data patterns
        sensitive_patterns = [
            (r'\b\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b', 'Credit card number'),
            (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', 'Email address'),
            (r'\b\d{3}-\d{2}-\d{4}\b', 'SSN pattern')
        ]
        
        for token in tokens:
            if token.type == TokenType.STRING:
                value = token.value.strip('"')
                for pattern, description in sensitive_patterns:
                    if re.search(pattern, value):
                        errors.append(ParseError(
                            message=f"Potential {description} detected",
                            line=token.line,
                            column=token.column,
                            severity="warning",
                            suggestion="Review data handling compliance"
                        ))
        
        return errors
    
    def _validate_parameters(self, params: str) -> bool:
        """Validate operator parameters"""
        try:
            # Basic parameter validation
            if not params.strip():
                return True
            
            # Check for balanced parentheses
            paren_count = 0
            for char in params:
                if char == '(':
                    paren_count += 1
                elif char == ')':
                    paren_count -= 1
                    if paren_count < 0:
                        return False
            
            return paren_count == 0
        except:
            return False
    
    def parse_content(self, content: str) -> Dict[str, Any]:
        """Parse content using enhanced parsing"""
        # This is a simplified version - in practice, you'd integrate with the main parser
        result = {}
        
        # Parse sections
        sections = re.findall(r'([a-zA-Z_][a-zA-Z0-9_]*)\s*\{([^}]*)\}', content)
        
        for section_name, section_content in sections:
            result[section_name] = self._parse_section_content(section_content)
        
        return result
    
    def _parse_section_content(self, content: str) -> Dict[str, Any]:
        """Parse section content"""
        result = {}
        
        # Parse key-value pairs
        lines = content.split('\n')
        for line in lines:
            line = line.strip()
            if '=' in line and not line.startswith('#'):
                key, value = line.split('=', 1)
                key = key.strip()
                value = value.strip().rstrip(';')
                result[key] = self._parse_value(value)
        
        return result
    
    def _parse_value(self, value: str) -> Any:
        """Parse individual value"""
        value = value.strip()
        
        # Remove quotes
        if (value.startswith('"') and value.endswith('"')) or \
           (value.startswith("'") and value.endswith("'")):
            return value[1:-1]
        
        # Parse numbers
        if re.match(r'^-?\d+$', value):
            return int(value)
        elif re.match(r'^-?\d+\.\d+$', value):
            return float(value)
        
        # Parse booleans
        if value.lower() == 'true':
            return True
        elif value.lower() == 'false':
            return False
        elif value.lower() == 'null':
            return None
        
        # Parse arrays
        if value.startswith('[') and value.endswith(']'):
            return self._parse_array(value)
        
        # Parse objects
        if value.startswith('{') and value.endswith('}'):
            return self._parse_object(value)
        
        return value
    
    def _parse_array(self, value: str) -> List[Any]:
        """Parse array value"""
        content = value[1:-1].strip()
        if not content:
            return []
        
        items = []
        current = ""
        depth = 0
        in_string = False
        
        for char in content:
            if char == '"' and (not current or current[-1] != '\\'):
                in_string = not in_string
            
            if not in_string:
                if char in '[{(':
                    depth += 1
                elif char in ']}':
                    depth -= 1
                elif char == ',' and depth == 0:
                    items.append(self._parse_value(current.strip()))
                    current = ""
                    continue
            
            current += char
        
        if current.strip():
            items.append(self._parse_value(current.strip()))
        
        return items
    
    def _parse_object(self, value: str) -> Dict[str, Any]:
        """Parse object value"""
        content = value[1:-1].strip()
        if not content:
            return {}
        
        result = {}
        current_key = ""
        current_value = ""
        depth = 0
        in_string = False
        expecting_value = False
        
        for char in content:
            if char == '"' and (not current_value or current_value[-1] != '\\'):
                in_string = not in_string
            
            if not in_string:
                if char in '[{(':
                    depth += 1
                elif char in ']}':
                    depth -= 1
                elif char == ':' and depth == 0 and not expecting_value:
                    current_key = current_value.strip()
                    current_value = ""
                    expecting_value = True
                    continue
                elif char == ',' and depth == 0 and expecting_value:
                    result[current_key] = self._parse_value(current_value.strip())
                    current_key = ""
                    current_value = ""
                    expecting_value = False
                    continue
            
            current_value += char
        
        if current_key and current_value.strip():
            result[current_key] = self._parse_value(current_value.strip())
        
        return result
    
    def generate_syntax_tree(self, content: str) -> SyntaxNode:
        """Generate syntax tree from content"""
        tokens = self.tokenize(content)
        return self._build_syntax_tree(tokens)
    
    def _build_syntax_tree(self, tokens: List[Token]) -> SyntaxNode:
        """Build syntax tree from tokens"""
        root = SyntaxNode(
            node_type="root",
            value="",
            children=[],
            start_pos=0,
            end_pos=0,
            line=1,
            column=1
        )
        
        current_section = None
        
        for token in tokens:
            if token.type == TokenType.SECTION:
                current_section = SyntaxNode(
                    node_type="section",
                    value=token.value.split('{')[0].strip(),
                    children=[],
                    start_pos=token.start_pos,
                    end_pos=token.end_pos,
                    line=token.line,
                    column=token.column
                )
                root.children.append(current_section)
            elif current_section and token.type in [TokenType.VARIABLE, TokenType.FUNCTION]:
                child = SyntaxNode(
                    node_type=token.type.value,
                    value=token.value,
                    children=[],
                    start_pos=token.start_pos,
                    end_pos=token.end_pos,
                    line=token.line,
                    column=token.column
                )
                current_section.children.append(child)
        
        return root


# Global parser instance
advanced_parser = AdvancedParser()


def parse_with_validation(content: str, validation_level: str = "basic") -> Tuple[Dict[str, Any], List[ParseError]]:
    """Parse content with validation"""
    level = ValidationLevel(validation_level.lower())
    parser = AdvancedParser(level)
    return parser.parse_with_validation(content)


def tokenize_content(content: str) -> List[Token]:
    """Tokenize content for syntax analysis"""
    return advanced_parser.tokenize(content)


def validate_content(content: str, validation_level: str = "basic") -> List[ParseError]:
    """Validate content"""
    tokens = advanced_parser.tokenize(content)
    level = ValidationLevel(validation_level.lower())
    parser = AdvancedParser(level)
    return parser.validate_tokens(tokens)


def generate_syntax_tree(content: str) -> SyntaxNode:
    """Generate syntax tree from content"""
    return advanced_parser.generate_syntax_tree(content)


if __name__ == "__main__":
    print("Advanced Parser for TuskLang Python SDK")
    print("=" * 50)
    
    # Test content
    test_content = '''
    database {
        host = "localhost";
        port = 5432;
        credentials = {
            username = "admin";
            password = "secret123";
        };
    }
    
    api {
        endpoint = @env("API_ENDPOINT", "https://api.example.com");
        timeout = 30;
        retries = @if(production, 3, 1);
    }
    '''
    
    # Test parsing with validation
    print("\n1. Testing Parsing with Validation:")
    result, errors = parse_with_validation(test_content, "strict")
    print(f"Parse result: {len(result)} sections")
    print(f"Validation errors: {len(errors)}")
    
    for error in errors:
        print(f"  - {error.severity}: {error.message} (line {error.line})")
    
    # Test tokenization
    print("\n2. Testing Tokenization:")
    tokens = tokenize_content(test_content)
    print(f"Total tokens: {len(tokens)}")
    
    # Count token types
    token_counts = {}
    for token in tokens:
        token_counts[token.type.value] = token_counts.get(token.type.value, 0) + 1
    
    for token_type, count in token_counts.items():
        print(f"  {token_type}: {count}")
    
    # Test syntax tree
    print("\n3. Testing Syntax Tree:")
    tree = generate_syntax_tree(test_content)
    print(f"Root node: {tree.node_type}")
    print(f"Children: {len(tree.children)}")
    
    for child in tree.children:
        print(f"  Section: {child.value} ({len(child.children)} items)")
    
    print("\nAdvanced parsing testing completed!") 