"""
AI Optimization Engine with Neural Architecture Search and Advanced Optimization
Supports hyperparameter tuning, neural architecture search, and model optimization.
"""

import asyncio
import json
import logging
import random
import time
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import cross_val_score

# Neural Architecture Search components
try:
    import torch
    import torch.nn as nn
    import torch.optim as optim
    from torch.utils.data import DataLoader, TensorDataset
    TORCH_AVAILABLE = True
except ImportError:
    TORCH_AVAILABLE = False
    print("PyTorch not available. Neural Architecture Search features will be limited.")

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

@dataclass
class OptimizationConfig:
    """Configuration for optimization tasks."""
    task_type: str  # 'hyperparameter', 'architecture', 'ensemble'
    objective: str  # 'maximize', 'minimize'
    max_evaluations: int = 100
    timeout_seconds: Optional[int] = None
    parallel_workers: int = 4
    early_stopping_patience: int = 10
    optimization_algorithm: str = 'bayesian'  # 'bayesian', 'genetic', 'random', 'grid'

@dataclass
class OptimizationResult:
    """Result from optimization process."""
    best_params: Dict[str, Any]
    best_score: float
    optimization_history: List[Dict]
    total_evaluations: int
    optimization_time: float
    convergence_info: Dict[str, Any]

class GeneticAlgorithmOptimizer:
    """Genetic Algorithm for optimization."""
    
    def __init__(self, population_size: int = 50, mutation_rate: float = 0.1,
                 crossover_rate: float = 0.7, elitism_rate: float = 0.2):
        self.population_size = population_size
        self.mutation_rate = mutation_rate
        self.crossover_rate = crossover_rate
        self.elitism_rate = elitism_rate
        
    def create_random_individual(self, param_space: Dict) -> Dict:
        """Create a random individual (parameter set)."""
        individual = {}
        for param, values in param_space.items():
            if isinstance(values, list):
                individual[param] = random.choice(values)
            elif isinstance(values, tuple) and len(values) == 2:
                # Assume (min, max) range
                if isinstance(values[0], int):
                    individual[param] = random.randint(values[0], values[1])
                else:
                    individual[param] = random.uniform(values[0], values[1])
            else:
                individual[param] = values
        return individual
    
    def mutate(self, individual: Dict, param_space: Dict) -> Dict:
        """Mutate an individual."""
        mutated = individual.copy()
        for param, values in param_space.items():
            if random.random() < self.mutation_rate:
                if isinstance(values, list):
                    mutated[param] = random.choice(values)
                elif isinstance(values, tuple) and len(values) == 2:
                    if isinstance(values[0], int):
                        mutated[param] = random.randint(values[0], values[1])
                    else:
                        mutated[param] = random.uniform(values[0], values[1])
        return mutated
    
    def crossover(self, parent1: Dict, parent2: Dict) -> Tuple[Dict, Dict]:
        """Create offspring through crossover."""
        if random.random() > self.crossover_rate:
            return parent1.copy(), parent2.copy()
        
        child1, child2 = parent1.copy(), parent2.copy()
        
        # Single-point crossover
        keys = list(parent1.keys())
        crossover_point = random.randint(1, len(keys) - 1)
        
        for i, key in enumerate(keys):
            if i >= crossover_point:
                child1[key], child2[key] = child2[key], child1[key]
        
        return child1, child2
    
    def optimize(self, objective_function: Callable, param_space: Dict,
                 generations: int = 50) -> OptimizationResult:
        """Run genetic algorithm optimization."""
        start_time = time.time()
        
        # Initialize population
        population = [self.create_random_individual(param_space) 
                     for _ in range(self.population_size)]
        
        history = []
        best_individual = None
        best_score = float('-inf')
        
        for generation in range(generations):
            # Evaluate population
            fitness_scores = []
            for individual in population:
                try:
                    score = objective_function(individual)
                    fitness_scores.append(score)
                    
                    if score > best_score:
                        best_score = score
                        best_individual = individual.copy()
                        
                except Exception as e:
                    fitness_scores.append(float('-inf'))
            
            # Record history
            generation_best = max(fitness_scores)
            generation_avg = np.mean([s for s in fitness_scores if s != float('-inf')])
            
            history.append({
                'generation': generation,
                'best_score': generation_best,
                'avg_score': generation_avg,
                'best_params': population[fitness_scores.index(generation_best)].copy()
            })
            
            logger.info(f"Generation {generation}: Best={generation_best:.4f}, Avg={generation_avg:.4f}")
            
            # Selection and reproduction
            # Elite selection
            elite_count = int(self.population_size * self.elitism_rate)
            elite_indices = np.argsort(fitness_scores)[-elite_count:]
            new_population = [population[i] for i in elite_indices]
            
            # Generate offspring
            while len(new_population) < self.population_size:
                # Tournament selection
                parent1 = self.tournament_selection(population, fitness_scores)
                parent2 = self.tournament_selection(population, fitness_scores)
                
                # Crossover and mutation
                child1, child2 = self.crossover(parent1, parent2)
                child1 = self.mutate(child1, param_space)
                child2 = self.mutate(child2, param_space)
                
                new_population.extend([child1, child2])
            
            population = new_population[:self.population_size]
        
        optimization_time = time.time() - start_time
        
        return OptimizationResult(
            best_params=best_individual,
            best_score=best_score,
            optimization_history=history,
            total_evaluations=generations * self.population_size,
            optimization_time=optimization_time,
            convergence_info={'algorithm': 'genetic', 'generations': generations}
        )
    
    def tournament_selection(self, population: List[Dict], fitness_scores: List[float],
                           tournament_size: int = 3) -> Dict:
        """Select individual using tournament selection."""
        tournament_indices = random.sample(range(len(population)), tournament_size)
        tournament_fitness = [fitness_scores[i] for i in tournament_indices]
        winner_index = tournament_indices[np.argmax(tournament_fitness)]
        return population[winner_index]

class NeuralArchitectureSearch:
    """Neural Architecture Search for automatic neural network design."""
    
    def __init__(self, input_dim: int, output_dim: int, task_type: str = 'classification'):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.task_type = task_type
        self.search_space = self._define_search_space()
        
    def _define_search_space(self) -> Dict:
        """Define the neural architecture search space."""
        return {
            'num_layers': (2, 8),
            'layer_sizes': [32, 64, 128, 256, 512, 1024],
            'activation_functions': ['relu', 'tanh', 'sigmoid', 'leaky_relu'],
            'dropout_rates': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5],
            'batch_norm': [True, False],
            'learning_rate': (0.0001, 0.1),
            'optimizer': ['adam', 'sgd', 'rmsprop'],
            'batch_size': [16, 32, 64, 128, 256]
        }
    
    def create_model_from_config(self, config: Dict) -> Any:
        """Create a PyTorch model from architecture configuration."""
        if not TORCH_AVAILABLE:
            raise ImportError("PyTorch is required for Neural Architecture Search")
        
        class DynamicNN(nn.Module):
            def __init__(self, input_dim, output_dim, config):
                super(DynamicNN, self).__init__()
                
                layers = []
                current_dim = input_dim
                
                # Build hidden layers
                for i in range(config['num_layers']):
                    layer_size = random.choice(config['layer_sizes']) if isinstance(config['layer_sizes'], list) else config['layer_sizes']
                    
                    # Linear layer
                    layers.append(nn.Linear(current_dim, layer_size))
                    
                    # Batch normalization
                    if config.get('batch_norm', False):
                        layers.append(nn.BatchNorm1d(layer_size))
                    
                    # Activation
                    activation = config.get('activation_functions', 'relu')
                    if isinstance(activation, list):
                        activation = random.choice(activation)
                    
                    if activation == 'relu':
                        layers.append(nn.ReLU())
                    elif activation == 'tanh':
                        layers.append(nn.Tanh())
                    elif activation == 'sigmoid':
                        layers.append(nn.Sigmoid())
                    elif activation == 'leaky_relu':
                        layers.append(nn.LeakyReLU())
                    
                    # Dropout
                    dropout_rate = config.get('dropout_rates', 0.0)
                    if isinstance(dropout_rate, list):
                        dropout_rate = random.choice(dropout_rate)
                    if dropout_rate > 0:
                        layers.append(nn.Dropout(dropout_rate))
                    
                    current_dim = layer_size
                
                # Output layer
                layers.append(nn.Linear(current_dim, output_dim))
                
                self.model = nn.Sequential(*layers)
            
            def forward(self, x):
                return self.model(x)
        
        return DynamicNN(self.input_dim, self.output_dim, config)
    
    def evaluate_architecture(self, config: Dict, X_train: np.ndarray, y_train: np.ndarray,
                            X_val: np.ndarray, y_val: np.ndarray, epochs: int = 50) -> float:
        """Evaluate a neural architecture configuration."""
        if not TORCH_AVAILABLE:
            # Fallback to simple evaluation
            from sklearn.neural_network import MLPClassifier
            
            hidden_layer_sizes = tuple([random.choice([32, 64, 128]) 
                                      for _ in range(config.get('num_layers', 3))])
            
            mlp = MLPClassifier(
                hidden_layer_sizes=hidden_layer_sizes,
                learning_rate_init=config.get('learning_rate', 0.001),
                max_iter=epochs,
                early_stopping=True,
                validation_fraction=0.1,
                random_state=42
            )
            
            mlp.fit(X_train, y_train)
            score = mlp.score(X_val, y_val)
            return score
        
        try:
            # Create model
            model = self.create_model_from_config(config)
            
            # Prepare data
            X_train_tensor = torch.FloatTensor(X_train)
            y_train_tensor = torch.LongTensor(y_train) if self.task_type == 'classification' else torch.FloatTensor(y_train)
            X_val_tensor = torch.FloatTensor(X_val)
            y_val_tensor = torch.LongTensor(y_val) if self.task_type == 'classification' else torch.FloatTensor(y_val)
            
            train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
            train_loader = DataLoader(train_dataset, batch_size=config.get('batch_size', 32), shuffle=True)
            
            # Setup optimizer and loss
            optimizer_name = config.get('optimizer', 'adam')
            lr = config.get('learning_rate', 0.001)
            
            if optimizer_name == 'adam':
                optimizer = optim.Adam(model.parameters(), lr=lr)
            elif optimizer_name == 'sgd':
                optimizer = optim.SGD(model.parameters(), lr=lr)
            else:
                optimizer = optim.RMSprop(model.parameters(), lr=lr)
            
            if self.task_type == 'classification':
                criterion = nn.CrossEntropyLoss()
            else:
                criterion = nn.MSELoss()
            
            # Training loop
            model.train()
            for epoch in range(epochs):
                epoch_loss = 0
                for batch_X, batch_y in train_loader:
                    optimizer.zero_grad()
                    outputs = model(batch_X)
                    loss = criterion(outputs, batch_y)
                    loss.backward()
                    optimizer.step()
                    epoch_loss += loss.item()
            
            # Evaluation
            model.eval()
            with torch.no_grad():
                val_outputs = model(X_val_tensor)
                
                if self.task_type == 'classification':
                    _, predicted = torch.max(val_outputs.data, 1)
                    accuracy = (predicted == y_val_tensor).sum().item() / len(y_val_tensor)
                    return accuracy
                else:
                    val_loss = criterion(val_outputs.squeeze(), y_val_tensor)
                    return -val_loss.item()  # Return negative loss for maximization
                    
        except Exception as e:
            logger.error(f"Error evaluating architecture: {str(e)}")
            return 0.0
    
    def search(self, X_train: np.ndarray, y_train: np.ndarray,
               X_val: np.ndarray, y_val: np.ndarray,
               search_iterations: int = 50) -> OptimizationResult:
        """Perform neural architecture search."""
        logger.info("Starting Neural Architecture Search...")
        
        start_time = time.time()
        history = []
        best_config = None
        best_score = float('-inf')
        
        ga_optimizer = GeneticAlgorithmOptimizer()
        
        def objective_function(config):
            return self.evaluate_architecture(config, X_train, y_train, X_val, y_val)
        
        result = ga_optimizer.optimize(objective_function, self.search_space, generations=search_iterations)
        
        logger.info(f"Neural Architecture Search complete. Best score: {result.best_score:.4f}")
        
        return result

class AIOptimizationEngine:
    """
    Comprehensive AI Optimization Engine with multiple optimization algorithms.
    Supports hyperparameter tuning, neural architecture search, and model optimization.
    """
    
    def __init__(self, n_jobs: int = -1):
        self.n_jobs = n_jobs
        self.optimization_history = []
        self.genetic_optimizer = GeneticAlgorithmOptimizer()
        
    def bayesian_optimization(self, objective_function: Callable, param_space: Dict,
                            n_iterations: int = 100) -> OptimizationResult:
        """Bayesian optimization using Gaussian Process."""
        try:
            from skopt import gp_minimize
            from skopt.space import Real, Integer, Categorical
            from skopt.utils import use_named_args
            
            # Convert parameter space to skopt format
            dimensions = []
            param_names = []
            
            for param, values in param_space.items():
                param_names.append(param)
                if isinstance(values, list):
                    if all(isinstance(v, str) for v in values):
                        dimensions.append(Categorical(values, name=param))
                    else:
                        dimensions.append(Categorical(values, name=param))
                elif isinstance(values, tuple) and len(values) == 2:
                    if isinstance(values[0], int):
                        dimensions.append(Integer(values[0], values[1], name=param))
                    else:
                        dimensions.append(Real(values[0], values[1], name=param))
            
            @use_named_args(dimensions)
            def objective(**params):
                try:
                    score = objective_function(params)
                    return -score  # Minimize negative score
                except Exception as e:
                    logger.error(f"Error in objective function: {str(e)}")
                    return float('inf')
            
            start_time = time.time()
            result = gp_minimize(objective, dimensions, n_calls=n_iterations, random_state=42)
            optimization_time = time.time() - start_time
            
            best_params = dict(zip(param_names, result.x))
            best_score = -result.fun
            
            history = []
            for i, (params, score) in enumerate(zip(result.x_iters, result.func_vals)):
                history.append({
                    'iteration': i,
                    'params': dict(zip(param_names, params)),
                    'score': -score,
                    'timestamp': datetime.now().isoformat()
                })
            
            return OptimizationResult(
                best_params=best_params,
                best_score=best_score,
                optimization_history=history,
                total_evaluations=n_iterations,
                optimization_time=optimization_time,
                convergence_info={'algorithm': 'bayesian', 'iterations': n_iterations}
            )
            
        except ImportError:
            logger.warning("scikit-optimize not available, falling back to genetic algorithm")
            return self.genetic_optimization(objective_function, param_space, n_iterations)
    
    def genetic_optimization(self, objective_function: Callable, param_space: Dict,
                           max_evaluations: int = 100) -> OptimizationResult:
        """Genetic algorithm optimization."""
        generations = max(1, max_evaluations // self.genetic_optimizer.population_size)
        return self.genetic_optimizer.optimize(objective_function, param_space, generations)
    
    def random_search_optimization(self, objective_function: Callable, param_space: Dict,
                                 max_evaluations: int = 100) -> OptimizationResult:
        """Random search optimization."""
        start_time = time.time()
        
        history = []
        best_params = None
        best_score = float('-inf')
        
        for iteration in range(max_evaluations):
            # Generate random parameters
            params = {}
            for param, values in param_space.items():
                if isinstance(values, list):
                    params[param] = random.choice(values)
                elif isinstance(values, tuple) and len(values) == 2:
                    if isinstance(values[0], int):
                        params[param] = random.randint(values[0], values[1])
                    else:
                        params[param] = random.uniform(values[0], values[1])
            
            try:
                score = objective_function(params)
                
                if score > best_score:
                    best_score = score
                    best_params = params.copy()
                
                history.append({
                    'iteration': iteration,
                    'params': params.copy(),
                    'score': score,
                    'timestamp': datetime.now().isoformat()
                })
                
                if iteration % 20 == 0:
                    logger.info(f"Random Search Iteration {iteration}: Current best = {best_score:.4f}")
                    
            except Exception as e:
                logger.error(f"Error in iteration {iteration}: {str(e)}")
        
        optimization_time = time.time() - start_time
        
        return OptimizationResult(
            best_params=best_params,
            best_score=best_score,
            optimization_history=history,
            total_evaluations=max_evaluations,
            optimization_time=optimization_time,
            convergence_info={'algorithm': 'random_search', 'iterations': max_evaluations}
        )
    
    def optimize(self, objective_function: Callable, param_space: Dict,
                 config: OptimizationConfig) -> OptimizationResult:
        """Main optimization function that routes to appropriate algorithm."""
        logger.info(f"Starting optimization with {config.optimization_algorithm} algorithm...")
        
        if config.optimization_algorithm == 'bayesian':
            result = self.bayesian_optimization(objective_function, param_space, config.max_evaluations)
        elif config.optimization_algorithm == 'genetic':
            result = self.genetic_optimization(objective_function, param_space, config.max_evaluations)
        elif config.optimization_algorithm == 'random':
            result = self.random_search_optimization(objective_function, param_space, config.max_evaluations)
        else:
            raise ValueError(f"Unknown optimization algorithm: {config.optimization_algorithm}")
        
        # Store in history
        self.optimization_history.append({
            'timestamp': datetime.now().isoformat(),
            'algorithm': config.optimization_algorithm,
            'task_type': config.task_type,
            'result': {
                'best_score': result.best_score,
                'total_evaluations': result.total_evaluations,
                'optimization_time': result.optimization_time
            }
        })
        
        logger.info(f"Optimization complete. Best score: {result.best_score:.4f} "
                   f"(Time: {result.optimization_time:.2f}s, Evaluations: {result.total_evaluations})")
        
        return result
    
    def neural_architecture_search(self, X_train: np.ndarray, y_train: np.ndarray,
                                  X_val: np.ndarray, y_val: np.ndarray,
                                  task_type: str = 'classification',
                                  search_iterations: int = 50) -> OptimizationResult:
        """Perform neural architecture search."""
        nas = NeuralArchitectureSearch(X_train.shape[1], len(np.unique(y_train)), task_type)
        return nas.search(X_train, y_train, X_val, y_val, search_iterations)
    
    def multi_objective_optimization(self, objectives: List[Callable], param_space: Dict,
                                   weights: Optional[List[float]] = None,
                                   max_evaluations: int = 100) -> OptimizationResult:
        """Multi-objective optimization with weighted combination."""
        if weights is None:
            weights = [1.0] * len(objectives)
        
        if len(weights) != len(objectives):
            raise ValueError("Number of weights must match number of objectives")
        
        def combined_objective(params):
            scores = []
            for objective in objectives:
                try:
                    score = objective(params)
                    scores.append(score)
                except Exception as e:
                    logger.error(f"Error in objective evaluation: {str(e)}")
                    scores.append(0.0)
            
            # Weighted combination
            combined_score = sum(w * s for w, s in zip(weights, scores))
            return combined_score
        
        config = OptimizationConfig(
            task_type='multi_objective',
            objective='maximize',
            max_evaluations=max_evaluations,
            optimization_algorithm='bayesian'
        )
        
        return self.optimize(combined_objective, param_space, config)
    
    def save_optimization_history(self, filepath: Union[str, Path]) -> None:
        """Save optimization history to file."""
        filepath = Path(filepath)
        with open(filepath, 'w') as f:
            json.dump(self.optimization_history, f, indent=2)
        logger.info(f"Optimization history saved to {filepath}")
    
    def get_optimization_summary(self) -> Dict:
        """Get summary of all optimization runs."""
        if not self.optimization_history:
            return {"message": "No optimization history available"}
        
        algorithms_used = [h['algorithm'] for h in self.optimization_history]
        best_scores = [h['result']['best_score'] for h in self.optimization_history]
        
        return {
            'total_optimizations': len(self.optimization_history),
            'algorithms_used': list(set(algorithms_used)),
            'best_overall_score': max(best_scores) if best_scores else None,
            'average_score': np.mean(best_scores) if best_scores else None,
            'total_optimization_time': sum(h['result']['optimization_time'] for h in self.optimization_history),
            'total_evaluations': sum(h['result']['total_evaluations'] for h in self.optimization_history)
        }

# Example usage and testing
async def main():
    """Example usage of the AI Optimization Engine."""
    print("=== AI Optimization Engine Demo ===")
    
    # Create sample data
    from sklearn.datasets import make_classification
    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import RandomForestClassifier
    
    X, y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
    
    # Initialize optimization engine
    opt_engine = AIOptimizationEngine()
    
    # Test 1: Hyperparameter optimization
    print("\n1. Testing hyperparameter optimization...")
    
    def rf_objective(params):
        rf = RandomForestClassifier(**params, random_state=42, n_jobs=-1)
        scores = cross_val_score(rf, X_train, y_train, cv=3, n_jobs=-1)
        return scores.mean()
    
    param_space = {
        'n_estimators': [50, 100, 200],
        'max_depth': [5, 10, 20, None],
        'min_samples_split': (2, 20),
        'min_samples_leaf': (1, 10)
    }
    
    config = OptimizationConfig(
        task_type='hyperparameter',
        objective='maximize',
        max_evaluations=30,
        optimization_algorithm='bayesian'
    )
    
    result = opt_engine.optimize(rf_objective, param_space, config)
    print(f"Best hyperparameters: {result.best_params}")
    print(f"Best score: {result.best_score:.4f}")
    
    # Test 2: Neural Architecture Search
    print("\n2. Testing Neural Architecture Search...")
    
    nas_result = opt_engine.neural_architecture_search(
        X_train, y_train, X_val, y_val,
        task_type='classification',
        search_iterations=20
    )
    
    print(f"Best architecture score: {nas_result.best_score:.4f}")
    print(f"Best architecture config: {nas_result.best_params}")
    
    # Test 3: Multi-objective optimization
    print("\n3. Testing multi-objective optimization...")
    
    def accuracy_objective(params):
        rf = RandomForestClassifier(**params, random_state=42, n_jobs=-1)
        scores = cross_val_score(rf, X_train, y_train, cv=3, n_jobs=-1)
        return scores.mean()
    
    def complexity_objective(params):
        # Penalize complexity (favor simpler models)
        complexity = params.get('n_estimators', 100) * params.get('max_depth', 10)
        return -np.log(complexity)  # Negative log to minimize complexity
    
    multi_result = opt_engine.multi_objective_optimization(
        [accuracy_objective, complexity_objective],
        param_space,
        weights=[0.8, 0.2],  # 80% accuracy, 20% simplicity
        max_evaluations=30
    )
    
    print(f"Multi-objective best parameters: {multi_result.best_params}")
    print(f"Multi-objective best score: {multi_result.best_score:.4f}")
    
    # Test 4: Algorithm comparison
    print("\n4. Testing different optimization algorithms...")
    
    algorithms = ['bayesian', 'genetic', 'random']
    comparison_results = {}
    
    for algorithm in algorithms:
        config.optimization_algorithm = algorithm
        config.max_evaluations = 20
        
        result = opt_engine.optimize(rf_objective, param_space, config)
        comparison_results[algorithm] = {
            'best_score': result.best_score,
            'optimization_time': result.optimization_time,
            'total_evaluations': result.total_evaluations
        }
    
    print("\nAlgorithm Comparison:")
    for algo, metrics in comparison_results.items():
        print(f"{algo}: Score={metrics['best_score']:.4f}, "
              f"Time={metrics['optimization_time']:.2f}s, "
              f"Evaluations={metrics['total_evaluations']}")
    
    # Test 5: Optimization summary
    print("\n5. Optimization summary:")
    summary = opt_engine.get_optimization_summary()
    for key, value in summary.items():
        print(f"{key}: {value}")
    
    print("\n=== AI Optimization Engine Demo Complete ===")

if __name__ == "__main__":
    asyncio.run(main()) 