"""
TuskLang Python SDK - Deep Learning Engine (g13.2)
Production deep learning with TensorFlow/PyTorch integration and GPU acceleration
"""

import asyncio
import json
import logging
import os
import time
import uuid
from dataclasses import dataclass, field, asdict
from datetime import datetime
from enum import Enum
from typing import Dict, List, Optional, Set, Any, Callable, Union, Tuple
import tempfile
import pickle

try:
    import numpy as np
    NUMPY_AVAILABLE = True
except ImportError:
    NUMPY_AVAILABLE = False

try:
    import tensorflow as tf
    from tensorflow import keras
    from tensorflow.keras import layers, models, optimizers, callbacks
    TENSORFLOW_AVAILABLE = True
except ImportError:
    TENSORFLOW_AVAILABLE = False

try:
    import torch
    import torch.nn as nn
    import torch.optim as optim
    import torch.nn.functional as F
    from torch.utils.data import Dataset, DataLoader, TensorDataset
    PYTORCH_AVAILABLE = True
except ImportError:
    PYTORCH_AVAILABLE = False


class Framework(Enum):
    TENSORFLOW = "tensorflow"
    PYTORCH = "pytorch"
    AUTO = "auto"


class NetworkType(Enum):
    FEEDFORWARD = "feedforward"
    CNN = "cnn"
    RNN = "rnn"
    LSTM = "lstm"
    GRU = "gru"
    TRANSFORMER = "transformer"
    AUTOENCODER = "autoencoder"
    GAN = "gan"


class TaskType(Enum):
    CLASSIFICATION = "classification"
    REGRESSION = "regression"
    GENERATION = "generation"
    SEGMENTATION = "segmentation"
    DETECTION = "detection"


class OptimizationType(Enum):
    SGD = "sgd"
    ADAM = "adam"
    ADAMW = "adamw"
    RMSPROP = "rmsprop"
    ADAGRAD = "adagrad"


@dataclass
class LayerConfig:
    """Neural network layer configuration"""
    layer_type: str = "dense"
    units: int = 64
    activation: str = "relu"
    dropout_rate: float = 0.0
    batch_norm: bool = False
    
    # CNN specific
    filters: int = 32
    kernel_size: Tuple[int, ...] = (3, 3)
    strides: Tuple[int, ...] = (1, 1)
    padding: str = "same"
    
    # RNN specific
    return_sequences: bool = False
    
    # Custom parameters
    parameters: Dict[str, Any] = field(default_factory=dict)


@dataclass
class NetworkArchitecture:
    """Deep learning network architecture"""
    id: str = field(default_factory=lambda: str(uuid.uuid4()))
    name: str = ""
    network_type: NetworkType = NetworkType.FEEDFORWARD
    
    # Architecture
    layers: List[LayerConfig] = field(default_factory=list)
    input_shape: Tuple[int, ...] = (784,)
    output_units: int = 10
    output_activation: str = "softmax"
    
    # Compilation settings
    optimizer: OptimizationType = OptimizationType.ADAM
    learning_rate: float = 0.001
    loss_function: str = "sparse_categorical_crossentropy"
    metrics: List[str] = field(default_factory=lambda: ["accuracy"])
    
    # Training settings
    batch_size: int = 32
    epochs: int = 10
    validation_split: float = 0.2
    
    # Callbacks
    early_stopping: bool = True
    reduce_lr: bool = True
    model_checkpoint: bool = True
    
    # Hardware
    use_gpu: bool = True
    mixed_precision: bool = False
    
    # Metadata
    created_at: datetime = field(default_factory=datetime.now)


@dataclass
class TrainingMetrics:
    """Training metrics and history"""
    epoch: int = 0
    train_loss: float = 0.0
    train_accuracy: float = 0.0
    val_loss: float = 0.0
    val_accuracy: float = 0.0
    learning_rate: float = 0.001
    timestamp: datetime = field(default_factory=datetime.now)


@dataclass
class DLModel:
    """Deep learning model"""
    id: str = field(default_factory=lambda: str(uuid.uuid4()))
    name: str = ""
    description: str = ""
    
    # Configuration
    architecture: NetworkArchitecture = field(default_factory=NetworkArchitecture)
    framework: Framework = Framework.AUTO
    task_type: TaskType = TaskType.CLASSIFICATION
    
    # Model state
    is_compiled: bool = False
    is_trained: bool = False
    model_path: Optional[str] = None
    
    # Training history
    training_history: List[TrainingMetrics] = field(default_factory=list)
    best_accuracy: float = 0.0
    best_loss: float = float('inf')
    
    # Performance
    final_train_accuracy: float = 0.0
    final_val_accuracy: float = 0.0
    training_time: float = 0.0
    
    # Framework-specific model objects (not serializable)
    tf_model: Optional[Any] = None
    torch_model: Optional[Any] = None
    
    # Metadata
    created_at: datetime = field(default_factory=datetime.now)
    updated_at: datetime = field(default_factory=datetime.now)


class TensorFlowModelBuilder:
    """TensorFlow model builder"""
    
    def __init__(self):
        self.logger = logging.getLogger(__name__)
    
    def build_model(self, architecture: NetworkArchitecture) -> Any:
        """Build TensorFlow model from architecture"""
        if not TENSORFLOW_AVAILABLE:
            raise RuntimeError("TensorFlow not available")
        
        model = models.Sequential()
        
        # Add layers based on architecture
        for i, layer_config in enumerate(architecture.layers):
            is_first_layer = (i == 0)
            
            if layer_config.layer_type == "dense":
                if is_first_layer:
                    model.add(layers.Dense(
                        layer_config.units,
                        activation=layer_config.activation,
                        input_shape=architecture.input_shape
                    ))
                else:
                    model.add(layers.Dense(
                        layer_config.units,
                        activation=layer_config.activation
                    ))
            
            elif layer_config.layer_type == "conv2d":
                if is_first_layer:
                    model.add(layers.Conv2D(
                        layer_config.filters,
                        layer_config.kernel_size,
                        strides=layer_config.strides,
                        padding=layer_config.padding,
                        activation=layer_config.activation,
                        input_shape=architecture.input_shape
                    ))
                else:
                    model.add(layers.Conv2D(
                        layer_config.filters,
                        layer_config.kernel_size,
                        strides=layer_config.strides,
                        padding=layer_config.padding,
                        activation=layer_config.activation
                    ))
            
            elif layer_config.layer_type == "maxpool2d":
                model.add(layers.MaxPooling2D(layer_config.kernel_size))
            
            elif layer_config.layer_type == "flatten":
                model.add(layers.Flatten())
            
            elif layer_config.layer_type == "lstm":
                if is_first_layer:
                    model.add(layers.LSTM(
                        layer_config.units,
                        return_sequences=layer_config.return_sequences,
                        input_shape=architecture.input_shape
                    ))
                else:
                    model.add(layers.LSTM(
                        layer_config.units,
                        return_sequences=layer_config.return_sequences
                    ))
            
            elif layer_config.layer_type == "gru":
                if is_first_layer:
                    model.add(layers.GRU(
                        layer_config.units,
                        return_sequences=layer_config.return_sequences,
                        input_shape=architecture.input_shape
                    ))
                else:
                    model.add(layers.GRU(
                        layer_config.units,
                        return_sequences=layer_config.return_sequences
                    ))
            
            # Add dropout if specified
            if layer_config.dropout_rate > 0:
                model.add(layers.Dropout(layer_config.dropout_rate))
            
            # Add batch normalization if specified
            if layer_config.batch_norm:
                model.add(layers.BatchNormalization())
        
        # Add output layer
        model.add(layers.Dense(
            architecture.output_units,
            activation=architecture.output_activation
        ))
        
        return model
    
    def compile_model(self, model: Any, architecture: NetworkArchitecture) -> Any:
        """Compile TensorFlow model"""
        # Get optimizer
        if architecture.optimizer == OptimizationType.ADAM:
            optimizer = optimizers.Adam(learning_rate=architecture.learning_rate)
        elif architecture.optimizer == OptimizationType.SGD:
            optimizer = optimizers.SGD(learning_rate=architecture.learning_rate)
        elif architecture.optimizer == OptimizationType.RMSPROP:
            optimizer = optimizers.RMSprop(learning_rate=architecture.learning_rate)
        else:
            optimizer = optimizers.Adam(learning_rate=architecture.learning_rate)
        
        # Compile model
        model.compile(
            optimizer=optimizer,
            loss=architecture.loss_function,
            metrics=architecture.metrics
        )
        
        return model
    
    def get_callbacks(self, architecture: NetworkArchitecture, model_path: str) -> List[Any]:
        """Get training callbacks"""
        callback_list = []
        
        if architecture.early_stopping:
            callback_list.append(callbacks.EarlyStopping(
                monitor='val_loss',
                patience=10,
                restore_best_weights=True
            ))
        
        if architecture.reduce_lr:
            callback_list.append(callbacks.ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.5,
                patience=5,
                min_lr=1e-7
            ))
        
        if architecture.model_checkpoint:
            callback_list.append(callbacks.ModelCheckpoint(
                filepath=model_path,
                monitor='val_accuracy',
                save_best_only=True,
                save_weights_only=False
            ))
        
        return callback_list


class PyTorchModelBuilder:
    """PyTorch model builder"""
    
    def __init__(self):
        self.logger = logging.getLogger(__name__)
    
    def build_model(self, architecture: NetworkArchitecture) -> Any:
        """Build PyTorch model from architecture"""
        if not PYTORCH_AVAILABLE:
            raise RuntimeError("PyTorch not available")
        
        if architecture.network_type == NetworkType.FEEDFORWARD:
            return self._build_feedforward(architecture)
        elif architecture.network_type == NetworkType.CNN:
            return self._build_cnn(architecture)
        elif architecture.network_type == NetworkType.LSTM:
            return self._build_lstm(architecture)
        else:
            return self._build_feedforward(architecture)  # Default
    
    def _build_feedforward(self, architecture: NetworkArchitecture) -> Any:
        """Build feedforward network"""
        
        class FeedForwardNet(nn.Module):
            def __init__(self, input_size, hidden_layers, output_size, dropout_rate=0.0):
                super().__init__()
                
                layers = []
                prev_size = input_size
                
                # Hidden layers
                for units in hidden_layers:
                    layers.append(nn.Linear(prev_size, units))
                    layers.append(nn.ReLU())
                    if dropout_rate > 0:
                        layers.append(nn.Dropout(dropout_rate))
                    prev_size = units
                
                # Output layer
                layers.append(nn.Linear(prev_size, output_size))
                
                self.network = nn.Sequential(*layers)
            
            def forward(self, x):
                return self.network(x)
        
        # Extract layer sizes
        hidden_layers = [layer.units for layer in architecture.layers if layer.layer_type == "dense"]
        input_size = architecture.input_shape[0] if architecture.input_shape else 784
        dropout_rate = architecture.layers[0].dropout_rate if architecture.layers else 0.0
        
        return FeedForwardNet(input_size, hidden_layers, architecture.output_units, dropout_rate)
    
    def _build_cnn(self, architecture: NetworkArchitecture) -> Any:
        """Build CNN model"""
        
        class CNNNet(nn.Module):
            def __init__(self, input_channels, num_classes):
                super().__init__()
                
                self.conv1 = nn.Conv2d(input_channels, 32, kernel_size=3, padding=1)
                self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
                self.conv3 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
                
                self.pool = nn.MaxPool2d(2, 2)
                self.dropout = nn.Dropout(0.5)
                
                # Calculate flattened size (assumes 32x32 input -> 4x4 after 3 pools)
                self.fc1 = nn.Linear(64 * 4 * 4, 512)
                self.fc2 = nn.Linear(512, num_classes)
            
            def forward(self, x):
                x = self.pool(F.relu(self.conv1(x)))
                x = self.pool(F.relu(self.conv2(x)))
                x = self.pool(F.relu(self.conv3(x)))
                
                x = x.view(-1, 64 * 4 * 4)
                x = F.relu(self.fc1(x))
                x = self.dropout(x)
                x = self.fc2(x)
                
                return x
        
        input_channels = architecture.input_shape[0] if len(architecture.input_shape) == 3 else 3
        return CNNNet(input_channels, architecture.output_units)
    
    def _build_lstm(self, architecture: NetworkArchitecture) -> Any:
        """Build LSTM model"""
        
        class LSTMNet(nn.Module):
            def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout=0.0):
                super().__init__()
                
                self.hidden_size = hidden_size
                self.num_layers = num_layers
                
                self.lstm = nn.LSTM(
                    input_size, hidden_size, num_layers,
                    batch_first=True, dropout=dropout if num_layers > 1 else 0
                )
                self.fc = nn.Linear(hidden_size, num_classes)
                self.dropout = nn.Dropout(dropout)
            
            def forward(self, x):
                h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
                c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
                
                out, _ = self.lstm(x, (h0, c0))
                out = self.dropout(out[:, -1, :])  # Last time step
                out = self.fc(out)
                
                return out
        
        lstm_layers = [layer for layer in architecture.layers if layer.layer_type in ["lstm", "gru"]]
        hidden_size = lstm_layers[0].units if lstm_layers else 128
        num_layers = len(lstm_layers) if lstm_layers else 1
        input_size = architecture.input_shape[-1] if architecture.input_shape else 128
        dropout = lstm_layers[0].dropout_rate if lstm_layers else 0.0
        
        return LSTMNet(input_size, hidden_size, num_layers, architecture.output_units, dropout)


class DeepLearningEngine:
    """Main deep learning engine"""
    
    def __init__(self):
        self.models: Dict[str, DLModel] = {}
        self.tf_builder = TensorFlowModelBuilder()
        self.torch_builder = PyTorchModelBuilder()
        self.logger = logging.getLogger(__name__)
        
        # Check GPU availability
        self.gpu_available = self._check_gpu_availability()
        self.device = self._get_device()
    
    def _check_gpu_availability(self) -> bool:
        """Check GPU availability across frameworks"""
        gpu_available = False
        
        if TENSORFLOW_AVAILABLE:
            gpus = tf.config.experimental.list_physical_devices('GPU')
            if gpus:
                gpu_available = True
                self.logger.info(f"TensorFlow: Found {len(gpus)} GPU(s)")
        
        if PYTORCH_AVAILABLE:
            if torch.cuda.is_available():
                gpu_available = True
                self.logger.info(f"PyTorch: Found {torch.cuda.device_count()} GPU(s)")
        
        return gpu_available
    
    def _get_device(self) -> str:
        """Get computation device"""
        if PYTORCH_AVAILABLE and torch.cuda.is_available():
            return "cuda"
        elif TENSORFLOW_AVAILABLE:
            gpus = tf.config.experimental.list_physical_devices('GPU')
            if gpus:
                return "gpu"
        return "cpu"
    
    def create_model(self, name: str, architecture: NetworkArchitecture,
                    framework: Framework = Framework.AUTO,
                    task_type: TaskType = TaskType.CLASSIFICATION) -> str:
        """Create deep learning model"""
        
        # Auto-select framework
        if framework == Framework.AUTO:
            if TENSORFLOW_AVAILABLE:
                framework = Framework.TENSORFLOW
            elif PYTORCH_AVAILABLE:
                framework = Framework.PYTORCH
            else:
                raise RuntimeError("No deep learning framework available")
        
        model = DLModel(
            name=name,
            architecture=architecture,
            framework=framework,
            task_type=task_type
        )
        
        # Build model
        if framework == Framework.TENSORFLOW and TENSORFLOW_AVAILABLE:
            tf_model = self.tf_builder.build_model(architecture)
            tf_model = self.tf_builder.compile_model(tf_model, architecture)
            model.tf_model = tf_model
            model.is_compiled = True
        elif framework == Framework.PYTORCH and PYTORCH_AVAILABLE:
            torch_model = self.torch_builder.build_model(architecture)
            if self.device == "cuda":
                torch_model = torch_model.cuda()
            model.torch_model = torch_model
            model.is_compiled = True
        
        self.models[model.id] = model
        self.logger.info(f"Created {framework.value} model: {name} ({model.id})")
        
        return model.id
    
    async def train_model(self, model_id: str, train_data: Any, train_labels: Any,
                         val_data: Any = None, val_labels: Any = None) -> bool:
        """Train deep learning model"""
        model = self.models.get(model_id)
        if not model:
            raise ValueError(f"Model not found: {model_id}")
        
        if not model.is_compiled:
            raise ValueError("Model not compiled")
        
        start_time = time.time()
        
        try:
            if model.framework == Framework.TENSORFLOW:
                success = await self._train_tensorflow_model(model, train_data, train_labels, val_data, val_labels)
            elif model.framework == Framework.PYTORCH:
                success = await self._train_pytorch_model(model, train_data, train_labels, val_data, val_labels)
            else:
                success = False
            
            model.training_time = time.time() - start_time
            model.is_trained = success
            model.updated_at = datetime.now()
            
            return success
            
        except Exception as e:
            self.logger.error(f"Training failed: {e}")
            return False
    
    async def _train_tensorflow_model(self, model: DLModel, train_data: Any, train_labels: Any,
                                     val_data: Any = None, val_labels: Any = None) -> bool:
        """Train TensorFlow model"""
        if not TENSORFLOW_AVAILABLE or not model.tf_model:
            return False
        
        # Prepare data
        if not NUMPY_AVAILABLE:
            self.logger.error("NumPy required for training")
            return False
        
        X_train = np.array(train_data)
        y_train = np.array(train_labels)
        
        validation_data = None
        if val_data is not None and val_labels is not None:
            X_val = np.array(val_data)
            y_val = np.array(val_labels)
            validation_data = (X_val, y_val)
        
        # Create model checkpoint path
        model_path = os.path.join(tempfile.gettempdir(), f"model_{model.id}.h5")
        model.model_path = model_path
        
        # Get callbacks
        callback_list = self.tf_builder.get_callbacks(model.architecture, model_path)
        
        # Custom callback to track training
        class MetricsCallback(tf.keras.callbacks.Callback):
            def __init__(self, dl_model):
                self.dl_model = dl_model
                
            def on_epoch_end(self, epoch, logs=None):
                if logs:
                    metrics = TrainingMetrics(
                        epoch=epoch,
                        train_loss=logs.get('loss', 0),
                        train_accuracy=logs.get('accuracy', 0),
                        val_loss=logs.get('val_loss', 0),
                        val_accuracy=logs.get('val_accuracy', 0),
                        learning_rate=logs.get('lr', 0)
                    )
                    self.dl_model.training_history.append(metrics)
                    
                    # Update best scores
                    if logs.get('val_accuracy', 0) > self.dl_model.best_accuracy:
                        self.dl_model.best_accuracy = logs.get('val_accuracy', 0)
                    if logs.get('val_loss', float('inf')) < self.dl_model.best_loss:
                        self.dl_model.best_loss = logs.get('val_loss', 0)
        
        callback_list.append(MetricsCallback(model))
        
        # Train model
        try:
            history = model.tf_model.fit(
                X_train, y_train,
                batch_size=model.architecture.batch_size,
                epochs=model.architecture.epochs,
                validation_data=validation_data,
                validation_split=model.architecture.validation_split if validation_data is None else 0,
                callbacks=callback_list,
                verbose=1
            )
            
            # Store final metrics
            if history.history:
                model.final_train_accuracy = history.history.get('accuracy', [0])[-1]
                if 'val_accuracy' in history.history:
                    model.final_val_accuracy = history.history.get('val_accuracy', [0])[-1]
            
            return True
            
        except Exception as e:
            self.logger.error(f"TensorFlow training error: {e}")
            return False
    
    async def _train_pytorch_model(self, model: DLModel, train_data: Any, train_labels: Any,
                                  val_data: Any = None, val_labels: Any = None) -> bool:
        """Train PyTorch model"""
        if not PYTORCH_AVAILABLE or not model.torch_model:
            return False
        
        if not NUMPY_AVAILABLE:
            self.logger.error("NumPy required for training")
            return False
        
        # Prepare data
        X_train = torch.FloatTensor(np.array(train_data))
        y_train = torch.LongTensor(np.array(train_labels))
        
        # Create data loader
        train_dataset = TensorDataset(X_train, y_train)
        train_loader = DataLoader(
            train_dataset,
            batch_size=model.architecture.batch_size,
            shuffle=True
        )
        
        val_loader = None
        if val_data is not None and val_labels is not None:
            X_val = torch.FloatTensor(np.array(val_data))
            y_val = torch.LongTensor(np.array(val_labels))
            val_dataset = TensorDataset(X_val, y_val)
            val_loader = DataLoader(val_dataset, batch_size=model.architecture.batch_size)
        
        # Setup optimizer and loss
        if model.architecture.optimizer == OptimizationType.ADAM:
            optimizer = optim.Adam(model.torch_model.parameters(), lr=model.architecture.learning_rate)
        elif model.architecture.optimizer == OptimizationType.SGD:
            optimizer = optim.SGD(model.torch_model.parameters(), lr=model.architecture.learning_rate)
        else:
            optimizer = optim.Adam(model.torch_model.parameters(), lr=model.architecture.learning_rate)
        
        if model.task_type == TaskType.CLASSIFICATION:
            criterion = nn.CrossEntropyLoss()
        else:
            criterion = nn.MSELoss()
        
        # Move to device
        device = torch.device(self.device)
        model.torch_model.to(device)
        criterion.to(device)
        
        # Training loop
        try:
            for epoch in range(model.architecture.epochs):
                # Training phase
                model.torch_model.train()
                train_loss = 0.0
                train_correct = 0
                train_total = 0
                
                for batch_data, batch_labels in train_loader:
                    batch_data, batch_labels = batch_data.to(device), batch_labels.to(device)
                    
                    optimizer.zero_grad()
                    outputs = model.torch_model(batch_data)
                    loss = criterion(outputs, batch_labels)
                    loss.backward()
                    optimizer.step()
                    
                    train_loss += loss.item()
                    
                    if model.task_type == TaskType.CLASSIFICATION:
                        _, predicted = torch.max(outputs.data, 1)
                        train_total += batch_labels.size(0)
                        train_correct += (predicted == batch_labels).sum().item()
                
                # Validation phase
                val_loss = 0.0
                val_correct = 0
                val_total = 0
                
                if val_loader:
                    model.torch_model.eval()
                    with torch.no_grad():
                        for batch_data, batch_labels in val_loader:
                            batch_data, batch_labels = batch_data.to(device), batch_labels.to(device)
                            
                            outputs = model.torch_model(batch_data)
                            loss = criterion(outputs, batch_labels)
                            val_loss += loss.item()
                            
                            if model.task_type == TaskType.CLASSIFICATION:
                                _, predicted = torch.max(outputs.data, 1)
                                val_total += batch_labels.size(0)
                                val_correct += (predicted == batch_labels).sum().item()
                
                # Record metrics
                train_acc = train_correct / train_total if train_total > 0 else 0
                val_acc = val_correct / val_total if val_total > 0 else 0
                
                metrics = TrainingMetrics(
                    epoch=epoch,
                    train_loss=train_loss / len(train_loader),
                    train_accuracy=train_acc,
                    val_loss=val_loss / len(val_loader) if val_loader else 0,
                    val_accuracy=val_acc,
                    learning_rate=optimizer.param_groups[0]['lr']
                )
                model.training_history.append(metrics)
                
                # Update best scores
                if val_acc > model.best_accuracy:
                    model.best_accuracy = val_acc
                if val_loss < model.best_loss:
                    model.best_loss = val_loss
                
                # Print progress
                if epoch % 10 == 0:
                    self.logger.info(f"Epoch {epoch}: Train Acc: {train_acc:.3f}, Val Acc: {val_acc:.3f}")
            
            # Store final metrics
            if model.training_history:
                last_metrics = model.training_history[-1]
                model.final_train_accuracy = last_metrics.train_accuracy
                model.final_val_accuracy = last_metrics.val_accuracy
            
            return True
            
        except Exception as e:
            self.logger.error(f"PyTorch training error: {e}")
            return False
    
    def predict(self, model_id: str, data: Any) -> List[Any]:
        """Make predictions with trained model"""
        model = self.models.get(model_id)
        if not model or not model.is_trained:
            raise ValueError(f"Trained model not found: {model_id}")
        
        if model.framework == Framework.TENSORFLOW and model.tf_model:
            return self._predict_tensorflow(model, data)
        elif model.framework == Framework.PYTORCH and model.torch_model:
            return self._predict_pytorch(model, data)
        else:
            raise ValueError("No valid model for prediction")
    
    def _predict_tensorflow(self, model: DLModel, data: Any) -> List[Any]:
        """Make predictions with TensorFlow model"""
        X = np.array(data)
        predictions = model.tf_model.predict(X)
        
        if model.task_type == TaskType.CLASSIFICATION:
            return np.argmax(predictions, axis=1).tolist()
        else:
            return predictions.flatten().tolist()
    
    def _predict_pytorch(self, model: DLModel, data: Any) -> List[Any]:
        """Make predictions with PyTorch model"""
        X = torch.FloatTensor(np.array(data))
        device = torch.device(self.device)
        X = X.to(device)
        
        model.torch_model.eval()
        with torch.no_grad():
            outputs = model.torch_model(X)
            
            if model.task_type == TaskType.CLASSIFICATION:
                _, predicted = torch.max(outputs.data, 1)
                return predicted.cpu().tolist()
            else:
                return outputs.cpu().flatten().tolist()
    
    def get_model_summary(self, model_id: str) -> Dict[str, Any]:
        """Get model summary and architecture"""
        model = self.models.get(model_id)
        if not model:
            return {}
        
        summary = {
            'id': model.id,
            'name': model.name,
            'framework': model.framework.value,
            'task_type': model.task_type.value,
            'network_type': model.architecture.network_type.value,
            'is_trained': model.is_trained,
            'training_time': model.training_time,
            'best_accuracy': model.best_accuracy,
            'final_train_accuracy': model.final_train_accuracy,
            'final_val_accuracy': model.final_val_accuracy,
            'total_epochs': len(model.training_history),
            'gpu_used': self.gpu_available and model.architecture.use_gpu
        }
        
        # Add framework-specific info
        if model.framework == Framework.TENSORFLOW and model.tf_model:
            summary['total_params'] = model.tf_model.count_params()
        elif model.framework == Framework.PYTORCH and model.torch_model:
            total_params = sum(p.numel() for p in model.torch_model.parameters())
            summary['total_params'] = total_params
        
        return summary
    
    def get_training_history(self, model_id: str) -> List[Dict[str, Any]]:
        """Get training history"""
        model = self.models.get(model_id)
        if not model:
            return []
        
        return [asdict(metric) for metric in model.training_history]
    
    def save_model(self, model_id: str, file_path: str) -> bool:
        """Save trained model"""
        model = self.models.get(model_id)
        if not model or not model.is_trained:
            return False
        
        try:
            if model.framework == Framework.TENSORFLOW and model.tf_model:
                model.tf_model.save(file_path)
            elif model.framework == Framework.PYTORCH and model.torch_model:
                torch.save(model.torch_model.state_dict(), file_path)
            
            model.model_path = file_path
            return True
            
        except Exception as e:
            self.logger.error(f"Model save failed: {e}")
            return False
    
    def load_model(self, model_id: str, file_path: str) -> bool:
        """Load trained model"""
        model = self.models.get(model_id)
        if not model:
            return False
        
        try:
            if model.framework == Framework.TENSORFLOW:
                model.tf_model = tf.keras.models.load_model(file_path)
            elif model.framework == Framework.PYTORCH and model.torch_model:
                model.torch_model.load_state_dict(torch.load(file_path))
            
            model.model_path = file_path
            model.is_trained = True
            return True
            
        except Exception as e:
            self.logger.error(f"Model load failed: {e}")
            return False


def create_sample_architecture(network_type: NetworkType = NetworkType.FEEDFORWARD,
                              input_shape: Tuple[int, ...] = (784,),
                              output_units: int = 10) -> NetworkArchitecture:
    """Create sample architecture for common tasks"""
    
    if network_type == NetworkType.FEEDFORWARD:
        layers = [
            LayerConfig(layer_type="dense", units=128, activation="relu", dropout_rate=0.2),
            LayerConfig(layer_type="dense", units=64, activation="relu", dropout_rate=0.2)
        ]
    elif network_type == NetworkType.CNN:
        layers = [
            LayerConfig(layer_type="conv2d", filters=32, kernel_size=(3, 3), activation="relu"),
            LayerConfig(layer_type="maxpool2d", kernel_size=(2, 2)),
            LayerConfig(layer_type="conv2d", filters=64, kernel_size=(3, 3), activation="relu"),
            LayerConfig(layer_type="maxpool2d", kernel_size=(2, 2)),
            LayerConfig(layer_type="flatten"),
            LayerConfig(layer_type="dense", units=64, activation="relu", dropout_rate=0.5)
        ]
    elif network_type == NetworkType.LSTM:
        layers = [
            LayerConfig(layer_type="lstm", units=128, dropout_rate=0.2, return_sequences=True),
            LayerConfig(layer_type="lstm", units=64, dropout_rate=0.2, return_sequences=False)
        ]
    else:
        layers = [LayerConfig(layer_type="dense", units=64, activation="relu")]
    
    return NetworkArchitecture(
        name=f"Sample {network_type.value} Architecture",
        network_type=network_type,
        layers=layers,
        input_shape=input_shape,
        output_units=output_units,
        epochs=20,
        batch_size=32
    )


if __name__ == "__main__":
    async def main():
        # Create deep learning engine
        dl_engine = DeepLearningEngine()
        
        print(f"GPU Available: {dl_engine.gpu_available}")
        print(f"Computation Device: {dl_engine.device}")
        print(f"TensorFlow Available: {TENSORFLOW_AVAILABLE}")
        print(f"PyTorch Available: {PYTORCH_AVAILABLE}")
        
        if NUMPY_AVAILABLE and (TENSORFLOW_AVAILABLE or PYTORCH_AVAILABLE):
            # Create sample data (MNIST-like)
            np.random.seed(42)
            X_train = np.random.rand(1000, 784)  # 1000 samples, 784 features
            y_train = np.random.randint(0, 10, 1000)  # 10 classes
            X_val = np.random.rand(200, 784)
            y_val = np.random.randint(0, 10, 200)
            
            print(f"Training data shape: {X_train.shape}")
            print(f"Training labels shape: {y_train.shape}")
            
            # Create feedforward architecture
            architecture = create_sample_architecture(
                NetworkType.FEEDFORWARD,
                input_shape=(784,),
                output_units=10
            )
            architecture.epochs = 5  # Quick training for demo
            
            # Create and train model
            print("\nCreating and training feedforward model...")
            model_id = dl_engine.create_model(
                "Sample Feedforward Model",
                architecture,
                Framework.AUTO,
                TaskType.CLASSIFICATION
            )
            
            # Train model
            success = await dl_engine.train_model(
                model_id, X_train, y_train, X_val, y_val
            )
            
            print(f"Training success: {success}")
            
            if success:
                # Get model summary
                summary = dl_engine.get_model_summary(model_id)
                print(f"\nModel Summary:")
                print(f"Framework: {summary['framework']}")
                print(f"Total Parameters: {summary.get('total_params', 'N/A')}")
                print(f"Training Time: {summary['training_time']:.2f}s")
                print(f"Best Accuracy: {summary['best_accuracy']:.3f}")
                print(f"Final Val Accuracy: {summary['final_val_accuracy']:.3f}")
                
                # Test predictions
                test_data = X_val[:5]  # First 5 validation samples
                predictions = dl_engine.predict(model_id, test_data)
                print(f"\nTest Predictions: {predictions}")
                print(f"Actual Labels: {y_val[:5].tolist()}")
                
                # Get training history
                history = dl_engine.get_training_history(model_id)
                print(f"\nTraining History: {len(history)} epochs recorded")
        
        else:
            print("Required libraries not available for full demo")
        
        print("\ng13.2: Deep Learning Engine with GPU Support - COMPLETED ✅")
    
    asyncio.run(main()) 