#!/usr/bin/env python3
"""
Real-Time Analytics for TuskLang Python SDK
===========================================
Advanced real-time analytics and streaming data processing

This module provides real-time analytics capabilities for the TuskLang Python SDK,
including streaming analytics, real-time monitoring, live data processing, and
instant insights generation.
"""

import asyncio
import threading
import time
import json
import statistics
from typing import Any, Dict, List, Optional, Callable, Union, Tuple
from dataclasses import dataclass, asdict
from datetime import datetime, timedelta
from enum import Enum
import logging
from collections import deque, defaultdict
import queue
import random
import math


class AnalyticsType(Enum):
    """Analytics type enumeration"""
    STREAMING = "streaming"
    BATCH = "batch"
    REAL_TIME = "real_time"
    PREDICTIVE = "predictive"


class MetricType(Enum):
    """Metric type enumeration"""
    COUNTER = "counter"
    GAUGE = "gauge"
    HISTOGRAM = "histogram"
    SUMMARY = "summary"


@dataclass
class DataPoint:
    """Data point structure for analytics"""
    timestamp: datetime
    value: float
    labels: Dict[str, str]
    metric_name: str
    metric_type: MetricType


@dataclass
class AnalyticsEvent:
    """Analytics event structure"""
    event_id: str
    event_type: str
    timestamp: datetime
    data: Dict[str, Any]
    source: str
    priority: int


@dataclass
class StreamConfig:
    """Stream configuration structure"""
    stream_id: str
    buffer_size: int
    window_size: int
    aggregation_interval: int
    retention_period: int
    alert_thresholds: Dict[str, float]


class RealTimeAnalytics:
    """Real-time analytics system for TuskLang"""
    
    def __init__(self, config: Dict[str, Any] = None):
        self.config = config or {}
        self.logger = logging.getLogger('tusklang.analytics')
        
        # Initialize components
        self.streams = {}
        self.metrics = defaultdict(list)
        self.alerts = []
        self.event_queue = queue.Queue()
        self.analytics_active = True
        
        # Initialize streams
        self._init_default_streams()
        
        # Start background processes
        self._start_background_processes()
    
    def _init_default_streams(self):
        """Initialize default analytics streams"""
        default_streams = {
            "performance": StreamConfig(
                stream_id="performance",
                buffer_size=1000,
                window_size=300,  # 5 minutes
                aggregation_interval=60,  # 1 minute
                retention_period=3600,  # 1 hour
                alert_thresholds={"cpu_usage": 80.0, "memory_usage": 85.0}
            ),
            "operations": StreamConfig(
                stream_id="operations",
                buffer_size=5000,
                window_size=600,  # 10 minutes
                aggregation_interval=30,  # 30 seconds
                retention_period=7200,  # 2 hours
                alert_thresholds={"error_rate": 5.0, "latency": 1000.0}
            ),
            "security": StreamConfig(
                stream_id="security",
                buffer_size=1000,
                window_size=300,
                aggregation_interval=60,
                retention_period=86400,  # 24 hours
                alert_thresholds={"failed_logins": 10.0, "suspicious_activity": 1.0}
            )
        }
        
        for stream_id, config in default_streams.items():
            self.create_stream(stream_id, config)
    
    def _start_background_processes(self):
        """Start background analytics processes"""
        # Event processor
        self.event_processor_thread = threading.Thread(target=self._event_processor_loop, daemon=True)
        self.event_processor_thread.start()
        
        # Stream aggregator
        self.stream_aggregator_thread = threading.Thread(target=self._stream_aggregator_loop, daemon=True)
        self.stream_aggregator_thread.start()
        
        # Alert processor
        self.alert_processor_thread = threading.Thread(target=self._alert_processor_loop, daemon=True)
        self.alert_processor_thread.start()
    
    def create_stream(self, stream_id: str, config: StreamConfig) -> bool:
        """Create a new analytics stream"""
        try:
            self.streams[stream_id] = {
                "config": config,
                "data_buffer": deque(maxlen=config.buffer_size),
                "aggregated_data": deque(maxlen=config.window_size),
                "alerts": [],
                "subscribers": []
            }
            
            self.logger.info(f"Created analytics stream: {stream_id}")
            return True
            
        except Exception as e:
            self.logger.error(f"Failed to create stream {stream_id}: {e}")
            return False
    
    def add_data_point(self, stream_id: str, metric_name: str, value: float, 
                      labels: Dict[str, str] = None, metric_type: MetricType = MetricType.GAUGE):
        """Add data point to analytics stream"""
        if stream_id not in self.streams:
            self.logger.warning(f"Stream {stream_id} not found, creating default")
            self.create_stream(stream_id, StreamConfig(
                stream_id=stream_id,
                buffer_size=1000,
                window_size=300,
                aggregation_interval=60,
                retention_period=3600,
                alert_thresholds={}
            ))
        
        data_point = DataPoint(
            timestamp=datetime.now(),
            value=value,
            labels=labels or {},
            metric_name=metric_name,
            metric_type=metric_type
        )
        
        # Add to stream buffer
        self.streams[stream_id]["data_buffer"].append(data_point)
        
        # Add to global metrics
        self.metrics[metric_name].append(data_point)
        
        # Check for alerts
        self._check_alerts(stream_id, metric_name, value)
    
    def add_event(self, event_type: str, data: Dict[str, Any], source: str = "system", priority: int = 5):
        """Add analytics event"""
        event = AnalyticsEvent(
            event_id=str(random.randint(1000000, 9999999)),
            event_type=event_type,
            timestamp=datetime.now(),
            data=data,
            source=source,
            priority=priority
        )
        
        self.event_queue.put(event)
    
    def get_stream_data(self, stream_id: str, window_minutes: int = 5) -> List[Dict[str, Any]]:
        """Get stream data for specified window"""
        if stream_id not in self.streams:
            return []
        
        stream = self.streams[stream_id]
        cutoff_time = datetime.now() - timedelta(minutes=window_minutes)
        
        # Get data points within window
        data_points = [
            point for point in stream["data_buffer"]
            if point.timestamp >= cutoff_time
        ]
        
        # Convert to dictionary format
        return [asdict(point) for point in data_points]
    
    def get_aggregated_metrics(self, stream_id: str, metric_name: str = None, 
                              window_minutes: int = 5) -> Dict[str, Any]:
        """Get aggregated metrics for stream"""
        if stream_id not in self.streams:
            return {}
        
        stream = self.streams[stream_id]
        cutoff_time = datetime.now() - timedelta(minutes=window_minutes)
        
        # Filter data points
        data_points = [
            point for point in stream["data_buffer"]
            if point.timestamp >= cutoff_time and (metric_name is None or point.metric_name == metric_name)
        ]
        
        if not data_points:
            return {}
        
        # Calculate aggregations
        values = [point.value for point in data_points]
        
        return {
            "count": len(values),
            "sum": sum(values),
            "average": statistics.mean(values),
            "median": statistics.median(values),
            "min": min(values),
            "max": max(values),
            "std_dev": statistics.stdev(values) if len(values) > 1 else 0,
            "window_minutes": window_minutes,
            "last_updated": datetime.now().isoformat()
        }
    
    def get_real_time_insights(self, stream_id: str) -> Dict[str, Any]:
        """Get real-time insights for stream"""
        if stream_id not in self.streams:
            return {}
        
        # Get recent data
        recent_data = self.get_stream_data(stream_id, window_minutes=5)
        
        if not recent_data:
            return {"message": "No recent data available"}
        
        # Group by metric
        metrics = defaultdict(list)
        for point in recent_data:
            metrics[point["metric_name"]].append(point["value"])
        
        insights = {
            "stream_id": stream_id,
            "analysis_time": datetime.now().isoformat(),
            "data_points": len(recent_data),
            "metrics": {}
        }
        
        # Analyze each metric
        for metric_name, values in metrics.items():
            if values:
                insights["metrics"][metric_name] = {
                    "current_value": values[-1],
                    "trend": self._calculate_trend(values),
                    "volatility": statistics.stdev(values) if len(values) > 1 else 0,
                    "range": max(values) - min(values),
                    "anomaly_score": self._calculate_anomaly_score(values)
                }
        
        return insights
    
    def subscribe_to_stream(self, stream_id: str, callback: Callable) -> bool:
        """Subscribe to stream updates"""
        if stream_id not in self.streams:
            return False
        
        self.streams[stream_id]["subscribers"].append(callback)
        return True
    
    def set_alert_threshold(self, stream_id: str, metric_name: str, threshold: float):
        """Set alert threshold for metric"""
        if stream_id in self.streams:
            self.streams[stream_id]["config"].alert_thresholds[metric_name] = threshold
    
    def get_alerts(self, stream_id: str = None, severity: str = None) -> List[Dict[str, Any]]:
        """Get alerts"""
        alerts = []
        
        for alert in self.alerts:
            if stream_id and alert["stream_id"] != stream_id:
                continue
            if severity and alert["severity"] != severity:
                continue
            alerts.append(alert)
        
        return alerts
    
    def _event_processor_loop(self):
        """Event processor background loop"""
        while self.analytics_active:
            try:
                # Process events from queue
                try:
                    event = self.event_queue.get(timeout=1)
                    self._process_event(event)
                except queue.Empty:
                    continue
                    
            except Exception as e:
                self.logger.error(f"Event processor error: {e}")
                time.sleep(1)
    
    def _process_event(self, event: AnalyticsEvent):
        """Process analytics event"""
        try:
            # Extract metrics from event
            if "metrics" in event.data:
                for metric_name, value in event.data["metrics"].items():
                    self.add_data_point(
                        event.event_type,
                        metric_name,
                        float(value),
                        {"source": event.source, "event_id": event.event_id}
                    )
            
            # Process event-specific logic
            if event.event_type == "performance":
                self._process_performance_event(event)
            elif event.event_type == "security":
                self._process_security_event(event)
            elif event.event_type == "operation":
                self._process_operation_event(event)
            
            self.logger.debug(f"Processed event: {event.event_id}")
            
        except Exception as e:
            self.logger.error(f"Error processing event {event.event_id}: {e}")
    
    def _process_performance_event(self, event: AnalyticsEvent):
        """Process performance event"""
        # Extract performance metrics
        if "cpu_usage" in event.data:
            self.add_data_point("performance", "cpu_usage", event.data["cpu_usage"])
        
        if "memory_usage" in event.data:
            self.add_data_point("performance", "memory_usage", event.data["memory_usage"])
        
        if "response_time" in event.data:
            self.add_data_point("performance", "response_time", event.data["response_time"])
    
    def _process_security_event(self, event: AnalyticsEvent):
        """Process security event"""
        # Extract security metrics
        if "failed_logins" in event.data:
            self.add_data_point("security", "failed_logins", event.data["failed_logins"])
        
        if "suspicious_activity" in event.data:
            self.add_data_point("security", "suspicious_activity", event.data["suspicious_activity"])
        
        if "access_attempts" in event.data:
            self.add_data_point("security", "access_attempts", event.data["access_attempts"])
    
    def _process_operation_event(self, event: AnalyticsEvent):
        """Process operation event"""
        # Extract operation metrics
        if "operation_count" in event.data:
            self.add_data_point("operations", "operation_count", event.data["operation_count"])
        
        if "error_count" in event.data:
            self.add_data_point("operations", "error_count", event.data["error_count"])
        
        if "latency" in event.data:
            self.add_data_point("operations", "latency", event.data["latency"])
    
    def _stream_aggregator_loop(self):
        """Stream aggregator background loop"""
        while self.analytics_active:
            try:
                current_time = datetime.now()
                
                for stream_id, stream in self.streams.items():
                    config = stream["config"]
                    
                    # Check if it's time to aggregate
                    if (current_time - config.aggregation_interval).second % config.aggregation_interval == 0:
                        self._aggregate_stream(stream_id)
                
                time.sleep(1)
                
            except Exception as e:
                self.logger.error(f"Stream aggregator error: {e}")
                time.sleep(5)
    
    def _aggregate_stream(self, stream_id: str):
        """Aggregate stream data"""
        stream = self.streams[stream_id]
        config = stream["config"]
        
        # Get recent data points
        cutoff_time = datetime.now() - timedelta(seconds=config.aggregation_interval)
        recent_points = [
            point for point in stream["data_buffer"]
            if point.timestamp >= cutoff_time
        ]
        
        if recent_points:
            # Group by metric
            metrics = defaultdict(list)
            for point in recent_points:
                metrics[point.metric_name].append(point.value)
            
            # Calculate aggregations
            aggregations = {}
            for metric_name, values in metrics.items():
                aggregations[metric_name] = {
                    "count": len(values),
                    "sum": sum(values),
                    "average": statistics.mean(values),
                    "min": min(values),
                    "max": max(values)
                }
            
            # Store aggregated data
            aggregated_point = {
                "timestamp": datetime.now(),
                "aggregations": aggregations,
                "interval_seconds": config.aggregation_interval
            }
            
            stream["aggregated_data"].append(aggregated_point)
            
            # Notify subscribers
            for callback in stream["subscribers"]:
                try:
                    callback(stream_id, aggregated_point)
                except Exception as e:
                    self.logger.error(f"Subscriber callback error: {e}")
    
    def _alert_processor_loop(self):
        """Alert processor background loop"""
        while self.analytics_active:
            try:
                # Process alerts
                current_alerts = self.alerts.copy()
                self.alerts.clear()
                
                for alert in current_alerts:
                    self._process_alert(alert)
                
                time.sleep(5)
                
            except Exception as e:
                self.logger.error(f"Alert processor error: {e}")
                time.sleep(10)
    
    def _process_alert(self, alert: Dict[str, Any]):
        """Process alert"""
        # Log alert
        self.logger.warning(f"Alert: {alert['message']} (Severity: {alert['severity']})")
        
        # In a real system, you'd send notifications, emails, etc.
        # For now, we just log them
    
    def _check_alerts(self, stream_id: str, metric_name: str, value: float):
        """Check for alerts based on thresholds"""
        if stream_id not in self.streams:
            return
        
        config = self.streams[stream_id]["config"]
        threshold = config.alert_thresholds.get(metric_name)
        
        if threshold and value > threshold:
            alert = {
                "alert_id": str(random.randint(1000000, 9999999)),
                "stream_id": stream_id,
                "metric_name": metric_name,
                "value": value,
                "threshold": threshold,
                "severity": "high" if value > threshold * 1.5 else "medium",
                "message": f"{metric_name} exceeded threshold: {value} > {threshold}",
                "timestamp": datetime.now().isoformat()
            }
            
            self.alerts.append(alert)
    
    def _calculate_trend(self, values: List[float]) -> str:
        """Calculate trend direction"""
        if len(values) < 2:
            return "stable"
        
        # Simple linear regression
        x = list(range(len(values)))
        y = values
        
        n = len(x)
        sum_x = sum(x)
        sum_y = sum(y)
        sum_xy = sum(x[i] * y[i] for i in range(n))
        sum_x2 = sum(x[i] ** 2 for i in range(n))
        
        slope = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x ** 2)
        
        if slope > 0.01:
            return "increasing"
        elif slope < -0.01:
            return "decreasing"
        else:
            return "stable"
    
    def _calculate_anomaly_score(self, values: List[float]) -> float:
        """Calculate anomaly score using statistical methods"""
        if len(values) < 3:
            return 0.0
        
        mean = statistics.mean(values)
        std_dev = statistics.stdev(values)
        
        if std_dev == 0:
            return 0.0
        
        # Calculate z-score for the latest value
        latest_value = values[-1]
        z_score = abs((latest_value - mean) / std_dev)
        
        # Convert to anomaly score (0-1)
        return min(1.0, z_score / 3.0)  # Normalize to 0-1 range


class StreamingAnalytics:
    """Streaming analytics processor"""
    
    def __init__(self, window_size: int = 100):
        self.window_size = window_size
        self.data_window = deque(maxlen=window_size)
        self.analytics = RealTimeAnalytics()
    
    def add_data(self, data: Dict[str, Any]):
        """Add data to streaming analytics"""
        self.data_window.append(data)
        
        # Extract metrics
        for key, value in data.items():
            if isinstance(value, (int, float)):
                self.analytics.add_data_point("streaming", key, float(value))
        
        # Process streaming analytics
        self._process_streaming_analytics()
    
    def _process_streaming_analytics(self):
        """Process streaming analytics on data window"""
        if len(self.data_window) < 2:
            return
        
        # Calculate moving averages
        for key in self.data_window[0].keys():
            if isinstance(self.data_window[0][key], (int, float)):
                values = [item[key] for item in self.data_window if key in item and isinstance(item[key], (int, float))]
                if values:
                    moving_avg = statistics.mean(values)
                    self.analytics.add_data_point("streaming", f"{key}_moving_avg", moving_avg)
    
    def get_streaming_insights(self) -> Dict[str, Any]:
        """Get streaming insights"""
        if not self.data_window:
            return {"message": "No data available"}
        
        insights = {
            "window_size": len(self.data_window),
            "data_points": len(self.data_window),
            "latest_data": self.data_window[-1] if self.data_window else None
        }
        
        # Calculate insights for numeric fields
        numeric_fields = {}
        for key in self.data_window[0].keys():
            if isinstance(self.data_window[0][key], (int, float)):
                values = [item[key] for item in self.data_window if key in item and isinstance(item[key], (int, float))]
                if values:
                    numeric_fields[key] = {
                        "current": values[-1],
                        "average": statistics.mean(values),
                        "min": min(values),
                        "max": max(values),
                        "trend": self.analytics._calculate_trend(values)
                    }
        
        insights["numeric_fields"] = numeric_fields
        return insights


# Global analytics instance
real_time_analytics = RealTimeAnalytics()


def add_analytics_data(stream_id: str, metric_name: str, value: float, labels: Dict[str, str] = None):
    """Add data to analytics stream"""
    real_time_analytics.add_data_point(stream_id, metric_name, value, labels)


def add_analytics_event(event_type: str, data: Dict[str, Any], source: str = "system"):
    """Add analytics event"""
    real_time_analytics.add_event(event_type, data, source)


def get_analytics_insights(stream_id: str) -> Dict[str, Any]:
    """Get real-time analytics insights"""
    return real_time_analytics.get_real_time_insights(stream_id)


def get_analytics_metrics(stream_id: str, metric_name: str = None, window_minutes: int = 5) -> Dict[str, Any]:
    """Get aggregated analytics metrics"""
    return real_time_analytics.get_aggregated_metrics(stream_id, metric_name, window_minutes)


def subscribe_to_analytics(stream_id: str, callback: Callable) -> bool:
    """Subscribe to analytics stream"""
    return real_time_analytics.subscribe_to_stream(stream_id, callback)


def set_analytics_alert(stream_id: str, metric_name: str, threshold: float):
    """Set analytics alert threshold"""
    real_time_analytics.set_alert_threshold(stream_id, metric_name, threshold)


def get_analytics_alerts(stream_id: str = None) -> List[Dict[str, Any]]:
    """Get analytics alerts"""
    return real_time_analytics.get_alerts(stream_id)


if __name__ == "__main__":
    print("Real-Time Analytics for TuskLang Python SDK")
    print("=" * 50)
    
    # Test real-time analytics
    print("\n1. Testing Real-Time Analytics:")
    
    # Add performance data
    add_analytics_data("performance", "cpu_usage", 75.5)
    add_analytics_data("performance", "memory_usage", 82.3)
    add_analytics_data("performance", "response_time", 150.0)
    
    # Add operation data
    add_analytics_data("operations", "operation_count", 100)
    add_analytics_data("operations", "error_count", 2)
    add_analytics_data("operations", "latency", 45.0)
    
    # Add security data
    add_analytics_data("security", "failed_logins", 5)
    add_analytics_data("security", "suspicious_activity", 0)
    
    # Set alert thresholds
    set_analytics_alert("performance", "cpu_usage", 80.0)
    set_analytics_alert("security", "failed_logins", 10.0)
    
    # Get insights
    print("\n2. Performance Insights:")
    performance_insights = get_analytics_insights("performance")
    print(f"  CPU Usage Trend: {performance_insights.get('metrics', {}).get('cpu_usage', {}).get('trend', 'N/A')}")
    print(f"  Memory Usage: {performance_insights.get('metrics', {}).get('memory_usage', {}).get('current_value', 'N/A')}")
    
    # Get metrics
    print("\n3. Operation Metrics:")
    operation_metrics = get_analytics_metrics("operations", window_minutes=5)
    print(f"  Average Latency: {operation_metrics.get('average', 'N/A')}")
    print(f"  Total Operations: {operation_metrics.get('count', 'N/A')}")
    
    # Get alerts
    print("\n4. Alerts:")
    alerts = get_analytics_alerts()
    for alert in alerts:
        print(f"  {alert['message']} (Severity: {alert['severity']})")
    
    # Test streaming analytics
    print("\n5. Testing Streaming Analytics:")
    streaming = StreamingAnalytics(window_size=10)
    
    for i in range(15):
        streaming.add_data({
            "value": random.uniform(10, 100),
            "timestamp": datetime.now().isoformat()
        })
    
    streaming_insights = streaming.get_streaming_insights()
    print(f"  Current Value: {streaming_insights.get('latest_data', {}).get('value', 'N/A')}")
    print(f"  Average Value: {streaming_insights.get('numeric_fields', {}).get('value', {}).get('average', 'N/A')}")
    
    print("\nReal-time analytics testing completed!") 