"""
Performance benchmark for require_fastbinary parameter.

This script measures the performance difference between:
- require_fastbinary=True (uses C++ fastbinary extension)
- require_fastbinary=False (uses pure Python Thrift implementation)

Expected: ~2-3x performance improvement with fastbinary enabled
"""

import os
import time
import sys
import statistics
from datetime import datetime

from e6data_python_connector import Connection


# Configuration from environment variables
username = os.environ.get('E6DATA_USERNAME')
password = os.environ.get('E6DATA_PASSWORD')
host = os.environ.get('E6DATA_HOST')
database = os.environ.get('E6DATA_DATABASE')
port = int(os.environ.get('E6DATA_PORT', '443'))
catalog_name = os.environ.get('E6DATA_CATALOG')
cluster_name = os.environ.get('E6DATA_CLUSTER_NAME')

# Test queries with different result set sizes
TEST_QUERIES = {
    'small': 'select * from date_dim limit 10',
    'medium': 'select * from date_dim limit 100',
    'large': 'select * from date_dim limit 1000',
    'very_large': 'select * from date_dim limit 10000',
}


class PerformanceBenchmark:
    """Performance benchmark for fastbinary vs pure Python."""

    def __init__(self):
        self.results = {
            'with_fastbinary': {},
            'without_fastbinary': {}
        }

    def run_query_with_timing(self, query, require_fastbinary, query_name):
        """Execute a query and measure execution time."""
        print(f"\n{'='*60}")
        print(f"Running: {query_name}")
        print(f"require_fastbinary: {require_fastbinary}")
        print(f"Query: {query}")
        print(f"{'='*60}")

        times = {
            'connection_time': 0,
            'query_execution_time': 0,
            'data_fetch_time': 0,
            'total_time': 0,
            'row_count': 0
        }

        # Time connection creation
        conn_start = time.perf_counter()
        try:
            conn = Connection(
                host=host,
                port=port,
                username=username,
                database=database,
                catalog=catalog_name,
                password=password,
                secure=True,
                cluster_name=cluster_name,
                debug=False,
                auto_resume=False,
                require_fastbinary=require_fastbinary
            )
            conn_end = time.perf_counter()
            times['connection_time'] = conn_end - conn_start
            print(f"✓ Connection created: {times['connection_time']:.4f}s")

            cursor = conn.cursor()

            # Time query execution
            exec_start = time.perf_counter()
            query_id = cursor.execute(query)
            exec_end = time.perf_counter()
            times['query_execution_time'] = exec_end - exec_start
            print(f"✓ Query executed (ID: {query_id}): {times['query_execution_time']:.4f}s")

            # Time data fetching (this is where fastbinary makes a difference)
            fetch_start = time.perf_counter()
            all_records = cursor.fetchall()
            fetch_end = time.perf_counter()
            times['data_fetch_time'] = fetch_end - fetch_start
            times['row_count'] = len(all_records)
            print(f"✓ Data fetched ({times['row_count']} rows): {times['data_fetch_time']:.4f}s")

            # Calculate total time
            times['total_time'] = conn_end - conn_start + times['query_execution_time'] + times['data_fetch_time']

            # Clean up
            cursor.close()
            conn.close()

            return times, None

        except Exception as e:
            print(f"✗ Error: {str(e)}")
            return times, str(e)

    def run_benchmark(self, iterations=3):
        """Run benchmark for all test queries."""
        print(f"\n{'#'*60}")
        print(f"# FastBinary Performance Benchmark")
        print(f"# Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
        print(f"# Iterations per test: {iterations}")
        print(f"{'#'*60}\n")

        # Check if fastbinary is available
        try:
            from e6data_python_connector.datainputstream import is_fastbinary_available
            fastbinary_available = is_fastbinary_available()
            print(f"FastBinary Available: {fastbinary_available}")
            if not fastbinary_available:
                print("\n⚠️  WARNING: FastBinary is NOT available!")
                print("Performance comparison will show minimal difference.")
                print("Both tests will use pure Python implementation.\n")
        except Exception as e:
            print(f"Error checking fastbinary availability: {e}")
            fastbinary_available = False

        # Run benchmarks for each query size
        for query_name, query in TEST_QUERIES.items():
            print(f"\n{'='*60}")
            print(f"Testing Query Size: {query_name.upper()}")
            print(f"{'='*60}")

            # Test with fastbinary enabled
            print(f"\n--- WITH FASTBINARY (require_fastbinary=True) ---")
            with_fb_times = []
            for i in range(iterations):
                print(f"\nIteration {i+1}/{iterations}:")
                times, error = self.run_query_with_timing(
                    query,
                    require_fastbinary=True,
                    query_name=f"{query_name} (with fastbinary)"
                )
                if error is None:
                    with_fb_times.append(times)
                else:
                    print(f"Skipping iteration due to error: {error}")

            # Test without fastbinary (pure Python)
            print(f"\n--- WITHOUT FASTBINARY (require_fastbinary=False) ---")
            without_fb_times = []
            for i in range(iterations):
                print(f"\nIteration {i+1}/{iterations}:")
                times, error = self.run_query_with_timing(
                    query,
                    require_fastbinary=False,
                    query_name=f"{query_name} (without fastbinary)"
                )
                if error is None:
                    without_fb_times.append(times)
                else:
                    print(f"Skipping iteration due to error: {error}")

            # Store results
            self.results['with_fastbinary'][query_name] = with_fb_times
            self.results['without_fastbinary'][query_name] = without_fb_times

        # Print summary
        self.print_summary()

    def print_summary(self):
        """Print performance comparison summary."""
        print(f"\n{'#'*60}")
        print(f"# PERFORMANCE SUMMARY")
        print(f"{'#'*60}\n")

        for query_name in TEST_QUERIES.keys():
            with_fb = self.results['with_fastbinary'].get(query_name, [])
            without_fb = self.results['without_fastbinary'].get(query_name, [])

            if not with_fb or not without_fb:
                print(f"\n{query_name.upper()}: Insufficient data")
                continue

            print(f"\n{'='*60}")
            print(f"Query Size: {query_name.upper()}")
            print(f"{'='*60}")

            # Calculate averages for each metric
            metrics = ['connection_time', 'query_execution_time', 'data_fetch_time', 'total_time']

            for metric in metrics:
                with_fb_values = [t[metric] for t in with_fb if t[metric] > 0]
                without_fb_values = [t[metric] for t in without_fb if t[metric] > 0]

                if not with_fb_values or not without_fb_values:
                    continue

                avg_with = statistics.mean(with_fb_values)
                avg_without = statistics.mean(without_fb_values)

                # Calculate speedup
                if avg_without > 0:
                    speedup = avg_without / avg_with
                    improvement = ((avg_without - avg_with) / avg_without) * 100
                else:
                    speedup = 1.0
                    improvement = 0

                print(f"\n{metric.replace('_', ' ').title()}:")
                print(f"  With FastBinary:    {avg_with:.4f}s")
                print(f"  Without FastBinary: {avg_without:.4f}s")
                print(f"  Speedup:            {speedup:.2f}x")
                print(f"  Improvement:        {improvement:.1f}%")

            # Row count (should be same for both)
            if with_fb:
                row_count = with_fb[0]['row_count']
                print(f"\nRows Fetched: {row_count:,}")

        # Overall recommendation
        print(f"\n{'='*60}")
        print(f"RECOMMENDATIONS")
        print(f"{'='*60}")

        # Calculate average speedup for data fetching (most important metric)
        speedups = []
        for query_name in TEST_QUERIES.keys():
            with_fb = self.results['with_fastbinary'].get(query_name, [])
            without_fb = self.results['without_fastbinary'].get(query_name, [])

            if with_fb and without_fb:
                with_fb_fetch = [t['data_fetch_time'] for t in with_fb if t['data_fetch_time'] > 0]
                without_fb_fetch = [t['data_fetch_time'] for t in without_fb if t['data_fetch_time'] > 0]

                if with_fb_fetch and without_fb_fetch:
                    avg_with = statistics.mean(with_fb_fetch)
                    avg_without = statistics.mean(without_fb_fetch)
                    if avg_with > 0:
                        speedups.append(avg_without / avg_with)

        if speedups:
            avg_speedup = statistics.mean(speedups)
            print(f"\nAverage Data Fetch Speedup: {avg_speedup:.2f}x")

            if avg_speedup > 1.5:
                print("\n✓ RECOMMENDATION: Use require_fastbinary=True (default)")
                print("  FastBinary provides significant performance improvement.")
            elif avg_speedup > 1.1:
                print("\n• RECOMMENDATION: Use require_fastbinary=True (default)")
                print("  FastBinary provides moderate performance improvement.")
            else:
                print("\n⚠️  RECOMMENDATION: FastBinary may not be available or working")
                print("  Performance difference is minimal.")
                print("  Check if system dependencies are installed correctly.")

        print("\nNOTE: For production use with large datasets:")
        print("  - require_fastbinary=True (recommended for best performance)")
        print("  - require_fastbinary=False (only if fastbinary cannot be installed)")


def main():
    """Main benchmark execution."""
    print("\n" + "="*60)
    print("FastBinary Performance Benchmark")
    print("="*60)

    # Check command line arguments
    iterations = 3
    if len(sys.argv) > 1:
        try:
            iterations = int(sys.argv[1])
            print(f"\nUsing {iterations} iterations per test")
        except ValueError:
            print(f"\nInvalid iteration count, using default: {iterations}")

    # Create and run benchmark
    benchmark = PerformanceBenchmark()

    try:
        benchmark.run_benchmark(iterations=iterations)
    except KeyboardInterrupt:
        print("\n\nBenchmark interrupted by user.")
        sys.exit(1)
    except Exception as e:
        print(f"\n\nBenchmark failed with error: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)

    print("\n" + "="*60)
    print("Benchmark Complete!")
    print("="*60 + "\n")


if __name__ == '__main__':
    main()
