import os
import pandas as pd
from math import radians, sin, cos, sqrt, atan2
from typing import Dict

def process_team_location(df_match: pd.DataFrame, teams_path: str) -> pd.DataFrame:
    """
    Process team location information for a single match row.
    
    - Creates teams CSV if it doesn't exist
    - Looks up team location info based on home_team_name
    - Asks user for missing team data
    - Updates both teams CSV and df_match
    
    Args:
        df_match: DataFrame with ONE match row (must have 'home_team_name' column)
        teams_path: Path to teams CSV file
        
    Returns:
        Updated df_match with longitude, latitude, and altitude columns
    """
    
    # Validate input
    if df_match.empty:
        print("[WARNING] Empty DataFrame provided, returning as is")
        return df_match
    
    if len(df_match) > 1:
        print(f"[WARNING] DataFrame has {len(df_match)} rows, only processing first row")
        df_match = df_match.iloc[[0]].copy()
    else:
        df_match = df_match.copy()
    
    # Ensure parent directory exists
    teams_dir = os.path.dirname(teams_path)
    if teams_dir:
        os.makedirs(teams_dir, exist_ok=True)
    
    # Check if teams CSV exists, if not create it
    if not os.path.exists(teams_path):
        df_teams = pd.DataFrame(columns=['name', 'longitude', 'latitude', 'altitude'])
        df_teams.to_csv(teams_path, index=False, encoding='utf-8')
    else:
        df_teams = pd.read_csv(teams_path, encoding='utf-8')
    
    # Ensure df_match has the required column
    if 'home_team_name' not in df_match.columns:
        raise ValueError("df_match must have a 'home_team_name' column")
    
    # Add location info columns to df_match if they don't exist
    missing_cols = [col for col in ['longitude', 'latitude', 'altitude'] if col not in df_match.columns]
    if missing_cols:
        for col in missing_cols:
            df_match[col] = None
    
    # Get home team name
    team_name = df_match['home_team_name'].iloc[0]
    
    if pd.isna(team_name) or team_name == '':
        print("[WARNING] No home_team_name found in match, skipping location processing")
    
    # Check if team exists in teams CSV
    team_match = df_teams[df_teams['name'] == team_name]
    
    if not team_match.empty:
        # Team found - get its data
        team_info = team_match.iloc[0]
        longitude = team_info['longitude']
        latitude = team_info['latitude']
        altitude = team_info['altitude']
        
    else:
        # Team not found - ask user for data
        print(f"Team NOT found in database. Please provide home stadium location:")
        
        # Ask for longitude
        while True:
            try:
                longitude_input = input(f"   Longitude (decimal degrees, e.g., -3.688): ").strip()
                if longitude_input == '':
                    longitude = None
                    break
                longitude = float(longitude_input)
                break
            except ValueError:
                print("   Invalid input. Please enter a valid number or press Enter to skip.")
        
        # Ask for latitude
        while True:
            try:
                latitude_input = input(f"   Latitude (decimal degrees, e.g., 40.453): ").strip()
                if latitude_input == '':
                    latitude = None
                    break
                latitude = float(latitude_input)
                break
            except ValueError:
                print("   Invalid input. Please enter a valid number or press Enter to skip.")
        
        # Ask for altitude
        while True:
            try:
                altitude_input = input(f"   Altitude (meters, e.g., 667): ").strip()
                if altitude_input == '':
                    altitude = None
                    break
                altitude = float(altitude_input)
                break
            except ValueError:
                print("   Invalid input. Please enter a valid number or press Enter to skip.")
        
        # Add new team to df_teams
        new_team = pd.DataFrame([{
            'name': team_name,
            'longitude': longitude,
            'latitude': latitude,
            'altitude': altitude
        }])
        
        df_teams = pd.concat([df_teams, new_team], ignore_index=True)
        
        # Save updated teams CSV
        df_teams.to_csv(teams_path, index=False, encoding='utf-8')
    
    # Update df_match with team location info
    df_match.loc[df_match.index[0], 'longitude'] = longitude
    df_match.loc[df_match.index[0], 'latitude'] = latitude
    df_match.loc[df_match.index[0], 'altitude'] = altitude
    
    return df_match


def haversine_distance(lon1: float, lat1: float, lon2: float, lat2: float) -> float:
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees).
    
    Returns distance in kilometers.
    """
    if pd.isna(lon1) or pd.isna(lat1) or pd.isna(lon2) or pd.isna(lat2):
        return None
    
    # Convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    
    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * atan2(sqrt(a), sqrt(1-a))
    
    # Radius of earth in kilometers
    r = 6371
    
    return r * c


def get_season_from_date(date: pd.Timestamp) -> str:
    """
    Determine the season (YYYY-YYYY) from a date.
    Season runs from August 1st to June 30th.
    
    Args:
        date: Date to determine season for
        
    Returns:
        Season string in format "YYYY-YYYY"
    """
    year = date.year
    month = date.month
    
    # If month is August (8) or later, season is current_year - next_year
    # If month is before August, season is previous_year - current_year
    if month >= 8:
        return f"{year}-{year+1}"
    else:
        return f"{year-1}-{year}"


def calculate_accumulated_distance(
    team_name: str,
    current_date: pd.Timestamp,
    df: pd.DataFrame,
    df_teams: pd.DataFrame
) -> float:
    """
    Calculate accumulated distance traveled by a team in the current season.
    Season runs from August 1st to June 30th.
    
    Args:
        team_name: Name of the team
        current_date: Current match date
        df: Complete DataFrame with all matches
        df_teams: DataFrame with team locations
        
    Returns:
        Accumulated distance in kilometers for current season
    """
    # Get current season
    current_season = get_season_from_date(current_date)
    season_start_year = int(current_season.split('-')[0])
    
    # Define season boundaries
    season_start = pd.Timestamp(f"{season_start_year}-08-01")
    season_end = pd.Timestamp(f"{season_start_year + 1}-06-30")
    
    # Get team's home coordinates
    team_info = df_teams[df_teams['name'] == team_name]
    if team_info.empty:
        return None
    
    team_home_lon = team_info['longitude'].iloc[0]
    team_home_lat = team_info['latitude'].iloc[0]
    
    if pd.isna(team_home_lon) or pd.isna(team_home_lat):
        return None
    
    # Get all matches for this team in the current season BEFORE current_date
    team_matches = df[
        ((df['home_team_name'] == team_name) | (df['away_team_name'] == team_name)) &
        (df['date_of_match'] >= season_start) &
        (df['date_of_match'] < current_date) &
        (df['date_of_match'] <= season_end)
    ].copy()
    
    if team_matches.empty:
        return 0.0  # No previous matches in this season
    
    # Sort by date
    team_matches = team_matches.sort_values('date_of_match')
    
    total_accumulated = 0.0
    last_location_lon = team_home_lon
    last_location_lat = team_home_lat
    
    for idx, match in team_matches.iterrows():
        # Determine where the team played this match
        was_home = match['home_team_name'] == team_name
        
        if was_home:
            # Playing at home
            match_lon = team_home_lon
            match_lat = team_home_lat
        else:
            # Playing away - get opponent's location
            opponent = match['home_team_name']
            opponent_info = df_teams[df_teams['name'] == opponent]
            
            if opponent_info.empty:
                continue  # Skip if opponent location not found
            
            match_lon = opponent_info['longitude'].iloc[0]
            match_lat = opponent_info['latitude'].iloc[0]
            
            if pd.isna(match_lon) or pd.isna(match_lat):
                continue
        
        # Calculate distance from last location to this match
        distance = haversine_distance(
            last_location_lon, last_location_lat,
            match_lon, match_lat
        )
        
        if distance is not None:
            total_accumulated += distance
        
        # Update last location
        # After the match, team returns home
        last_location_lon = team_home_lon
        last_location_lat = team_home_lat
    
    return total_accumulated


def calculate_team_travel_distance(
    match_row: pd.DataFrame, 
    df: pd.DataFrame,
    teams_path: str = "app/data/raw/teams/teams.csv"
) -> pd.DataFrame:
    """
    Calculate travel distances for both teams in a match.
    
    For each team:
    1. Get their home coordinates (using process_team_location)
    2. Find their last match (as home or away)
    3. Calculate distance from last match location to their home
    4. Calculate distance from their home to current match location
    5. Calculate accumulated distance for the current season (INCLUDING current match)
    
    Args:
        match_row: DataFrame with ONE match row
        df: Complete DataFrame with all historical matches
        teams_path: Path to teams CSV file
        
    Returns:
        Updated match_row with columns:
        - home_team_distance: km traveled by home team for this match
        - away_team_distance: km traveled by away team for this match
        - home_team_accumulated_distance: total km traveled by home team this season (including current match)
        - away_team_accumulated_distance: total km traveled by away team this season (including current match)
        - altitude: altitude of the match venue
    """
    
    # Validate input
    if match_row.empty:
        print("[WARNING] Empty DataFrame provided")
        return match_row
    
    if len(match_row) > 1:
        print(f"[WARNING] DataFrame has {len(match_row)} rows, only processing first row")
        match_row = match_row.iloc[[0]].copy()
    else:
        match_row = match_row.copy()
    
    # Required columns
    required_cols = ['home_team_name', 'away_team_name', 'date_of_match']
    missing = [col for col in required_cols if col not in match_row.columns]
    if missing:
        raise ValueError(f"match_row must have columns: {missing}")
    
    # Get current match info
    home_team = match_row['home_team_name'].iloc[0]
    away_team = match_row['away_team_name'].iloc[0]
    current_date = pd.to_datetime(match_row['date_of_match'].iloc[0])
    
    # Process current match location (home team's stadium)
    match_row = process_team_location(match_row, teams_path=teams_path)
    
    current_lon = match_row['longitude'].iloc[0]
    current_lat = match_row['latitude'].iloc[0]
    current_alt = match_row['altitude'].iloc[0]
    
    # Ensure df has date column as datetime
    df = df.copy()
    if 'date_of_match' in df.columns:
        df['date_of_match'] = pd.to_datetime(df['date_of_match'], errors='coerce')
    
    # Load teams database
    if not os.path.exists(teams_path):
        df_teams = pd.DataFrame(columns=['name', 'longitude', 'latitude', 'altitude'])
        os.makedirs(os.path.dirname(teams_path), exist_ok=True)
        df_teams.to_csv(teams_path, index=False, encoding='utf-8')
    else:
        df_teams = pd.read_csv(teams_path, encoding='utf-8')
    
    # Initialize distance columns
    match_row['home_team_distance'] = None
    match_row['away_team_distance'] = None
    match_row['home_team_accumulated_distance'] = None
    match_row['away_team_accumulated_distance'] = None
    match_row['altitude'] = current_alt
    
    # Process each team
    for team_type in ['home', 'away']:
        team_name = home_team if team_type == 'home' else away_team
        
        
        # Get team's home coordinates (o preguntar si no existe)
        team_info = df_teams[df_teams['name'] == team_name]
        
        if team_info.empty:
            print(f"{team_name} not found in teams database.")
            print(f"Please provide {team_name}'s home stadium location:")
            
            # Ask for longitude
            while True:
                try:
                    longitude_input = input(f"   Longitude (decimal degrees, e.g., -3.688): ").strip()
                    if longitude_input == '':
                        team_home_lon = None
                        break
                    team_home_lon = float(longitude_input)
                    break
                except ValueError:
                    print("   Invalid input. Please enter a valid number or press Enter to skip.")
            
            # Ask for latitude
            while True:
                try:
                    latitude_input = input(f"   Latitude (decimal degrees, e.g., 40.453): ").strip()
                    if latitude_input == '':
                        team_home_lat = None
                        break
                    team_home_lat = float(latitude_input)
                    break
                except ValueError:
                    print("   Invalid input. Please enter a valid number or press Enter to skip.")
            
            # Ask for altitude
            while True:
                try:
                    altitude_input = input(f"   Altitude (meters, e.g., 667): ").strip()
                    if altitude_input == '':
                        team_altitude = None
                        break
                    team_altitude = float(altitude_input)
                    break
                except ValueError:
                    print("   Invalid input. Please enter a valid number or press Enter to skip.")
            
            # Add to dataframe
            new_team = pd.DataFrame([{
                'name': team_name,
                'longitude': team_home_lon,
                'latitude': team_home_lat,
                'altitude': team_altitude
            }])
            
            df_teams = pd.concat([df_teams, new_team], ignore_index=True)
            df_teams.to_csv(teams_path, index=False, encoding='utf-8')
            
        else:
            team_home_lon = team_info['longitude'].iloc[0]
            team_home_lat = team_info['latitude'].iloc[0]
        
        if pd.isna(team_home_lon) or pd.isna(team_home_lat):
            print(f"{team_name} has incomplete location data, skipping distance calculation")
            continue
        
        
        # =================================================================
        # CALCULATE MATCH DISTANCE (last match → home → current match)
        # =================================================================
        
        # Find team's last match before current date
        team_matches = df[
            ((df['home_team_name'] == team_name) | (df['away_team_name'] == team_name)) &
            (df['date_of_match'] < current_date)
        ].copy()
        
        # Variable para guardar la distancia del partido actual
        current_match_distance = None
        
        if team_matches.empty:
            # If no previous match, distance is 0 (they're at home) or distance from home to current venue
            if team_type == 'home':
                current_match_distance = 0.0  # Playing at home
            else:
                # Calcular distancia desde casa del away team al estadio del partido
                current_match_distance = haversine_distance(team_home_lon, team_home_lat, current_lon, current_lat)
            
            match_row.loc[match_row.index[0], f'{team_type}_team_distance'] = current_match_distance
        else:
            # Sort by date to get the most recent match
            team_matches = team_matches.sort_values('date_of_match', ascending=False)
            last_match = team_matches.iloc[0]
            
            last_match_date = last_match['date_of_match']
            
            # Determine where the team played their last match
            was_home = last_match['home_team_name'] == team_name
            
            if was_home:
                last_match_lon = team_home_lon
                last_match_lat = team_home_lat
            else:
                opponent = last_match['home_team_name']
                opponent_info = df_teams[df_teams['name'] == opponent]
                
                if opponent_info.empty or pd.isna(opponent_info['longitude'].iloc[0]):
                    print(f"   Cannot find location for opponent {opponent}")
                    continue
                
                last_match_lon = opponent_info['longitude'].iloc[0]
                last_match_lat = opponent_info['latitude'].iloc[0]
            
            
            # Calculate distance from last match to their home
            dist_last_to_home = haversine_distance(
                last_match_lon, last_match_lat,
                team_home_lon, team_home_lat
            )
            
            # Calculate distance from their home to current match
            dist_home_to_current = haversine_distance(
                team_home_lon, team_home_lat,
                current_lon, current_lat
            )
            
            # Total distance traveled for this match
            if dist_last_to_home is not None and dist_home_to_current is not None:
                current_match_distance = dist_last_to_home + dist_home_to_current
            else:
                current_match_distance = None
                print(f"   Could not calculate total distance")
            
            # Store match distance
            match_row.loc[match_row.index[0], f'{team_type}_team_distance'] = current_match_distance
        
        # =================================================================
        # CALCULATE ACCUMULATED DISTANCE FOR THE SEASON
        # =================================================================
        
        
        # Calcular acumulada de partidos PREVIOS (sin incluir el actual)
        accumulated_distance_previous = calculate_accumulated_distance(
            team_name=team_name,
            current_date=current_date,
            df=df,
            df_teams=df_teams
        )
        
        if accumulated_distance_previous is not None and current_match_distance is not None:
            total_accumulated = accumulated_distance_previous + current_match_distance
            match_row.loc[match_row.index[0], f'{team_type}_team_accumulated_distance'] = total_accumulated
        elif current_match_distance is not None:
            # Si no hay acumulada previa, usar solo la distancia actual
            match_row.loc[match_row.index[0], f'{team_type}_team_accumulated_distance'] = current_match_distance
        else:
            print(f"   Could not calculate accumulated distance")
    
    return match_row


def compute_rest_times(df_row: pd.Series, historical_matches_path: str) -> pd.Series:
    """
    Compute rest time in hours for home and away teams based on the immediately
    previous match each team played (home or away) across ANY competition.
    
    Args:
        df_row: Single row (Series) containing the match information
        historical_matches_path: Path to CSV file with all historical matches
    
    Returns:
        Series with only the computed columns:
        - home_team_rest_time
        - away_team_rest_time
    """
    
    def _to_datetime(date_str, hour_str):
        """
        Build a datetime from date + hour-of-match strings.
        If hour is missing, default to 00:00.
        """
        dt = pd.to_datetime(date_str, errors="coerce")
        if pd.isna(dt):
            return pd.NaT
        
        # Parse hour as HH:MM
        if pd.notna(hour_str):
            try:
                time_obj = pd.to_datetime(hour_str, format="%H:%M", errors="coerce")
                if pd.notna(time_obj):
                    dt = dt.replace(hour=time_obj.hour, minute=time_obj.minute)
            except:
                pass  # Keep dt with 00:00 if parsing fails
        
        return dt
    
    # Initialize result with only the columns we're computing
    result = pd.Series({
        'home_team_rest_time': pd.NA,
        'away_team_rest_time': pd.NA
    })
    
    # Read historical matches
    try:
        df_historical = pd.read_csv(historical_matches_path)
    except FileNotFoundError:
        print(f"Warning: Historical matches file not found at {historical_matches_path}")
        return result
    
    # Get current match info
    home_team = df_row.get("home_team_name")
    away_team = df_row.get("away_team_name")
    current_date = df_row.get("date_of_match")
    current_hour = df_row.get("hour_of_the_match", None)
    
    # Build current match datetime
    current_dt = _to_datetime(current_date, current_hour)
    
    if pd.isna(current_dt):
        return result
    
    # Ensure required columns exist in historical data
    if "hour_of_the_match" not in df_historical.columns:
        df_historical["hour_of_the_match"] = pd.NA
    
    # Build datetime for all historical matches
    df_historical["match_dt"] = df_historical.apply(
        lambda row: _to_datetime(row["date_of_match"], row.get("hour_of_the_match")),
        axis=1
    )
    
    # Filter only matches with valid datetime and before current match
    df_historical = df_historical[
        (~df_historical["match_dt"].isna()) & 
        (df_historical["match_dt"] < current_dt)
    ]
    
    # --- Compute rest time for HOME team ---
    home_team_matches = df_historical[
        (df_historical["home_team_name"] == home_team) | 
        (df_historical["away_team_name"] == home_team)
    ]
    
    if not home_team_matches.empty:
        last_home_match = home_team_matches.nlargest(1, "match_dt").iloc[0]
        prev_home_dt = last_home_match["match_dt"]
        time_diff = current_dt - prev_home_dt
        result["home_team_rest_time"] = time_diff.total_seconds() / 3600.0
    
    # --- Compute rest time for AWAY team ---
    away_team_matches = df_historical[
        (df_historical["home_team_name"] == away_team) | 
        (df_historical["away_team_name"] == away_team)
    ]
    
    if not away_team_matches.empty:
        last_away_match = away_team_matches.nlargest(1, "match_dt").iloc[0]
        prev_away_dt = last_away_match["match_dt"]
        time_diff = current_dt - prev_away_dt
        result["away_team_rest_time"] = time_diff.total_seconds() / 3600.0
    
    return result


def calculate_accumulated_matches_for_row(
    df_row: pd.Series,
    df: pd.DataFrame
) -> pd.Series:
    """
    Calculate accumulated matches for both teams in a row.
    Season runs from August 1st to June 30th.
    
    Args:
        df_row: Single row (Series) containing the match information
        df: Complete DataFrame with all matches
        
    Returns:
        Series with only the computed columns:
        - home_team_accumulated_matches
        - away_team_accumulated_matches
    """
    # Get current match info
    home_team = df_row.get("home_team_name")
    away_team = df_row.get("away_team_name")
    current_date = pd.to_datetime(df_row.get("date_of_match"))
    
    # Get current season
    current_season = get_season_from_date(current_date)
    season_start_year = int(current_season.split('-')[0])
    
    # Define season boundaries
    season_start = pd.Timestamp(f"{season_start_year}-08-01")
    season_end = pd.Timestamp(f"{season_start_year + 1}-06-30")
    
    # Ensure df has date column as datetime
    df_copy = df.copy()
    if 'date_of_match' in df_copy.columns:
        df_copy['date_of_match'] = pd.to_datetime(df_copy['date_of_match'], errors='coerce')
    
    # Calculate for home team
    home_team_matches = df_copy[
        ((df_copy['home_team_name'] == home_team) | (df_copy['away_team_name'] == home_team)) &
        (df_copy['date_of_match'] >= season_start) &
        (df_copy['date_of_match'] < current_date) &
        (df_copy['date_of_match'] <= season_end)
    ]
    
    # Calculate for away team
    away_team_matches = df_copy[
        ((df_copy['home_team_name'] == away_team) | (df_copy['away_team_name'] == away_team)) &
        (df_copy['date_of_match'] >= season_start) &
        (df_copy['date_of_match'] < current_date) &
        (df_copy['date_of_match'] <= season_end)
    ]
    
    # Return only the calculated columns
    return pd.Series({
        'home_team_accumulated_matches': len(home_team_matches),
        'away_team_accumulated_matches': len(away_team_matches)
    })


def calculate_team_travel_distance_for_row(
    df_row: pd.Series,
    df: pd.DataFrame,
    teams_path: str = "app/data/raw/teams/teams.csv"
) -> pd.Series:
    """
    Calculate travel distances and geographic info for both teams in a match row.
    
    Returns:
        Series with only the computed columns:
        - home_team_distance
        - away_team_distance
        - home_team_accumulated_distance
        - away_team_accumulated_distance
        - altitude
        - longitude
        - latitude
    """
    # Convert Series to DataFrame for existing function
    row_df = df_row.to_frame().T
    
    # Call existing function
    result_df = calculate_team_travel_distance(
        match_row=row_df,
        df=df,
        teams_path=teams_path
    )
    
    # Extract only the columns we want to return
    result_row = result_df.iloc[0]
    
    return pd.Series({
        'home_team_distance': result_row.get('home_team_distance'),
        'away_team_distance': result_row.get('away_team_distance'),
        'home_team_accumulated_distance': result_row.get('home_team_accumulated_distance'),
        'away_team_accumulated_distance': result_row.get('away_team_accumulated_distance'),
        'altitude': result_row.get('altitude'),
        'longitude': result_row.get('longitude'),
        'latitude': result_row.get('latitude')
    })


def enrich_with_team_history(
    df: pd.DataFrame,
    teams_columns: Dict[str, type],
    csv_path: str,
    country: str = None
) -> pd.DataFrame:
    """
    Enrich dataframe with team historical information.
    Simply iterates through teams_columns and asks for each value.
    
    Args:
        df: DataFrame with matches (must have home_team_name, away_team_name)
        teams_columns: Dictionary with column names and types (base names, will add home_/away_ prefix)
        csv_path: Path to CSV file with team historical information
        country: Country of the competition (used for filtering teams)

    Returns:
        DataFrame with enriched team history columns
    """
    
    # Read or create teams CSV
    if not os.path.exists(csv_path):
        print(f"Creating new teams history CSV at {csv_path}")
        df_teams = pd.DataFrame(columns=['name'])
        os.makedirs(os.path.dirname(csv_path), exist_ok=True)
        df_teams.to_csv(csv_path, index=False, encoding='utf-8')
    else:
        df_teams = pd.read_csv(csv_path, encoding='utf-8')
    
    # Make a copy
    df_result = df.copy()
    
    # Initialize columns with home_ and away_ prefixes
    type_mapping = {int: "Int64", float: "float64", str: "string"}
    
    for base_col, col_type in teams_columns.items():
        dtype = type_mapping.get(col_type, "object")
        home_col = f"home_{base_col}"
        away_col = f"away_{base_col}"
        
        if home_col not in df_result.columns:
            df_result[home_col] = pd.Series([None] * len(df_result), dtype=dtype)
        if away_col not in df_result.columns:
            df_result[away_col] = pd.Series([None] * len(df_result), dtype=dtype)
    
    def ask_team_data(team_name: str, prefix: str, idx: int) -> dict:
        """Ask user for all team data."""
        print(f"\n{'='*60}")
        print(f"Team: {team_name} (position: {prefix})")
        print(f"{'='*60}\n")
        
        new_team_data = {'name': team_name}
        
        # Simply iterate through all columns and ask
        for base_col, col_type in teams_columns.items():
            # Make display name user-friendly
            display_name = base_col.replace('_', ' ').title()
            
            while True:
                try:
                    user_input = input(f"{display_name}: ").strip()
                    
                    if user_input == '':
                        value = 0  # Default
                        break
                    
                    if col_type == int:
                        value = int(user_input)
                    elif col_type == float:
                        value = float(user_input)
                    else:
                        value = user_input
                    break
                    
                except ValueError:
                    print(f"Invalid input. Please enter a valid {col_type.__name__}.")
            
            # Save to CSV (without prefix)
            new_team_data[base_col] = value
            
            # Save to df_result (WITH prefix)
            target_col = f"{prefix}_{base_col}"
            if target_col in df_result.columns:
                df_result.at[idx, target_col] = value
        
        return new_team_data
    
    def load_team_data(team_name: str, prefix: str, idx: int, team_row: pd.Series):
        """Load existing team data from CSV."""
        for base_col in teams_columns.keys():
            target_col = f"{prefix}_{base_col}"
            
            if base_col in team_row.index and pd.notna(team_row[base_col]):
                df_result.at[idx, target_col] = team_row[base_col]
            else:
                df_result.at[idx, target_col] = 0
                # Create missing column in CSV if needed
                if base_col not in df_teams.columns:
                    df_teams[base_col] = 0
    
    # Process each match
    for idx, row in df_result.iterrows():
        home_team = row.get('home_team_name')
        away_team = row.get('away_team_name')
        
        # HOME team
        if pd.notna(home_team):
            team_data = df_teams[df_teams['name'] == home_team]
            
            if team_data.empty:
                new_team_data = ask_team_data(home_team, 'home', idx)
                df_teams = pd.concat([df_teams, pd.DataFrame([new_team_data])], ignore_index=True)
                df_teams.to_csv(csv_path, index=False, encoding='utf-8')
                print(f"✓ {home_team} saved\n")
            else:
                load_team_data(home_team, 'home', idx, team_data.iloc[0])
        
        # AWAY team
        if pd.notna(away_team):
            team_data = df_teams[df_teams['name'] == away_team]
            
            if team_data.empty:
                new_team_data = ask_team_data(away_team, 'away', idx)
                df_teams = pd.concat([df_teams, pd.DataFrame([new_team_data])], ignore_index=True)
                df_teams.to_csv(csv_path, index=False, encoding='utf-8')
                print(f"✓ {away_team} saved\n")
            else:
                load_team_data(away_team, 'away', idx, team_data.iloc[0])
    
    df_teams.to_csv(csv_path, index=False, encoding='utf-8')
    return df_result

def update_league_winners(
    ranking_csv_path: str,
    trophies_csv_path: str,
    verbose: bool = False
) -> None:
    """
    Update first and second place trophies after gameweek 38.
    
    Reads the ranking CSV, finds gameweek 38 standings, and updates:
    - 1st place team: +1 to first_place_league
    - 2nd place team: +1 to second_place_league
    
    Args:
        ranking_csv_path: Path to the ranking CSV file
        trophies_csv_path: Path to the team trophies CSV file
        verbose: Print debugging information
    """
    
    if verbose:
        print("[TROPHIES] Updating league winners...")
    
    # Read ranking CSV
    if not os.path.exists(ranking_csv_path):
        raise FileNotFoundError(f"Ranking CSV not found: {ranking_csv_path}")
    
    df_ranking = pd.read_csv(ranking_csv_path, encoding='utf-8')
    
    if verbose:
        print(f"[TROPHIES] Loaded ranking CSV with {len(df_ranking)} rows")
    
    # Filter for gameweek 38
    df_gw38 = df_ranking[df_ranking['gameweek'] == 38]
    
    if df_gw38.empty:
        if verbose:
            print("[TROPHIES] No gameweek 38 data found in ranking CSV")
        return
    
    # Sort by team_rank to ensure correct order
    df_gw38 = df_gw38.sort_values('team_rank')
    
    # Get 1st and 2nd place teams
    first_place_team = df_gw38.iloc[0]['team_name']
    second_place_team = df_gw38.iloc[1]['team_name'] if len(df_gw38) > 1 else None
    
    if verbose:
        print(f"[TROPHIES] 1st place: {first_place_team}")
        if second_place_team:
            print(f"[TROPHIES] 2nd place: {second_place_team}")
    
    # Read or create trophies CSV
    if not os.path.exists(trophies_csv_path):
        if verbose:
            print(f"[TROPHIES] Creating new trophies CSV at {trophies_csv_path}")
        df_trophies = pd.DataFrame(columns=['name', 'country', 'first_place_league', 
                                            'second_place_league', 'years_total_league',
                                            'years_consecutive_league'])
        os.makedirs(os.path.dirname(trophies_csv_path), exist_ok=True)
    else:
        df_trophies = pd.read_csv(trophies_csv_path, encoding='utf-8')
    
    # Ensure required columns exist
    for col in ['first_place_league', 'second_place_league']:
        if col not in df_trophies.columns:
            df_trophies[col] = 0
    
    # Update first place
    if first_place_team in df_trophies['name'].values:
        df_trophies.loc[df_trophies['name'] == first_place_team, 'first_place_league'] += 1
        if verbose:
            new_count = df_trophies.loc[df_trophies['name'] == first_place_team, 'first_place_league'].iloc[0]
            print(f"[TROPHIES] {first_place_team}: first_place_league = {new_count}")
    else:
        if verbose:
            print(f"[TROPHIES] Warning: {first_place_team} not found in trophies CSV")
    
    # Update second place
    if second_place_team and second_place_team in df_trophies['name'].values:
        df_trophies.loc[df_trophies['name'] == second_place_team, 'second_place_league'] += 1
        if verbose:
            new_count = df_trophies.loc[df_trophies['name'] == second_place_team, 'second_place_league'].iloc[0]
            print(f"[TROPHIES] {second_place_team}: second_place_league = {new_count}")
    elif second_place_team:
        if verbose:
            print(f"[TROPHIES] Warning: {second_place_team} not found in trophies CSV")
    
    # Save updated CSV
    df_trophies.to_csv(trophies_csv_path, index=False, encoding='utf-8')
    
    if verbose:
        print(f"[TROPHIES] Updated trophies saved to {trophies_csv_path}")


def update_season_participation(
    df_matches: pd.DataFrame,
    country: str,
    trophies_csv_path: str,
    teams_columns: Dict[str, type] = None,
    verbose: bool = False
) -> pd.DataFrame:
    """
    Update team participation stats before gameweek 1 of a new season.
    Returns df_matches enriched with initial ranking and team history data.
    
    For teams participating in this season's league (present in df_matches):
    - Increments years_total_league by 1
    - Increments years_consecutive_league by 1
    - Creates gameweek 0 ranking based on trophy history
    - Enriches df_matches with ranking columns (all stats at 0 except team_rank)
    - Enriches df_matches with team historical information
    
    For teams from the same country NOT participating:
    - Resets years_consecutive_league to 0
    
    Args:
        df_matches: DataFrame with matches (must have home_team and/or away_team columns)
        country: Country code to filter teams (e.g., 'ESP', 'ENG')
        trophies_csv_path: Path to the team trophies CSV file
        teams_columns: Dictionary with column names and types for team history (optional)
        verbose: Print debugging information
        
    Returns:
        DataFrame (df_matches) enriched with ranking and team history columns
    """
    
    if verbose:
        print("[PARTICIPATION] Updating season participation...")
        print(f"[PARTICIPATION] Country: {country}")
    
    # Read or create trophies CSV
    if not os.path.exists(trophies_csv_path):
        if verbose:
            print(f"[PARTICIPATION] Creating new trophies CSV at {trophies_csv_path}")
        df_trophies = pd.DataFrame(columns=['name', 'country', 'first_place_league', 
                                            'second_place_league', 'years_total_league',
                                            'years_consecutive_league'])
        os.makedirs(os.path.dirname(trophies_csv_path), exist_ok=True)
    else:
        df_trophies = pd.read_csv(trophies_csv_path, encoding='utf-8')
    
    # Ensure required columns exist
    for col in ['country', 'years_total_league', 'years_consecutive_league', 
                'first_place_league', 'second_place_league']:
        if col not in df_trophies.columns:
            if col == 'country':
                df_trophies[col] = ''
            else:
                df_trophies[col] = 0
    
    # Get unique teams from matches (check both home_team and away_team columns)
    participating_teams = set()
    
    if 'home_team' in df_matches.columns:
        participating_teams.update(df_matches['home_team'].dropna().unique())
    if 'away_team' in df_matches.columns:
        participating_teams.update(df_matches['away_team'].dropna().unique())
    if 'home_team_name' in df_matches.columns:
        participating_teams.update(df_matches['home_team_name'].dropna().unique())
    if 'away_team_name' in df_matches.columns:
        participating_teams.update(df_matches['away_team_name'].dropna().unique())
    
    if verbose:
        print(f"[PARTICIPATION] Found {len(participating_teams)} participating teams")
    
    # Filter teams from this country
    df_country = df_trophies[df_trophies['country'] == country].copy()
    
    if verbose:
        print(f"[PARTICIPATION] Found {len(df_country)} teams from {country} in trophies CSV")
    
    # Update participating teams
    for team in participating_teams:
        if team in df_trophies['name'].values:
            # Increment years_total_league
            df_trophies.loc[df_trophies['name'] == team, 'years_total_league'] += 1
            
            # Increment years_consecutive_league
            df_trophies.loc[df_trophies['name'] == team, 'years_consecutive_league'] += 1
            
            if verbose:
                total = df_trophies.loc[df_trophies['name'] == team, 'years_total_league'].iloc[0]
                consecutive = df_trophies.loc[df_trophies['name'] == team, 'years_consecutive_league'].iloc[0]
                print(f"[PARTICIPATION] {team}: total={total}, consecutive={consecutive}")
        else:
            if verbose:
                print(f"[PARTICIPATION] Warning: {team} not found in trophies CSV")
    
    # Reset consecutive years for non-participating teams from this country
    non_participating = df_country[~df_country['name'].isin(participating_teams)]['name']
    
    for team in non_participating:
        df_trophies.loc[df_trophies['name'] == team, 'years_consecutive_league'] = 0
        if verbose:
            print(f"[PARTICIPATION] {team}: Reset consecutive years to 0 (not participating)")
    
    # Save updated trophies CSV
    df_trophies.to_csv(trophies_csv_path, index=False, encoding='utf-8')
    
    if verbose:
        print(f"[PARTICIPATION] Updated participation saved to {trophies_csv_path}")
    
    # Create gameweek 0 ranking based on trophy history
    df_participating = df_trophies[df_trophies['name'].isin(participating_teams)].copy()
    
    # Sort by trophy criteria to determine ranking
    # Priority: 1st place > 2nd place > total years > consecutive years
    df_participating = df_participating.sort_values(
        by=['first_place_league', 'second_place_league', 'years_total_league', 'years_consecutive_league'],
        ascending=[False, False, False, False]
    ).reset_index(drop=True)
    
    # Create ranking DataFrame
    df_ranking = pd.DataFrame({
        'team_name': df_participating['name'].values,
        'team_rank': range(1, len(df_participating) + 1),
        'matchs_played': 0,
        'matchs_won': 0,
        'matchs_drawn': 0,
        'matchs_lost': 0,
        'team_goals_for': 0,
        'team_goals_against': 0,
        'team_goals_difference': 0,
        'team_points': 0
    })
    
    if verbose:
        print(f"\n[PARTICIPATION] Gameweek 0 ranking created:")
        print(df_ranking[['team_rank', 'team_name']].to_string(index=False))
    
    # Enrich df_matches with ranking data
    df_matches_enriched = df_matches.copy()
    
    # Determine which column names to use for merging
    home_col = 'home_team' if 'home_team' in df_matches.columns else 'home_team_name'
    away_col = 'away_team' if 'away_team' in df_matches.columns else 'away_team_name'
    
    # Merge home team ranking
    df_matches_enriched = df_matches_enriched.merge(
        df_ranking[['team_name', 'team_rank', 'matchs_played', 'matchs_won', 'matchs_drawn',
                   'matchs_lost', 'team_goals_for', 'team_goals_against', 
                   'team_goals_difference', 'team_points']],
        left_on=home_col,
        right_on='team_name',
        how='left',
        suffixes=('', '_home')
    )
    
    # Rename home team columns
    df_matches_enriched.rename(columns={
        'team_rank': 'home_team_rank',
        'matchs_played': 'home_matchs_played',
        'matchs_won': 'home_matchs_won',
        'matchs_drawn': 'home_matchs_drawn',
        'matchs_lost': 'home_matchs_lost',
        'team_goals_for': 'home_team_goals_for',
        'team_goals_against': 'home_team_goals_against',
        'team_goals_difference': 'home_team_goals_difference',
        'team_points': 'home_team_points'
    }, inplace=True)
    
    # Drop temporary column
    df_matches_enriched.drop(columns=['team_name'], inplace=True, errors='ignore')
    
    # Merge away team ranking
    df_matches_enriched = df_matches_enriched.merge(
        df_ranking[['team_name', 'team_rank', 'matchs_played', 'matchs_won', 'matchs_drawn',
                   'matchs_lost', 'team_goals_for', 'team_goals_against', 
                   'team_goals_difference', 'team_points']],
        left_on=away_col,
        right_on='team_name',
        how='left',
        suffixes=('', '_away')
    )
    
    # Rename away team columns
    df_matches_enriched.rename(columns={
        'team_rank': 'away_team_rank',
        'matchs_played': 'away_matchs_played',
        'matchs_won': 'away_matchs_won',
        'matchs_drawn': 'away_matchs_drawn',
        'matchs_lost': 'away_matchs_lost',
        'team_goals_for': 'away_team_goals_for',
        'team_goals_against': 'away_team_goals_against',
        'team_goals_difference': 'away_team_goals_difference',
        'team_points': 'away_team_points'
    }, inplace=True)
    
    # Drop temporary column
    df_matches_enriched.drop(columns=['team_name'], inplace=True, errors='ignore')
    
    if verbose:
        print(f"\n[PARTICIPATION] Enriched df_matches with ranking columns")
        print(f"[PARTICIPATION] Added home/away columns: team_rank, matchs_played, matchs_won, etc.")
    
    # Enrich with team history if teams_columns provided
    if teams_columns:
        if verbose:
            print(f"\n[PARTICIPATION] Enriching with team history...")
        
        df_matches_enriched = enrich_with_team_history(
            df=df_matches_enriched,
            teams_columns=teams_columns,
            csv_path=trophies_csv_path,
            country=country
        )
        
        if verbose:
            print(f"[PARTICIPATION] Team history enrichment complete")
    
    if verbose:
        print(f"[PARTICIPATION] Final shape: {df_matches_enriched.shape}")
    
    return df_matches_enriched