"""
Dataset updater module.
Contains the DatasetUpdater class that orchestrates all data collection,
processing, and feature engineering steps.
"""

import os
from typing import Optional, List, Dict, Tuple, Any
import pandas as pd

# Import custom modules
from scrapers.fbref_scraper import FBRefScraper
from scrapers.fotmob_scraper import FotMobScraper
from processing.teams.teams import update_season_participation
from feature_engineering.feature_engineering import MatchStatsCalculator
from processing.ranking.league_ranking import LeagueRanking
from feature_engineering.team_notes_fixtures import compute_team_notes_for_fixtures
from utils.date import get_next_days, validate_execution_day, season_from_date
from utils.format import _format_paths

class Predicting:
    """Main class to orchestrate the dataset update process."""

    def __init__(
        self,
        features_config: Dict[str, any],
        competitions_config: Dict[str, Dict],
        paths: Dict[str, str],
    ):
        """
        Initialize Predicting with configuration from Settings.

        Args:
            features_config: Configuration for feature engineering
            competitions_config: Configuration for competitions
            dataset_csv: Path to the dataset CSV file
        """
        self.fbref_informations = features_config['fbref_informations']
        self.fbref_work_features = features_config['fbref_work_features']
        self.ranking_informations = features_config['ranking_informations']
        self.teams_informations = features_config['teams_informations']
        self.fbref_features = features_config['fbref_features']
        self.competitions_config = competitions_config
        self.paths = paths

    def update_dataset(
        self, date: Optional[str] = None, competition: Optional[str] = None
    ) -> pd.DataFrame:
        """
        Main method to update the dataset with new matches and features.

        Process:
        0. Validate execution day (must be Tuesday or Friday):
           - If date provided: Check if it's Tuesday or Friday
           - If no date: Use today and check if it's Tuesday or Friday
           - Friday: Get matches for Fri, Sat, Sun, Mon (4 days)
           - Tuesday: Get matches for Tue, Wed, Thu (3 days)
           - Other days: Return error message
        1. Iterate through competitions (all or specific one)
        2. For each competition:
           - Scrape match data from FBRef (leagues, cups, supercups, european, international)
           - Check for new teams and add them if needed (scraping Transfermarkt)
           - Scrape injury and suspension data from Fotmob
           - Calculate team rankings and trophy information
           - Organize matches by date and time
        3. Apply feature engineering (competition-specific and general)
        4. Append to existing CSV dataset

        Args:
            date: Date to process (format: YYYY-MM-DD). If None, uses today.
                  Must be Tuesday or Friday.
            competition: Specific competition to update (None for all)

        Returns:
            DataFrame with updated match data and features

        Raises:
            ValueError: If the date is not a Tuesday or Friday
        """
        print(f"        Starting feature engineering process")

        # Step 0: Validate execution day
        try:
            execution_date = validate_execution_day(date)
        except ValueError as e:
            error_msg = str(e)
            print(f"\nERROR: {error_msg}")
            raise

        # Determine date range based on validated day
        days = get_next_days(execution_date)

        last_season = season_from_date(execution_date)

        # Step 1: Determine which competitions to process
        competitions_list = self._get_competitions(competition, last_season)

        # Step 2: Process each competition
        for comp_info in competitions_list:
            comp_name = comp_info["name"]
            comp_country = comp_info["country"]

            fotmob_url = comp_info["information_urls"]["fotmob"]

            formatted_paths = _format_paths(
                self.paths,
                last_season=last_season,
                country=comp_country,
                competition=comp_name,
            )

            matches_csv_path = formatted_paths["matches"]
            players_csv_path = formatted_paths["players"]
            keepers_csv_path = formatted_paths["keepers"]
            ranking_csv_path = formatted_paths["ranking"]
            dataset_global_csv_path = formatted_paths["dataset_global"]
            dataset_country_csv_path = formatted_paths["dataset_country"]
            dataset_competition_csv_path = formatted_paths["dataset_competition"]
            teams_csv_path = formatted_paths["teams"]

            # --- Ensure parent folders exist for the three CSVs ---
            for _p in [matches_csv_path]:
                _parent = os.path.dirname(_p)
                if _parent:
                    os.makedirs(_parent, exist_ok=True)

            print(f"            Processing competition: {comp_name} - {comp_country}")

            try:
                self._process_competition(
                    comp_name,
                    comp_country,
                    matches_csv_path,
                    players_csv_path,
                    keepers_csv_path,
                    ranking_csv_path,
                    dataset_global_csv_path,
                    dataset_country_csv_path,
                    dataset_competition_csv_path,
                    teams_csv_path,
                    days,
                    last_season,
                    fotmob_url,
                )

            except Exception as e:
                continue

    def _get_competitions(
        self, competition: Optional[str], last_season: Tuple[str, int, int]
    ) -> List[Dict[str, Any]]:
        """
        Get list of competitions to process with their full configuration.

        Args:
            competition: Specific competition or None for all
            last_season: Tuple with (season_string, start_year, end_year)

        Returns:
            List of dictionaries containing competition data with structure:
            [
                {
                    'name': 'liga',
                    'country': 'spain',
                    'paths': {
                        'ranking': 'https://...',
                        'teams_players': 'https://...',
                        'trophies': 'https://...'
                    },
                    'information_urls': {
                        'matches': 'https://fbref.com/...',
                        'fotmob': 'https://www.fotmob.com/...',
                        'teams': 'https://www.transfermarkt.com/...'
                    }
                },
                ...
            ]

        Raises:
            ValueError: If specified competition doesn't exist in config
        """
        # Determine which competitions to process
        if competition:
            if competition not in self.competitions_config:
                available = list(self.competitions_config.keys())
                raise ValueError(
                    f"Competition '{competition}' not found in configuration. "
                    f"Available competitions: {available}"
                )
            competitions_to_process = [competition]
        else:
            competitions_to_process = list(self.competitions_config.keys())

        # Build detailed competition list
        detailed_competitions = []

        for comp_name in competitions_to_process:
            competition_config = self.competitions_config.get(comp_name, {})

            # Extract country
            country = competition_config.get("country", "unknown")

            # Extract and format information scraping URLs
            info_urls = competition_config.get("information_scraping_urls", {})
            formatted_info_urls = {}

            for url_key, url_template in info_urls.items():
                if url_key == "fotmob":
                    # FotMob URL needs page parameter, store template without formatting page
                    formatted_info_urls[url_key] = url_template.format(
                        last_season=last_season,
                        page="{page}",  # Keep placeholder for later formatting
                    )
                else:
                    # Format other URLs normally
                    formatted_info_urls[url_key] = url_template.format(
                        last_season=last_season
                    )

            # Build competition dictionary
            comp_dict = {
                "name": comp_name,
                "country": country,
                "information_urls": formatted_info_urls,
            }

            detailed_competitions.append(comp_dict)

        return detailed_competitions

    def _process_competition(
        self,
        comp_name: str,
        comp_country: str,
        matches_csv_path: str,
        players_csv_path: str,
        keepers_csv_path: str,
        ranking_csv_path: str,
        dataset_global_csv_path: str,
        dataset_country_csv_path: str,
        dataset_competition_csv_path: str,
        teams_csv_path: str,
        days: List[str],
        last_season: Tuple[str, int, int],
        fotmob_url: str,
    ) -> None:
        """
        Process a single competition and update datasets.

        Args:
            comp_name: Name of the competition
            comp_country: Country of the competition
            matches_csv_path: Path to matches CSV file
            players_csv_path: Path to players CSV file
            keepers_csv_path: Path to keepers CSV file
            ranking_csv_path: Path to ranking CSV file
            dataset_global_csv_path: Path to global dataset CSV file
            dataset_country_csv_path: Path to country dataset CSV file
            dataset_competition_csv_path: Path to competition dataset CSV file
            days: List of dates to process
            last_season: Tuple containing season information (name, start_year, end_year)
            fotmob_url: URL for FotMob scraping
        """

        # Get competition configuration
        comp_config = self.competitions_config[comp_name]
        comp_country = comp_config.get("country", comp_country)

        minimum_matches = 18

        # Determine competition type from config or infer from name
        comp_type = self._infer_competition_type(comp_name)

        # Scrape match data from FBRef
        scraper = FBRefScraper(
            last_season, 
            comp_config, 
            comp_name, 
            comp_config, 
            comp_type, 
            days
        )

        # Run the scraper to get DataFrame
        df_matches = scraper.run_before()

        # If no matches found, stop processing
        if df_matches.empty:
            print(f"                No matches to process")
            return
        
        # Read past matches CSV
        try:
            past_matches = pd.read_csv(matches_csv_path)
        except Exception as e:
            print(f"                Not enough historical data to process matches")
            return pd.DataFrame()

        # Filter historical matches for the same competition
        past_matches = past_matches[past_matches["competition"] == comp_name].copy()

        # Number of matches played as HOME by each team in this competition
        home_counts = past_matches.groupby("home_team_name").size()

        # Number of matches played as AWAY by each team in this competition
        away_counts = past_matches.groupby("away_team_name").size()

        # For each future match, how many historical matches has the home team played as home?
        df_matches["home_past_home_matches"] = (
            df_matches["home_team_name"]
            .map(home_counts)
            .fillna(0)
            .astype(int)
        )

        # For each future match, how many historical matches has the away team played as away?
        df_matches["away_past_away_matches"] = (
            df_matches["away_team_name"]
            .map(away_counts)
            .fillna(0)
            .astype(int)
        )

        mask = (
            (df_matches["home_past_home_matches"] >= minimum_matches) &
            (df_matches["away_past_away_matches"] >= minimum_matches)
        )

        # Ahora filtra
        df_matches = df_matches[mask].reset_index(drop=True)

        df_matches = df_matches.drop(columns=["home_past_home_matches", "away_past_away_matches"])

        # If no matches found, stop processing
        if df_matches.empty:
            print(f"                Not enough historical data to process matches")
            return
        
        print(f"                Imputing rankings...")
        
        # Calculate rankings and trophy information
        if comp_type == "league":
            current_gameweek = int(df_matches['gameweek'].max())

            # Check if gameweek is 1 BEFORE calling the function
            if current_gameweek == 1:

                df_matches = update_season_participation(
                    df_matches=df_matches,
                    country=comp_country,
                    trophies_csv_path=teams_csv_path,
                    teams_columns=self.teams_informations,
                    verbose=True
                )
            
            else:
                league_ranker = LeagueRanking(
                    ranking_csv_path,
                    self.ranking_informations,
                    teams_csv_path,
                    self.teams_informations,
                    comp_name,
                    comp_country,
                )

                df_matches = league_ranker.get_last_match_stats(df_matches, past_matches)


            print(f"                Building teams' notes...")

            fotmob_df = self._prepare_target_matches(df_matches)
            scraper_fotmob = FotMobScraper()
            injured_unique = scraper_fotmob.scrape_all_injuries(fotmob_df, fotmob_url)

            df_matches = compute_team_notes_for_fixtures(
                df_matches,
                past_matches,
                players_csv_path, 
                keepers_csv_path,
                unavailable_list=injured_unique,
                transfers_in=transfers_in,
                transfers_out=transfers_out
            )

            # Hacer notas de los equipos aqui

            print(f"                Making feature engineering...")

            # Making feature engineering
            feature_engineer = MatchStatsCalculator(
                dataset_global_csv_path,
                dataset_country_csv_path,
                dataset_competition_csv_path,
                comp_type,
                comp_country,
                self.features_config,
                self.teams
            )

            feature_engineer.data_update_dataset(df_matches, injured_unique)
        
    def _infer_competition_type(self, competition: str) -> str:
        """
        Infer competition type from competition name.

        Args:
            competition: Competition identifier

        Returns:
            Competition type: 'league', 'cup', 'supercup', 'european', or 'international'
        """
        comp_lower = competition.lower()

        if (
            "league" in comp_lower
            or "liga" in comp_lower
            or "serie" in comp_lower
            or "bundesliga" in comp_lower
            or "ligue" in comp_lower
        ):
            return "league"
        elif (
            "fa" in comp_lower
            or "carabao" in comp_lower
            or "rey" in comp_lower
            or "pokal" in comp_lower
            or "coppa" in comp_lower
        ):
            return "cup"
        elif (
            "supercup" in comp_lower
            or "supercopa" in comp_lower
            or "shield" in comp_lower
        ):
            return "supercup"
        elif "uefa" in comp_lower:
            return "european"
        elif "fifa" in comp_lower:
            return "international"
        else:
            return "league"

    def _prepare_target_matches(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Extract home_team, away_team, and date from a DataFrame to prepare
        target matches for FotMob scraper.

        Args:
            df: DataFrame with columns ['home_team_name', 'away_team_name', 'date_of_match']

        Returns:
            DataFrame with columns ['date', 'home_team', 'away_team'] ready for scraper
        """
        # Select relevant columns
        target_matches = df[
            ["date_of_match", "home_team_name", "away_team_name"]
        ].copy()

        # Rename columns to match scraper expected format
        target_matches = target_matches.rename(
            columns={
                "date_of_match": "date",
                "home_team_name": "home_team",
                "away_team_name": "away_team",
            }
        )

        # Remove duplicates if any
        target_matches = target_matches.drop_duplicates()

        # Reset index
        target_matches = target_matches.reset_index(drop=True)

        return target_matches
