# ranking_generator.py
# - FootballRanking: builds week-by-week ranking table from matches_<season>.csv
# - MatchRankUpdater: merges nearest historical ranking (before match date) back into matches_<season>.csv

import os
import pandas as pd

from app.scrapers.fbref_scraper import CompetitionScraper


class FootballRanking:
    """Compute season rankings gameweek by gameweek with full tiebreakers."""

    def __init__(self, last_season: str, seasons: list[str]) -> None:
        self.last_season = last_season
        self.seasons = seasons
        self.df: pd.DataFrame | None = None
        self.team_stats: dict[str, dict[str, float]] = {}

    # ----------------- Core stats updates -----------------
    def update_team_stats(
        self,
        home_team: str,
        away_team: str,
        home_goals: int,
        away_goals: int,
        home_yellow_cards: int,
        home_red_cards: int,
        home_second_yellow_cards: int,
        away_yellow_cards: int,
        away_red_cards: int,
        away_second_yellow_cards: int,
    ) -> None:
        """Update per-team cumulative stats after a single match."""

        # Initialize containers if needed
        for team in (home_team, away_team):
            if team not in self.team_stats:
                self.team_stats[team] = {
                    "points": 0,
                    "goals_for": 0,
                    "goals_against": 0,
                    "matches_played": 0,
                    "yellow_cards": 0,
                    "red_cards": 0,
                    "second_yellow_cards": 0,
                }

        # Points from result
        if home_goals > away_goals:
            home_points, away_points = 3, 0
        elif home_goals < away_goals:
            home_points, away_points = 0, 3
        else:
            home_points, away_points = 1, 1

        # Home update
        self.team_stats[home_team]["points"] += home_points
        self.team_stats[home_team]["goals_for"] += home_goals
        self.team_stats[home_team]["goals_against"] += away_goals
        self.team_stats[home_team]["yellow_cards"] += home_yellow_cards
        self.team_stats[home_team]["red_cards"] += home_red_cards
        self.team_stats[home_team]["second_yellow_cards"] += home_second_yellow_cards
        self.team_stats[home_team]["matches_played"] += 1

        # Away update
        self.team_stats[away_team]["points"] += away_points
        self.team_stats[away_team]["goals_for"] += away_goals
        self.team_stats[away_team]["goals_against"] += home_goals
        self.team_stats[away_team]["yellow_cards"] += away_yellow_cards
        self.team_stats[away_team]["red_cards"] += away_red_cards
        self.team_stats[away_team]["second_yellow_cards"] += away_second_yellow_cards
        self.team_stats[away_team]["matches_played"] += 1

    # ----------------- Helpers for ordering blocks -----------------
    def reorder_teams_by_new_order(
        self, sorted_teams: list[dict], new_order: list[tuple[str, float]]
    ) -> list[dict]:
        """
        Reorder a block inside sorted_teams following new_order, preserving the rest.
        new_order: list of (team_name, sort_value)
        """
        new_order_teams = [name for name, _ in new_order]
        team_dict = {row["team"]: row for row in sorted_teams}
        reordered_block = [team_dict[t] for t in new_order_teams if t in team_dict]

        final_sorted: list[dict] = []
        inserted = False
        for row in sorted_teams:
            if row["team"] in new_order_teams and not inserted:
                final_sorted.extend(reordered_block)
                inserted = True
            elif row["team"] not in new_order_teams:
                final_sorted.append(row)
        return final_sorted

    # ----------------- Tiebreakers dispatcher -----------------
    def check_tiebreaker_type(
        self, sorted_teams: list[dict], current_gameweek: int, match_index: int
    ) -> list[dict]:
        """Detect tie groups and apply pair/multi-team tiebreakers in-place order."""
        # Group by points
        groups: dict[int, list[dict]] = {}
        for row in sorted_teams:
            groups.setdefault(row["points"], []).append(row)

        # Only groups with ties
        ties = {pts: teams for pts, teams in groups.items() if len(teams) > 1}

        for pts, teams_list in ties.items():
            if len(teams_list) == 2:
                sorted_teams = self.apply_tiebreaker_two_teams(
                    sorted_teams, current_gameweek, match_index, teams_list, pts
                )
            elif len(teams_list) >= 3:
                sorted_teams = self.apply_tiebreaker_multiple_teams(
                    sorted_teams, current_gameweek, match_index, teams_list, pts
                )
        return sorted_teams

    # ----------------- Two-team tiebreaker -----------------
    def apply_tiebreaker_two_teams(
        self,
        sorted_teams: list[dict],
        gameweek: int,
        match_id: int,
        teams_list: list[dict],
        points: int,
    ) -> list[dict]:
        """Apply H2H goals, then global goal diff, then total goals, then fairplay, then alphabetical."""
        if len(teams_list) != 2:
            raise ValueError(
                "apply_tiebreaker_two_teams must receive exactly two teams."
            )

        team1 = teams_list[0]["team"]
        team2 = teams_list[1]["team"]

        print(
            f"\nChecking tiebreaker for teams: {team1} vs {team2} (Points: {points}. Gameweek: {gameweek}. Match id: {match_id})"
        )

        # Accumulate direct encounters up to previous match id
        total_goals_team1 = 0
        total_goals_team2 = 0

        # Iterate up to match_id-1 (rows are 0-based)
        for _, row in self.df.loc[: match_id - 1].iterrows():  # type: ignore
            h, a = row["home_team_name"], row["away_team_name"]
            hg, ag = row["home_goals"], row["away_goals"]
            if (h == team1 and a == team2) or (h == team2 and a == team1):
                if h == team1:
                    total_goals_team1 += hg
                    total_goals_team2 += ag
                else:
                    total_goals_team1 += ag
                    total_goals_team2 += hg

        # 1) H2H goals for advantage
        if total_goals_team1 != total_goals_team2:
            winner = team1 if total_goals_team1 > total_goals_team2 else team2
        else:
            # 2) Global goal difference
            def cum_diff(team: str) -> int:
                gf = ga = 0
                for _, r in self.df.loc[: match_id - 1].iterrows():  # type: ignore
                    if r["home_team_name"] == team:
                        gf += r["home_goals"]
                        ga += r["away_goals"]
                    elif r["away_team_name"] == team:
                        gf += r["away_goals"]
                        ga += r["home_goals"]
                return gf - ga

            d1, d2 = cum_diff(team1), cum_diff(team2)
            if d1 != d2:
                winner = team1 if d1 > d2 else team2
            else:
                # 3) Total goals scored overall
                def cum_goals_for(team: str) -> int:
                    gf = 0
                    for _, r in self.df.loc[: match_id - 1].iterrows():  # type: ignore
                        if r["home_team_name"] == team:
                            gf += r["home_goals"]
                        elif r["away_team_name"] == team:
                            gf += r["away_goals"]
                    return gf

                gf1, gf2 = cum_goals_for(team1), cum_goals_for(team2)
                if gf1 != gf2:
                    winner = team1 if gf1 > gf2 else team2
                else:
                    # 4) Fairplay (lower is better)
                    row1 = next(t for t in sorted_teams if t["team"] == team1)
                    row2 = next(t for t in sorted_teams if t["team"] == team2)
                    if row1["fairplay"] != row2["fairplay"]:
                        winner = team1 if row1["fairplay"] < row2["fairplay"] else team2
                    else:
                        # 5) Alphabetical
                        winner = min(team1, team2)

        # Put winner above the other if needed
        i1 = next(i for i, t in enumerate(sorted_teams) if t["team"] == team1)
        i2 = next(i for i, t in enumerate(sorted_teams) if t["team"] == team2)
        if winner == team1 and i1 > i2:
            sorted_teams[i1], sorted_teams[i2] = sorted_teams[i2], sorted_teams[i1]
        elif winner == team2 and i2 > i1:
            sorted_teams[i1], sorted_teams[i2] = sorted_teams[i2], sorted_teams[i1]

        return sorted_teams

    # ----------------- Multi-team tiebreaker -----------------
    def apply_tiebreaker_multiple_teams(
        self,
        sorted_teams: list[dict],
        gameweek: int,
        match_id: int,
        teams_list: list[dict],
        points: int,
    ) -> list[dict]:
        """Multi-team tiebreak: points among tied teams, then H2H goal diff, then global goal diff, then goals for, then fairplay."""

        tied_names = [t["team"] for t in teams_list]
        total_goals = {t: 0 for t in tied_names}
        total_points = {t: 0 for t in tied_names}

        # Consider only matches between tied teams up to match_id
        for _, row in self.df.iterrows():  # type: ignore
            if (
                row["id"] <= match_id
                and (row["home_team_name"] in tied_names)
                and (row["away_team_name"] in tied_names)
            ):
                h, a = row["home_team_name"], row["away_team_name"]
                hg, ag = row["home_goals"], row["away_goals"]
                total_goals[h] += hg
                total_goals[a] += ag
                if hg > ag:
                    total_points[h] += 3
                elif hg < ag:
                    total_points[a] += 3
                else:
                    total_points[h] += 1
                    total_points[a] += 1

        # 1) Sort by points among tied teams
        points_sorted = sorted(total_points.items(), key=lambda x: x[1], reverse=True)
        sorted_teams = self.reorder_teams_by_new_order(sorted_teams, points_sorted)

        # Build tie groups inside H2H points
        groups: dict[int, list[str]] = {}
        for team, pts in points_sorted:
            groups.setdefault(pts, []).append(team)
        multi_tie_groups = {
            pts: teams for pts, teams in groups.items() if len(teams) >= 2
        }

        # 2) For each tie group within H2H points, use H2H goal-diff among them
        for pts, teams in sorted(multi_tie_groups.items(), reverse=True):
            # H2H goal difference among the tied subset
            h2h_gd = {t: 0 for t in teams}
            for _, row in self.df.iterrows():  # type: ignore
                if row["id"] <= match_id and (
                    row["home_team_name"] in teams or row["away_team_name"] in teams
                ):
                    h, a = row["home_team_name"], row["away_team_name"]
                    hg, ag = row["home_goals"], row["away_goals"]
                    if h in teams:
                        h2h_gd[h] += hg - ag
                    if a in teams:
                        h2h_gd[a] += ag - hg

            gd_sorted = sorted(h2h_gd.items(), key=lambda x: x[1], reverse=True)
            sorted_teams = self.reorder_teams_by_new_order(sorted_teams, gd_sorted)

            # 3) If ties remain inside that block, use global goal diff up to date
            # Build grouped by same gd
            tie_buckets: dict[int, list[str]] = {}
            for t, v in gd_sorted:
                tie_buckets.setdefault(v, []).append(t)
            tie_buckets = {gd: arr for gd, arr in tie_buckets.items() if len(arr) >= 2}

            for gd, ties in sorted(tie_buckets.items(), reverse=True):
                global_gd = {t: 0 for t in ties}
                for _, row in self.df.iterrows():  # type: ignore
                    if row["id"] <= match_id and (
                        row["home_team_name"] in ties or row["away_team_name"] in ties
                    ):
                        h, a = row["home_team_name"], row["away_team_name"]
                        hg, ag = row["home_goals"], row["away_goals"]
                        if h in ties:
                            global_gd[h] += hg - ag
                        if a in ties:
                            global_gd[a] += ag - hg

                global_gd_sorted = sorted(
                    global_gd.items(), key=lambda x: x[1], reverse=True
                )
                sorted_teams = self.reorder_teams_by_new_order(
                    sorted_teams, global_gd_sorted
                )

                # 4) If still tied, use global goals for
                # group again by same global diff
                sub_buckets: dict[int, list[str]] = {}
                for t, v in global_gd_sorted:
                    sub_buckets.setdefault(v, []).append(t)
                sub_buckets = {k: v for k, v in sub_buckets.items() if len(v) >= 2}

                for _, same_diff_group in sorted(sub_buckets.items(), reverse=True):
                    goals_for = {t: 0 for t in same_diff_group}
                    for _, row in self.df.iterrows():  # type: ignore
                        if row["gameweek"] <= gameweek and (
                            row["home_team_name"] in same_diff_group
                            or row["away_team_name"] in same_diff_group
                        ):
                            h, a = row["home_team_name"], row["away_team_name"]
                            hg, ag = row["home_goals"], row["away_goals"]
                            if h in same_diff_group:
                                goals_for[h] += hg
                            if a in same_diff_group:
                                goals_for[a] += ag

                    gf_sorted = sorted(
                        goals_for.items(), key=lambda x: x[1], reverse=True
                    )
                    sorted_teams = self.reorder_teams_by_new_order(
                        sorted_teams, gf_sorted
                    )

                    # 5) If still tied, fairplay (lower is better)
                    # group by same GF
                    gf_groups: dict[int, list[tuple[str, int]]] = {}
                    for name, gf in gf_sorted:
                        gf_groups.setdefault(gf, []).append((name, gf))
                    for _, group in gf_groups.items():
                        if len(group) >= 2:
                            fairplay_pairs = []
                            for team_name, _ in group:
                                team_row = next(
                                    t for t in sorted_teams if t["team"] == team_name
                                )
                                fairplay_pairs.append(
                                    (team_name, int(team_row["fairplay"]))
                                )
                            fairplay_sorted = sorted(
                                fairplay_pairs, key=lambda x: x[1]
                            )  # lower is better
                            # reorder by fairplay within this block
                            sorted_teams = self.reorder_teams_by_new_order(
                                sorted_teams, fairplay_sorted
                            )
        return sorted_teams

    # ----------------- Fairplay scoring -----------------
    @staticmethod
    def calculate_fairplay(
        yellow_cards: int, red_cards: int, second_yellow_cards: int
    ) -> int:
        """Compute fairplay penalty (lower is better)."""
        return int(
            yellow_cards + 3 * (red_cards - second_yellow_cards) + second_yellow_cards
        )

    # ----------------- Main processing -----------------
    def process_matches(self) -> None:
        """
        Walk matches in chronological CSV order, save a ranking snapshot per gameweek.
        If paths are not provided, defaults to app/data/raw/{season}.
        """
        # --- Paths (default to unified app/data/raw) ---
        idx = self.seasons.index(self.last_season)
        prev_season = self.seasons[idx - 1] if idx > 0 else None

        matches_file_last = (
            f"app/data/raw/{self.last_season}/matches_{self.last_season}.csv"
        )

        rankings_file_last = (
            f"app/data/raw/{self.last_season}/rankings_{self.last_season}.csv"
        )
        rankings_file_prev = f"app/data/raw/{prev_season}/rankings_{prev_season}.csv"
        rankings_file_first = f"app/data/initial/rankings_2016-2017.csv"

        teams_file_last = (
            f"app/data/raw/{self.last_season}/teams_{self.last_season}.csv"
        )

        trophies_file_last = (
            f"app/data/raw/{self.last_season}/trophies_{self.last_season}.csv"
        )
        trophies_file_prev = f"app/data/raw/{prev_season}/trophies_{prev_season}.csv"
        trophies_file_first = f"app/data/initial/trophies_2016-2017.csv"

        # --- Load matches ---
        self.df = pd.read_csv(matches_file_last, low_memory=False)

        # Ensure stable dtypes
        if "date_of_match" in self.df.columns:
            self.df["date_of_match"] = pd.to_datetime(
                self.df["date_of_match"], errors="coerce"
            )
        if "gameweek" in self.df.columns:
            self.df["gameweek"] = pd.to_numeric(
                self.df["gameweek"], errors="coerce"
            ).astype("Int64")

        # Optional: sort to be safe (by date then id)
        if "date_of_match" in self.df.columns:
            self.df = self.df.sort_values(
                ["date_of_match", "id"], kind="stable", na_position="last"
            )

        # Fresh rebuild for rankings CSV
        if os.path.exists(rankings_file_last):
            os.remove(rankings_file_last)

        if os.path.exists(teams_file_last):
            teams_df = pd.read_csv(teams_file_last, low_memory=False)
        else:
            scraper = CompetitionScraper(
                season=self.last_season, max_future_matches=0, competition="liga"
            )
            teams_df = scraper.build_season_team_list(
                self.last_season, competition="liga"
            )

            teams_df.to_csv(teams_file_last, index=False)

            out_dir = os.path.dirname(os.path.abspath(trophies_file_last))
            os.makedirs(out_dir, exist_ok=True)

            if os.path.exists(rankings_file_prev):
                df_rankings_prev = pd.read_csv(rankings_file_prev)
            else:
                df_rankings_prev = pd.read_csv(rankings_file_first)

            if os.path.exists(trophies_file_prev):
                df_trophies = pd.read_csv(trophies_file_prev)
            else:
                df_trophies = pd.read_csv(trophies_file_first)

            for team in teams_df["teams"]:
                if team in df_trophies["teams"].values:
                    if team in df_rankings_prev["team"].values:
                        latest_entry = df_rankings_prev[
                            df_rankings_prev["team"] == team
                        ].iloc[-1]
                        df_trophies.loc[
                            df_trophies["teams"].eq(team), "years_first_division_total"
                        ] = (
                            df_trophies.loc[
                                df_trophies["teams"].eq(team),
                                "years_first_division_total",
                            ]
                            .fillna(0)
                            .add(1)
                        )
                        df_trophies.loc[
                            df_trophies["teams"].eq(team),
                            "years_first_division_consecutive",
                        ] = (
                            df_trophies.loc[
                                df_trophies["teams"].eq(team),
                                "years_first_division_consecutive",
                            ]
                            .fillna(0)
                            .add(1)
                        )
                        if latest_entry["rank"] == 1:
                            df_trophies.loc[
                                df_trophies["teams"].eq(team), "first_place"
                            ] = (
                                df_trophies.loc[
                                    df_trophies["teams"].eq(team), "first_place"
                                ]
                                .fillna(0)
                                .add(1)
                            )
                        elif latest_entry["rank"] == 2:
                            df_trophies.loc[
                                df_trophies["teams"].eq(team), "second_place"
                            ] = (
                                df_trophies.loc[
                                    df_trophies["teams"].eq(team), "second_place"
                                ]
                                .fillna(0)
                                .add(1)
                            )
                        elif latest_entry["rank"] == 3:
                            df_trophies.loc[
                                df_trophies["teams"].eq(team), "third_place"
                            ] = (
                                df_trophies.loc[
                                    df_trophies["teams"].eq(team), "third_place"
                                ]
                                .fillna(0)
                                .add(1)
                            )
                else:
                    print(
                        f"Warning: Team '{team}' not found in previous seasons. Team to put manually."
                    )

            missing = set(df_trophies["teams"]) - set(teams_df["teams"])
            for team in missing:
                df_trophies.loc[
                    df_trophies["teams"].eq(team), "years_first_division_consecutive"
                ] = 0

            df_trophies.to_csv(trophies_file_last, index=False)

        full_teams = set(teams_df["teams"])
        remaining_teams = set(full_teams)

        current_gw = None
        bucket: list[dict] = []

        for idx, row in self.df.iterrows():
            is_last = idx == self.df.index[-1]

            gw = row["gameweek"]

            if len(remaining_teams) == 20:
                # Update season stats from this match
                self.update_team_stats(
                    home_team=row["home_team_name"],
                    away_team=row["away_team_name"],
                    home_goals=row["home_goals"],
                    away_goals=row["away_goals"],
                    home_yellow_cards=row.get("home_PlayersYellowCards", 0),
                    home_red_cards=row.get("home_PlayersRedCards", 0),
                    home_second_yellow_cards=row.get(
                        "home_PlayersSecondYellowCards", 0
                    ),
                    away_yellow_cards=row.get("away_PlayersYellowCards", 0),
                    away_red_cards=row.get("away_PlayersRedCards", 0),
                    away_second_yellow_cards=row.get(
                        "away_PlayersSecondYellowCards", 0
                    ),
                )

                # Remove teams the GW
                home_team = row["home_team_name"]
                away_team = row["away_team_name"]
                remaining_teams.discard(home_team)
                remaining_teams.discard(away_team)

                # Snapshot both teams **after** this match
                for side in ("home", "away"):
                    team = row[f"{side}_team_name"]
                    stats = self.team_stats.get(team, {})
                    bucket.append(
                        {
                            "gameweek": gw,
                            "team": team,
                            "goals": row[f"{side}_goals"],
                            "yellow_cards": row.get(f"{side}_PlayersYellowCards", 0),
                            "red_cards": row.get(f"{side}_PlayersRedCards", 0),
                            "second_yellow_cards": row.get(
                                f"{side}_PlayersSecondYellowCards", 0
                            ),
                            "date_of_match": row["date_of_match"],
                            "matches_played": stats.get("matches_played", 0),
                            "points": stats.get("points", 0),
                            "goals_for": stats.get("goals_for", 0),
                            "goals_against": stats.get("goals_against", 0),
                            "goal_difference": stats.get("goals_for", 0)
                            - stats.get("goals_against", 0),
                            "fairplay": self.calculate_fairplay(
                                stats.get("yellow_cards", 0),
                                stats.get("red_cards", 0),
                                stats.get("second_yellow_cards", 0),
                            ),
                        }
                    )

                current_gw = gw

            elif len(remaining_teams) < 20:
                if gw == current_gw:
                    # Update season stats from this match
                    self.update_team_stats(
                        home_team=row["home_team_name"],
                        away_team=row["away_team_name"],
                        home_goals=row["home_goals"],
                        away_goals=row["away_goals"],
                        home_yellow_cards=row.get("home_PlayersYellowCards", 0),
                        home_red_cards=row.get("home_PlayersRedCards", 0),
                        home_second_yellow_cards=row.get(
                            "home_PlayersSecondYellowCards", 0
                        ),
                        away_yellow_cards=row.get("away_PlayersYellowCards", 0),
                        away_red_cards=row.get("away_PlayersRedCards", 0),
                        away_second_yellow_cards=row.get(
                            "away_PlayersSecondYellowCards", 0
                        ),
                    )

                    # Remove teams the GW
                    home_team = row["home_team_name"]
                    away_team = row["away_team_name"]
                    remaining_teams.discard(home_team)
                    remaining_teams.discard(away_team)

                    # Snapshot both teams **after** this match
                    for side in ("home", "away"):
                        team = row[f"{side}_team_name"]
                        stats = self.team_stats.get(team, {})
                        bucket.append(
                            {
                                "gameweek": gw,
                                "team": team,
                                "goals": row[f"{side}_goals"],
                                "yellow_cards": row.get(
                                    f"{side}_PlayersYellowCards", 0
                                ),
                                "red_cards": row.get(f"{side}_PlayersRedCards", 0),
                                "second_yellow_cards": row.get(
                                    f"{side}_PlayersSecondYellowCards", 0
                                ),
                                "date_of_match": row["date_of_match"],
                                "matches_played": stats.get("matches_played", 0),
                                "points": stats.get("points", 0),
                                "goals_for": stats.get("goals_for", 0),
                                "goals_against": stats.get("goals_against", 0),
                                "goal_difference": stats.get("goals_for", 0)
                                - stats.get("goals_against", 0),
                                "fairplay": self.calculate_fairplay(
                                    stats.get("yellow_cards", 0),
                                    stats.get("red_cards", 0),
                                    stats.get("second_yellow_cards", 0),
                                ),
                            }
                        )

                    if not remaining_teams:
                        for entry in bucket:
                            entry["index"] = idx

                        bucket.sort(key=lambda x: x["points"], reverse=True)
                        bucket = self.check_tiebreaker_type(bucket, current_gw, idx)

                        for r, entry in enumerate(bucket, start=1):
                            entry["rank"] = r

                        if not is_last:
                            remaining_teams = set(full_teams)

                        gw_df = pd.DataFrame(bucket)
                        gw_df.to_csv(
                            rankings_file_last,
                            mode="a",
                            index=False,
                            header=not os.path.exists(rankings_file_last),
                        )

                        bucket = []

                else:
                    if os.path.exists(teams_file_last):
                        prev_df = pd.read_csv(teams_file_last, low_memory=False)
                        last_per_team = prev_df.reset_index(drop=True).set_index(
                            "teams"
                        )

                    for team in sorted(remaining_teams):
                        if team in last_per_team.index:
                            r = last_per_team.loc[team]
                            # Carry forward cumulative stats; day stats are zeros
                            bucket.append(
                                {
                                    "gameweek": current_gw,
                                    "team": team,
                                    "goals": 0,
                                    "yellow_cards": 0,
                                    "red_cards": 0,
                                    "second_yellow_cards": 0,
                                    "date_of_match": row["date_of_match"],
                                    "matches_played": r.get("matches_played", 0),
                                    "points": r.get("points", 0),
                                    "goals_for": r.get("goals_for", 0),
                                    "goals_against": r.get("goals_against", 0),
                                    "goal_difference": r.get("goal_difference", 0),
                                    "fairplay": r.get("fairplay", 0),
                                }
                            )
                        else:
                            # No previous snapshot for this team -> start from zeros
                            s = self.team_stats.get(team, {})
                            bucket.append(
                                {
                                    "gameweek": current_gw,
                                    "team": team,
                                    "goals": 0,
                                    "yellow_cards": 0,
                                    "red_cards": 0,
                                    "second_yellow_cards": 0,
                                    "date_of_match": None,
                                    "matches_played": s.get("matches_played", 0),
                                    "points": s.get("points", 0),
                                    "goals_for": s.get("goals_for", 0),
                                    "goals_against": s.get("goals_against", 0),
                                    "goal_difference": s.get("goals_for", 0)
                                    - s.get("goals_against", 0),
                                    "fairplay": self.calculate_fairplay(
                                        s.get("yellow_cards", 0),
                                        s.get("red_cards", 0),
                                        s.get("second_yellow_cards", 0),
                                    ),
                                }
                            )

                    for entry in bucket:
                        entry["index"] = idx

                    bucket.sort(key=lambda x: x["points"], reverse=True)
                    bucket = self.check_tiebreaker_type(bucket, current_gw, idx)

                    for r, entry in enumerate(bucket, start=1):
                        entry["rank"] = r

                    gw_df = pd.DataFrame(bucket)
                    gw_df.to_csv(
                        rankings_file_last,
                        mode="a",
                        index=False,
                        header=not os.path.exists(rankings_file_last),
                    )

                    current_gw = gw

                    if not is_last:
                        remaining_teams = set(full_teams)

                    bucket = []

                    # Update season stats from this match
                    self.update_team_stats(
                        home_team=row["home_team_name"],
                        away_team=row["away_team_name"],
                        home_goals=row["home_goals"],
                        away_goals=row["away_goals"],
                        home_yellow_cards=row.get("home_PlayersYellowCards", 0),
                        home_red_cards=row.get("home_PlayersRedCards", 0),
                        home_second_yellow_cards=row.get(
                            "home_PlayersSecondYellowCards", 0
                        ),
                        away_yellow_cards=row.get("away_PlayersYellowCards", 0),
                        away_red_cards=row.get("away_PlayersRedCards", 0),
                        away_second_yellow_cards=row.get(
                            "away_PlayersSecondYellowCards", 0
                        ),
                    )

                    # Remove teams the GW
                    home_team = row["home_team_name"]
                    away_team = row["away_team_name"]
                    remaining_teams.discard(home_team)
                    remaining_teams.discard(away_team)

                    # Snapshot both teams **after** this match
                    for side in ("home", "away"):
                        team = row[f"{side}_team_name"]
                        stats = self.team_stats.get(team, {})
                        bucket.append(
                            {
                                "gameweek": gw,
                                "team": team,
                                "goals": row[f"{side}_goals"],
                                "yellow_cards": row.get(
                                    f"{side}_PlayersYellowCards", 0
                                ),
                                "red_cards": row.get(f"{side}_PlayersRedCards", 0),
                                "second_yellow_cards": row.get(
                                    f"{side}_PlayersSecondYellowCards", 0
                                ),
                                "date_of_match": row["date_of_match"],
                                "matches_played": stats.get("matches_played", 0),
                                "points": stats.get("points", 0),
                                "goals_for": stats.get("goals_for", 0),
                                "goals_against": stats.get("goals_against", 0),
                                "goal_difference": stats.get("goals_for", 0)
                                - stats.get("goals_against", 0),
                                "fairplay": self.calculate_fairplay(
                                    stats.get("yellow_cards", 0),
                                    stats.get("red_cards", 0),
                                    stats.get("second_yellow_cards", 0),
                                ),
                            }
                        )

        if remaining_teams:
            if os.path.exists(teams_file_last):
                prev_df = pd.read_csv(teams_file_last, low_memory=False)
                last_per_team = prev_df.reset_index(drop=True).set_index("teams")

            for team in sorted(remaining_teams):
                if team in last_per_team.index:
                    r = last_per_team.loc[team]
                    # Carry forward cumulative stats; day stats are zeros
                    bucket.append(
                        {
                            "gameweek": current_gw,
                            "team": team,
                            "goals": 0,
                            "yellow_cards": 0,
                            "red_cards": 0,
                            "second_yellow_cards": 0,
                            "date_of_match": row["date_of_match"],
                            "matches_played": r.get("matches_played", 0),
                            "points": r.get("points", 0),
                            "goals_for": r.get("goals_for", 0),
                            "goals_against": r.get("goals_against", 0),
                            "goal_difference": r.get("goal_difference", 0),
                            "fairplay": r.get("fairplay", 0),
                        }
                    )
                else:
                    # No previous snapshot for this team -> start from zeros
                    s = self.team_stats.get(team, {})
                    bucket.append(
                        {
                            "gameweek": current_gw,
                            "team": team,
                            "goals": 0,
                            "yellow_cards": 0,
                            "red_cards": 0,
                            "second_yellow_cards": 0,
                            "date_of_match": None,
                            "matches_played": s.get("matches_played", 0),
                            "points": s.get("points", 0),
                            "goals_for": s.get("goals_for", 0),
                            "goals_against": s.get("goals_against", 0),
                            "goal_difference": s.get("goals_for", 0)
                            - s.get("goals_against", 0),
                            "fairplay": self.calculate_fairplay(
                                s.get("yellow_cards", 0),
                                s.get("red_cards", 0),
                                s.get("second_yellow_cards", 0),
                            ),
                        }
                    )

            for entry in bucket:
                entry["index"] = idx

            bucket.sort(key=lambda x: x["points"], reverse=True)
            bucket = self.check_tiebreaker_type(bucket, current_gw, idx)

            for r, entry in enumerate(bucket, start=1):
                entry["rank"] = r

            gw_df = pd.DataFrame(bucket)
            gw_df.to_csv(
                rankings_file_last,
                mode="a",
                index=False,
                header=not os.path.exists(rankings_file_last),
            )


class MatchRankUpdater:
    """
    Enrich matches_<season>.csv with nearest historical ranking/points/goals snapshot
    from rankings_<season>.csv before each match date.
    """

    def __init__(self, last_season: str):
        self.last_season = last_season
        self.input_file: pd.DataFrame | None = None
        self.output_file: pd.DataFrame | None = None

    def _get_closest_stats(
        self, team: str, match_date: pd.Timestamp
    ) -> tuple[float, float, float, float, float]:
        """Find latest snapshot (strictly before match_date) for given team."""
        assert self.output_file is not None
        df_team = self.output_file[
            (self.output_file["team"] == team)
            & (self.output_file["date_of_match"] < match_date)
        ]
        if not df_team.empty:
            closest_row = df_team.loc[df_team["date_of_match"].idxmax()]
            return (
                float(closest_row["rank"]),
                float(closest_row["points"]),
                float(closest_row["goals_for"]),
                float(closest_row["goals_against"]),
                float(closest_row["goal_difference"]),
            )
        return (0.0, 0.0, 0.0, 0.0, 0.0)

    def update_and_save(self) -> None:
        """Inject ranking columns into matches CSV in-place."""
        input_path = f"app/data/raw/{self.last_season}/matches_{self.last_season}.csv"
        rankings_path = (
            f"app/data/raw/{self.last_season}/rankings_{self.last_season}.csv"
        )
        trophies_path = (
            f"app/data/raw/{self.last_season}/trophies_{self.last_season}.csv"
        )

        if not os.path.exists(input_path):
            print(f"[WARN] matches CSV not found → {input_path}")
            return
        if not os.path.exists(rankings_path):
            print(
                f"[WARN] rankings CSV not found; run FootballRanking.process_matches() first → {rankings_path}"
            )
            return

        self.input_file = pd.read_csv(input_path, low_memory=False)
        if self.input_file.empty:
            print("[WARN] matches CSV is empty, nothing to update.")
            return

        self.output_file = pd.read_csv(rankings_path, low_memory=False)

        # Parse to datetime for proper comparison
        self.input_file["date_of_match"] = pd.to_datetime(
            self.input_file["date_of_match"], errors="coerce"
        )
        self.output_file["date_of_match"] = pd.to_datetime(
            self.output_file["date_of_match"], errors="coerce"
        )

        # Pre-allocate lists
        home_team_ranks, away_team_ranks = [], []
        home_team_points, away_team_points = [], []
        home_team_goals_for, away_team_goals_for = [], []
        home_team_goals_against, away_team_goals_against = [], []
        home_team_goal_difference, away_team_goal_difference = [], []

        # --- Prepare mappings from trophies table ---
        df_trophies = pd.read_csv(trophies_path)

        # Ensure columns exist (fallback to 0 if missing)
        for col in [
            "teams",
            "first_place",
            "second_place",
            "third_place",
            "years_first_division_total",
            "years_first_division_consecutive",
        ]:
            if col not in df_trophies.columns:
                df_trophies[col] = 0

        m_first = dict(zip(df_trophies["teams"], df_trophies["first_place"]))
        m_second = dict(zip(df_trophies["teams"], df_trophies["second_place"]))
        m_third = dict(zip(df_trophies["teams"], df_trophies["third_place"]))
        m_years_total = dict(
            zip(df_trophies["teams"], df_trophies["years_first_division_total"])
        )
        m_years_cons = dict(
            zip(df_trophies["teams"], df_trophies["years_first_division_consecutive"])
        )

        # --- Init lists to collect per-match values ---
        home_first_place, home_second_place, home_third_place = [], [], []
        home_years_total, home_years_cons = [], []
        away_first_place, away_second_place, away_third_place = [], [], []
        away_years_total, away_years_cons = [], []

        # --- Your existing loop, now also fetching trophies for h and a ---
        for _, m in self.input_file.iterrows():
            match_date = m["date_of_match"]
            h, a = m["home_team_name"], m["away_team_name"]

            # Existing stats
            hrank, hpts, hgf, hga, hgd = self._get_closest_stats(h, match_date)
            arank, apts, agf, aga, agd = self._get_closest_stats(a, match_date)

            home_team_ranks.append(hrank)
            away_team_ranks.append(arank)
            home_team_points.append(hpts)
            away_team_points.append(apts)
            home_team_goals_for.append(hgf)
            away_team_goals_for.append(agf)
            home_team_goals_against.append(hga)
            away_team_goals_against.append(aga)
            home_team_goal_difference.append(hgd)
            away_team_goal_difference.append(agd)

            # Trophies lookups (use 0 if team not found)
            home_first_place.append(m_first.get(h, 0))
            home_second_place.append(m_second.get(h, 0))
            home_third_place.append(m_third.get(h, 0))
            home_years_total.append(m_years_total.get(h, 0))
            home_years_cons.append(m_years_cons.get(h, 0))

            away_first_place.append(m_first.get(a, 0))
            away_second_place.append(m_second.get(a, 0))
            away_third_place.append(m_third.get(a, 0))
            away_years_total.append(m_years_total.get(a, 0))
            away_years_cons.append(m_years_cons.get(a, 0))

        # Assign columns back
        self.input_file["home_team_rank"] = home_team_ranks
        self.input_file["away_team_rank"] = away_team_ranks
        self.input_file["home_team_points"] = home_team_points
        self.input_file["away_team_points"] = away_team_points
        self.input_file["home_team_goals_for"] = home_team_goals_for
        self.input_file["away_team_goals_for"] = away_team_goals_for
        self.input_file["home_team_goals_against"] = home_team_goals_against
        self.input_file["away_team_goals_against"] = away_team_goals_against
        self.input_file["home_team_goals_difference"] = home_team_goal_difference
        self.input_file["away_team_goals_difference"] = away_team_goal_difference

        # --- Assign back to the DataFrame (keep your original column names) ---
        self.input_file["home_team_first_place"] = home_first_place
        self.input_file["home_team_second_place"] = home_second_place
        self.input_file["home_team_thirst_place"] = home_third_place
        self.input_file["home_team_yearst_first_division_global"] = home_years_total
        self.input_file["home_team_yearst_first_division_consecutive"] = home_years_cons
        self.input_file["away_team_first_place"] = away_first_place
        self.input_file["away_team_second_place"] = away_second_place
        self.input_file["away_team_thirst_place"] = away_third_place
        self.input_file["away_team_yearst_first_division_global"] = away_years_total
        self.input_file["away_team_yearst_first_division_consecutive"] = away_years_cons

        self.input_file.to_csv(input_path, index=False)
