import argparse
import logging
from functools import partial
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple, Union

import numpy
import pandas

import annofabcli
from annofabcli.common.cli import AbstractCommandLineWithoutWebapiInterface, ArgumentParser, get_list_from_args
from annofabcli.common.utils import read_multiheader_csv

logger = logging.getLogger(__name__)


ALPHABET_SIZE = 26
DIGIT = 2


def _create_unique_masked_name(masked_name_set: Set[str], masked_name: str) -> str:
    """
    マスクされたユニークな名前を返す。
    `masked_name_set` に含まれている名前なら、末尾に数字をつけて、ユニークにする。
    """
    if masked_name not in masked_name_set:
        masked_name_set.add(masked_name)
        return masked_name
    else:
        # 末尾に数字を付ける
        base_masked_name = masked_name[0:DIGIT]
        try:
            # 末尾が数字の場合(末尾の数字が２桁になると処理がおかしくなるけど、許容する）
            now_index = int(masked_name[-1])
        except ValueError:
            # 末尾が数字でない場合
            now_index = 0

        new_masked_name = base_masked_name + str(now_index + 1)
        return _create_unique_masked_name(masked_name_set, new_masked_name)


def _create_replaced_dict(name_set: Set[str]) -> Dict[str, str]:
    """
    keyがマスク対象の名前で、valueがマスクしたあとの名前であるdictを返します。

    Args:
        name_set:

    Returns:

    """
    replaced_dict = {}
    masked_name_set: Set[str] = set()
    for name in name_set:
        masked_name = create_masked_name(name)
        unique_masked_name = _create_unique_masked_name(masked_name_set, masked_name)
        replaced_dict[name] = unique_masked_name
    return replaced_dict


def create_replaced_biography_dict(name_set: Set[str]) -> Dict[str, str]:
    replaced_dict = {}
    masked_name_set: Set[str] = set()
    for name in name_set:
        masked_name = create_masked_name(name)
        unique_masked_name = _create_unique_masked_name(masked_name_set, masked_name)
        replaced_dict[name] = unique_masked_name
    return replaced_dict


def create_masked_name(name: str) -> str:
    """
    マスクされた名前を返す。
    AA,ABのように、26*26 パターンを返す
    """

    def _hash_str(value: str) -> int:
        hash_value = 7
        for c in value:
            # 64bit integer
            hash_value = (31 * hash_value + ord(c)) & 18446744073709551615
        return hash_value

    def _num2alpha(num: int) -> str:
        """
        1以上の整数を大文字アルファベットに変換する
        """
        if num <= ALPHABET_SIZE:
            return chr(64 + num)
        elif num % 26 == 0:
            return _num2alpha(num // ALPHABET_SIZE - 1) + chr(90)
        else:
            return _num2alpha(num // ALPHABET_SIZE) + chr(64 + num % ALPHABET_SIZE)

    SIZE = pow(ALPHABET_SIZE, DIGIT)
    hash_value = (_hash_str(name) % SIZE) + 1
    return _num2alpha(hash_value)


def get_replaced_user_id_set_from_biography(
    df: pandas.DataFrame, not_masked_location_set: Optional[Set[str]] = None
) -> Set[str]:
    if not_masked_location_set is None:
        filtered_df = df
    else:
        filtered_df = df[df["biography"].map(lambda e: e not in not_masked_location_set)]

    # user_id列にnanが含まれている可能性があるので、それを除外する
    return set(filtered_df[filtered_df["user_id"].notna()]["user_id"])


def _get_header_row_count(df: pandas.DataFrame) -> int:
    if isinstance(df.columns, pandas.MultiIndex):
        return len(df.columns.levels)
    else:
        return 1


def _get_tuple_column(df: pandas.DataFrame, column: str) -> Union[str, Tuple]:
    """
    列名を返します。ヘッダ行が複数行の場合は、タプルで返します。

    Args:
        df:
        column:

    Returns:

    """
    size = _get_header_row_count(df)
    if size >= 2:
        return tuple([column] + [""] * (size - 1))
    else:
        return column


def replace_by_columns(
    df: pandas.DataFrame,
    replacement_dict: Dict[str, str],
    main_column: Union[str, Tuple],
    sub_columns: Optional[List[Any]] = None,
):
    """引数dfの中のユーザ情報を、指定した列名を元に置換します。

    Args:
        df (pandas.DataFrame): _description_
        replacement_dict (Dict[str, str]): 置換対象のuser_idと置換後のuser_id(username)。key: 置換対象のuser_id, value: 置換後のuser_id
        main_column: 置換対象の列名(ex: user_id)
        sub_column: main_columnと同じ値で置換する列(ex: username)
    """

    def _get_username(row: pandas.Series, main_column: Union[str, Tuple], sub_column: Union[str, Tuple]) -> str:
        if row[main_column] in replacement_dict:
            return replacement_dict[row[main_column]]
        else:
            return row[sub_column]

    if sub_columns is not None:
        for sub_column in sub_columns:
            get_username_func = partial(_get_username, main_column=main_column, sub_column=sub_column)
            df[sub_column] = df.apply(get_username_func, axis=1)

    # 列の型を合わせないとreplaceに失敗するため, dtype を確認する
    if df[main_column].dtype == numpy.dtype("object"):
        df[main_column] = df[main_column].replace(replacement_dict)


def get_masked_username_series(df: pandas.DataFrame, replace_dict_by_user_id: Dict[str, str]) -> pandas.Series:
    """
    マスク後のusernameのSeriesを返す
    """
    user_id_column = _get_tuple_column(df, "user_id")
    username_column = _get_tuple_column(df, "username")

    def _get_username(row: pandas.Series) -> str:
        if row[user_id_column] in replace_dict_by_user_id:
            return replace_dict_by_user_id[row[user_id_column]]
        else:
            return row[username_column]

    return df.apply(_get_username, axis=1)


def get_masked_account_id(df: pandas.DataFrame, replace_dict_by_user_id: Dict[str, str]) -> pandas.Series:
    """
    マスク後のaccount_idのSeriesを返す
    """
    user_id_column = _get_tuple_column(df, "user_id")
    account_id_column = _get_tuple_column(df, "account_id")

    def _get_account_id(row: pandas.Series) -> str:
        if row[user_id_column] in replace_dict_by_user_id:
            return replace_dict_by_user_id[row[user_id_column]]
        else:
            return row[account_id_column]

    return df.apply(_get_account_id, axis=1)


def get_replaced_biography_set(df: pandas.DataFrame, not_masked_location_set: Optional[Set[str]] = None) -> Set[str]:
    biography_set = set(df["biography"].dropna())

    if not_masked_location_set is None:
        return biography_set
    else:
        for not_masked_location in not_masked_location_set:
            if not_masked_location in biography_set:
                biography_set.remove(not_masked_location)

    return biography_set


def create_replacement_dict_by_user_id(
    df: pandas.DataFrame,
    not_masked_biography_set: Optional[Set[str]] = None,
    not_masked_user_id_set: Optional[Set[str]] = None,
) -> Dict[str, str]:
    """
    keyが置換対象のuser_id、valueが置換後のマスクされたuser_idであるdictを作成する。
    """
    if "biography" in df:
        replaced_user_id_set = get_replaced_user_id_set_from_biography(
            df, not_masked_location_set=not_masked_biography_set
        )
    else:
        replaced_user_id_set = set()
    if not_masked_user_id_set is not None:
        replaced_user_id_set = replaced_user_id_set - not_masked_user_id_set

    return _create_replaced_dict(replaced_user_id_set)


def create_replacement_dict_by_biography(
    df: pandas.DataFrame,
    not_masked_biography_set: Optional[Set[str]] = None,
) -> Dict[str, str]:
    """
    keyが置換対象のbiography、valueが置換後のマスクされた biography であるdictを作成する。
    """
    replaced_biography_set = get_replaced_biography_set(df, not_masked_location_set=not_masked_biography_set)
    print(f"{replaced_biography_set=}")
    tmp_replace_dict_by_biography = _create_replaced_dict(replaced_biography_set)
    return {key: f"category-{value}" for key, value in tmp_replace_dict_by_biography.items()}


def replace_user_info_by_user_id(df: pandas.DataFrame, replacement_dict_by_user_id: Dict[str, str]):
    """
    user_id, username, account_id 列を, マスクする。

    Args:
        df:
        replacement_dict_by_user_id: user_idの置換前と置換後を示したdict

    """
    sub_columns = []
    user_id_column = _get_tuple_column(df, "user_id")

    if "username" in df:
        username_column = _get_tuple_column(df, "username")
        sub_columns.append(username_column)
    if "account_id" in df:
        account_id_column = _get_tuple_column(df, "account_id")
        sub_columns.append(account_id_column)
    replace_by_columns(df, replacement_dict_by_user_id, main_column=user_id_column, sub_columns=sub_columns)


def replace_biography(
    df: pandas.DataFrame, replacement_dict_by_user_id: Dict[str, str], replacement_dict_by_biography: Dict[str, str]
):
    """
    biography 列を, マスクする。

    Args:
        df:
        replacement_dict_by_user_id: user_idの置換前と置換後を示したdict

    """
    user_id_column = _get_tuple_column(df, "user_id")
    biography_column = _get_tuple_column(df, "biography")

    def _get_biography(
        row: pandas.Series, user_id_column: Union[str, Tuple], biography_column: Union[str, Tuple]
    ) -> str:
        if row[user_id_column] in replacement_dict_by_user_id:
            # マスク対象のユーザなら biographyをマスクする
            biography = row[biography_column]
            if biography in replacement_dict_by_biography:
                return replacement_dict_by_biography[biography]
            else:
                return biography
        else:
            return row[biography_column]

    get_biography_func = partial(_get_biography, user_id_column=user_id_column, biography_column=biography_column)
    df[biography_column] = df.apply(get_biography_func, axis=1)


def create_masked_user_info_df(
    df: pandas.DataFrame,
    *,
    not_masked_biography_set: Optional[Set[str]] = None,
    not_masked_user_id_set: Optional[Set[str]] = None,
) -> pandas.DataFrame:
    if "user_id" not in df:
        logger.warning("引数`df`に`user_id`列が存在しないため、ユーザ情報をマスクできません。")
        return df

    df_output = df.copy()
    replacement_dict_by_user_id = create_replacement_dict_by_user_id(
        df, not_masked_biography_set=not_masked_biography_set, not_masked_user_id_set=not_masked_user_id_set
    )

    if "biography" in df_output:
        replacement_dict_by_biography = create_replacement_dict_by_biography(
            df_output, not_masked_biography_set=not_masked_biography_set
        )
        replace_biography(
            df_output,
            replacement_dict_by_biography=replacement_dict_by_biography,
            replacement_dict_by_user_id=replacement_dict_by_user_id,
        )

    replace_user_info_by_user_id(df_output, replacement_dict_by_user_id)

    return df_output


class MaskUserInfo(AbstractCommandLineWithoutWebapiInterface):
    def main(self) -> None:
        args = self.args

        not_masked_biography_set = (
            set(get_list_from_args(args.not_masked_biography)) if args.not_masked_biography is not None else None
        )
        not_masked_user_id_set = (
            set(get_list_from_args(args.not_masked_user_id)) if args.not_masked_user_id is not None else None
        )

        csv_header: int = args.csv_header
        csv_path: Path = args.csv
        if csv_header == 1:
            original_df = pandas.read_csv(str(csv_path))
        else:
            original_df = read_multiheader_csv(str(csv_path), header_row_count=csv_header)

        df = create_masked_user_info_df(
            df=original_df,
            not_masked_biography_set=not_masked_biography_set,
            not_masked_user_id_set=not_masked_user_id_set,
        )
        self.print_csv(df)


def main(args: argparse.Namespace) -> None:
    MaskUserInfo(args).main()


def parse_args(parser: argparse.ArgumentParser) -> None:
    argument_parser = ArgumentParser(parser)

    parser.add_argument("--csv", type=Path, required=True, help="ユーザ情報が記載されたCSVファイルを指定してください。CSVには`user_id`列が必要です。")
    parser.add_argument(
        "--not_masked_biography",
        type=str,
        nargs="+",
        help="マスクしないユーザの`biography`を指定してください。",
    )
    parser.add_argument(
        "--not_masked_user_id",
        type=str,
        nargs="+",
        help="マスクしないユーザの`user_id`を指定してください。",
    )
    parser.add_argument("--csv_header", type=int, help="CSVのヘッダ行数", default=1)

    argument_parser.add_output()
    argument_parser.add_csv_format()

    parser.set_defaults(subcommand_func=main)


def add_parser(subparsers: Optional[argparse._SubParsersAction] = None) -> argparse.ArgumentParser:
    subcommand_name = "mask_user_info"
    subcommand_help = "CSVに記載されたユーザ情報をマスクします。"
    description = "CSVに記載されたユーザ情報をマスクします。CSVの`user_id`,`username`,`biography`,`account_id` 列をマスクします。"
    parser = annofabcli.common.cli.add_parser(subparsers, subcommand_name, subcommand_help, description)
    parse_args(parser)
    return parser
