"""File parser implementation (with single/multi delimiter & templates)"""

import os
from dataclasses import fields
from typing import Callable, Dict, List, Optional, Type, TypeVar, Generic

from .dto import FileDTO
from .exceptions import FileNotFoundError, ParseError, ValidationError
from .templates import LinePattern, TEMPLATES

__all__ = ["FileParser"]

T = TypeVar('T', bound=FileDTO)
U = TypeVar('U', bound=FileDTO)  # ← для parse(..., dto_class=...)

def _split_by_ordered_delimiters(s: str, delimiters: List[str]) -> List[str]:
    """
    Последовательно режет строку по каждому разделителю ОДИН раз.
    На выходе len(delimiters) + 1 частей.
    Если какой-то разделитель не найден — бросаем ParseError.
    """
    parts: List[str] = []
    rest = s
    for delim in delimiters:
        idx = rest.find(delim)
        if idx == -1:
            raise ParseError(f"Delimiter '{delim}' not found in: {rest}")
        left = rest[:idx]
        rest = rest[idx + len(delim):]
        parts.append(left)
    parts.append(rest)
    return parts


class FileParser(Generic[T]):
    """
    Гибкий парсер.
    ⚠️ Конфигурация (dto_class, file_path, delimiter[s], template/pattern и т.д.)
       теперь передаётся в сам метод parse(...), а НЕ в __init__.
    """

    def __init__(
        self,
        *,
        encoding: str = 'utf-8',
        skip_invalid: bool = True,
        strip_whitespace: bool = True,
    ):
        self.encoding = encoding
        self.skip_invalid = skip_invalid
        self.strip_whitespace = strip_whitespace

    # --- Вспомогательные парсеры одной строки (без обращения к self-состоянию) ---
    def _parse_line_by_regex(self, line: str, pattern: LinePattern) -> Dict[str, str]:
        assert pattern.regex, "Regex pattern not configured"
        m = pattern.regex.match(line)
        if not m:
            raise ParseError(f"Regex did not match: {line}")
        d = m.groupdict()
        if pattern.fields:
            return {k: d.get(k, "") for k in pattern.fields}
        return d

    def _parse_line_by_multi(self, line: str, delimiters: List[str], fields_: List[str]) -> Dict[str, str]:
        parts = _split_by_ordered_delimiters(line, delimiters)
        if len(parts) != len(fields_):
            raise ParseError(
                f"Expected {len(fields_)} parts by ordered delimiters, got {len(parts)}: {line}"
            )
        if self.strip_whitespace:
            parts = [p.strip() for p in parts]
        return dict(zip(fields_, parts))

    def _parse_line_by_single(self, line: str, delimiter: str, fields_: List[str]) -> Dict[str, str]:
        if len(fields_) == 1:
            val = line.strip() if self.strip_whitespace else line
            return {fields_[0]: val}
        parts = line.split(delimiter, maxsplit=len(fields_) - 1)
        if len(parts) != len(fields_):
            raise ParseError(
                f"Expected {len(fields_)} fields with delimiter '{delimiter}', got {len(parts)}: {line}"
            )
        if self.strip_whitespace:
            parts = [p.strip() for p in parts]
        return dict(zip(fields_, parts))

    def _apply_post_map(self, data: Dict[str, str], pattern: Optional[LinePattern]) -> Dict[str, str]:
        if pattern and pattern.post_map:
            data = pattern.post_map(data)
        return data

    def _build_pattern_and_fields(
        self,
        *,
        dto_class: Type[U],
        field_order: Optional[List[str]],
        pattern: Optional[LinePattern],
        template: Optional[str],
    ) -> tuple[Optional[LinePattern], List[str]]:
        # определить список «данных» полей DTO (без метаданных)
        excluded = {'line_number', 'raw_line'}
        dto_fields = [f.name for f in fields(dto_class) if f.name not in excluded]

        # выбрать pattern: template > pattern > None
        patt = pattern
        if template:
            if template not in TEMPLATES:
                raise ValueError(f"Unknown template '{template}'. Available: {list(TEMPLATES)}")
            patt = TEMPLATES[template]

        # выбрать итоговый порядок полей
        if field_order:
            data_fields = field_order
        elif patt and patt.fields:
            data_fields = patt.fields
        else:
            data_fields = dto_fields

        return patt, data_fields

    def _validate_config(
        self,
        *,
        file_path: str,
        patt: Optional[LinePattern],
        data_fields: List[str],
        delimiter: Optional[str],
        delimiters: Optional[List[str]],
    ) -> None:
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"File not found: {file_path}")

        has_regex = bool(patt and patt.regex)
        has_multi = bool(patt and patt.delimiters) or bool(delimiters)
        has_single = bool(patt and patt.delimiter) or bool(delimiter)
        mechanics = sum([has_regex, has_multi, has_single])

        if mechanics > 1:
            raise ValueError("Ambiguous parse config: specify only one of regex / ordered delimiters / single delimiter.")

        if len(data_fields) > 1 and mechanics == 0:
            raise ValueError(
                f"DTO expects {len(data_fields)} fields; specify delimiter(s) or a pattern/template."
            )

    # ------------------------------- ПУБЛИЧНЫЙ API -------------------------------
    def parse(
        self,
        dto_class: Type[U],
        file_path: str,
        delimiter: Optional[str] = None,
        delimiters: Optional[List[str]] = None,
        *,
        pattern: Optional[LinePattern] = None,
        template: Optional[str] = None,
        field_order: Optional[List[str]] = None,
        max_lines: Optional[int] = None,
        skip_lines: int = 0,
        line_filter: Optional[Callable[[str], bool]] = None,
    ) -> List[U]:
        """
        Главный метод: принимает класс DTO и всю конфигурацию.
        Возвращает List[U] — IDE корректно выводит тип.
        """
        patt, data_fields = self._build_pattern_and_fields(
            dto_class=dto_class,
            field_order=field_order,
            pattern=pattern,
            template=template,
        )
        self._validate_config(
            file_path=file_path,
            patt=patt,
            data_fields=data_fields,
            delimiter=delimiter,
            delimiters=delimiters,
        )

        items: List[U] = []
        parsed_count = 0

        try:
            with open(file_path, 'r', encoding=self.encoding) as f:
                for line_number, raw in enumerate(f, start=1):
                    if line_number <= skip_lines:
                        continue

                    line = raw.rstrip('\n\r')
                    if not line.strip():
                        continue
                    if line_filter and not line_filter(line):
                        continue

                    try:
                        # выберем механику
                        if patt and patt.regex:
                            data = self._parse_line_by_regex(line, patt)
                        elif patt and patt.delimiters:
                            data = self._parse_line_by_multi(line, patt.delimiters, data_fields)
                        elif delimiters:
                            data = self._parse_line_by_multi(line, delimiters, data_fields)
                        elif patt and patt.delimiter:
                            data = self._parse_line_by_single(line, patt.delimiter, data_fields)
                        elif delimiter is not None:
                            data = self._parse_line_by_single(line, delimiter, data_fields)
                        else:
                            # fallback: 1 поле
                            if len(data_fields) == 1:
                                val = line.strip() if self.strip_whitespace else line
                                data = {data_fields[0]: val}
                            else:
                                raise ParseError("No valid parsing mechanics configured")

                        data = self._apply_post_map(data, patt)

                        dto_instance = dto_class(
                            line_number=line_number,
                            raw_line=line,
                            **data
                        )
                        dto_instance.post_parse()

                        if not dto_instance.validate():
                            if self.skip_invalid:
                                continue
                            raise ValidationError(f"Line {line_number}: validation failed: {line}")

                        items.append(dto_instance)
                        parsed_count += 1
                        if max_lines and parsed_count >= max_lines:
                            break

                    except (ParseError, ValidationError) as e:
                        if self.skip_invalid:
                            print(f"Warning: {e}")
                            continue
                        raise

        except OSError as e:
            raise FileNotFoundError(f"Error reading file {file_path}: {e}")

        return items

    def parse_batch(
        self,
        dto_class: Type[U],
        *,
        file_path: str,
        batch_size: int = 100,
        skip_lines: int = 0,
        delimiter: Optional[str] = None,
        delimiters: Optional[List[str]] = None,
        pattern: Optional[LinePattern] = None,
        template: Optional[str] = None,
        field_order: Optional[List[str]] = None,
    ):
        """
        Генератор батчей с новой сигнатурой (аналогично parse)
        """
        patt, data_fields = self._build_pattern_and_fields(
            dto_class=dto_class,
            field_order=field_order,
            pattern=pattern,
            template=template,
        )
        self._validate_config(
            file_path=file_path,
            patt=patt,
            data_fields=data_fields,
            delimiter=delimiter,
            delimiters=delimiters,
        )

        batch: List[U] = []
        skipped = 0
        try:
            with open(file_path, 'r', encoding=self.encoding) as f:
                for line_number, raw in enumerate(f, start=1):
                    if skipped < skip_lines:
                        skipped += 1
                        continue

                    line = raw.rstrip('\n\r')
                    if not line.strip():
                        continue

                    try:
                        if patt and patt.regex:
                            data = self._parse_line_by_regex(line, patt)
                        elif patt and patt.delimiters:
                            data = self._parse_line_by_multi(line, patt.delimiters, data_fields)
                        elif delimiters:
                            data = self._parse_line_by_multi(line, delimiters, data_fields)
                        elif patt and patt.delimiter:
                            data = self._parse_line_by_single(line, patt.delimiter, data_fields)
                        elif delimiter is not None:
                            data = self._parse_line_by_single(line, delimiter, data_fields)
                        else:
                            if len(data_fields) == 1:
                                val = line.strip() if self.strip_whitespace else line
                                data = {data_fields[0]: val}
                            else:
                                raise ParseError("No valid parsing mechanics configured")

                        data = self._apply_post_map(data, patt)

                        dto_instance = dto_class(
                            line_number=line_number,
                            raw_line=line,
                            **data
                        )
                        dto_instance.post_parse()

                        if dto_instance.validate():
                            batch.append(dto_instance)

                        if len(batch) >= batch_size:
                            yield batch
                            batch = []

                    except (ParseError, ValidationError) as e:
                        if self.skip_invalid:
                            print(f"Warning: {e}")
                            continue
                        raise

                if batch:
                    yield batch

        except OSError as e:
            raise FileNotFoundError(f"Error reading file {file_path}: {e}")

    def count_lines(self, *, file_path: str, skip_empty: bool = True) -> int:
        count = 0
        try:
            with open(file_path, 'r', encoding=self.encoding) as f:
                for line in f:
                    if skip_empty and not line.strip():
                        continue
                    count += 1
        except OSError as e:
            raise FileNotFoundError(f"Error reading file {file_path}: {e}")
        return count