# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/02_utils.ipynb (unless otherwise specified).

__all__ = ['recursive_walk', 'get_files', 'bin2int', 'bin2str', 'bin2date', 'bin2time', 'optimize_floats',
           'optimize_ints', 'optimize_objects', 'df_optimize', 'public_attrs', 'getattrs', 'cached']

# Cell
import os
import functools
import platform
from typing import *
from dataclasses import make_dataclass
import pandas as pd
from fastcore.foundation import L
from fastcore.xtras import Path
from fastcore.basics import listify, setify
from .constants import EXCLUDE_ATTRS

# Cell
def _get_files(p, fs, extensions=None):
    p = Path(p)
    return L(
        p / f
        for f in fs
        if not f.startswith(".")
        and ((not extensions) or f'.{f.split(".")[-1].lower()}' in extensions)
    )

def recursive_walk(path, folders, extensions, followlinks) -> L:
    """Helper functions to encapsulate os.walk"""
    res = L()
    for i, (p, d, f) in enumerate(
        os.walk(path, followlinks=followlinks)
    ):  # returns (dirpath, dirnames, filenames)
        if len(folders) != 0 and i == 0:
            d[:] = [o for o in d if o in folders]
        else:
            d[:] = [o for o in d if not o.startswith(".")]
        if len(folders) != 0 and i == 0 and "." not in folders:
            continue
        res += _get_files(p, f, extensions)
    return res

# Cell
def get_files(path, extensions=None, recurse=True, folders=None, followlinks=True)->L:
    "Get all the filerefas in `path` with optional `extensions`, optionally with `recurse`, only in `folders`, if specified."
    path = Path(path)
    folders = L(folders)
    if extensions is not None:
        extensions = {e.lower() for e in setify(extensions)}
    if recurse:
        return recursive_walk(path, folders, extensions, followlinks)
    f = [o.name for o in os.scandir(path) if o.is_file()]
    return _get_files(path, f, extensions)

# Cell
def bin2int(binary_data: bytes, is_signed: bool = True) -> int:
    """Convert bytes number to int
    :param binary_data: valor de int comprimido
    :param is_signed: indica se é um valor negativo ou não
    :return: decoded int
    """
    return int.from_bytes(binary_data, byteorder="little", signed=is_signed)


def bin2str(binary_data: bytes) -> str:
    """
    bytes > str
    :param binary_data: valor de str comprimida
    :return: str traduzida

    Conversor binario para str.
    Erros do 'decoder' são ignorados.
    Ignora o final do dado binario ('\x00') que é usado apenas para manter o tamanho dos campos.
    """
    return binary_data.decode("utf-8", errors="ignore").rstrip("\x00")


def bin2date(binary_data: bytes) -> L:
    """Receives a byte and returns a List with the date"""
    date = L(*binary_data[:-1])
    date[2] += 2000
    return date


def bin2time(binary_data: bytes) -> L:
    """Receives a byte and returns a list with the time"""
    return L(*binary_data)

# Cell
def optimize_floats(df: pd.DataFrame, exclude=None) -> pd.DataFrame:
    floats = df.select_dtypes(include=["float64"]).columns.tolist()
    floats = [c for c in floats if c not in listify(exclude)]
    df[floats] = df[floats].apply(pd.to_numeric, downcast="float")
    return df


def optimize_ints(df: pd.DataFrame, exclude=None) -> pd.DataFrame:
    ints = df.select_dtypes(include=["int64"]).columns.tolist()
    ints = [c for c in ints if c not in listify(exclude)]
    df[ints] = df[ints].apply(pd.to_numeric, downcast="integer")
    return df


def optimize_objects(
    df: pd.DataFrame, datetime_features: List[str], exclude=None
) -> pd.DataFrame:
    for col in df.select_dtypes(include=["object"]).columns.tolist():
        if col not in datetime_features:
            if col in listify(exclude):
                continue
            num_unique_values = len(df[col].unique())
            num_total_values = len(df[col])
            if float(num_unique_values) / num_total_values < 0.5:
                dtype = "category"
            else:
                dtype = "string"
            df[col] = df[col].astype(dtype)
        else:
            df[col] = pd.to_datetime(df[col]).dt.date
    return df


def df_optimize(df: pd.DataFrame, datetime_features: List[str] = [], exclude=None):
    return optimize_floats(
        optimize_ints(optimize_objects(df, datetime_features, exclude), exclude),
        exclude,
    )

# Cell
def public_attrs(obj: Any) -> L:
    """Receives an object and return its public attributes (not starting with underscore _) excluding those listed in `EXCLUDE_ATTRS`"""
    return tuple(k for k in dir(obj) if not k.startswith("_") and k not in EXCLUDE_ATTRS)


def getattrs(obj: Any, attrs: Iterable = None, as_tuple=False) -> L:
    """Receives an object and return the atributes listed in `attrs`, if attrs is None return its public attributes"""
    if attrs is None:
        attrs = public_attrs(obj)
    if as_tuple:
        return attrs, tuple(getattr(obj, k) for k in attrs)
    return {k: getattr(obj, k) for k in attrs}

# Cell
def cached(f):
    version = float(platform.python_version()[:3])
    if version >= 3.8:
        return functools.cached_property(f)
    elif version >= 3.2:
        return property(functools.lru_cache()(f))
    else:
        raise NotImplementedError(
            "There is no cache attribute implemented for python < 3.2"
        )