import re
import os
import hashlib
from functools import reduce
import operator
import collections
from pathlib import Path
import tarfile
import shutil

import gdown
from tqdm import tqdm
from best_download import download_file


def touch(x):
    Path(x).touch()


Source = collections.namedtuple('Source', ['type', 'url'])


def download(fname, checksum, sources, extract=False):
    if os.path.exists(fname + '.done'): return

    print('Finding source for', fname)

    parentdir = Path(fname).parent
    os.makedirs(parentdir, exist_ok=True)

    for source in sources:
        try:
            # todo: implement torrent handling
            if source.type == 'direct':
                download_file(source.url, fname, checksum)
            elif source.type == 'gdrive':
                if os.path.exists(fname):
                    try:
                        print(fname, 'already exists.')
                        sha256sum(fname, expected=checksum)
                        touch(fname + '.done')
                        return
                    except AssertionError:
                        print('{} exists but doesn\'t match checksum!'.format(fname))
                        rm_if_exists(fname)

                gdown.download(source.url, fname, quiet=False)
                sha256sum(fname, expected=checksum)
            elif source.type == 'gcloud':
                raise NotImplementedError('gcloud download not implemented!')

            if extract:
                tar_xf(fname)
                rm_if_exists(fname)
            touch(fname + '.done')
            return
        except SystemExit:
            raise
        except KeyboardInterrupt:
            raise
        except:
            import traceback
            traceback.print_exc()
            print('Download method [{}] {} failed, trying next option'.format(source.type, source.url))
            # rm_if_exists(fname)
            continue

        break

    raise Exception('Failed to download {} from any source'.format(fname))


def tar_xf(x):
    parentdir = Path(x).parent
    tf = tarfile.open(x)
    tf.extractall(parentdir)


class ExitCodeError(Exception): pass


def stableorder(x):
    arr = [(elem, sha256str(elem.encode('utf-8'))) for elem in x]
    arr.sort(key=lambda x: x[1])
    return [elem for elem, _ in arr]


def id(x):
    return x


def utf8len(s):
    return len(s.encode('utf-8'))


def sh(x):
    if os.system(x): raise ExitCodeError()


def fwrite(fname, content):
    with open(fname, 'w') as fh:
        fh.write(content)


def fread(fname):
    with open(fname) as fh:
        return fh.read()


def ls(x):
    return [x + '/' + fn for fn in stableorder(os.listdir(x))]


def cycle_documents(dataset):
    while True:
        yield from filter(id, dataset.documents())


def concat(xs):
    for x in xs:
        yield from x


def flatMap(f, x):
    return reduce(operator.add, map(f, x), [])


def sha256str(s):
    h = hashlib.sha256()
    h.update(s)
    return h.hexdigest()


def sha256sum(filename, expected=None):
    h = hashlib.sha256()
    b = bytearray(128 * 1024)
    mv = memoryview(b)
    progress = tqdm(total=os.path.getsize(filename), unit="byte", unit_scale=1)
    tqdm.write(f"Verifying checksum for {filename}")
    with open(filename, 'rb', buffering=0) as f:
        for n in iter(lambda: f.readinto(mv), 0):
            h.update(mv[:n])
            progress.update(n)
    progress.close()

    if expected:
        assert h.hexdigest() == expected
        print('CHECKSUM OK', filename)
    else:
        print(filename, h.hexdigest())


def rm_if_exists(path):
    try:
        if os.path.exists(path):
            shutil.rmtree(path)
    except NotADirectoryError:
        os.remove(path)


# https://stackoverflow.com/questions/12523586/python-format-size-application-converting-b-to-kb-mb-gb-tb/37423778
def humanbytes(B, units=None):
    'Return the given bytes as a human friendly KB, MB, GB, or TB string'
    B = float(B)
    KB = float(1024)
    MB = float(KB ** 2)  # 1,048,576
    GB = float(KB ** 3)  # 1,073,741,824
    TB = float(KB ** 4)  # 1,099,511,627,776

    if (B < KB and units is None) or units == "B":
        return '{0} {1}'.format(B, 'Bytes' if 0 == B > 1 else 'Byte')
    elif (KB <= B < MB and units is None) or units == "KiB":
        return '{0:.2f} KiB'.format(B / KB)
    elif (MB <= B < GB and units is None) or units == "MiB":
        return '{0:.2f} MiB'.format(B / MB)
    elif (GB <= B < TB and units is None) or units == "GiB":
        return '{0:.2f} GiB'.format(B / GB)
    elif (TB <= B and units is None) or units == "TiB":
        return '{0:.2f} TiB'.format(B / TB)


def strip_markdown_colons(x):
    return re.sub(r'^:::.*?\n', '', x, flags=re.MULTILINE)


def remove_advertisement(x):
    return re.sub(r'^Advertisement\n', '', x, flags=re.MULTILINE)


def compose(*fs):
    def _f(x):
        for f in reversed(fs):
            x = f(x)
        return x

    return _f


def parse_size(sizestr):
    unit = sizestr[-1]
    size = float(sizestr[:-1])

    if unit.upper() == 'B':
        return size
    if unit.upper() == 'K':
        return size * 1024
    if unit.upper() == 'M':
        return size * 1024 * 1024
    if unit.upper() == 'G':
        return size * 1024 * 1024 * 1024
    if unit.upper() == 'T':
        return size * 1024 * 1024 * 1024 * 1024


def dummy_meta(xs):
    return ((x, {}) for x in xs)


def chunk_at_even_lines(it, chunksize):
    for doc in it:
        totlen = 0
        res = []
        for i, line in enumerate(doc.split('\n')):
            res.append(line)
            totlen += len(line)

            if totlen > chunksize and i % 2 == 1:
                yield '\n'.join(res)
                totlen = 0
                res = []
        if res: yield '\n'.join(res)