#!python
import argparse
import glob
import sys
from deviaTE import deviaTE_IO as IO


parser = argparse.ArgumentParser()
# args for prepping
parser.add_argument('--library', help='path to alternative reference library')
parser.add_argument('--read_type', default='phred+33', choices=['phred+64', 'phred+33'], help='quality encoding can either be phred+33 for sanger and illumina 1.8+, or phred+64 for illumina 1.3 - 1.8')
parser.add_argument('--min_read_len', default='1')
parser.add_argument('--quality_threshold', default='15')
parser.add_argument('--min_alignment_len', default='1')
parser.add_argument('--threads', default='1')
# args for analysis
parser.add_argument('--families', required=True)
parser.add_argument('--annotation', help='alternative annotation in gff-format')
parser.add_argument('--no_freq_corr', action='store_true', help='deactivate frequency correction for internal deletions')
parser.add_argument('--hq_threshold', default='20')
norm_group = parser.add_mutually_exclusive_group()
norm_group.add_argument('--rpm', action='store_true', help='normalize all abundances by reads per million')
norm_group.add_argument('--single_copy_genes', help='comma-separated names of single-copy genes in reference')
# args for plotting
parser.add_argument('--free_yaxis', action='store_true', help='flag to free the y-axis; for highly differential coverage')
# input arguments - mutually exclusive
inp_group = parser.add_mutually_exclusive_group(required=True)
inp_group.add_argument('--input_fq', help='fastq file for prepping and analysis')
inp_group.add_argument('--input_bam', help='alignment file to be analyzed')
inp_group.add_argument('--input_fq_dir', action='store_true', help='directory of fastq-files as input')
inp_group.add_argument('--input_bam_dir', action='store_true', help='directory of sam-files as input')
args = parser.parse_args()


def process_fq(file, families):
    fq = IO.fq_file(inp=file)
    fq.prep(lib=args.library, qual_tr=args.quality_threshold, min_rl=args.min_read_len,
            min_al=args.min_alignment_len, read_ty=args.read_type, thr=args.threads)

    for f in families:
        sample_id = file
        output_table = sample_id + '.' + f
        output_plot = output_table + '.pdf'

        prep_bam = IO.bam_file(inp=fq.path + '.fused.sort.bam', orig_name=file)
        prep_bam.analyze(lib=args.library, fam=f, sid=sample_id, out=output_table,
                         anno=args.annotation, corr=args.no_freq_corr, hqt=args.hq_threshold,
                         scgs=args.single_copy_genes, rpm=args.rpm)

        table = IO.analysis_table(inp=output_table)
        table.plot(out=output_plot, free_y=args.free_yaxis)


def process_bam(bam, families):
    prep_bam = IO.bam_file(inp=bam)
    prep_bam.fuse()

    for f in families:
        sample_id = bam
        output_table = sample_id + '.' + f
        output_plot = output_table + '.pdf'

        fused_bam = IO.bam_file(inp=bam + '.fused.sort.bam')
        fused_bam.analyze(lib=args.library, fam=f, sid=sample_id, out=output_table,
                          anno=args.annotation, corr=args.no_freq_corr, hqt=args.hq_threshold,
                          scgs=args.single_copy_genes, rpm=args.rpm)

        table = IO.analysis_table(inp=output_table)
        table.plot(out=output_plot, free_y=args.free_yaxis)


fams = args.families.split(',')

if args.input_fq:
    print('Fastq provided: preparing for alignment')
    process_fq(file=args.input_fq, families=fams)

elif args.input_bam:
    print('Alignment file provided: skipping preparation')
    process_bam(bam=args.input_bam, families=fams)

elif args.input_fq_dir:
    print('Directory of fastq files provided: preparing for alignment')
    fq_list = glob.glob('*.fastq')

    for f in fq_list:
        print(f)
        process_fq(file=f, families=fams)

elif args.input_bam_dir:
    print('Directory of bam-files provided: skipping preparation')
    bam_list = glob.glob('*.bam')

    for b in bam_list:
        print(b)
        process_bam(bam=b, families=fams)

else:
    sys.exit('No input provided')

print('done')

