#!/usr/bin/env python
"""
Intervene: a tool for intersection and visualization of multiple genomic region sets
Created on January 10, 2017
@author: <Aziz Khan>aziz.khan@ncmm.uio.no
For more details please check the documentation at http://intervene.readthedocs.io'

"""

import sys
import os
from matplotlib import use as mpl_use
mpl_use('Agg')
import matplotlib.pyplot as plt
#import pybedtools
from intervene.modules.pairwise import pairwise
from intervene.modules.upset import upset
from intervene.modules.venn import list_venn, genomic_venn
from intervene import helpers
from argparse import ArgumentParser, RawTextHelpFormatter
import intervene

#main function
def main():
    
    desc = """
    Intervene: a tool for intersection and visualization of multiple genomic region and gene sets.
    For more details check documentation: http://intervene.readthedocs.io
    """
    #print desc
    
    parser = ArgumentParser(usage='intervene <subcommand> [options]', description=desc, formatter_class=RawTextHelpFormatter)
    subparsers = parser.add_subparsers(dest='command',help='List of subcommands')

    #venn module argument parser
    venn_parser = subparsers.add_parser('venn', usage='intervene venn [options]', formatter_class=RawTextHelpFormatter,
        description='Create Venn diagram upto 6-way after intersection of genomic regions in (BED/GTF/GFF) format or list sets.', 
        help='Venn diagram of intersection of genomic regions or list sets (upto 6-way).')
    
    venn_parser.add_argument('-i', dest='input', nargs="*",
        help='Input genomic regions in (BED/GTF/GFF) format or lists of genes/SNPs IDs.\n'
        'For files in a directory use *.<extension>. e.g. *.bed\n\n')

    venn_parser.add_argument('--type', dest='type', default='genomic', choices=('genomic','list'),
                  help='Type of input sets. Genomic regions or lists of genes/SNPs. '
                        'Default is "%(default)s"\n\n')
    venn_parser.add_argument('--names', dest='labels',
                  help='Comma-separated list of names as labels for input files.\nIf it is not set file names will be used as labels.\n'
                       'For example: --names=A,B,C,D,E,F\n\n')
   
    venn_parser.add_argument('--filenames', action='store_true', default=False,
                  help='Use file names as labels instead. '
                       'Default is "%(default)s"\n\n')
    venn_parser.add_argument('--colors', dest='colors',
                  help='Comma-separated list of matplotlib-valid colors. '
                       'E.g., --colors=r,b,k\n\n')
    venn_parser.add_argument('-o', '--output', dest='output', 
                  help='Output folder path where results will be stored. '
                       'Default is current working directory.\n\n')
    venn_parser.add_argument('--project', type=str, default="Intervene",
                  help='Project name for the output. '
                       'Default is: "%(default)s"\n\n')  
    venn_parser.add_argument('--figtype', dest="figtype",choices=('pdf','svg','ps','tiff','png'), 
                   default='pdf',help='Figure type for the plot. '
                       'e.g. --figtype svg. Default is "%(default)s"\n\n')    
    venn_parser.add_argument('--figsize',  nargs=2, type=int, default=(12,12),
                   help='Figure size as width and height.'
                       'e.g. --figsize 12 12.\n\n') 
    venn_parser.add_argument('--fontsize', dest='fontsize', type=int, default=14,
                   help='Font size for the plot labels.'
                       'Default is: "%(default)s"\n\n')  
      
    venn_parser.add_argument('--dpi', type=int, default=300,
                  help='Dots-per-inch (DPI) for the output. '
                       'Default is: "%(default)s"\n\n')  
    venn_parser.add_argument('--fill', choices=('number','percentage'),default='number',
                  help='Report number or  percentage of overlaps (Only if --type=list). '
                       'Default is "%(default)s"\n\n')
    venn_parser.add_argument('--test', action='store_true', help='This will run the program on test data.\n\n')

    #upset module argument parser
    upset_parser = subparsers.add_parser('upset', usage='intervene upset [options]', formatter_class=RawTextHelpFormatter,
        description='Create UpSet diagram after intersection of genomic regions in (BED/GTF/GFF) or list sets.', 
        help='UpSet diagram of intersection of genomic regions or list sets.')
    
    upset_parser.add_argument('-i', dest='input', nargs="*",
        help='Input genomic regions in (BED/GTF/GFF) format or lists of genes/SNPs IDs.\n '
        'For files in a directory use *.<extension>. e.g. *.bed\n\n')
    '''
    upset_parser.add_argument('-c', dest='compare', nargs=1,
        help='Input genomic region file (BED/GTF/GFF) format to compare with input files.')
    '''

    upset_parser.add_argument('--type', dest='type', choices=('genomic','list'),default='genomic',
                  help='Type of input sets. Genomic regions or lists of genes/SNPs sets. '
                       'Default is "%(default)s".\n\n') 
   
    upset_parser.add_argument('--names', dest='labels',
                  help='Comma-separated list of names as labels for input files.\nIf it is not set file names will be used as labels.\n'
                       'For example: --names=A,B,C,D,E,F\n\n')
    
    upset_parser.add_argument('--filenames', action='store_true', default=False,
                  help='Use file names as labels instead. '
                       'Default is "%(default)s".\n\n')
   
    upset_parser.add_argument('-o', '--output', dest='output', 
                  help='Output folder path where results will be stored. '
                       'Default is current working directory.\n\n')
    upset_parser.add_argument('--project', type=str, default="Intervene",
                  help='Project name for the output. '
                       'Default is: "%(default)s"\n\n')  
 
    upset_parser.add_argument('--order', dest="order",choices=("freq", "degree"), 
                   default='freq',help='The order of intersections of sets. '
                       'e.g. --order degree. Default is "%(default)s".\n\n')
    
    upset_parser.add_argument('--ninter', type=int, default=30, 
                  help='Number of top intersections to show in plot. '
                       'Default is "%(default)s".\n\n')

    upset_parser.add_argument('--showzero', action='store_true', default=False,
                    help='Show empty intersection combinations. '
                       'Default is "%(default)s".\n\n')

    upset_parser.add_argument('--showsize', action='store_false', default=True,
                    help='Show intersection sizes above bars. '
                       'Default is "%(default)s".\n\n')
   
    upset_parser.add_argument('--mbcolor', type=str, default='#ea5d4e', 
                  help='Color of the main bar plot. '
                       'Default is: "%(default)s".\n\n')

    upset_parser.add_argument('--sbcolor', type=str, default='#317eab', 
                  help='Color of set size bar plot. '
                       'Default is: "%(default)s".\n\n')

    upset_parser.add_argument('--mblabel', type=str, default='No. of Intersections', 
                  help='The y-axis label of the intersection size bars. '
                       'Default is: "%(default)s".\n\n')
    upset_parser.add_argument('--sxlabel', type=str, default='Set size', 
                  help='The x-axis label of the set size bars. '
                       'Default is: "%(default)s".\n\n')
           
    upset_parser.add_argument('--figtype', dest="figtype",choices=('pdf','svg','ps','tiff','png'), 
                   default='pdf',help='Figure type for the plot. '
                       'e.g. --figtype svg. Default is "%(default)s"\n\n')    
    upset_parser.add_argument('--figsize', nargs=2, type=int, default=(14,8),
                   help='Figure size for the output plot (width,height). '
                        'e.g.  --figsize 14 8\n\n')
    upset_parser.add_argument('--dpi', type=int, dest='dpi', default=300,
                  help='Dots-per-inch (DPI) for the output. '
                       'Default is: "%(default)s".\n\n')

    upset_parser.add_argument('--scriptonly', action='store_true', default=False,
                    help='Set to generate Rscript only, if R/UpSetR package is not installed. '
                       'Default is "%(default)s".\n\n')

    upset_parser.add_argument('--showshiny', action='store_true', default=False,
                    help='Print the combinations of intersections to input to Shiny App. '
                       'Default is "%(default)s".\n\n')
  
    upset_parser.add_argument('--test', action='store_true', help='This will run the program on test data.\n\n')


    #pairwise module argument parser
    pairwise_parser = subparsers.add_parser('pairwise', usage='intervene pairwise [options]', formatter_class=RawTextHelpFormatter,
        description='Pairwise intersection and heatmap of N genomic region sets in (BED/GTF/GFF) format.',
        help='Pairwise intersection and heatmap of N genomic region sets in <BED/GTF/GFF> format.')
    
    pairwise_parser.add_argument('-i', dest='input', nargs="*",
        help='Input genomic regions in (BED/GTF/GFF) format. \n'
        'For files in a directory use *.<extension>. e.g. *.bed\n')
    
    pairwise_parser.add_argument('--type', dest='type', choices=('genomic','list'),default='genomic',
                  help='Type of input sets. Genomic regions or lists of genes/SNPs sets. '
                       'Default is "%(default)s".\n\n') 
   
    pairwise_parser.add_argument('--compute', choices=('count','frac','jaccard','fisher','reldist'),
        default='frac', help='Compute count/fraction of overlaps or other statistical relationships. \n'
                       'count: calculates the number of overlaps. \n'
                       'frac: calculates the fraction of overlap. (Default)\n'
                       'jaccard: calculate the Jaccard statistic. \n'
                       'reldist: calculate the distribution of relative distances.\n'
                       'fisher: calculate Fisher`s statistic. \n'
                       'Note: For jaccard and reldist regions should be pre-shorted or set --sort.\n\n')

    pairwise_parser.add_argument('--htype', dest="htype",choices=("tribar","color", "pie","circle", "square", "ellipse", "number", "shade"), 
                   default='tribar',help='Heatmap plot type. '
                           'Default is "%(default)s".\n\n')
    pairwise_parser.add_argument('--triangle', dest="triangle",choices=("lower","upper"), 
                   default='lower',help='Show lower/upper triangle of the matrix as heatmap. '
                           'Default is "%(default)s".\n\n')
    
    pairwise_parser.add_argument('--diagonal', action='store_true', default=False,
                  help='Show the diagonal values in the heatmap. \n'
                       'Default is "%(default)s".\n\n')

    pairwise_parser.add_argument('--names', dest='labels',
                  help='Comma-separated list of names as labels for input files.\nIf it is not set file names will be used as labels.\n'
                       'For example: --names=A,B,C,D,E,F\n\n')

    pairwise_parser.add_argument('--filenames', action='store_true', default=False,
                  help='Use file names as labels instead. \n'
                       'Default is "%(default)s".\n\n')

    pairwise_parser.add_argument('--sort', action='store_true', default=False,
                  help='Set this only if your files are not sorted. \n'
                       'Default is "%(default)s".\n\n')

    '''
    pairwise_parser.add_argument('--enrichment', action='store_true',
                    help='Run randomizations (default 1000, specify otherwise '
                    'with --iterations) on each pairwise comparison and '
                    'compute the enrichment score as '
                    '(actual intersection count + 1) / (median randomized + 1)')
    pairwise_parser.add_argument('--iterations', default=1000, type=int,
                    help='Number of randomizations to perform for enrichement '
                    'scores')
    pairwise_parser.add_argument('--processes', default=1, type=int,
                    help='Number of processors to perform for enrichement '
                    'scores')
    '''
    pairwise_parser.add_argument('--genome', help='Required argument if --type=fisher.\n'
                    'Needs to be a string assembly name like "mm10" or "hg38"\n\n')
    pairwise_parser.add_argument('-o', '--output', dest='output', 
                    help='Output folder path where results will be stored. \n'
                       'Default is current working directory.\n\n')
    pairwise_parser.add_argument('--project', type=str, default="Intervene",
                  help='Project name for the output. '
                       'Default is: "%(default)s"\n\n')  
    pairwise_parser.add_argument('--barlabel', dest="blabel", default='Set size',
                    help='x-axis label of boxplot if --htype=tribar. '
                       'Default is "%(default)s"\n\n')
    pairwise_parser.add_argument('--barcolor', dest="barcolor", default='#53cfff',
                    help='Boxplot color (hex vlaue or name, e.g. blue). '
                       'Default is "%(default)s".\n\n') 
    pairwise_parser.add_argument('--fontsize', default=8,
                   help='Label font size. '
                       'Default is "%(default)s".\n\n')
    pairwise_parser.add_argument('--title', type=str, default="Pairwise intersection",
                   help='Heatmap main title. '
                       'Default is "%(default)s".\n\n')
    
    pairwise_parser.add_argument('--space', type=float, default=1.3,
                   help='White space between barplt and heatmap, if --htype=tribar. '
                       'Default is "%(default)s".\n\n')
    pairwise_parser.add_argument('--figtype', dest="figtype",choices=('pdf','svg','ps','tiff','png'),  
                   default='pdf',help='Figure type for the plot. '
                       'e.g. --figtype svg. Default is "%(default)s"\n\n')    
    pairwise_parser.add_argument('--figsize', nargs=2, type=int,
                   help='Figure size for the output plot (width,height). '
                        'e.g.  --figsize 8 8\n\n')          
    pairwise_parser.add_argument('--dpi', type=int, default=300,
                  help='Dots-per-inch (DPI) for the output. '
                       'Default is: "%(default)s".\n\n')
    pairwise_parser.add_argument('--scriptonly', action='store_true', default=False,
                    help='Set to generate Rscript only, if R/Corrplot package are not installed. '
                       'Default is "%(default)s".\n\n')
  
    pairwise_parser.add_argument('--test', action='store_true', help='This will run the program on test data.\n\n')
    
    
    parser.add_argument('-v','--version', dest='version', action='version', version='%(prog)s version '+intervene.__version__)
   
    options = parser.parse_args()

    if not options.command:
        parser.print_help()
        sys.stderr.write('Missing required arguments. ')
        sys.exit(1)

    # if options.cite:
    #     if options.cite == 'bib':
    #       sys.stderr.write('Missing required arguments. ')
    #       sys.exit(1)
    #     else:
    #       sys.stderr.write('Khan A. and Mathelier A., Intervene: a tool for intersection and visualization of multiple gene or genomic region sets, Submitted, 2017. ')
    #       sys.exit(0)

    #checke if there are at least two input bed/list files
    reqd_args = ['input']
    if not options.test:
        for ra in reqd_args:
            if not getattr(options,ra):
                if options.command == 'venn':
                    venn_parser.print_help()
                else:
                    upset_parser.print_help()
                sys.stderr.write('Missing required arg "%s"\n' % ra)
                sys.exit(1)
        if len(options.input) < 2:
          sys.stderr.write('Input should have at least two files.\n')
          sys.exit(1)
        if len(options.input) > 6 and options.command =='venn':
          sys.stderr.write('Input should not have more than six files. You can use upset or pairwise module instead.\nType --help or read documentation for more details - http://intervene.readthedocs.io\n')
          sys.exit(1)
    
    if options.command == 'upset':
        plot_type = 'upset'
    else:
        plot_type = 'venn'

    #making the out folder if it doesn't exist
    if options.output:
        output_dir = helpers.create_dir(options.output)
    else:
        options.output = helpers.create_dir(os.getcwd()+"/Intervene_results")

    #check if user want to user test data to plot and get get test files.
    if options.test:
        print("\nRunning Intervene with test data.\n")
        options.input = helpers.get_test_data(options.command)
        options.filenames = True
        options.htype = "tribar"
        if options.command == 'venn':
          options.figsize = (7, 7)

    #get the label names
    if options.filenames:
          label_names = helpers.get_filenames(options.input) 
    elif options.labels:
      label_names = options.labels.split(',')
    else:
      label_names = helpers.get_filenames(options.input) 
            
    #check if the number of label names are equal to number of input files
    if len(label_names) != len(options.input):
      err_message = '\nInput error: The number label names('+str(len(label_names))+') are not equal to the number of input files('+str(len(options.input))+').\n'
      print(err_message)
      sys.exit(1)

    #================== Run Pairwise module ================#
    if options.command =='pairwise':
        if options.type == 'fisher':
          if not options.genome:
            sys.stderr.write('Fisher test requires genome name. Please set the --genome argument.\nType --help or read documentation for more details - http://intervene.readthedocs.io\n')
            sys.exit(1)
        
        #If input type is list then set the compute to count. 
        if options.type == 'list':
          options.compute == 'count'

        print("\nPerforming a pairwise intersection analysis. Please wait...\n")
        pairwise.pairwise_intersection(options)
        sys.exit(0)
    
    #================== Run Upset module ================#
    if options.command == 'upset':
        #create an RScript
        print("\nRunning UpSet module. Please wait...\n")
        
        #if options.compare:
        #  upset.one_vs_rest_intersection(options.input,options.compare,options.output)
        # sys.exit(0)
        #options.filenames = True
        
        if options.type == 'list':
          labels = upset.list_upset(options.input)
        else:
          labels = upset.genomic_upset(options.input)
        upset.create_r_script(labels, label_names, options)
        sys.exit(0)

    
    #================== Run Venn module ================#
    #create output file name
    output_name =  options.output+'/'+str(options.project)+'_'+options.command+'.'+options.figtype

    #check file name already exists and ask if user want to overwrite
    if os.path.exists(output_name):
      print("The file name "+str(output_name)+" already exist.\n")
      answer = str(raw_input("Do you want to overwrite? [yes/no]: "))
      if answer != 'yes':
        print("You can run Intervene by setting a different --output path.\nThanks for using Intervene!")
        sys.exit(1)
    
    #Cheeck colors for Venn
    if options.command == 'venn':
      if options.colors:
        options.colors = options.colors.split(',')
        options.colors = [helpers.get_colors(options.colors)[i] for i in range(helpers.venn_order(options.input))]
      else:
        options.colors = [helpers.default_colors()[i] for i in range(helpers.venn_order(options.input))]
        
    #Cheeck figure size
    if options.figsize:
      options.figsize = tuple(options.figsize)

    if options.command == 'venn' or options.command == 'upset':
        if not options.type:
            if options.command == 'venn':
                venn_parser.print_help()
            else:
                upset_parser.print_help()
            sys.stderr.write('Missing required arg --type {genomic,list}')
            sys.exit(0)
    
    #print(options.filenames)
    #sys.exit(1)
    if helpers.venn_order(options.input) == 2:
        
        print('\nGenerating a 2-way "%s" diagram. Please wait...\n' %options.command)
        
        #If the input is a gene list
        if options.type == 'list':
          a = open(options.a, 'r').read().splitlines()
          b = open(options.b, 'r').read().splitlines()
          labels = list_venn.get_labels([a, b], fill=[str(options.fill)])
          if options.command == 'upset':
            #create an RScript
            upset.create_r_script(labels, label_names, options)
          else:
            fig, ax = list_venn.venn2(labels, names=label_names, dpi=options.dpi, colors=options.colors,  figsize=options.figsize, fontsize=options.fontsize)
        
        #else input considered as bed file
        else:
          labels = upset.genomic_upset(options.input)
          fig, ax = list_venn.venn2(labels, names=label_names, dpi=options.dpi, colors=options.colors,  figsize=options.figsize, fontsize=options.fontsize)
          
          #fig, ax = genomic_venn.venn2(input_files=options.input,
          #      options=options, names=label_names, plot_type=plot_type)     
    elif helpers.venn_order(options.input) == 3:
        
        print('\nGenerating a 3-way "%s" diagram. Please wait...\n' %options.command)
        
        #If the input is a gene list
        if options.type == 'list':
          a = open(options.input[0], 'r').read().splitlines()
          b = open(options.input[1], 'r').read().splitlines()
          c = open(options.input[2], 'r').read().splitlines()
          labels = list_venn.get_labels([a, b, c], fill=['number'])
          if options.command == 'upset':
            #create an RScript
            upset.create_r_script(labels, label_names, options)

          else:
            fig, ax = list_venn.venn3(labels, names=label_names, dpi=options.dpi, colors=options.colors,  figsize=options.figsize, fontsize=options.fontsize)          
        #else input considered as bed file
        else:
          labels = upset.genomic_upset(options.input)
          fig, ax = list_venn.venn3(labels, names=label_names, dpi=options.dpi, colors=options.colors,  figsize=options.figsize, fontsize=options.fontsize)
          
          #fig, ax = genomic_venn.venn3(input_files=options.input, options=options, names=label_names, plot_type=plot_type)

    elif helpers.venn_order(options.input) == 4:
        
        print('\nGenerating a 4-way "%s" diagram. Please wait...\n' %options.command)
        
        #If the input is a gene list
        if options.type == 'list':
          a = open(options.input[0], 'r').read().splitlines()
          b = open(options.input[1], 'r').read().splitlines()
          c = open(options.input[2], 'r').read().splitlines()
          d = open(options.input[3], 'r').read().splitlines()
          labels = list_venn.get_labels([a, b, c, d], fill=['number'])
          if options.command == 'upset':
            #create an RScript
            upset.create_r_script(labels, label_names, options)

          else:
            fig, ax = list_venn.venn4(labels, names=label_names, dpi=options.dpi, colors=options.colors,  figsize=options.figsize, fontsize=options.fontsize)
            
        #else input considered as bed file
        else:
          labels = upset.genomic_upset(options.input)
          fig, ax = list_venn.venn4(labels, names=label_names, dpi=options.dpi, colors=options.colors,  figsize=options.figsize, fontsize=options.fontsize)
          #fig, ax = genomic_venn.venn4(input_files=options.input, options=options, names=label_names, plot_type=plot_type)
            
                   
    elif helpers.venn_order(options.input) == 5:
        
        print('\nGenerating a 5-way "%s" diagram. Please wait...\n' %options.command)
        
        #If the input is a gene list
        if options.type == 'list':
          a = open(options.input[0], 'r').read().splitlines()
          b = open(options.input[1], 'r').read().splitlines()
          c = open(options.input[2], 'r').read().splitlines()
          d = open(options.input[3], 'r').read().splitlines()
          e = open(options.input[4], 'r').read().splitlines()

          labels = list_venn.get_labels([a, b, c, d, e], fill=['number'])
          if options.command == 'upset':
            #create an RScript
            upset.create_r_script(labels, label_names, options)

          else:
            fig, ax = list_venn.venn5(labels, names=label_names, dpi=options.dpi, colors=options.colors,  figsize=options.figsize, fontsize=options.fontsize)
        #else input considered as genomic regions file
        else:
            labels = upset.genomic_upset(options.input)
            fig, ax = list_venn.venn5(labels, names=label_names, dpi=options.dpi, colors=options.colors,  figsize=options.figsize, fontsize=options.fontsize)
            #fig, ax = genomic_venn.venn5(input_files=options.input, options=options, names=label_names, plot_type=plot_type)
        
    elif helpers.venn_order(options.input) == 6:
        
        print('\nGenerating a 6-way "%s" diagram. Please wait...\n' %options.command)

        #If the input is a gene list
        if options.type == 'list':
          a = open(options.input[0], 'r').read().splitlines()
          b = open(options.input[1], 'r').read().splitlines()
          c = open(options.input[2], 'r').read().splitlines()
          d = open(options.input[3], 'r').read().splitlines()
          e = open(options.input[4], 'r').read().splitlines()
          f = open(options.input[5], 'r').read().splitlines()

          labels = list_venn.get_labels([a, b, c, d, e, f], fill=['number'])
          if options.command == 'upset':
            #create an RScript
            upset.create_r_script(labels, label_names, options)
          else:
            fig, ax = list_venn.venn6(labels, names=label_names, dpi=options.dpi, colors=options.colors,  figsize=options.figsize, fontsize=options.fontsize)
                
        #else input considered as bed file
        else:
            labels = upset.genomic_upset(options.input)
            fig, ax = list_venn.venn6(labels, names=label_names, dpi=options.dpi, colors=options.colors,  figsize=options.figsize, fontsize=options.fontsize)
            #fig, ax = genomic_venn.venn6(input_files=options.input,options=options, names=label_names, plot_type=plot_type)
   
    else:
        parser.print_help()
        sys.stderr.write('Please make sure your arguments are correct.\n')
        sys.exit(1)

    fig.savefig(output_name, dpi=options.dpi, bbox_inches='tight')
    plt.close()
    print('\nYou are done! Please check your results @ '+options.output+'. \nThank you for using Intervene!\n')

if __name__ == '__main__':
    try:
        main()
    except KeyboardInterrupt:
        sys.stderr.write("I got interrupted. :-( Bye!\n")
        sys.exit(0)
