#!/usr/bin/env python
# -*- coding:UTF-8 -*-
__author__ = 'Zhengtao Xiao'

import argparse
import os

def parsing_transcript():
	parser = argparse.ArgumentParser(
		description="This script is designed for preparing transcripts annotation files."
	)
	parser.add_argument("-g","--gtf",dest="gtfFile",required=True,type=str,
	                    help='Default, suitable for GENCODE and ENSEMBL GTF file, \
	                          please refer: https://en.wikipedia.org/wiki/GENCODE')
	parser.add_argument("-f","--fasta",dest="genomeFasta",required=True,type=str,
	                    help="The genome sequences file in fasta format.")
	parser.add_argument("-o","--out_dir",required=True,type=str,dest="out_dir",help="annotation directory name.")
	args = parser.parse_args()

	if not os.path.exists(args.out_dir):
		try:
			os.mkdir(args.out_dir)
		except OSError as e:
			raise e

	if not os.path.exists(args.gtfFile):
		raise ValueError("Error, gtf file not found:%s.\n" % args.gtfFile)

	if not os.path.exists(args.genomeFasta):
		raise ValueError("Error, genomic fasta not found: %s\n" % args.genomeFata)

	return args
def parsing_metaplots():
	parser = argparse.ArgumentParser(
		description="""
		This script create aggregate plots of distances from the 5'end of reads to start or stop codons,
		which help determine the length range of the PRF reads that are most likely originated from the
		translating ribosomes and identify the P-site locations for each reads lengths.
		"""
	)
	parser.add_argument("-a","--annot_dir",dest="annot_dir",required=True,type=str,
	                    help="transcripts annotation directory, generated by prepare_transcripts.")
	parser.add_argument("-r","--rpf_mapping_file",dest="rpf_mapping_file",required=True,type=str,
	                    help="ribo-seq BAM/SAM file aligned to the transcriptome.")
	parser.add_argument("-s","--stranded",dest="stranded",required=False,type=str,choices=["yes","reverse"],
	                    default="yes",help="whether the data is strand-specific, \
	                    reverse means reversed strand interpretation.(default: yes)")
	parser.add_argument("-m","--minimum-length",dest="minLength",required=False,type=int,default=24,
	                    help="minimum length of read to output, default 24")
	parser.add_argument("-M","--maximum-length",dest="maxLength",required=False,type=int,default=35,
	                    help="maximum length of read to output, default 35")
	parser.add_argument("-pv1","--pvalue1_cutoff",dest="pvalue1_cutoff",required=False,type=float,default=0.001,
	                    help="pvalue cutoff of frame0 > frame2 for automatically predicting P-site location, default 0.001")
	parser.add_argument("-pv2","--pvalue2_cutoff",dest="pvalue2_cutoff",required=False,type=float,default=0.001,
	                    help="pvalue cutoff of frame0 > frame2 for automatically predicting P-site location, default 0.001")
	parser.add_argument("-f0_percent","--frame0_percent",dest="frame0_percent",required=False,type=float,default=0.6,
	                    help="proportion threshold of the number of reads in frame0, defined by f0/(f0+f1+f2), default 0.6")
	parser.add_argument("-o","--outname",dest="outname",required=False,type=str,default="metaplots",
	                    help="name of output pdf file(default: metaplots)")
	args = parser.parse_args()

	if not os.path.exists(args.annot_dir):
		raise ValueError("Error, the transcript annotation directory not found: {} \n \
		                  pleas run prepare_transcripts.py first.".format(args.annot_dir))
	if args.minLength > args.maxLength:
		raise ValueError("minimum length must be <= maximum length (currently %d and %d, respectively)" % (args.minLength, args.maxLength))
	if args.minLength <= 0 or  args.maxLength <=0:
		raise ValueError("minimum length or maximum length must be larger than 0.")
	if not os.path.exists(args.rpf_mapping_file):
		raise  ValueError("Error, the rpf mapping file not found: %s\n" % args.rpf_mapping_file)
	args.stranded = True if args.stranded == "yes" else False
	args.pvalue1_cutoff = float(args.pvalue1_cutoff)
	args.pvalue2_cutoff = float(args.pvalue2_cutoff)
	args.frame0_percent = float(args.frame0_percent)

	return args

def parsing_ribo():
	parser = argparse.ArgumentParser(
		description="The main function designed for detecting ORF using ribosome-profiling data."
	)
	parser.add_argument("-a","--annot_dir",dest="annot_dir",required=True,type=str,
	                    help="transcripts annotation directory, generated by prepare_transcripts.")
	parser.add_argument("-c","--config_file",dest="config_file",required=True,type=str,
	                    help="list bam file and P-sites information in this file, \
	                    please refer to the example file in data folder.")
	# parser.add_argument("-n","--num-threads",dest="threads_num",default=1,required=False,
	#                     help="number of threads, optimal number is number of bam files.", type=int)
	parser.add_argument("-l","--longest-orf",dest="longest_orf",choices=["yes","no"],default="yes",required=False,
	                    help="Default: yes, the region from most distal AUG to stop was defined as an ORF. \
	                          If set to no , the position of start codon will be automatically determined by program.", type=str)
	parser.add_argument("-p","--pval-cutoff",dest="pval_cutoff",default=0.05,required=False,
	                    help="P-value cutoff for ORF filtering, default 0.05", type=float)
	parser.add_argument("-s","--start_codon",default="ATG",type=str,dest="start_codon",
	                    help="The canonical start codon. default: ATG")
	parser.add_argument("-A","--alt_start_codons",default="",type=str,dest="alternative_start_codons",
	                    help="The alternative start codon, such as CTG,GTG, default: None. Multiple codons should be separated by comma.")
	parser.add_argument("-S","--stop_codon",default="TAA,TAG,TGA",type=str,dest="stop_codon",
	                    help="Stop codon, default: TAA,TAG,TGA")
	parser.add_argument("-t","--transl_table",default=1,dest="transl_table",type=int,
	                    help="ORF translation table(Default: 1). Assign the correct genetic code based on your organism, \
	                    [please refer: https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi]")
	parser.add_argument("-m","--min-AA-length",dest="min_AA_length",default="20",required=False,
	                    help="The minimal length of predicted peptides,default 20", type=int)
	parser.add_argument("-o","--output-name",dest="output_name",default="final_result",required=False,
	                    help="output file name, default: final_result", type=str)
	parser.add_argument("-g","--output-gtf",dest="output_gtf",action='store_true',default=False,required=False,
	                    help="output the gtf file of predicted ORFs")
	parser.add_argument("-b","--output-bed",dest="output_bed",action='store_true',default=False,required=False,
	                    help="output the bed file of predicted ORFs")
	args = parser.parse_args()

	if not os.path.exists(args.annot_dir):
		raise  ValueError("Error, the transcript annotation directory not found: {} \n \
		             pls run prepare_transcripts.py first. ".format(args.annot_dir))

	return args

def parsing_plot_orf_density():
	parser = argparse.ArgumentParser(
		description="This script is designed for plot the P-site profile of specified ORF."
	)
	parser.add_argument("-a","--annot_dir",dest="annot_dir",required=True,type=str,
	                    help="transcripts annotation directory, generated by prepare_transcripts.")
	parser.add_argument("-c","--config_file",dest="config_file",required=True,
	                    help="defile bam file information in this file, \
	                    please refer to the example file in data folder.",type=str)
	parser.add_argument("-t","--transcript_id",dest="transcript_id",required=True,type=str,
	                    help="the transcript id")
	parser.add_argument("-s","--orf_tstart",dest="orf_tstart",required=True,type=int,
	                    help="transcript-level coordinates of start of ORF (orf_tstart)")
	parser.add_argument("-e","--orf_tstop",dest="orf_tstop",required=True,type=int,
	                    help="transcript-level coordinates of end of ORF (orf_tstop)")
	parser.add_argument("-o","--outname",dest="outname",required=False,type=str,default="",
	                    help="output file name,default is transcriptid_tstart_tstop.pdf")
	args = parser.parse_args()

	args.orf_tstart = args.orf_tstart -1 # change to 0 based
	if not os.path.exists(args.annot_dir):
		raise ValueError("Error, the transcript annotation directory not found: {} \n \
		             pls run prepare_transcripts.py first. ".format(args.annot_dir))
	return args

def parsing_ORF_count():
	parser = argparse.ArgumentParser(
		description="This script is designed for calculating the number of reads mapping to ORF with the alignment files \
		in SAM/BAM format (aligned to genome) and a feature file in GTF format"
	)
	parser.add_argument("-s","--stranded",dest="stranded",required=False,type=str,choices=["yes","reverse"],
	                    default="yes",help="whether the data is strand-specific, \
	                    reverse means reversed strand interpretation. (default: yes)")
	parser.add_argument("-a","--minaqual",dest="min_quality",required=False,type=int,
	                    default=10,help="skip all reads with alignment quality lower than the given minimum value (default:10)")
	parser.add_argument("-c","--count_mode",dest="count_mode",required=False,type=str,choices=["union","intersection-strict"],
	                    default="intersection-strict",help="mode to handle reads overlapping more than one ORF (choices:\
	                    union,intersection-strict;default: intersection-strict)")
	parser.add_argument("-g","--gtf",dest="gtf_file",required=False,type=str,default="final_result_collapsed.gtf",
	                    help="ORF gtf file generated by RiboCode, default:final_result")
	parser.add_argument("-r","--rpf_mapping_file",dest="rpf_mapping_file",required=True,type=str,
	                    help="ribo-seq BAM/SAM file aligned to the genome, multiple files should be separated with \",\"")
	parser.add_argument("-f","--first_exclude_codons",dest="first_exclude_codons",required=False,type=int,default=15,
	                    help="excluding the reads aligned to the first few codons of the ORF, default:15")
	parser.add_argument("-l","--last_exclude_codons",dest="last_exclude_codons",required=False,type=int,default=5,
	                    help="excluding the reads aligned to the last few codons of the ORF, default:5")
	parser.add_argument("-e","--exclude_min_ORF",dest="exclude_min_ORF",required=False,type=int,default=100,
	                    help="the minimal length(nt) of ORF for excluding the reads aligned to first and last few codons, default:100")
	parser.add_argument("-m","--min_read",dest="min_read",required=False,type=int,default=26,
	                    help="minimal read length for the counting of RPF,default:26")
	parser.add_argument("-M","--max_read",dest="max_read",required=False,type=int,default=34,
	                    help="maximal read length for the counting of RPF,default:34")
	parser.add_argument("-o","--output",dest="output_file",required=False,type=str,
	                    default="-",help="write out all ORF counts into a file, default is to write to standard output")
	args = parser.parse_args()

	if not os.path.exists(args.gtf_file):
		raise ValueError("Error, the gtf file not found: {}".format(args.gtf_file))
	if args.first_exclude_codons * 3 + args.last_exclude_codons * 3 >= args.exclude_min_ORF:
		raise ValueError("Error, the exclude_min_ORF is too small: %i" % args.exclude_min_ORF)

	return args
