# -*- coding: utf-8 -*-
"""
Created on Tue Aug  4 14:47:21 2020

@author: Megha Mathur
"""

import argparse  
import warnings
from collections  import defaultdict
import  pandas as pd
import os

warnings.filterwarnings('ignore')
parser = argparse.ArgumentParser(description='Please provide following arguments to proceed') 

## Read Arguments from command
parser.add_argument("-i", "--input", type=str, required=True, help="Input: protein or peptide sequence in FASTA format")
parser.add_argument("-p", "--property",type=str,nargs='+',required=True,help=" Refer the property list of dipeptides and enter property names having space in between")
parser.add_argument("-l","--lagvalue",type=int, help="Enter the lag value and its default value is 2")
parser.add_argument("-k","--kvalue",type=int, help="Enter the k value and its default value is 2")
parser.add_argument("-o","--output",type=str, help="Enter the output file name")
# Parameter initialization or assigning variable for command level arguments
args = parser.parse_args()
if args.output == None:
    out= "outfile.csv" 
else:
    out= args.output
f1 = args.input 

     
if args.lagvalue == None:
    lag = int(2)
else:
    lag = int(args.lagvalue)
if args.kvalue == None:
    k = int(2)
else:
    k = int(args.kvalue)
prop= args.property
property_list=[]

# DNA_Di_Prop.txt file contains all the properties along with its value.
if k==2:
    data = pd.DataFrame()
    data['Physicochemical properties']=['p1','p2','p3','p4','p5','p6','p7','p8','p9','p10','p11','p12','p13','p14','p15','p16','p17','p18','p19','p20','p21','p22','p23','p24','p25','p26','p27','p28','p29','p30','p31','p32','p33','p34','p35','p36','p37','p38']
    data['GA']=[-0.654, -0.14400000000000002, 1.112, 0.0, 1.023, -0.27399999999999997, 0.27, -0.5, -0.21899999999999997, -0.11800000000000001, 0.8270000000000001, 0.15, -0.19, -0.5, -1.105, -1.251, 0.0, -0.413, -0.495, 1.023, -0.402, 0.0, 0.17, -0.026000000000000002, -0.036000000000000004, -0.434, 0.516, 0.6509999999999999, -0.23600000000000002, 0.08800000000000001, 0.191, 0.0, -0.081, 0.502, 0.266, 0.126, -0.39399999999999996, 0.711]
    data['GC']=[-2.455, -0.301, 0.7859999999999999, 1.369, 0.322, 0.47200000000000003, -1.232, 1.4040000000000001, 2.353, 0.6659999999999999, -0.22399999999999998, 0.588, 1.5190000000000001, 1.4040000000000001, 1.35, 1.162, 1.369, -1.7069999999999999, 1.4080000000000001, 0.322, -1.7069999999999999, 1.369, 1.9969999999999999, 2.354, 2.1, 0.076, 2.517, 2.45, 1.3530000000000002, 1.04, 0.8440000000000001, 1.369, -0.081, 0.215, 1.331, -0.348, 0.6459999999999999, 1.585]
    data['GG']=[-0.07, 0.355, -0.055999999999999994, 1.369, -1.1909999999999998, 1.3969999999999998, -1.232, 1.4040000000000001, 0.67, 2.076, -0.496, -0.579, -0.498, 1.4040000000000001, 1.306, 1.1440000000000001, 1.369, -0.485, 1.4080000000000001, -1.1909999999999998, -0.488, 1.369, 0.86, -0.726, -1.276, -0.789, 0.49700000000000005, 0.068, 0.956, 1.264, 1.2930000000000001, 1.369, 0.063, 1.077, 0.08900000000000001, 0.56, -0.8220000000000001, 0.242]
    data['GT']=[-0.9179999999999999, -0.831, -0.653, 0.0, -1.359, -0.156, 0.27, -0.8809999999999999, 1.107, -0.11800000000000001, -1.041, 1.025, 0.259, -0.8809999999999999, -0.703, -0.556, 0.0, -1.276, -0.887, -1.359, -1.278, 0.0, 0.10300000000000001, 0.604, 0.6759999999999999, 0.852, 0.971, 0.915, -0.23600000000000002, 0.424, 0.6409999999999999, 0.0, 1.5019999999999998, 0.502, 0.799, 0.126, 1.2890000000000001, 1.044]
    data['AA']=[1.0190000000000001, -0.644, -0.002, -1.369, 0.995, -1.8869999999999998, 0.833, -0.11900000000000001, -0.8420000000000001, -0.9009999999999999, 0.36, 0.515, -0.9329999999999999, -0.11900000000000001, 0.45799999999999996, 0.6679999999999999, -1.369, 0.593, -0.132, 0.995, 0.593, -1.369, -0.81, 0.46399999999999997, 0.8340000000000001, -0.7, -0.77, -1.02, -0.831, -0.361, -0.16899999999999998, -1.369, 0.063, 0.502, 0.266, 1.587, 0.111, -0.109]
    data['AC']=[-0.9179999999999999, -0.831, -0.653, 0.0, -1.359, -0.156, 0.27, -0.8809999999999999, 1.107, -0.11800000000000001, -1.041, 1.025, 0.259, -0.8809999999999999, -0.703, -0.556, 0.0, -1.276, -0.887, -1.359, -1.278, 0.0, 0.10300000000000001, 0.604, 0.6759999999999999, 0.852, 0.971, 0.915, -0.23600000000000002, 0.424, 0.6409999999999999, 0.0, 1.5019999999999998, 0.502, 0.799, 0.126, 1.2890000000000001, 1.044]
    data['AG']=[0.488, -0.894, -1.3319999999999999, 0.0, -0.799, -0.436, 0.27, -0.5, 0.016, -0.11800000000000001, -0.885, 0.15, -0.99, -0.5, -0.12300000000000001, 0.083, 0.0, 0.23399999999999999, -0.495, -0.799, 0.233, 0.0, -0.498, -1.147, -1.1440000000000001, -0.5670000000000001, -0.612, -0.489, -0.23600000000000002, -1.145, -1.406, 0.0, 0.7829999999999999, 0.359, 0.08900000000000001, 0.679, -0.24100000000000002, -0.623]
    data['AT']=[0.5670000000000001, -1.05, 2.089, -1.369, -0.098, -0.7509999999999999, 1.396, -1.3880000000000001, -0.5760000000000001, -1.371, -1.896, 1.973, 1.03, -0.627, 0.23399999999999999, 0.65, -1.369, -0.485, -0.615, -0.098, -0.488, -1.369, -1.456, -0.866, -0.43200000000000005, 3.159, -0.669, -0.568, -1.4269999999999998, -1.705, -1.676, -1.369, 1.071, 0.215, 0.621, -1.0190000000000001, 2.513, 1.171]
    data['CA']=[0.5670000000000001, 1.51, 0.596, 0.0, 1.1909999999999998, 0.98, -0.106, -0.11900000000000001, -0.915, -0.11800000000000001, 1.216, -1.38, 0.45399999999999996, -0.11900000000000001, -1.015, -1.361, 0.0, 1.0959999999999999, -0.132, 1.1909999999999998, 1.091, 0.0, -0.008, -0.23600000000000002, -0.3, 0.032, -0.043, -0.568, 0.161, -0.249, -0.371, 0.0, -1.376, -1.364, -0.266, -0.861, -0.623, -1.254]
    data['CC']=[-0.07, 0.355, -0.055999999999999994, 1.369, -1.1909999999999998, 1.3969999999999998, -1.232, 1.4040000000000001, 0.67, 2.076, -0.496, -0.579, -0.498, 1.4040000000000001, 1.306, 1.1440000000000001, 1.369, -0.485, 1.4080000000000001, -1.1909999999999998, -0.488, 1.369, 0.86, -0.726, -1.276, -0.789, -0.762, 0.068, 0.956, 1.264, 1.2930000000000001, 1.369, 0.063, 1.077, 0.08900000000000001, 0.56, -0.8220000000000001, 0.242]
    data['CG']=[-0.579, 2.229, -1.1420000000000001, 1.369, -0.266, 0.799, -2.17, 2.039, 0.187, 0.6659999999999999, 0.7490000000000001, -1.818, 2.36, 2.039, 1.7069999999999999, 1.3630000000000002, 1.369, 0.665, 2.042, -0.266, 0.662, 1.369, 1.5730000000000002, 1.6540000000000001, 1.335, -0.41200000000000003, -0.762, 0.606, 2.346, 1.768, 1.4280000000000002, 1.369, -1.6640000000000001, -1.22, -0.444, -0.8220000000000001, -0.287, -1.389]
    data['CT']=[0.488, -0.894, -1.3319999999999999, 0.0, -0.799, -0.436, 0.27, -0.5, 0.016, -0.11800000000000001, -0.885, 0.15, -0.99, -0.5, -0.12300000000000001, 0.083, 0.0, 0.23399999999999999, -0.495, -0.799, 0.233, 0.0, -0.498, -1.147, -1.1440000000000001, -0.5670000000000001, 0.49700000000000005, -0.489, -0.23600000000000002, -1.145, -1.406, 0.0, 0.7829999999999999, 0.359, 0.08900000000000001, 0.679, -0.24100000000000002, -0.623]
    data['TA']=[1.6030000000000002, 0.418, -1.061, -1.369, 0.322, 0.233, 1.396, -0.627, -1.598, -1.371, 1.41, -0.506, -1.114, -1.3880000000000001, -0.9259999999999999, -0.629, -1.369, 2.031, -1.37, 0.322, 2.036, -1.369, -1.746, -1.006, -0.511, 0.387, -1.486, -1.6030000000000002, -1.4269999999999998, -1.145, -0.956, -1.369, -1.2329999999999999, -2.3680000000000003, -0.444, -2.2430000000000003, -1.511, -1.389]
    data['TC']=[-0.654, -0.14400000000000002, 1.112, 0.0, 1.023, -0.27399999999999997, 0.27, -0.5, -0.21899999999999997, -0.11800000000000001, 0.8270000000000001, 0.15, -0.19, -0.5, -1.105, -1.251, 0.0, -0.413, -0.495, 1.023, -0.402, 0.0, 0.17, -0.026000000000000002, -0.036000000000000004, -0.434, 0.516, 0.6509999999999999, -0.23600000000000002, 0.08800000000000001, 0.191, 0.0, -0.081, 0.502, 0.266, 0.126, -0.39399999999999996, 0.711]
    data['TG']=[0.5670000000000001, 1.51, 0.596, 0.0, 1.1909999999999998, 0.98, -0.106, -0.11900000000000001, -0.915, -0.11800000000000001, 1.216, -1.38, 0.45399999999999996, -0.11900000000000001, -1.015, -1.361, 0.0, 1.0959999999999999, -0.132, 1.1909999999999998, 1.091, 0.0, -0.008, -0.23600000000000002, -0.3, 0.032, -0.612, -0.568, 0.161, -0.249, -0.371, 0.0, -1.376, -1.364, -0.266, -0.861, -0.623, -1.254]
    data['TT']=[1.0190000000000001, -0.644, -0.002, -1.369, 0.995, -1.8869999999999998, 0.833, -0.11900000000000001, -0.8420000000000001, -0.9009999999999999, 0.36, 0.515, -0.9329999999999999, -0.11900000000000001, 0.45799999999999996, 0.6679999999999999, -1.369, 0.593, -0.132, 0.995, 0.593, -1.369, -0.81, 0.46399999999999997, 0.8340000000000001, -0.7, -0.77, -1.02, -0.831, -0.361, -0.16899999999999998, -1.369, 0.063, 0.502, -3.284, 1.587, 0.111, -0.109]
elif k==3:
    data = pd.DataFrame()
    data['Physicochemical properties']=['p1','p2','p3','p4','p5','p6','p7','p8','p9','p10','p11','p12']
    data['AAA']=[-2.0869999999999997, -2.745, -1.732, -2.349, -2.7439999999999998, -2.7439999999999998, 2.274, 2.1180000000000003, -1.0, -1.0, -2.342, 2.386]
    data['AAC']=[-1.5090000000000001, -1.354, -0.5770000000000001, -0.561, -1.3630000000000002, -1.3630000000000002, 1.105, 1.516, -1.0, -1.0, -0.555, 0.5479999999999999]
    data['AAG']=[-0.506, -0.257, -0.5770000000000001, 0.155, -0.26, -0.26, 0.193, 0.493, -1.0, -1.0, 0.16899999999999998, -0.179]
    data['AAT']=[-2.126, -2.585, -1.732, -1.9909999999999999, -2.591, -2.591, 2.141, 2.158, -1.0, -1.0, -2.004, 2.032]
    data['ACA']=[0.111, 0.171, -0.5770000000000001, 0.155, 0.16399999999999998, 0.16399999999999998, -0.153, -0.12300000000000001, 1.0, 1.0, 0.16899999999999998, -0.179]
    data['ACC']=[-0.121, 0.064, 0.5770000000000001, 0.27399999999999997, 0.071, 0.071, -0.078, 0.107, 1.0, 1.0, 0.266, -0.275]
    data['ACG']=[-0.121, 0.064, 0.5770000000000001, 0.27399999999999997, 0.065, 0.065, -0.07400000000000001, 0.107, 1.0, 1.0, 0.266, -0.275]
    data['ACT']=[-1.354, -0.685, -0.5770000000000001, 0.45299999999999996, -0.6759999999999999, -0.6759999999999999, 0.536, 1.357, 1.0, 1.0, 0.45899999999999996, -0.466]
    data['AGA']=[0.381, -0.15, -0.5770000000000001, -0.74, -0.158, -0.158, 0.109, -0.389, 1.0, 1.0, -0.748, 0.743]
    data['AGC']=[0.304, 0.92, 0.5770000000000001, 1.287, 0.9109999999999999, 0.9109999999999999, -0.753, -0.313, 1.0, 1.0, 1.28, -1.272]
    data['AGG']=[-0.313, -0.07, 0.5770000000000001, 0.27399999999999997, -0.07, -0.07, 0.039, 0.3, 1.0, 1.0, 0.266, -0.275]
    data['AGT']=[-1.354, -0.685, -0.5770000000000001, 0.45299999999999996, -0.6759999999999999, -0.6759999999999999, 0.536, 1.357, 1.0, 1.0, 0.45899999999999996, -0.466]
    data['ATA']=[1.615, 0.5720000000000001, -1.732, -0.978, 0.584, 0.584, -0.491, -1.585, -1.0, -1.0, -0.99, 0.988]
    data['ATC']=[-0.737, -0.391, -0.5770000000000001, 0.214, -0.397, -0.397, 0.307, 0.727, -1.0, -1.0, 0.217, -0.22699999999999998]
    data['ATG']=[1.229, 1.348, -0.5770000000000001, 0.87, 1.358, 1.358, -1.112, -1.215, -1.0, -1.0, 0.893, -0.894]
    data['ATT']=[-2.126, -2.585, -1.732, -1.9909999999999999, -2.591, -2.591, 2.141, 2.158, -1.0, -1.0, -2.004, 2.032]
    data['CAA']=[0.265, -0.231, -0.5770000000000001, -0.74, -0.226, -0.226, 0.166, -0.275, -1.0, -1.0, -0.748, 0.743]
    data['CAC']=[0.496, 0.7859999999999999, 0.5770000000000001, 0.81, 0.773, 0.773, -0.6459999999999999, -0.503, -1.0, -1.0, 0.797, -0.8]
    data['CAG']=[1.5759999999999998, 0.92, 0.5770000000000001, -0.322, 0.92, 0.92, -0.762, -1.5490000000000002, -1.0, -1.0, -0.314, 0.304]
    data['CAT']=[1.229, 1.348, -0.5770000000000001, 0.87, 1.358, 1.358, -1.112, -1.215, -1.0, -1.0, 0.893, -0.894]
    data['CCA']=[-1.8559999999999999, -1.14, 0.5770000000000001, 0.27399999999999997, -1.139, -1.139, 0.917, 1.876, 1.0, 1.0, 0.266, -0.275]
    data['CCC']=[0.07200000000000001, 0.358, 1.732, 0.5720000000000001, 0.345, 0.345, -0.3, -0.084, 1.0, 1.0, 0.555, -0.562]
    data['CCG']=[-0.9690000000000001, -0.7120000000000001, 1.732, -0.084, -0.705, -0.705, 0.5579999999999999, 0.9620000000000001, 1.0, 1.0, -0.07200000000000001, 0.062]
    data['CCT']=[-0.313, -0.07, 0.5770000000000001, 0.27399999999999997, -0.07, -0.07, 0.039, 0.3, 1.0, 1.0, 0.266, -0.275]
    data['CGA']=[0.111, 1.0, 0.5770000000000001, 1.645, 1.012, 1.012, -0.8340000000000001, -0.12300000000000001, 1.0, 1.0, 1.666, -1.646]
    data['CGC']=[-0.46799999999999997, 0.385, 1.732, 1.287, 0.379, 0.379, -0.326, 0.455, 1.0, 1.0, 1.28, -1.272]
    data['CGG']=[-0.9690000000000001, -0.7120000000000001, 1.732, -0.084, -0.705, -0.705, 0.5579999999999999, 0.9620000000000001, 1.0, 1.0, -0.07200000000000001, 0.062]
    data['CGT']=[-0.121, 0.064, 0.5770000000000001, 0.27399999999999997, 0.065, 0.065, -0.07400000000000001, 0.107, 1.0, 1.0, 0.266, -0.275]
    data['CTA']=[0.882, -0.09699999999999999, -0.5770000000000001, -1.276, -0.09699999999999999, -0.09699999999999999, 0.062, -0.88, -1.0, -1.0, -1.28, 1.285]
    data['CTC']=[0.419, 0.43799999999999994, 0.5770000000000001, 0.27399999999999997, 0.42700000000000005, 0.42700000000000005, -0.365, -0.42700000000000005, -1.0, -1.0, 0.266, -0.275]
    data['CTG']=[1.5759999999999998, 0.92, 0.5770000000000001, -0.322, 0.92, 0.92, -0.762, -1.5490000000000002, -1.0, -1.0, -0.314, 0.304]
    data['CTT']=[-0.506, -0.257, -0.5770000000000001, 0.155, -0.26, -0.26, 0.193, 0.493, -1.0, -1.0, 0.16899999999999998, -0.179]
    data['GAA']=[-0.159, -0.605, -0.5770000000000001, -0.9179999999999999, -0.6, -0.6, 0.474, 0.146, -1.0, -1.0, -0.893, 0.89]
    data['GAC']=[0.034, 0.171, 0.5770000000000001, 0.27399999999999997, 0.17800000000000002, 0.17800000000000002, -0.165, -0.046, -1.0, -1.0, 0.266, -0.275]
    data['GAG']=[0.419, 0.43799999999999994, 0.5770000000000001, 0.27399999999999997, 0.42700000000000005, 0.42700000000000005, -0.365, -0.42700000000000005, -1.0, -1.0, 0.266, -0.275]
    data['GAT']=[-0.737, -0.391, -0.5770000000000001, 0.214, -0.397, -0.397, 0.307, 0.727, -1.0, -1.0, 0.217, -0.22699999999999998]
    data['GCA']=[0.7659999999999999, 0.8390000000000001, 0.5770000000000001, 0.5720000000000001, 0.8420000000000001, 0.8420000000000001, -0.7020000000000001, -0.767, 1.0, 1.0, 0.555, -0.562]
    data['GCC']=[1.036, 2.097, 1.732, 2.479, 2.089, 2.089, -1.6869999999999998, -1.0290000000000001, 1.0, 1.0, 2.487, -2.4330000000000003]
    data['GCG']=[-0.46799999999999997, 0.385, 1.732, 1.287, 0.379, 0.379, -0.326, 0.455, 1.0, 1.0, 1.28, -1.272]
    data['GCT']=[0.304, 0.92, 0.5770000000000001, 1.287, 0.9109999999999999, 0.9109999999999999, -0.753, -0.313, 1.0, 1.0, 1.28, -1.272]
    data['GGA']=[0.265, -0.09699999999999999, 0.5770000000000001, -0.501, -0.10300000000000001, -0.10300000000000001, 0.066, -0.275, 1.0, 1.0, -0.507, 0.499]
    data['GGC']=[1.036, 2.097, 1.732, 2.479, 2.089, 2.089, -1.6869999999999998, -1.0290000000000001, 1.0, 1.0, 2.487, -2.4330000000000003]
    data['GGG']=[0.07200000000000001, 0.358, 1.732, 0.5720000000000001, 0.345, 0.345, -0.3, -0.084, 1.0, 1.0, 0.555, -0.562]
    data['GGT']=[-0.121, 0.064, 0.5770000000000001, 0.27399999999999997, 0.071, 0.071, -0.078, 0.107, 1.0, 1.0, 0.266, -0.275]
    data['GTA']=[0.342, -0.07, -0.5770000000000001, -0.561, -0.062, -0.062, 0.031, -0.35100000000000003, -1.0, -1.0, -0.555, 0.5479999999999999]
    data['GTC']=[0.034, 0.171, 0.5770000000000001, 0.27399999999999997, 0.17800000000000002, 0.17800000000000002, -0.165, -0.046, -1.0, -1.0, 0.266, -0.275]
    data['GTG']=[0.496, 0.7859999999999999, 0.5770000000000001, 0.81, 0.773, 0.773, -0.6459999999999999, -0.503, -1.0, -1.0, 0.797, -0.8]
    data['GTT']=[-1.5090000000000001, -1.354, -0.5770000000000001, -0.561, -1.3630000000000002, -1.3630000000000002, 1.105, 1.516, -1.0, -1.0, -0.555, 0.5479999999999999]
    data['TAA']=[0.6890000000000001, -0.284, -1.732, -1.395, -0.275, -0.275, 0.20600000000000002, -0.6920000000000001, -1.0, -1.0, -1.376, 1.3840000000000001]
    data['TAC']=[0.342, -0.07, -0.5770000000000001, -0.561, -0.062, -0.062, 0.031, -0.35100000000000003, -1.0, -1.0, -0.555, 0.5479999999999999]
    data['TAG']=[0.882, -0.09699999999999999, -0.5770000000000001, -1.276, -0.09699999999999999, -0.09699999999999999, 0.062, -0.88, -1.0, -1.0, -1.28, 1.285]
    data['TAT']=[1.615, 0.5720000000000001, -1.732, -0.978, 0.584, 0.584, -0.491, -1.585, -1.0, -1.0, -0.99, 0.988]
    data['TCA']=[1.73, 1.348, -0.5770000000000001, 0.27399999999999997, 1.348, 1.348, -1.103, -1.696, 1.0, 1.0, 0.266, -0.275]
    data['TCC']=[0.265, -0.09699999999999999, 0.5770000000000001, -0.501, -0.10300000000000001, -0.10300000000000001, 0.066, -0.275, 1.0, 1.0, -0.507, 0.499]
    data['TCG']=[0.111, 1.0, 0.5770000000000001, 1.645, 1.012, 1.012, -0.8340000000000001, -0.12300000000000001, 1.0, 1.0, 1.666, -1.646]
    data['TCT']=[0.381, -0.15, -0.5770000000000001, -0.74, -0.158, -0.158, 0.109, -0.389, 1.0, 1.0, -0.748, 0.743]
    data['TGA']=[1.73, 1.348, -0.5770000000000001, 0.27399999999999997, 1.348, 1.348, 4.522, -1.696, 1.0, 1.0, 0.266, -0.275]
    data['TGC']=[0.7659999999999999, 0.8390000000000001, 0.5770000000000001, 0.5720000000000001, 0.8420000000000001, 0.8420000000000001, -0.7020000000000001, -0.767, 1.0, 1.0, 0.555, -0.562]
    data['TGG']=[-1.8559999999999999, -1.14, 0.5770000000000001, 0.27399999999999997, -1.139, -1.139, 0.917, 1.876, 1.0, 1.0, 0.266, -0.275]
    data['TGT']=[0.111, 0.171, -0.5770000000000001, 0.155, 0.16399999999999998, 0.16399999999999998, -0.153, -0.12300000000000001, 1.0, 1.0, 0.16899999999999998, -0.179]
    data['TTA']=[0.6890000000000001, -0.284, -1.732, -1.395, -0.275, -0.275, 0.20600000000000002, -0.6920000000000001, -1.0, -1.0, -1.376, 1.3840000000000001]
    data['TTC']=[-0.159, -0.605, -0.5770000000000001, -0.9179999999999999, -0.6, -0.6, 0.474, 0.146, -1.0, -1.0, -0.893, 0.89]
    data['TTG']=[0.265, -0.231, -0.5770000000000001, -0.74, -0.226, -0.226, 0.166, -0.275, -1.0, -1.0, -0.748, 0.743]
    data['TTT']=[-2.0869999999999997, -2.745, -1.732, -2.349, -2.7439999999999998, -2.7439999999999998, -2.615, 2.1180000000000003, -1.0, -1.0, -2.342, 2.386]
else:
    print("Invalid value of k")

for i in data['Physicochemical properties']:
    property_list.append(i)
al=['all']   
for i in prop:
    if i not in property_list:
        if i not in al:
            print("No such property found")
            exit()    
    
def allp(k):
  n =['A','T','C','G']
  s=[]
  if (k==1):
    return n
  elif(k==2):
    for i in n:
      for j in n:
        se = i+j
        if (se not in s):
          s.append(se)
    return s
  elif(k==3):
      for i in n:
          for j in n:
              for k in n:
                  se = i+j+k
                  if (se not in s):
                      s.append(se)
      return s

def kmer(k,seq):
  s=[]
  for i in range(len(seq)):
    se=""
    if (i+k > len(seq)):
      break
    for j in range(k):
      se = se+seq[i+j]
    s.append(se)
  return s
rs = allp(k)  


sq=[]
phy=[]
for i in prop:
    if i=='all':
        for i in property_list:
            phy.append(i)
    else:
        phy.append(i)
temp = defaultdict(list)
for p in phy:
    for km in rs:
        count=-1
        for i in data['Physicochemical properties']:
            count=count+1
            if(i==p):
                temp[km].append(data[km][count])
            else:
                continue
            
filename, file_extension = os.path.splitext(f1)
f2 = open(out,'r')
dr = f2.readlines()
if len(dr) == 0:
    cdk = pd.DataFrame()
else:
    cdk = pd.read_csv(out)
if(file_extension==""):
    f1=f1.upper()
    alphabet=['A','C','G','T']
    for i in f1:
        if i not in alphabet:
            print("Invalid Character found in the given sequence")
            exit()
    sq.append(f1)
    cdk['Sequence'] = sq
    
else:
    f=open(f1,"r")
    b= f.readlines()
    s_id =[]
    s=""
    f.close()
    for i in b:
        if i[0] == '>':
            i=i.split("\n")
            s_id.append(i[0])
            if s!= "":
                sq.append(s)
                s=""
                
            else:
                continue
        else:
            for j in i:
                j=j.capitalize()
                if(j in ['A','G','C','T']):
                    s = s+j
    if s!="":
        sq.append(s)
    cdk['Sequence_ID'] =s_id


mac= defaultdict(list)
for seq in sq:
    pu= []
    for i in range(len(phy)):
        mean=0.0
        for j in temp.keys():
            mean += temp[j][i]
        mean = mean/len(seq)
        pu.append(mean)  
    mer=kmer(k,seq)   
    count=0
    for l in range(1,lag+1):
        for p in range(len(phy)):
            av=0.0
            for i in range(len(seq)-l-k+1):
                av += temp[mer[i]][p]
            av= av/len(sq[0])
            count = count+1
            su=0.0
            su2=0.0
            st = "MAC_"+ phy[p]+"_lag"+str(l)
            for i in range(len(seq)-l-k+1):
                pu1 = temp[mer[i]][p]-av
                pu2 = temp[mer[i+l]][p]-av
                su += (pu1*pu2)
            for i in range(len(seq)-k+1):
                pu3 = temp[mer[i]][p]-av
                su2 +=(pu3*pu3) 
            if (len(seq)-l-k+1)==0:
                print("Invalid Sequence Length")
                exit()
            su = ((1/(len(seq)-l-k+1))*su)/((1/(len(seq)-k+1))*(su2))
            mac[st].append(su)
for i in mac.keys():
    cdk[i]= mac[i]
cdk.to_csv(out,index =False)
