#!/usr/bin/python
# coding: utf-8

# Author: Archit Sharma <archit.py@gmail.com>
# Makes use of data generated by using `perf script`
# ref: http://linux.die.net/man/1/perf-script

import os, sys
import time
import argparse
import pandas as pd
import configparser

class PostProcessor(object):
    '''
    Form a DataFrame from the csv and utilize 
    inhouse functions to calculate delta. '''
    def __init__(self, file_path='', result_path='', order=[], mode=2, log=None):
        self.file_path = file_path
        self.result_path = result_path
        self.mode = mode
        self.loop_order = tuple(order)
        self.df_dict = {}
        self.log_file = log

    def load_data(self):
        try:
            self.df = pd.read_csv(self.file_path)

            # filter loop_order in pandas
            # needed when loop_order filter in perf_script_processor 
            # is not applied..
            # if self.mode == 0:
                # self.df = self.df[self.df.entry.isin(self.loop_order)]
                # self.df = self.df.reset_index().drop('index', axis=1)

            # if need be
            # self.df.convert_objects(convert_numeric=True)

            # self.df.tstamp = self.df.tstamp.multiply(1000000)
            # OR
            # This is slower than above method
            # self.df['tstamp'] = self.df.apply(lambda row: \
            #     row.tstamp*1000000, axis=1)

        except Exception as E:
            quit(E)
        
    def _process(self, x):
        ''' 
        clean the metric keys to get unique categories.
        Example: kvm_entry/kvm_exit represents kvm as a category
        '''
        return x.replace('_exit_','__').replace('_exit','___')\
                .replace('_entry','___').replace('_enter_','__')
    
    def _process_inverse(self, entry, alternate=False):
        '''
        convert those categories back to metric name as under
        perf script processed data. '''
        if not alternate:
            return entry.replace('___','_exit').replace('___','_entry')\
                        .replace('__','_exit_').replace('__','_enter_')
        else:
            # observe the order of enter/exit is different
            return entry.replace('___','_entry').replace('___','_exit')\
                        .replace('__','_enter_').replace('__','_exit_')    

    def _unique_metrics(self):
        """
        `___` would mean this is meant to be replaced by `_entry`
        and `_exit` later -> special case for kvm_entry/exit
        
        `__` would mean this is meant to be replaced by `_enter_`
        and `_exit_` later -> preserves kvm_entry/exit
        
        `_` would mean no changes -> preserves sched_switch
        """
        self.entries = self.df['entry'].unique().tolist()
        if pd.np.nan in self.entries:
            self.entries.remove(pd.np.nan)
        self.entries = set([self._process(i) for i in self.entries])
        self.log_file.write("\n" + "*"*22)
        self.log_file.write("\nUnique metrics found:\n\t%s\n" % '\n\t'.join(self.entries))

    def prepare_delta(self):
        self.load_data()
        # prepare list of metric categories
        self._unique_metrics()

        try:
            # check if loop_order actually exists
            assert len(set(self.df['entry'].unique().tolist()) & \
                       set(self.loop_order)) == len(self.loop_order)
        except AssertionError:
            quit("\nERROR: loop_order supplied doesn't exist in DataFrame")
        
        data = []

        ## calculate sequential event loops (list of dicts)
        buf_base = dict.fromkeys(self.loop_order[:-1])
        buf_mirror = buf_base.copy()
        # calculate sequential event loops (list of dicts)
        self._df = self.df[self.df.entry.isin(self.loop_order)]
        self._df = self._df.reset_index().drop('index', axis=1)
        for i in range(len(self._df)):
            if self._df.entry[i] == self.loop_order[-1]:
                # if last entry is encountered, update data[]
                buf_base[self._df.entry[i]] = self._df.tstamp[i]
                data.append(buf_base.copy())
            else:
                if not buf_mirror[self._df.entry[i]]:
                    # if the entry's current tstamp is set to None; update
                    buf_mirror[self._df.entry[i]] = self._df.tstamp[i]
                else:
                    # find the next higher entry in loop_order order; then,
                    # find out whether current entry's tstamp is less than 
                    # higher priority entry's tstamp; if so, update mirror.
                    pat_next_ix = self.loop_order.index(self._df.entry[i]) + 1
                    if buf_mirror.get(self.loop_order[pat_next_ix]):
                        if self._df.tstamp[i] < buf_mirror[self.loop_order[pat_next_ix]]:
                            buf_mirror[self._df.entry[i]] = self._df.tstamp[i]
                    else:
                        # => In loop_order, the next higher priority event's tstamp
                        # is currently None; so we can now righteously update 
                        # current entry's tstamp in mirror
                        buf_mirror[self._df.entry[i]] = self._df.tstamp[i]

            if not None in buf_mirror.values():
                # if mirror buffer is found to have latest copies; 
                # reflect values in base buffer and empty mirror
                buf_base = buf_mirror.copy()
                buf_mirror = dict.fromkeys(self.loop_order[:-1])

        del self._df

        # form dataframe from calculated loops data 
        loops = pd.DataFrame(data, columns=self.loop_order)

        # calculate intra-loop deltas
        for i in range(len(self.loop_order)-1):
            current = "delta:%s__%s" % (self.loop_order[i+1], 
                self.loop_order[i])
            loops.loc[:,current] = loops.loc[:,self.loop_order[i+1]] - \
                                     loops.loc[:,self.loop_order[i]]
        loops = loops.fillna(0)
        loops.to_csv('%s.csv'%(os.path.join(self.result_path, 'loop_diff')))

        self.log_file.write("\n" + "*"*22)
        for i in range(len(self.loop_order)-1):
            current = "delta:%s__%s" % (self.loop_order[i+1], 
                self.loop_order[i])
            self.log_file.write("\n%s stats:" %(current))
            self.log_file.write("\n\tStandard Dev: %s\n\tMean: %s\n\tMedian: %s" % \
                        (loops[current].std(), 
                         loops[current].mean(), 
                         loops[current].median()))
            self.log_file.write("\n" + "="*22)
        self.log_file.write("\n")

        if self.mode==1:
            for entry in self.entries:
                _tmp = self.df[(self.df['entry'] == \
                                self._process_inverse(entry)) |\
                                    (self.df['entry'] == \
                                    self._process_inverse(entry, alternate=True))]
                _tmp.set_index('entry').diff().to_csv('%s.csv' % \
                    (os.path.join(self.result_path, entry)))

        print("\nScript was executed with Mode option %d.\nResults have been stored to: %s"\
            %(self.mode, self.result_path))


if __name__=='__main__':
    # Parse configurations
    parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, 
    description="""
    Generate delta of entry/exit points for data from `perf script. 
    This script runs in following modes:

    - `Mode 0`: Produce __loop statistics__[1] with __all events together__.
    - `Mode 1`: breakup result into __per-event calculated delta csv files__.

    *Loop order: default given under 'order=' in delta_processor.conf""")
    # requiredNamed = parser.add_argument_group('required named arguments')
    parser.add_argument('-i', '--input', type=str, 
        default='perf_data.csv',
        help='Absolute Path to input csv file')
    parser.add_argument('-m', '--mode', type=int, 
        default=0,
        help='specify mode as per above documentation')
    parser.add_argument('-o', '--output', type=str, 
        default='/tmp/pp_results',
        help='Absolute Path to output dir')
    parser.add_argument('-c', '--conf', type=str, 
        default='/etc/delta_processor.conf',
        help='Absolute Path to output dir')
    parser.add_argument('-t', '--type', type=int, 
        default=0,
        help='Define type of test: 0: Native | 1: Threads')
    parser.add_argument('-l', '--log', type=str, 
        default='delta_output.log',
        help='Log output to file..')

    test_types = { 0: 'Native', 1: 'Threads'}
    pd.options.mode.chained_assignment = None  # default='warn'

    try:
        args = parser.parse_args()
        if not os.path.exists(args.output):
            os.mkdir(args.output)
        # sys.stdout = open(args.log, 'w')
        log = open(args.log, 'w')
        config = configparser.ConfigParser()
        config.read(args.conf)
        order = config.get(test_types[args.type], 'order').split('|')

        PP = PostProcessor(
                file_path=args.input, 
                result_path=args.output,
                order=order,
                mode=args.mode,
                log=log)

        a = time.time()
        PP.prepare_delta()
        b = time.time()        

        print("\nTime taken -- prepare_delta() -- %s\n" % (b-a))
        log.close()

    except OSError as E:
        quit(E)
    except Exception as E:
        quit("ERROR: %s\nUnable to execute. Refer to --help. I Quit!"%(E))
