#!/usr/bin/env python3
from teca import *
import sys
import argparse

# parse the command line
parser = argparse.ArgumentParser(
    formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(
                        prog, max_help_position=4, width=100))

parser.add_argument('in_file', type=str,
                    help='path to table to read')

parser.add_argument('--in_format', type=str, default='auto',
                    help='The file format to assume on the input. '
                         'One of: auto, bin, csv. (auto)')

parser.add_argument('out_file', type=str,
                    help='path to write result')

parser.add_argument('--out_format', type=str, default='auto',
                    help='The file formate to use in the output. '
                         'One of: auto, bin, csv, netcdf. (auto)')

parser.add_argument('--select', type=str, required=False,
                    help='a logical expression on table columns. '
                         'Row where this evaluates to true are passed '
                         'to the output')

args = parser.parse_args()

# read the table
reader = teca_table_reader.New()
reader.set_file_name(args.in_file)
if args.in_format == 'auto':
    reader.set_file_format_auto()
elif args.in_format == 'bin':
    reader.set_file_format_bin()
elif args.in_format == 'csv':
    reader.set_file_format_csv()
elif args.in_format == 'netcdf':
    reader.set_file_format_netcdf()
else:
    raise RuntimeError('Invalid input file format %s' % (args.in_format))

# optionally remove unselected rows
tip = reader
if args.select:
    # negate the selection, since the following removes rows
    expr = '!(' + args.select + ')'
    select = teca_table_remove_rows.New()
    select.set_input_connection(reader.get_output_port())
    select.set_mask_expression(expr)
    tip = select

# write the table back out
writer = teca_table_writer.New()
writer.set_input_connection(tip.get_output_port())
writer.set_file_name(args.out_file)
if args.in_format == 'auto':
    writer.set_output_format_auto()
elif args.in_format == 'bin':
    writer.set_output_format_bin()
elif args.in_format == 'csv':
    writer.set_output_format_csv()
else:
    raise RuntimeError('Invalid in output format %s' % (args.out_format))

# execute the pipeline
writer.update()
