#!/bin/bash

# Copyright 2019 by Peter Cock, The James Hutton Institute.
# All rights reserved.
# This file is part of the THAPBI Phytophthora ITS1 Classifier Tool (PICT),
# and is released under the "MIT License Agreement". Please see the LICENSE
# file that should have been included as part of this package.

IFS=$'\n\t'
set -euo pipefail

export TMP=${TMP:-/tmp}

echo "Preparing sample data for woody hosts example"

rm -rf $TMP/woody_hosts
mkdir $TMP/woody_hosts
mkdir $TMP/woody_hosts/intermediate
mkdir $TMP/woody_hosts/summary
mkdir $TMP/woody_hosts/positive_controls/
for f in tests/woody_hosts/*.known.tsv; do ln -s $PWD/$f $TMP/woody_hosts/positive_controls/ ; done

# Idea here is to mimic what "thapbi_pict pipeline" would do if we had
# the FASTQ files here:
# thapbi_pict pipeline -i sample_data/raw_data/ \
#	    -s $TMP/woody_hosts/intermediate \
#	    -o $TMP/woody_hosts/summary -r woody-hosts \
#	    -t tests/woody_hosts/site_metadata.tsv \
#            -c 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -x 16 -f 20


echo "=================================="
echo "Decompressing prepare-reads output"
echo "=================================="
time tar -jxvf tests/woody_hosts/intermediate.tar.bz2 -C $TMP/woody_hosts/ | wc -l

echo "============================"
echo "Running woody hosts classify"
echo "============================"
# Default for -o should be the same next to the inputs, which is fine
time thapbi_pict classify -i $TMP/woody_hosts/intermediate/

echo "=================================="
echo "Running woody hosts sample-summary"
echo "=================================="
time thapbi_pict sample-summary -i $TMP/woody_hosts/intermediate/ \
            -o $TMP/woody_hosts/summary/no-metadata.samples.tsv \
            -r $TMP/woody_hosts/summary/no-metadata.samples.txt
ls $TMP/woody_hosts/summary/no-metadata.samples.*

time thapbi_pict sample-summary -i $TMP/woody_hosts/intermediate/ \
            -o $TMP/woody_hosts/summary/with-metadata.samples.tsv \
            -r $TMP/woody_hosts/summary/with-metadata.samples.txt \
            -t tests/woody_hosts/site_metadata.tsv \
            -c 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -x 16 -f 20
ls $TMP/woody_hosts/summary/with-metadata.samples.*
if [ `grep -c "^Site: " "$TMP/woody_hosts/summary/with-metadata.samples.txt"` -ne 17 ]; then echo "Wrong site count"; false; fi
if [ `grep -c "^Sequencing sample: " "$TMP/woody_hosts/summary/with-metadata.samples.txt"` -ne 122 ]; then echo "Wrong sample count"; false; fi

# Should be identical apart from row order
diff <(sort $TMP/woody_hosts/summary/no-metadata.samples.tsv) <(sort $TMP/woody_hosts/summary/with-metadata.samples.tsv)

echo "================================"
echo "Running woody hosts read-summary"
echo "================================"
time thapbi_pict read-summary -i $TMP/woody_hosts/intermediate/ \
            -o $TMP/woody_hosts/summary/no-metadata.reads.tsv \
            -e $TMP/woody_hosts/summary/no-metadata.reads.xlxs
ls $TMP/woody_hosts/summary/no-metadata.reads.*
if [ `grep -c -v "^#" $TMP/woody_hosts/summary/no-metadata.reads.tsv` -ne 95 ]; then echo "Wrong unique sequence count"; false; fi
# Expect 94 + total line

time thapbi_pict read-summary -i $TMP/woody_hosts/intermediate/ \
	    -o $TMP/woody_hosts/summary/with-metadata.reads.tsv \
	    -e $TMP/woody_hosts/summary/with-metadata.reads.xlxs \
	    -t tests/woody_hosts/site_metadata.tsv \
	    -c 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -x 16 -f 20
ls $TMP/woody_hosts/summary/with-metadata.reads.*
if [ `grep -c -v "^#" $TMP/woody_hosts/summary/with-metadata.reads.tsv` -ne 95 ]; then echo "Wrong unique sequence count"; false; fi
# Expect 94 + total line

echo "=============================="
echo "Running woody hosts edit-graph"
echo "=============================="
time thapbi_pict edit-graph -i $TMP/woody_hosts/intermediate/ -o $TMP/woody_hosts/summary/no-metadata.edit-graph.xgmml
if [ `grep -c "<node " $TMP/woody_hosts/summary/no-metadata.edit-graph.xgmml` -ne 94 ]; then echo "Wrong node count"; false; fi
if [ `grep -c "<edge " $TMP/woody_hosts/summary/no-metadata.edit-graph.xgmml` -ne 65 ]; then echo "Wrong edge count"; false; fi

echo "=========================="
echo "Running woody hosts assess"
echo "=========================="
time thapbi_pict assess -i $TMP/woody_hosts/positive_controls/ $TMP/woody_hosts/intermediate/ -o $TMP/woody_hosts/DNA_MIXES.assess.tsv
diff $TMP/woody_hosts/DNA_MIXES.assess.tsv tests/woody_hosts/DNA_MIXES.assess.tsv

echo "$0 - test_woody_hosts.sh passed"
