#include "Debug.h"
#include "Command.h"
#include "CommandDeclarations.h"
#include "Util.h"
#include "Parameters.h"

const char* tool_name = "MMseqs2";
const char* tool_introduction = "MMseqs2 (Many against Many sequence searching) is an open-source software suite for very fast, \nparallelized protein sequence searches and clustering of huge protein sequence data sets.\n\nPlease cite: M. Steinegger and J. Soding. MMseqs2 enables sensitive protein sequence searching for the analysis of massive data sets. Nature Biotechnology, doi:10.1038/nbt.3988 (2017).";
const char* main_author = "Martin Steinegger (martin.steinegger@mpibpc.mpg.de)";

Parameters& par = Parameters::getInstance();

std::vector<struct Command> commands = {
// Main tools  (for non-experts)
        {"createdb",             createdb,             &par.createdb,             COMMAND_MAIN,
                "Convert protein sequence set in a FASTA file to MMseqs sequence DB format",
                "converts a protein sequence flat/gzipped FASTA or FASTQ file to the MMseqs sequence DB format. This format is needed as input to mmseqs search, cluster and many other tools.",
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de>",
                "<i:fastaFile1[.gz]> ... <i:fastaFileN[.gz]> <o:sequenceDB>",
                CITATION_MMSEQS2},
        {"search",               search,               &par.searchworkflow,       COMMAND_MAIN,
                "Search with query sequence or profile DB (iteratively) through target sequence DB",
                "Searches with the sequences or profiles query DB through the target sequence DB by running the prefilter tool and the align tool for Smith-Waterman alignment. For each query a results file with sequence matches is written as entry into a database of search results (alignmentDB).\nIn iterative profile search mode, the detected sequences satisfying user-specified criteria are aligned to the query MSA, and the resulting query profile is used for the next search iteration. Iterative profile searches are usually much more sensitive than (and at least as sensitive as) searches with single query sequences.",
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de>",
                "<i:queryDB> <i:targetDB> <o:alignmentDB> <tmpDir>",
                CITATION_MMSEQS2},
        {"cluster",              clusteringworkflow,   &par.clusteringWorkflow,   COMMAND_MAIN,
                "Compute clustering of a sequence DB (quadratic time)",
                "Clusters sequences by similarity. It compares all sequences in the sequence DB with each other using mmseqs search, filters alignments according to user-specified criteria (max. E-value, min. coverage,...),   and runs mmseqs clust to group similar sequences together into clusters.",
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de> & Lars von den Driesch",
                "<i:sequenceDB> <o:clusterDB> <tmpDir>",
                CITATION_MMSEQS2|CITATION_MMSEQS1},
        {"createindex",          createindex,          &par.createindex,          COMMAND_MAIN,
                "Precompute index table of sequence DB for faster searches",
                "Precomputes an index table for the sequence DB. Handing over the precomputed index table as input to mmseqs search or mmseqs prefilter eliminates the computational overhead of building the index table on the fly.",
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de>",
                "<i:sequenceDB> <o:indexDB> <tmpDir>",
                CITATION_MMSEQS2},
// Utility tools for format conversions
        {"createtsv",            createtsv,            &par.createtsv,        COMMAND_FORMAT_CONVERSION,
                "Create tab-separated flat file from prefilter DB, alignment DB, cluster DB, or taxa DB",
                "Create tab-separated flat file from prefilter DB, alignment DB, cluster DB, or taxa DB. The target database is optional. This is useful for taxa DB, since it does not have a target key.",
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de>r",
                "<i:queryDB> [<i:targetDB>] <i:resultDB> <o:tsvFile>",
                CITATION_MMSEQS2},
        {"convertalis",          convertalignments,    &par.convertalignments,    COMMAND_FORMAT_CONVERSION,
                "Convert alignment DB to BLAST-tab format, SAM flat file, or to raw pairwise alignments",
                NULL,
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de>",
                "<i:queryDb> <i:targetDb> <i:alignmentDB> <o:alignmentFile>",
                CITATION_MMSEQS2},
        {"convertprofiledb",     convertprofiledb,     &par.convertprofiledb,     COMMAND_FORMAT_CONVERSION,
                "Convert ffindex DB of HMM/HMMER3/PSSM files to MMseqs profile DB",
                NULL,
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de>",
                "<i:ffindexProfileDB> <o:profileDB>",
                CITATION_MMSEQS2},
        {"convert2fasta",        convert2fasta,        &par.convert2fasta,        COMMAND_FORMAT_CONVERSION,
                "Convert sequence DB to FASTA format",
                NULL,
                "Milot Mirdita <milot@mirdita.de>",
                "<i:sequenceDB> <o:fastaFile>",
                CITATION_MMSEQS2},
        {"result2flat",          result2flat,          &par.result2flat,          COMMAND_FORMAT_CONVERSION,
                "Create a FASTA-like flat file from prefilter DB, alignment DB, or cluster DB",
                NULL,
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de>",
                "<i:queryDB> <i:targetDB> <i:resultDB> <o:fastaDB>",
                CITATION_MMSEQS2},
// Taxonomy
        {"taxonomy",             taxonomy,             &par.taxonomy,             COMMAND_TAXONOMY,
                "Compute taxonomy and lowest common ancestor for each sequence.",
                NULL,
                "Milot Mirdita <milot@mirdita.de>",
                "<i:queryDB> <i:targetDB> <i:targetTaxonMapping> <i:NcbiTaxdmpDir> <o:taxaDB> <tmpDir>",
                CITATION_MMSEQS2
        },
        {"lca",                  lca,                  &par.lca,                  COMMAND_TAXONOMY,
                "Compute the lowest common ancestor from a set of taxa.",
                NULL,
                "Milot Mirdita <milot@mirdita.de>",
                "<i:taxaDB> <i:NcbiTaxdmpDir> <o:taxaDB>",
                CITATION_MMSEQS2},
// Utility tools for clustering
        {"clusterupdate",        clusterupdate,        &par.clusterUpdate,        COMMAND_CLUSTER,
                "Update clustering of old sequence DB to clustering of new sequence DB",
                NULL,
                "Clovis Galiez & Martin Steinegger <martin.steinegger@mpibpc.mpg.de>",
                "<i:oldSequenceDB> <i:newSequenceDB> <i:oldClustResultDB> <o:newMappedSequenceDB> <o:newClustResultDB> <tmpDir>",
                CITATION_MMSEQS2|CITATION_MMSEQS1},
        {"createseqfiledb",      createseqfiledb,      &par.createseqfiledb,      COMMAND_CLUSTER,
                "Create DB of unaligned FASTA files (1 per cluster) from sequence DB and cluster DB",
                NULL,
                "Milot Mirdita <milot@mirdita.de>",
                "<i:sequenceDB> <i:clusterDB> <o:fastaDB>",
                CITATION_MMSEQS2},
        {"mergeclusters",        mergeclusters,        &par.onlyverbosity,        COMMAND_CLUSTER,
                "Merge multiple cluster DBs into single cluster DB",
                NULL,
                "Maria Hauser & Martin Steinegger <martin.steinegger@mpibpc.mpg.de>",
                "<i:sequenceDB> <o:clusterDB> <i:clusterDB1> ... <i:clusterDBn>",
                CITATION_MMSEQS2},
// Expert tools (for advanced users)
        {"prefilter",            prefilter,            &par.prefilter,            COMMAND_EXPERT,
                "Search with query sequence / profile DB through target DB (k-mer matching + ungapped alignment)",
                "Searches with the sequences or profiles in query DB through the target sequence DB in two consecutive stages: a very fast k-mer matching stage (double matches on same diagonal) and a subsequent ungapped alignment stage. For each query a results file with sequence matches is written as entry into the prefilter DB.",
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de> & Maria Hauser",
                "<i:queryDB> <i:targetDB> <o:prefilterDB>",
                CITATION_MMSEQS2},
        {"align",                align,                &par.align,                COMMAND_EXPERT,
                "Compute Smith-Waterman alignments for previous results (e.g. prefilter DB, cluster DB)",
                "Calculates Smith-Waterman alignment scores between all sequences in the query database and the sequences of the target database which passed the prefiltering.",
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de> & Maria Hauser",
                "<i:queryDB> <i:targetDB> <i:prefilterDB> <o:alignmentDB>",
                CITATION_MMSEQS2},
        {"clust",                clust,                &par.clust,                COMMAND_EXPERT,
                "Cluster sequence DB from alignment DB (e.g. created by searching DB against itself)",
                "Computes a clustering of a sequence DB based on the alignment DB containing for each query sequence or profile the Smith Waterman alignments generated by mmseqs align. (When given a prefilter DB as input the tool will use the ungapped alignment scores scores for the clustering.) The tool reads the search results DB,  constructs a similarity graph based on the matched sequences in alignment DB, and applies one of several clustering algorithms. The first, representative sequence of each cluster is connected by an edge to each cluster member. Its names are used as ID in the resulting cluster DB, the entries contain the names of all member sequences.",
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de> & Lars von den Driesch & Maria Hauser",
                "<i:sequenceDB> <i:alignmentDB> <o:clusterDB>",
                CITATION_MMSEQS2|CITATION_MMSEQS1},
        {"linclust",          linclust,          &par.linclustworkflow,            COMMAND_MAIN,
                "Cluster sequences of >30% sequence identity *in linear time*",
                "Detects redundant sequences based on reduced alphabet and k-mer sorting.",
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de> ",
                "<i:sequenceDB> <o:clusterDB> <tmpDir>",
                CITATION_MMSEQS2|CITATION_LINCLUST},
        {"assemble",          assembler,          &par.assemblerworkflow,            COMMAND_HIDDEN,
                "Assemble protein sequences in linear time.",
                "Extends sequence to the left and right using ungapped alignments.",
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de> ",
                "<i:sequenceDB> <o:repSeqDb> <tmpDir>",
                CITATION_MMSEQS2|CITATION_LINCLUST},
        {"kmermatcher",          kmermatcher,          &par.kmermatcher,            COMMAND_EXPERT,
                "Finds exact $k$-mers matches between sequences",
                NULL,
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de> ",
                "<i:sequenceDB> <o:prefDB>",
                CITATION_MMSEQS2},
        {"clusthash",            clusthash,            &par.clusthash,            COMMAND_EXPERT,
                "Cluster sequences of same length and >90% sequence identity *in linear time*",
                "Detects redundant sequences based on reduced alphabet hashing and hamming distance.",
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de> ",
                "<i:sequenceDB> <o:alignmentDB>",
                CITATION_MMSEQS2},
// Utility tools to manipulate DBs
        {"extractorfs",          extractorfs,          &par.extractorfs,          COMMAND_DB,
                "Extract open reading frames from all six frames from nucleotide sequence DB",
                NULL,
                "Milot Mirdita <milot@mirdita.de>",
                "<i:sequenceDB> <o:sequenceDB>",
                CITATION_MMSEQS2},
        {"translatenucs",        translatenucs,        &par.translatenucs,        COMMAND_DB,
                "Translate nucleotide sequence DB into protein sequence DB",
                NULL,
                "Milot Mirdita <milot@mirdita.de>",
                "<i:sequenceDB> <o:sequenceDB>",
                CITATION_MMSEQS2},
        {"swapresults",          swapresults,          &par.swapresult,          COMMAND_DB,
                "Reformat prefilter/alignment/cluster DB as if target DB had been searched through query DB",
                NULL,
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de> & Clovis Galiez",
                "<i:queryDB> <i:targetDB> <i:resultDB> <o:resultDB>",
                CITATION_MMSEQS2},
        {"mergedbs",             mergedbs,             &par.mergedbs,             COMMAND_DB,
                "Merge multiple DBs into a single DB, based on IDs (names) of entries",
                NULL,
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de>",
                "<i:sequenceDB> <o:resultDB> <i:resultDB1> ... <i:resultDBn>",
                CITATION_MMSEQS2},
        {"splitdb",              splitdb,              &par.splitdb,              COMMAND_DB,
                "Split a mmseqs DB into multiple DBs",
                NULL,
                "Milot Mirdita <milot@mirdita.de>",
                "<i:sequenceDB> <o:sequenceDB_1..N>",
                CITATION_MMSEQS2},
        {"subtractdbs",          subtractdbs,          &par.subtractdbs,          COMMAND_DB,
                "Generate a DB with entries of first DB not occurring in second DB",
                NULL,
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de>",
                "<i:resultDBLeft> <i:resultDBRight> <o:resultDB>",
                CITATION_MMSEQS2},
        {"filterdb",             filterdb,             &par.filterDb,             COMMAND_DB,
                "Filter a DB by conditioning (regex, numerical, ...) on one of its whitespace-separated columns",
                NULL,
                "Clovis Galiez & Martin Steinegger <martin.steinegger@mpibpc.mpg.de>",
                "<i:resultDB> <o:resultDB>",
                CITATION_MMSEQS2},
        {"createsubdb",          createsubdb,          &par.onlyverbosity,        COMMAND_DB,
                "Create a subset of a DB from a file of IDs of entries",
                NULL,
                "Milot Mirdita <milot@mirdita.de>",
                "<i:subsetFile or DB> <i:resultDB> <o:resultDB>",
                CITATION_MMSEQS2},
        {"result2profile",       result2profile,       &par.result2profile,       COMMAND_DB,
                "Compute profile and consensus DB from a prefilter, alignment or cluster DB",
                NULL,
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de>",
                "<i:queryDB> <targetDB> <i:resultDB> <o:profileDB>",
                CITATION_MMSEQS2},
        {"result2msa",           result2msa,           &par.result2msa,           COMMAND_DB,
                "Generate MSAs for queries by locally aligning their matched targets in prefilter/alignment/cluster DB",
                NULL,
                "Martin Steinegger (martin.steinegger@mpibpc.mpg.de) & Milot Mirdita <milot@mirdita.de> & Clovis Galiez",
                "<i:queryDB> <i:targetDB> <i:resultDB> <o:msaDB>",
                CITATION_MMSEQS2},
        {"msa2profile",          msa2profile,          &par.msa2profile,          COMMAND_DB,
                "Turns a MSA database into a MMseqs profile database",
                NULL,
                "Milot Mirdita <milot@mirdita.de>",
                "<i:msaDB> <o:profileDB>",
                CITATION_MMSEQS2},
        {"result2stats",         result2stats,         &par.result2stats,         COMMAND_DB,
                "Compute statistics for each entry in a sequence, prefilter, alignment or cluster DB",
                NULL,
                "Clovis Galiez & Martin Steinegger <martin.steinegger@mpibpc.mpg.de>",
                "<i:queryDB> <i:targetDB> <i:resultDB> <o:statsDB>",
                CITATION_MMSEQS2},
        {"tsv2db",               tsv2db,               &par.tsv2db,               COMMAND_DB,
                "Turns a TSV file into a MMseqs database",
                NULL,
                "Milot Mirdita <milot@mirdita.de>",
                "<i:tsvFile> <o:sequenceDB>",
                CITATION_MMSEQS2
        },
        {"result2repseq",       result2repseq,      &par.onlythreads,          COMMAND_DB,
                "Get representative sequences for a result database",
                NULL,
                "Milot Mirdita <milot@mirdita.de> & Martin Steinegger <martin.steinegger@mpibpc.mpg.de>",
                "<i:sequenceDB> <i:resultDB> <o:reprSeqDB>",
                CITATION_MMSEQS2},
        {"assembleresults",       assembleresult,      &par.onlyverbosity,          COMMAND_HIDDEN,
                "Extending representative sequence to the left and right side using ungapped alignments.",
                NULL,
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de>",
                "<i:sequenceDB> <i:alnResult> <o:reprSeqDB>",
                CITATION_MMSEQS2},
// Special-purpose utilities
        {"rescorediagonal",           rescorediagonal,           &par.rescorediagonal,        COMMAND_SPECIAL,
                "Compute sequence identity for diagonal",
                NULL,
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de>",
                "<i:queryDB> <i:targetDB> <i:prefilterDB> <o:resultDB>",
                CITATION_MMSEQS2},
        {"diffseqdbs",           diffseqdbs,           &par.diff,        COMMAND_SPECIAL,
                "Find IDs of sequences kept, added and removed between two versions of sequence DB",
                "It creates 3 filtering files, that can be used in cunjunction with \"createsubdb\" tool.\nThe first file contains the keys that has been removed from DBold to DBnew.\nThe second file maps the keys of the kept sequences from DBold to DBnew.\nThe third file contains the keys of the sequences that have been added in DBnew.",
                "Clovis Galiez & Martin Steinegger <martin.steinegger@mpibpc.mpg.de>",
                "<i:oldSequenceDB> <i:newSequenceDB> <o:rmSeqKeysFile> <o:keptSeqKeysFile> <o:newSeqKeysFile>",
                CITATION_MMSEQS2},
        {"concatdbs",            concatdbs,            &par.concatdbs,        COMMAND_SPECIAL,
                "Concatenate two DBs, giving new IDs to entries from second input DB",
                NULL,
                "Clovis Galiez & Martin Steinegger (martin.steinegger@mpibpc.mpg.de)",
                "<i:resultDB> <i:resultDB> <o:resultDB>",
                CITATION_MMSEQS2},
        {"summarizeresult",      summarizeresult,      &par.summarizeresult,      COMMAND_SPECIAL,
                "Extract annotations from result db",
                NULL,
                "Milot Mirdita <milot@mirdita.de> & Martin Steinegger <martin.steinegger@mpibpc.mpg.de>",
                "<i:resultbDB> <o:summarizedResultDB>",
                CITATION_MMSEQS2|CITATION_UNICLUST},
        {"summarizetabs",        summarizetabs,        &par.summarizetabs,        COMMAND_SPECIAL,
                "Extract annotations from HHblits BAST-tab-formatted results",
                NULL,
                "Milot Mirdita <milot@mirdita.de> & Martin Steinegger <martin.steinegger@mpibpc.mpg.de>",
                "<i:blastTabDB> <i:lengthFile> <o:summarizedBlastTabDB>",
                CITATION_MMSEQS2|CITATION_UNICLUST},
        {"gff2db",               gff2db,               &par.gff2ffindex,          COMMAND_SPECIAL,
                "Turn a gff3 (generic feature format) file into a gff3 DB",
                NULL,
                "Milot Mirdita <milot@mirdita.de>",
                "<i:gff3File> <i:sequenceDB> <o:sequenceDB>",
                CITATION_MMSEQS2},
        {"maskbygff",            maskbygff,            &par.gff2ffindex,          COMMAND_SPECIAL,
                "X out sequence regions in a sequence DB by features in a gff3 file",
                NULL,
                "Milot Mirdita <milot@mirdita.de>",
                "<i:gff3File> <i:sequenceDB> <o:sequenceDB>",
                CITATION_MMSEQS2},
        {"prefixid",             prefixid,             &par.prefixid,             COMMAND_SPECIAL,
                "For each entry in a DB prepend the entry ID to the entry itself",
                NULL,
                "Milot Mirdita <milot@mirdita.de>",
                "<i:resultDB> <o:resultDB>",
                CITATION_MMSEQS2},
        {"convertkb",            convertkb,            &par.convertkb,            COMMAND_SPECIAL,
                "Convert UniProt knowledge flat file into knowledge DB for the selected column types",
                NULL,
                "Milot Mirdita <milot@mirdita.de>",
                "<uniprotkbFile> <uniprotkbDB>",
                CITATION_MMSEQS2},
        {"summarizeheaders",     summarizeheaders,     &par.summarizeheaders,     COMMAND_SPECIAL,
                "Return a new summarized header DB from the UniProt headers of a cluster DB",
                NULL,
                "Milot Mirdita <milot@mirdita.de>",
                "<i:queryHeaderDB> <i:targetHeaderDB> <i:clusterDB> <o:headerDB>",
                CITATION_MMSEQS2|CITATION_UNICLUST},
        {"extractalignedregion", extractalignedregion, &par.extractalignedregion, COMMAND_SPECIAL,
                "Extract aligned sequence region",
                NULL,
                "Martin Steinegger <martin.steinegger@mpibpc.mpg.de>",
                "<i:queryDB> <i:targetDB> <i:resultDB> <o:domainDB>",
                CITATION_MMSEQS2},
        {"extractdomains",       extractdomains,       &par.extractdomains,       COMMAND_SPECIAL,
                "Extract highest scoring alignment region for each sequence from BLAST-tab file",
                NULL,
                "Milot Mirdita <milot@mirdita.de> & Martin Steinegger <martin.steinegger@mpibpc.mpg.de>",
                "<i:domainDB> <i:msaDB> <o:domainDB>",
                CITATION_MMSEQS2|CITATION_UNICLUST},
        {"shellcompletion",      shellcompletion,      &par.empty,                COMMAND_HIDDEN,
                "",
                NULL,
                "",
                "",
                CITATION_MMSEQS2},
};
