#!/usr/bin/env perl
#
# AB-BLASTALL -- mimic the NCBI's blastall command line usage with AB-BLAST
#
# Copyright (C) 1999-2009 Warren R. Gish.  All rights reserved.
# Unlicensed use, reproduction or distribution are prohibited.
#
# THE CONTENTS OF THIS FILE ARE THE CONFIDENTIAL PROPERTY
# OF WARREN R. GISH.
#
# NOTE: this script is provided as a convenience only.  For persons interested
# in benchmarking relative performance, while an effort has been made to produce
# comparable parameter settings in an automated fashion, there are still likely
# to be differences in the parameters chosen here that could affect benchmark
# results.  For this reason, benchmarking should be undertaken with careful
# scrutiny of the parameters used, rather than blindly accepting the ones
# chosen by this script as being equivalent or even as close as could
# reasonably be attained.
#
# significant options not supported or not supported well:
#  -m : partially supported
#  -B : not supported
#  -J : not supported
#  -K : not supported
#  -L : not supported
#  -O : not supported
#  -T : not supported
#  -U : not supported
#  -Y : can't readily convert this to AB-BLAST's Y and Z parameters,
#       so it's faked
#  -X : expected to be expressed in units of bits
#
# Karlin-Dembo-Altschul "K" parameter for BLASTN is often a crude approximation
# to the value actually used but should not produce terribly different E-values.
#
# Interactions between scoring matrices and gap penalties, and various cutoff
# scores are not robust
#
#
$^W=1;
$| = 1;
require 5.004;
use strict;
use POSIX;
use FileHandle;
use File::Basename;
use Getopt::Std;
use vars qw($opt_p $opt_d $opt_i $opt_e $opt_l $opt_m $opt_o $opt_t $opt_y $opt_B $opt_F $opt_G $opt_E $opt_X
		$opt_A $opt_I $opt_q $opt_r $opt_v $opt_b $opt_c $opt_f $opt_g $opt_Q $opt_D $opt_a $opt_O
		$opt_J $opt_M $opt_P $opt_U $opt_W $opt_z $opt_K $opt_L $opt_Y $opt_S $opt_T $opt_Z
		);

my ($rc, $i, $j, @i, @stdopts, @moreopts, $cmd, $program, $db, $query, $out, $e, $cpus, $w, $t, $m, $n, $matrix, $statmethod, $filter, $wordfilter, $v, $b, $x, $gapx, $gapw, $nogaps, $lcfilter, $q, $r, $s2, $gaps2, $c, $dbgcode, $gi, $strands, $y, $z, $hitdist, $karlin_l, $karlin_k, $karlin_h, $gapkarlin_l, $gapkarlin_k, $gapkarlin_h, $sepSmax);

if (!defined $ARGV[0]) {
	my(@foo) = <DATA>;
	print @foo;
	exit 1;
}

getopts('p:d:i:e:l:m:o:t:y:A:E:F:G:I:P:X:q:r:v:b:f:g:Q:D:a:O:J:M:W:z:K:L:Y:S:T:Z:');

!$opt_l || die "Unsupported option:  -l\n";
!$opt_y || die "Unsupported option:  -y\n";
!$opt_B || die "Unsupported option:  -B\n";
!$opt_J || die "Unsupported option:  -J\n";
!$opt_K || die "Unsupported option:  -K\n";
!$opt_L || die "Unsupported option:  -L\n";
!$opt_O || die "Unsupported option:  -O\n";
!$opt_T || die "Unsupported option:  -T\n";
!$opt_Z || die "Unsupported option:  -Z\n";

use constant log2 => 0.69314718055994530941;

#
#########################################################################
# Parameters cut+pasted from the NCBI Toolbox tools/blastkar.c 2000-04-10
#########################################################################
#
my(@blosum90) = (
	[  -1,   -1,    0.3346,    0.190,      0.75],
	[  8.0,  2.0,   0.297,     0.088,      0.50],
	[  7.0,  2.0,   0.285,     0.077,      0.42],
	[  6.0,  2.0,   0.269,     0.072,      0.31],
	[ 11.0,  1.0,   0.304,     0.098,      0.52],
	[ 10.0,  1.0,   0.289,     0.072,      0.42],
	[  9.0,  1.0,   0.263,     0.040,      0.31]
	);

my(@blosum80) = (
	[   -1,   -1,   0.3430,    0.177,      0.66],
    [  8.0,  2.0,   0.308,     0.089,      0.46],
    [  7.0,  2.0,   0.295,     0.077,      0.38],
    [  6.0,  2.0,   0.271,     0.051,      0.28],
    [ 11.0,  1.0,   0.314,     0.096,      0.48],
    [ 10.0,  1.0,   0.300,     0.072,      0.39],
    [  9.0,  1.0,   0.277,     0.046,      0.30]
	);

my(@blosum62) = (
	[   -1,   -1,     0.3176,    0.134,    0.40],
    [ 11.0,  2.0,     0.297,     0.082,    0.27],
    [ 10.0,  2.0,     0.291,     0.075,    0.23],
    [  9.0,  2.0,     0.279,     0.058,    0.19],
    [  8.0,  2.0,     0.264,     0.045,    0.15],
    [  7.0,  2.0,     0.239,     0.027,    0.10],
    [ 13.0,  1.0,     0.292,     0.071,    0.23],
    [ 12.0,  1.0,     0.283,     0.059,    0.19],
    [ 11.0,  1.0,     0.267,     0.041,    0.14],
    [ 10.0,  1.0,     0.243,     0.024,    0.10],
    [  9.0,  1.0,     0.206,     0.010,    0.052]
	);

my(@blosum50) = (
	[  -1,   -1,     0.232,     0.11,       0.34],
    [12.0,  3.0,     0.206,     0.055,      0.23],
    [11.0,  3.0,     0.198,     0.046,      0.20],
    [10.0,  3.0,     0.189,     0.038,      0.17],
    [ 9.0,  3.0,     0.177,     0.030,      0.14],
    [15.0,  2.0,     0.211,     0.062,      0.25],
    [14.0,  2.0,     0.205,     0.053,      0.22],
    [13.0,  2.0,     0.197,     0.043,      0.19],
    [12.0,  2.0,     0.183,     0.028,      0.15],
    [18.0,  1.0,     0.208,     0.055,      0.23],
    [17.0,  1.0,     0.200,     0.042,      0.20],
    [16.0,  1.0,     0.189,     0.030,      0.17],
    [15.0,  1.0,     0.175,     0.020,      0.13]
	);

my(@blosum45) = (
	[  -1,   -1,     0.2291,    0.092,      0.25],
    [13.0,  3.0,     0.209,     0.057,      0.19],
    [12.0,  3.0,     0.203,     0.049,      0.17],
    [11.0,  3.0,     0.193,     0.037,      0.15],
    [10.0,  3.0,     0.182,     0.029,      0.12],
    [15.0,  2.0,     0.206,     0.049,      0.18],
    [14.0,  2.0,     0.199,     0.040,      0.16],
    [13.0,  2.0,     0.190,     0.032,      0.14],
    [12.0,  2.0,     0.177,     0.023,      0.11],
    [19.0,  1.0,     0.209,     0.049,      0.19],
    [18.0,  1.0,     0.202,     0.041,      0.17],
    [17.0,  1.0,     0.195,     0.034,      0.14],
    [16.0,  1.0,     0.183,     0.024,      0.12]
	);


my(@pam30) = (
	[ -1,   -1,     0.340,     0.283,      1.75],
    [5.0,  3.0,     0.301,     0.14,       1.06],
    [4.0,  3.0,     0.286,     0.12,       0.85],
    [3.0,  3.0,     0.259,     0.081,      0.61],
    [7.0,  2.0,     0.306,     0.15,       1.07],
    [6.0,  2.0,     0.292,     0.13,       0.86],
    [5.0,  2.0,     0.263,     0.077,      0.60],
    [10.0, 1.0,     0.309,     0.15,       1.07],
    [9.0,  1.0,     0.295,     0.12,       0.85],
    [8.0,  1.0,     0.270,     0.070,      0.59]
	);

my(@pam70) = (
	[ -1,   -1,     0.3345,    0.229,      1.03],
    [6.0,  3.0,     0.297,     0.11,       0.67],
    [5.0,  3.0,     0.285,     0.10,       0.55],
    [4.0,  3.0,     0.258,     0.062,      0.40],
    [8.0,  2.0,     0.303,     0.13,       0.67],
    [7.0,  2.0,     0.287,     0.095,      0.56],
    [6.0,  2.0,     0.269,     0.079,      0.42],
    [11.0, 1.0,     0.307,     0.13,       0.70],
    [10.0, 1.0,     0.291,     0.089,      0.57],
    [9.0,  1.0,     0.269,     0.058,      0.42]
	);

my(@pam250) = (
	[  -1,   -1,     0.229,     0.09,       0.23],
    [13.0,  3.0,     0.207,     0.051,      0.17],
    [12.0,  3.0,     0.200,     0.043,      0.15],
    [11.0,  3.0,     0.191,     0.034,      0.13],
    [10.0,  3.0,     0.181,     0.028,      0.11],
    [15.0,  2.0,     0.203,     0.043,      0.16],
    [14.0,  2.0,     0.196,     0.036,      0.14],
    [13.0,  2.0,     0.188,     0.030,      0.12],
    [12.0,  2.0,     0.175,     0.022,      0.10],
    [19.0,  1.0,     0.208,     0.049,      0.17],
    [18.0,  1.0,     0.202,     0.040,      0.15],
    [17.0,  1.0,     0.194,     0.034,      0.13],
    [16.0,  1.0,     0.180,     0.021,      0.10]
	);

my($H) = {
	"blosum90" => [@blosum90],
	"blosum80" => [@blosum80],
	"blosum62" => [@blosum62],
	"blosum50" => [@blosum50],
	"blosum45" => [@blosum45],
	"pam30"    => [@pam30],
	"pam70"    => [@pam70],
	"pam250"   => [@pam250]
	};

if (defined $opt_G) {
	if ($opt_G == 0) {
		undef $opt_G;
	}
}
if (defined $opt_E) {
	if ($opt_E == 0) {
		undef $opt_E;
	}
}
if (defined $opt_X) {
	if ($opt_X == 0) {
		undef $opt_X;
	}
}
if (defined $opt_y) {
	if ($opt_y == 0) {
		undef $opt_y;
	}
}
if (defined $opt_Z) {
	if ($opt_Z == 0) {
		undef $opt_Z;
	}
}
if (defined $opt_c) {
	if ($opt_c == 0) {
		undef $opt_c;
	}
}
if (defined $opt_t) {
	if ($opt_t == 0) {
		undef $opt_t;
	}
}
if (defined $opt_f) {
	if ($opt_f == 0) {
		undef $opt_f;
	}
}
if (defined $opt_W) {
	if ($opt_W == 0) {
		undef $opt_W;
	}
}
if (defined $opt_z) {
	if ($opt_z == 0) {
		undef $opt_z;
	}
}
if (defined $opt_Y) {
	if ($opt_Y == 0) {
		undef $opt_Y;
	}
}
if (defined $opt_A) {
	if ($opt_A == 0) {
		undef $opt_A;
	}
}
if (defined $opt_m) {
	if ($opt_m == 0) {
		$opt_m = " mformat=1";
	}
	else {
		if ($opt_m == 8) {
			$opt_m = " mformat=2";
		}
		else {
			if ($opt_m == 9) {
				$opt_m = " mformat=3";
			}
			else {
				if ($opt_m == 7) {
					$opt_m = " mformat=7";
				}
				else {
					die "Unsupported format:  -m$opt_m\n";
				}
			}
		}
	}
}
else {
	$opt_m = "";
}

$program = $opt_p ? $opt_p : die "[ab-blastall] ERROR: Program Name was not given an argument (-p option missing)\n";
$program = "\L$program";
$filter = 'filter=seg';
$wordfilter = '';
if (defined $opt_P) {
	die "[ab-blastall] ERROR: unsupported setting for -P option:  ${opt_P}\n" if ($opt_P != 0 && $opt_P != 1);
}


$statmethod = '';
if ($program eq 'blastn') {
	$statmethod = 'kap';
	$filter = 'filter=dust';
	$w = 'W=11';
	$m = "1";
	$n = "-3";
	$q = '7';
	$r = '2';
	$s2 = 26;
	$gaps2 = 40;
	$x = 20; # bits
	$gapx = 30; # bits
	$hitdist = 0;
	if (defined $opt_A) {
		$hitdist = $opt_A if ($opt_A > 0);
	}
}
else {
	die "[ab-blastall] ERROR: Invalid program name argument to -p option:  $program\n"
		if ($program ne 'blastp' && $program ne 'blastx' && $program ne 'tblastn' && $program ne 'tblastx');

	if (defined $opt_M && $opt_M ne "") {
		$opt_M = &quoteopt($opt_M);
	}
	$opt_M = $opt_M ? $opt_M : "blosum62";
	$w = 'W=3';
	$m='';
	$n='';
	$q = '12';
	$r = '1';
	$s2 = 22;
	$gaps2 = 31;
	$x = 7; # bits
	$gapx = 15; # bits
	$hitdist = 40;
	if ($opt_M =~ /^blosum45$/i) {
		$hitdist = 60;
		$q = 16;
		$r = 2;
	}
	if ($opt_M =~ /^blosum50$/i) {
		$q = 15;
		$r = 2;
	}
	if ($opt_M =~ /^blosum80$/i) {
		$hitdist = 25;
		$q = 11;
		$r = 1;
	}
	if ($opt_M =~ /^blosum90$/i) {
		$q = 11;
		$r = 1;
	}
	if ($opt_M =~ /^pam30$/i) {
		$hitdist = 15;
		$q = 10;
		$r = 1;
	}
	if ($opt_M =~ /^pam70$/i) {
		$hitdist = 20;
		$q = 11;
		$r = 1;
	}
	if ($opt_M =~ /^pam250$/i) {
		$q = 16;
		$r = 2;
	}
	if (defined $opt_A) {
		$hitdist = $opt_A if ($opt_A > 0);
	}
}
if (defined $opt_P) {
	if ($opt_P == 1) {
		$hitdist = 0;
	}
	if ($opt_P == 0 && !defined $opt_A) {
		$hitdist = 40;
	}
}
if ($hitdist > 0) {
	$hitdist = "hitdist=${hitdist}";
}
else {
	$hitdist = '';
}

$lcfilter = '';
$nogaps = '';
if ($program eq 'tblastx') {
	$nogaps = '-nogaps';
}
if ($program eq 'blastp') {
	$statmethod = 'kap';
}

$sepSmax="";
if (defined $opt_t) {
	if ($opt_t == 0) {
		undef $opt_t;
	}
}
if (defined $opt_t) {
	if ($program ne 'tblastn') {
		printf STDERR "\nERROR:  The -t option is not supported by NCBI blastall except in the tblastn search mode.\n";
		printf STDERR "With AB-BLAST, the hspsepqmax and hspsepsmax options may be used in any\n";
		printf STDERR "of the search modes; and these options may be appended to an otherwise typical\n";
		printf STDERR "ab-blastall command line instead of using -t.\n";
		exit 1;
	}
	$sepSmax="hspsepSmax=${opt_t}";
}

if (defined $opt_d && $opt_d ne "") {
	$opt_d = &quoteopt($opt_d);
}
$db = $opt_d ? $opt_d : "nr";

if (defined $opt_i && $opt_i ne "") {
	$opt_i = &quoteopt($opt_i);
}
$query = $opt_i ? $opt_i : "-"; # stdin default

$e = $opt_e ? "E=$opt_e" : '';
$v = '';
if (defined $opt_v) {
	$v = $opt_v ? "V=$opt_v" : 'V=0';
}
$b = '';
if (defined $opt_b) {
	$b = $opt_b ? "B=$opt_b" : 'B=0';
}
$w = $opt_W ? "W=$opt_W" : $w;
if (defined $opt_f && $program ne 'blastn') {
	$t = "T=$opt_f";
}
else {
	$t = "";
}
$m = $opt_r ? "$opt_r" : $m;
$n = $opt_q ? "$opt_q" : $n;
$cpus = $opt_a ? "cpus=$opt_a" : 'cpus=1';
$matrix = $opt_M ? "matrix=$opt_M" : '';
$c = $opt_Q ? "c=$opt_Q" : '';
$dbgcode = $opt_D ? "dbgcode=$opt_D" : '';
$gapx = $opt_X ? $opt_X : $gapx;
$s2 = $opt_c ? "$opt_c" : $s2;
$z = $opt_z ? "z=$opt_z" : '';
$y = '';
if ($opt_Y) {
	if ($opt_z) {
		$y = $opt_Y / $opt_z;
		$y = "y=$y";
	}
	else {
		$z = $opt_Y / 1000.;
		$z = "z=$z";
		$y = "y=1000";
	}
}
$strands = '';
if ($opt_S) {
	if ($opt_S & 1) {
		$strands = 'top';
	}
	if ($opt_S & 2) {
		$strands = 'bottom';
	}
}

#
# Set gap penalities
#
if ($opt_G) {
	if ($opt_E) {
		$r = $opt_E;
	}
	$q = $opt_G + $r;
}
else {
	if ($opt_E) {
		$q = $q - $r + $opt_E;
		$r = $opt_E;
	}
}
$opt_G = $q - $r;
$opt_E = $r;
$q = "Q=$q";
$r = "R=$r";

if ($opt_M) {
	($karlin_l, $karlin_k, $karlin_h) = ($gapkarlin_l, $gapkarlin_k, $gapkarlin_h) = &GetKarlinParms($H, $opt_M);
}
else {
	($karlin_l, $karlin_k, $karlin_h) = ($gapkarlin_l, $gapkarlin_k, $gapkarlin_h) = &ComputeKarlinParms($m, $n);
	$m = "M=$m";
	$n = "N=$n";
}

if ($opt_M) {
	($gapkarlin_l, $gapkarlin_k, $gapkarlin_h) = &GetKarlinGapParms($H, $opt_M, $opt_G, $opt_E);
}

$s2 = int((log2 * $s2 + log($karlin_k)) / $karlin_l);

if ($program eq 'blastn') {
	$s2 += 2;
}
$s2 = $opt_c ? "$opt_c" : $s2;
$s2 = "S2=$s2";
#$gaps2 = int((log2 * $gaps2 + log($gapkarlin_k)) / $gapkarlin_l);
$gaps2 = int((log2 * $gaps2 + log($gapkarlin_k)) / $gapkarlin_l);
$gaps2 = "gapS2=$gaps2";

$x = log2 * $x / $karlin_l;
#sheesh!
if ($program eq "blastn") {
	$x = int($x);
}
else {
	$x = ceil($x);
}
$x = "X=$x";
$gapx = int(log2 * $gapx / $gapkarlin_l);
$i = $gapx - $opt_G - $opt_E;
$i = $gapx if ($i <= 0);
$i = $i / $opt_E;
$i = 5 if ($i < 5);
if ($program eq 'blastn') {
	$i += 1;
	$i = int($i * 2);
}
$gapw = "gapW=$i";
$gapx = "gapX=$gapx";

$gapkarlin_l = $gapkarlin_l ? "gapL=$gapkarlin_l" : '';
$gapkarlin_k = $gapkarlin_k ? "gapK=$gapkarlin_k" : '';
$gapkarlin_h = $gapkarlin_h ? "gapH=$gapkarlin_h" : '';

if ($opt_F) {
	@i = split / /, $opt_F;
	$i = shift @i;
	if ($#i < 0) {
		if ($opt_F =~ /^f/i) {
			$filter = '';
		}
		else {
			if ($opt_F =~ /^t/i) {
			}
			else {
				if ($opt_F =~ /^d/i) {
					$filter = 'filter=dust';
				}
				else {
					if ($opt_F =~ /^s/i) {
						$filter = 'filter=seg';
					}
					else {
						if ($opt_F =~ /^x/i) {
							$filter = 'filter=xnu';
						}
						else {
BadF:
							die "Unrecognized or unsupported filter option:  -F$opt_F\n";
						}
					}
				}
			}
		}
	}
	else {
		$j = shift @i;
		if ($#i >= 0) {
			goto BadF;
		}
		if ($i !~ /^m$/i) {
			goto BadF;
		}
		if ($j =~ /^d/i) {
			$wordfilter = "dust";
		}
		else {
			if ($j =~ /^s/i) {
				$wordfilter = "seg";
			}
			else {
				if ($j =~ /^x/i) {
					$wordfilter = "xnu";
				}
			}
		}
		if ("$wordfilter" eq "") {
			goto BadF;
		}
		$wordfilter = "wordfilter=" . $wordfilter;
	}
}
if ($opt_g && $opt_g =~ /^f/i) {
	$nogaps = '-nogaps';
}
if ($opt_g && $opt_g =~ /^t/i) {
	$nogaps = '';
}
if ($opt_U && $opt_U =~ /^f/i) {
	$lcfilter = '';
}
if ($opt_U && $opt_U =~ /^t/i) {
	$lcfilter = 'lcfilter';
}

$gi = '';
if ($opt_I && $opt_I =~ /^t/i) {
	$gi = '-gi';
}

$out = '';
if (defined $opt_o && $opt_o ne "") {
	$opt_o = &quoteopt($opt_o);
	$out = "-o $opt_o";
}

@stdopts = qw( -novalidctxok -nonnegok -gapall -restest );

@moreopts = "$out $e $q $r $statmethod $cpus $filter $wordfilter $v $b $nogaps $lcfilter $matrix $m $n $w $t $c $dbgcode $s2 $gaps2 $x $gapx $gapw $y $z $hitdist $gi $strands $gapkarlin_l $gapkarlin_k $gapkarlin_h $sepSmax $opt_m";

my($dir) = dirname($0);
if (-x "$dir/$program") {
	$program = $dir . "/" . $program;
}

if (!defined($ENV{'BLASTFILTER'}) && !defined($ENV{'WUBLASTFILTER'}) && !defined($ENV{'ABBLASTFILTER'})) {
	if (-x "$dir/filter") {
		$ENV{'BLASTFILTER'} = $dir . "/filter:.";
	}
}

if (!defined($ENV{'BLASTMAT'}) && !defined($ENV{'WUBLASTMAT'}) && !defined($ENV{'ABBLASTMAT'})) {
	if (-x "$dir/matrix") {
		$ENV{'BLASTMAT'} = "/usr/ncbi/blast/matrix:" . $dir . "/matrix:.";
	}
}

$cmd = "$program $db $query @stdopts @moreopts";

exit system("$cmd @ARGV");



sub GetKarlinParms{
	my($H, $matrix) = @_;

	return ($H->{$matrix}[0][2], $H->{$matrix}[0][3], $H->{$matrix}[0][4]);
}

sub GetKarlinGapParms{
	my($H, $matrix, $gapopen, $gapext) = @_;
	my($i, $j);

	for ($i = 1; $i < 20; ++$i) {
		if (!defined $H->{$matrix}[$i][0]) {
			goto Notfound;
		}
		if ($gapopen == $H->{$matrix}[$i][0]) {
			if ($gapext == $H->{$matrix}[$i][1]) {
				goto Found;
			}
		}
	}
Notfound:
	die "Karlin-Altschul parameters were not found for the specified scoring system.\n";

Found:
	return ($H->{$matrix}[$i][2], $H->{$matrix}[$i][3], $H->{$matrix}[$i][4]);
}


sub ComputeKarlinParms{
	my($m, $n) = @_;
	my($lambda, $k, $h);

	$lambda = ComputeLambda($m, $n);
	$h = LambdaToH($lambda, $m, $n);
	$k = EstimateK($m, $n);

	return ($lambda, $k, $h);
}

#
# valid for DNA alphabet only, with equiprobable 25% ACGT distribution
#
sub ComputeLambda{
	my($m, $n) = @_;
	my($lambda, $sum);
	my($lo, $hi);

	$lo = 0.;
	$hi = 0.5;
	for (;;) {
		$sum = ComputeSum($hi, $m, $n);
		last if ($sum > 1);
		$lo = $hi;
		$hi *= 2;
	}

	if ($lo == 0) {
		$lo = $hi / 2;
		for (;;) {
			$sum = ComputeSum($lo, $m, $n);
			last if ($sum <= 1);
			$hi = $lo;
			$lo /= 2;
		}
	}

	while ($hi / $lo > 1.00001) {
		$lambda = ($lo + $hi) / 2.;
		$sum = ComputeSum($lambda, $m, $n);
		if ($sum > 1) {
			$hi = $lambda;
		}
		else {
			$lo = $lambda;
		}
	}

	if ($lambda > 0.1) {
		$lambda = int($lambda * 10000. + 0.5);
		$lambda = $lambda / 10000.;
	}
	return $lambda;
}

sub ComputeSum{
	my($lambda, $m, $n) = @_;
	my($sum);

	$sum = 0.25 * exp($m * $lambda);
	$sum += 0.75 * exp($n * $lambda);
	return $sum;
}

sub LambdaToH{
	my($lambda, $m, $n) = @_;
	my($sum, $h);

	$sum = 0.25 * $m * exp($lambda * $m);
	$sum += 0.75 * $n * exp($lambda * $n);

	$h = $sum * $lambda;
	if ($h > 0.1) {
		$h =  int($h * 10000. + 0.5);
		$h = $h / 10000.;
	}
}

sub EstimateK{
	my($m, $n) = @_;
	my($ratio, $k, $i, $hi, $lo, $tot);
	my(@ratios) = (
			0.2,	0.333333333,	0.5,
			0.666666667,	0.75,	1.0,
			1.25,	2.,	2.5
			);
	my(@kvals) = (
			0.747,	0.711,	0.621,
			0.418,	0.346,	0.333,
			0.173,	0.0532,	0.0149
			);

	$ratio = abs($m / $n);
	if ($ratio < $ratios[0]) {
		return 0.75;
	}

	for ($i = 1; defined $ratios[$i]; ++$i) {
		if ($ratios[$i] > $ratio && $ratios[$i - 1] <= $ratio) {
			goto Found;
		}
	}
	return $kvals[$i - 1];

Found:
# lousy linear interpolation -- fortunately K need not be very precise
	$lo = $ratio - $ratios[$i - 1];
	if ($lo == 0) {
		return $kvals[$i - 1];
	}
	$hi = $ratios[$i] - $ratio;
	if ($hi == 0) {
		return $kvals[$i];
	}
	$tot = $hi + $lo;

	$k = $kvals[$i - 1] * $hi;
	$k += $kvals[$i] * $lo;
	$k /= $tot;

	if ($k > 0.1) {
		$k = int($k * 10000. + 0.5);
		$k /= 10000.;
	}
	return $k;
}

# quoteopt -- quote a command line argument that needs to be quoted
sub quoteopt {
	my($opt) = @_;

	if ("$opt" =~ /[ 	]/) {
		$opt = "\"" . "$opt" . "\"";
	}

	return $opt;
}


__DATA__
AB-BLASTALL [2009-01-22] -- mimic the command line of the NCBI blastall program

Copyright (C) 2009 Warren R. Gish.  All rights reserved.
Unlicensed use, reproduction or distribution are prohibited.

ab-blastall 2.2.6 arguments:

  -p  Program Name [String]
  -d  Database [String]
    default = nr
  -i  Query File [File In]
    default = stdin
  -e  Expectation value (E) [Real]
    default = 10.0
  -m  alignment view options:
0 = pairwise,
7 = xml, NCBI DTD
8 = tabular,
9 = tabular with comment lines
    default = 0
  -o  BLAST report Output File [File Out]  Optional
    default = stdout
  -F  Filter query sequence (DUST with blastn, SEG with others) [String]
    default = T
  -G  Cost to open a gap (zero invokes default behavior) [Integer]
    default = 0
  -E  Cost to extend a gap (zero invokes default behavior) [Integer]
    default = 0
  -X  X dropoff value for gapped alignment (in bits) (zero invokes default behavior) [Integer]
    default = 0
  -I  Show GI's in deflines [T/F]
    default = F
  -q  Penalty for a nucleotide mismatch (blastn only) [Integer]
    default = -3
  -r  Reward for a nucleotide match (blastn only) [Integer]
    default = 1
  -v  Number of one-line descriptions (V) [Integer]
    default = 500
  -b  Number of alignments to show (B) [Integer]
    default = 250
  -f  Threshold for extending hits, default if zero [Integer]
    default = 0
  -g  Perform gapped alignment (not available with tblastx) [T/F]
    default = T
  -Q  Query Genetic code to use [Integer]
    default = 1
  -D  DB Genetic code (for tblast[nx] only) [Integer]
    default = 1
  -a  Number of processors to use [Integer]
    default = 1
  -M  Matrix [String]
    default = BLOSUM62
  -W  Word size, default if zero [Integer]
    default = 0
  -z  Effective length of the database (use zero for the real size) [Real]
    default = 0
  -P  0 for multiple hits 1-pass, 1 for single hit 1-pass [Integer]
    default = 0
  -Y  Effective length of the search space (use zero for the real size) [Real]
    default = 0
  -S  Query strands to search against database (for blast[nx], and tblastx).  3 is both, 1 is top, 2 is bottom [Integer]
    default = 3
  -U  Use lower case filtering of FASTA sequence [T/F]  Optional
    default = F
  -A  Multiple Hits window size, default if zero (blastn 0, all others 40 [Integer])
    default = 0
  -t  Length of the largest intron allowed in tblastn for linking HSPs (0 disables linking) [Integer]
    default = 0

 For proper command line parsing, boolean options MUST be specified
 with an F or T argument.

WARNING:  THIS PERL SCRIPT IS PROVIDED FOR DEMONSTRATION PURPOSES ONLY.
          IT SHOULD NOT BE USED FOR PRODUCTION OR PUBLICATION QUALITY WORK,
          AS A LITERAL REPLACEMENT FOR THE NCBI blastall PROGRAM, OR FOR
          BENCHMARKING.  FOR INFORMATION ABOUT AB-BLAST USAGE, PLEASE SEE:
          http://blast.advbiocomp.com/doc/parameters.html
