#!/usr/bin/perl
use strict;
use warnings;
# push @INC,'.';
# require "traceback.pl"; # handy for debugging, but I'm not using it now
# This program calculates binomial probabilities (or, optionally, just the binomial coefficient).
# Unlike most binomial calculators, this one goes to considerable effort to avoid overflow and
# underflow problems, so it works correctly when most others fail.
# Copyright 2021, by David A. Burton
# Cary, NC USA
# Mobile / WhatsApp: +1 919-244-3316
# Email: https://sealevel.info/contact.html
# This is free, open source software, but it is copyrighted. You are welcome to use it, in whole
# or in part, for any purpose. However, if you do so, I require that you retain this notice, or
# a substantially equivalent notice, in copies or substantial excerpts of the program.
# TLIB Version Control fills in the version information for us:
our $version_str;
#--=>keyflag<=-- "&(#)%n, version %v, %11d "
$version_str = "&(#)binomcalc.cgi, version 93, 07-May-2025 ";
our $trimmed_version_str;
if ($version_str =~ /, (version [0-9]+, [0-9]*\-[a-zA-Z]{3}\-[0-9]+) /) {
$trimmed_version_str = "$1";
} else {
$trimmed_version_str = '';
}
$| = 1;
# We would need cgi.pm for "user_agent" lookup, and some convenience functions, like header(), param(), etc.
# But I'm not using any of those.
# use CGI ':standard';
# Example of usage:
#
# our $Inf = 2e262144; # Scalar floating-point infinity
our $POS_NORM_SMALLEST = 2.22507385850720138e-308; # smallest possible normalized positive IEEE 64-bit float value
our $POS_DENORM_SMALLEST = 4.94065645841247e-324; # smallest possible denormalized (subnormal) positive IEEE 64-bit float value
use Scalar::Util qw(blessed isdual reftype tainted isvstring looks_like_number set_prototype);
use Time::HiRes qw(time sleep);
# use lib '/root/perl5/lib/perl5/x86_64-linux-thread-multi/Math';
use lib '/root/perl5/lib/perl5'; # for GMP
use Math::GMP; # Do this so it'll fail if GMP's missing (otherwise it falls back to the slow lib). If it fails try installing: "cpan Math::GMP" "cpan Math::BigInt::GMP"
use Math::BigInt lib => 'GMP';
use Math::BigFloat lib => 'GMP';
use Math::BigRat lib => 'GMP';
# use bignum; <--- Do not do this! (My code expects scalars by default.)
# Math::BigFloat->accuracy(500); <--- I'll do this further down (number of digits is now adjustable)
# printf(" Bigfloat version = " . $Math::BigFloat::VERSION . "\n" );
BEGIN {
# Polyfill for missing nparts method prior to BigFloat version 1.999808 (circa 2017)
if (!exists &Math::BigFloat::nparts) {
*Math::BigFloat::nparts = sub {
my $x = shift;
my ($mantis, $exp) = $x->parts(); # both are BigInts
my $len = $mantis->length(); # number of digits in the mantissa
$exp += ($len-1);
# $mantis = Math::BigFloat->new($mantis) / (10**($len-1)); <== this fails for large $len, due to overflow
$mantis = Math::BigFloat->new($mantis) / (Math::BigFloat->new(10)**($len-1));
return ($mantis,$exp);
};
}
}
our $dbg = 0; # default
our $echo_cmdline = 1; # normally we display command line, but v=0 can suppress it
our $vrblx; # temp value used by &dspl()
our $typefunc_avail = 0; # needed by sub dspl(), and set to 1 if we successfully load type() from autobox
our $in_midst_of_generating_acu_table = 0; # if
is being generated, this will be 1 (so we can close the table on a timeout)
# What is the name of this program file?
our $scriptname = $0;
if ($scriptname =~ /([^\/\\\:]+)$/) {
$scriptname = $1; # just the file name, not the directory
}
our $scriptdotpl = $scriptname; # this script name, which could end in .pl or .cgi
$scriptdotpl =~ s/\.cgi$/.pl/; # same, except always ends in .pl
our $parameters1 = $ENV{'QUERY_STRING'}; # when invoked via server-side include
our $parameters2 = join(' ',@ARGV); # when run standaline, at a command line (any OS)
our $cgi_context;
our $parameters;
if ($parameters2) {
$cgi_context = 0;
$parameters = ' ' . $parameters2; # from @ARGV, so it must be at a command prompt
} elsif ($parameters1) {
$cgi_context = 1;
$parameters = ' ' . $parameters1; # from $ENV{'QUERY_STRING'}, so it's probably CGI
} else {
# no parameters, so is it in CGI context, or not?
$cgi_context = ($scriptname =~ /\.cgi$/); # best guess is from the filename extension: .pl or .cgi
$parameters = '';
}
# The parameters will be processed further down, for the most part, but here we
# take a sneek peek at two of them: v={verbosity} and o={outputmode}.
our $p_v = ''; # verbosity (debug level)
if ($parameters =~ /[ \-\&]v=([0-9\-]+)/) {
$p_v = $1;
$dbg = 0 + $p_v;
if ($dbg <= 0) {
$echo_cmdline = 0; # explicit v=0 supresses echoing the command line
}
}
# The o=... parameter is optional, for overriding the output mode:
our $p_o = '';
# h = html / cgi, t = plain text, p = block
if ($parameters =~ /[ \-\&]o=([htp])/) {
$p_o = $1;
$cgi_context = ('h' eq $1);
}
# In a cgi context, we have to emit this magic line, before we can write anything else.
if ($cgi_context || ('p' eq $p_o)) {
print "Content-Type: text/html\n\n"; # in case it's running as a .cgi script
}
if ($dbg>0) {
&wprint( "$scriptname $trimmed_version_str\n" );
&wprint( "OS = '$^O'\n" ); # What OS is this?
&wprint( "Perl version " . $] . " = " . $^V . "\n" );
&wprint( 'Math::BigFloat->VERSION = ' . Math::BigFloat->VERSION . "\n" );
&wprint( 'Math::BigInt::GMP->VERSION = ' . Math::BigInt::GMP->VERSION . "\n" );
} elsif ($cgi_context) {
print( "\n\n" );
}
# Because binomial probability calculations can be very computationally intensive,
# we need to ensure that we don't end up with too many instances running, lest it
# monopolize my little web server. (I don't bother with this if running on the
# desktop.)
our $num_server_instances = 0;
if (($^O =~ /linux/i) && ($scriptname =~ /\.cgi$/)) {
$num_server_instances = 1;
# my $tmp = `pgrep -a binomcalc\\.cgi`;
# &wprint("PGREP output:\n$tmp\n-----------\n");
my $tmpcount = `pgrep -c binomcalc\\.cgi`;
$num_server_instances = 0 + $tmpcount;
if ($dbg>0) {
&wprint("Instance count = '$num_server_instances'\n");
}
}
if ($num_server_instances > 2) {
# The usual case is one instance running (this process).
# My server has four cores, so I allow at most two instances, to avoid bogging it down too much.
print "ERROR: Server busy. Wait a few minutes and try again.
\n" .
'(Or download binomcalc.pl,' .
" and run it on your own computer.)
\n";
exit 1;
}
if ((!$parameters) && !$cgi_context) {
# no parameters specified -- print help message and quit
&givehelp("** ERROR: no parameters were specified to $scriptname");
exit 1;
}
if ($dbg >= 1) {
&wprintf("dbg: QUERY_STRING='%s', ARGV='%s'\n", (defined $parameters1) ? $parameters1 : 'UNDEF', (defined $parameters2) ? $parameters2 : 'UNDEF' );
}
# define type() for debugging:
if ($dbg>0) {
# use autobox::universal qw(type);
if (eval {require autobox::universal;}) {
autobox::universal->import('type');
$typefunc_avail = 1;
} else {
if ($cgi_context) {
print 'Warning: Cannot locate autoboxx/universal.pm in @INC. You may need to install autoboxx, like this:' . "
\n" .
"cpan autobox::universal\n
\n";
} else {
print 'Warning: Cannot locate autoboxx/universal.pm in @INC (you may need to install autoboxx)' . "\n" .
"In Strawberry Perl you can get autobox like this:\n" .
" 1. from a Windows Administrator (elevated) command prompt...\n" .
" 2. c:\n" .
" 3. cd \\strawberry\\perl\\bin\n" .
" 4. cpan autobox::universal\n\n";
}
eval("sub type { return '?';}");
}
}
# for benchmarking; the only parameter is an arbitrary name
our %runtimes = ();
our %runtime_cnt = ();
our %interval_started_at = ();
# Call this at start of timing interval. (If it's called more than once, the 2nd call just increments the runtime_cnt.)
sub timeit_start {
my $name = shift;
if (defined $interval_started_at{$name}) {
$runtime_cnt{$name} += 1;
} else {
$interval_started_at{$name} = time();
if (!exists $runtime_cnt{$name}) {
$runtime_cnt{$name} = 0;
$runtimes{$name} = 0.0;
}
}
}
# Call this at the end of the timing interval.
sub timeit_end {
my $name = shift;
if (!exists $interval_started_at{$name}) {
die "ERR: timeit_end('$name'), but timeit_start wasn't called.";
}
my $time_increment = time() - $interval_started_at{$name};
delete $interval_started_at{$name};
$runtimes{$name} += $time_increment;
$runtime_cnt{$name} += 1;
}
# Call this to reset timer (forget previous timings).
sub timeit_reset {
my $name = shift;
delete $interval_started_at{$name};
delete $runtimes{$name};
delete $runtime_cnt{$name};
}
# If a name is specified, display the timing stats for that name;
# otherwise, display them all.
sub timeit_display {
my $name;
if ($#_ >= 0) {
$name = shift;
if (exists $interval_started_at{$name}) {
&timeit_end($name);
}
&wprintf("Time %s, %f / %d = %f each\n", $name, $runtimes{$name}, $runtime_cnt{$name}, ($runtimes{$name} / $runtime_cnt{$name}) );
} else {
foreach $name (sort keys %runtimes) {
&timeit_display($name);
}
}
}
# return a description of the input paramter (for my debug prints)
sub dspl {
local($vrblx) = $_[0];
my( $result, $tp, $rp );
if (!defined $vrblx) {
$result = 'UNDEF';
if ($dbg > 2) {
&wprint( "dbg: dspl(UNDEF)" );
}
} else {
$tp = '';
$rp = '';
if ($typefunc_avail) {
$tp = &type($vrblx);
if (('STRING' eq $tp) and looks_like_number($vrblx)) {
if ($vrblx =~ /^(\-|)[0-9]+$/) {
$tp = 'Integer';
} else {
$tp = 'Float';
}
}
}
if (('' eq $tp) or ('HASH' eq $tp)) {
$rp = ref($vrblx);
}
if ($dbg > 2) {
&wprint( "dbg: dspl(), type=$tp, ref=$rp,\n" );
}
if (('' eq $rp) && ('' eq $tp)) {
$result = "$vrblx";
} elsif ('' ne $rp) {
$result = "$rp $vrblx";
} elsif ('STRING' eq $tp) {
$result = "$tp '$vrblx'";
} else {
$result = "$tp $vrblx";
}
}
if ($dbg > 2) {
&wprint( " result='$result'\n" );
}
return $result;
}
# v= and o= have already been handled; here we process the rest of the parameters.
our ($p_n, $p_k, $p_p, $p_m, $p_a, $p_t, $p_d, $p_b);
$p_p = "0.5"; # default
$p_m = 'b'; # default
$p_a = 'q'; # default
our $accuracy = 500; # default is 500 digits except for a=q, which is about 14 digits (note: BigFloat default is 40)
our $effective_accuracy = $accuracy;
sub process1param {
my ($opt,$val) = @_;
# &wprintf(" opt='%s' val='%s'\n", $opt, $val);
if ('n' eq $opt) {
if ($val =~ /^([1-9][0-9]*)$/) {
$p_n = $1;
} else {
&wprintf("ERROR: 'n=%s' (should be a positive integer)\n", $val);
exit 1;
}
} elsif ('k' eq $opt) {
if ($val =~ /^([0-9]+)$/) {
$p_k = $1;
} else {
&wprintf("ERROR: 'k=%s' (should be 0..n)\n", $val);
exit 1;
}
} elsif ('p' eq $opt) {
if ($val =~ /^([0-9\/\.eE\-\+]+)$/) {
# if it contains a / then it's a rational number; the e, E, -, and + are so that it can be in scientific notation
$p_p = $1;
} else {
&wprintf("ERROR: 'p=%s' (should be 0..1)\n", $val);
exit 1;
}
} elsif ('m' eq $opt) {
# the m=... parameter must be 'b' (default Binomial probability), 'cu' (CUmulative probability), 'acu' (All CUmulative probabilities), or 'co' (binomial COefficient)
if ($val =~ /^(b|cu|acu|co)$/i) {
$p_m = lc $1;
} else {
&wprint("ERROR: 'm=$val' (mode) parameter to $scriptname may only be b, cu, acu, or co.\n");
exit 1;
}
} elsif ('a' eq $opt) {
# the a=... parameter must be 's' (Slow = GMP BigRat), 'q' (Quick = scalar float = default), 'h' (Hybrid), 'x' (eXperimental = BigFloat), or 'xx' (also eXperimental)
if ($val =~ /^(x(x|)|s|q|h)$/) {
$p_a = lc $1;
} else {
&wprintf("ERROR: 'a=$val' (accuracy) parameter may only be q, h, s, x or xx.\n");
exit 1;
}
} elsif ('t' eq $opt) {
# the t=... (timeout) parameter is optional
if ($val =~ /^([0-9\.]*)$/) {
$p_t = $1;
} else {
&wprint("ERROR: 't=$val' (timeout) parameter must be numberic (max number of seconds).\n");
exit 1;
}
} elsif ('d' eq $opt) {
# the d=... (digits) parameter is optional (but ignored for a=q)
if ($val =~ /^([0-9]+)$/) {
$p_d = $1;
$effective_accuracy = $accuracy = 0 + $p_d;
} else {
&wprint("ERROR: 'd=$val' (digits of precision) parameter must be a non-negative integer.\n" .
"(Default is 500 digits, except for a=q, which is about 15 digits.)\n");
exit 1;
}
} elsif ('b' eq $opt) {
# the b=1 (benchmark) parameter is optional
if ($val =~ /^([01])$/) {
$p_b = $1;
} else {
&wprint("ERROR: 'b=$val' (benchmark) parameter must be 0 or 1.\n");
exit 1;
}
} elsif (('v' eq $opt) || ('o' eq $opt)) {
# handled above
} else {
&wprintf("ERROR: unrecognized parameter to $scriptname: '%s=%s'\n", $opt, $val);
&wprint("parameters='$parameters'\n");
exit 1;
}
}
my $parm;
for $parm (split(/\s+\-|\s+|\&/, $parameters)) {
if ($dbg > 1) {
&wprint( "dbg: parm='$parm'\n");
}
if ('' ne $parm) {
if ($parm =~ /^([a-zA-Z]+)=(.+)$/) {
my $opt = lc $1;
my $val = $2;
&process1param( $opt, $val );
} else {
&wprintf("ERROR: unrecognized parameter to $scriptname: '%s'\n", $parm);
&wprint("Parameters='$parameters'\n");
exit 1;
}
}
}
# the k=... parameter is reqired
# the n=... parameter is reqired
# the p=... parameter defaults to 0.5
# the m=... parameter defaults to b
# the a=... parameter defaults to q
our $p; # Note: $p will be set waaaay down in the program
if ((!defined $p_n) && !defined $p_k) {
&givehelp('');
exit 1;
}
if ((!defined $p_n) || !defined $p_k) {
&givehelp("ERROR: the n and k parameters are required.\n");
exit 1;
}
if ($accuracy < 14) {
if ($accuracy < 1) {
$effective_accuracy = $accuracy = 1;
}
&wprintf("Warning: 'd=%d' digits is less than the accuracy of the 'a=q' default 'quick' setting (about 14 digits). That's probably a mistake.\n", $accuracy );
} elsif ($dbg && ('q' ne $p_a) && (('h' ne $p_a) || ('b' ne $p_m))) {
&wprintf("Computing with 'd=%d' digits of accuracy.\n", $accuracy );
}
Math::BigRat->accuracy($accuracy);
Math::BigFloat->accuracy($accuracy);
if (($effective_accuracy > 13) && (('q' eq $p_a) || ('h' eq $p_a))) {
# in a=q or a=h mode we calculate with 64-bit scalar floats, so effective accuracy is only about 14 digits
$effective_accuracy = 13; # I'm being conservative
}
if ($echo_cmdline) {
# echo the equivalent Perl command-line
my $cmdline = 'perl ' . $scriptdotpl;
if ($p_v) { $cmdline .= " v=$p_v"; }
# if ($p_t ne '') { $cmdline .= " t=$p_t"; }
if ($p_m) { $cmdline .= " m=$p_m"; }
if ($p_a) { $cmdline .= " a=$p_a"; }
if ($p_d) { $cmdline .= " d=$p_d"; }
if ($p_b) { $cmdline .= " b=$p_b"; }
if ($p_n) { $cmdline .= " n=$p_n"; }
if ($p_k ne '') { $cmdline .= " k=$p_k"; } # note: k can be zero
if ($p_p) { $cmdline .= " p=$p_p"; }
if ($cgi_context) {
print "$cmdline
\n"
} else {
print "\n$cmdline\n";
}
}
sub givehelp {
my $ermsg = shift;
# First, reformat the TLIB version info, for prettier display
# e.g., $version_str = "&(#)binomcalc.pl, version 42, 19-Dec-2020 ";
my $vstr = $trimmed_version_str;
if ('' ne $vstr) {
$vstr = " ($vstr)";
}
if (!defined $ermsg) {
$ermsg = '';
} elsif ('' ne $ermsg) {
$ermsg .= "\n\n";
}
# Now display the Help message, including version number
&wprint( "\n" . $ermsg .
"$scriptdotpl -- extreme precision binomial calculator\n" .
"by Dave Burton$vstr\n" .
"\n" .
"Note: you can rename this program to binomcalc.cgi and use it from a web page, via a\n" .
"\"server-side include,\" as in this example:\n" .
"\n" .
" \n" .
"\n" .
"That does the same calculation as this command:\n" .
"\n" .
" c:\\> perl -f binomcalc.pl n=12 k=3 p=1/2\n" .
"\n" .
"The following command-line parameters are supported:\n" .
"\n" .
" n={integer} (REQUIRED: the n of 'n choose k')\n" .
" k={integer} (REQUIRED: the k of 'n choose k')\n" .
" p={probability} (decimal value 0..1, e.g., 0.5, or a fraction, e.g., 1/3)\n" .
" m={b, cu, acu, co} (b = binomial probability, cu = cumulative probabilities,\n" .
" acu = all cumulative probabilities, co = coefficient only)\n" .
" a={q, s, h, x, xx} (q = quick/scalar = default, s is slower w/ extreme precision,\n" .
" h = hybrid, x and xx are experimental [don't use them])\n" .
" o={h, t, p} (output: h = html, t = text, p = , default determined by context)\n" .
" t=nnn (timeout limit, nnn seconds; default = 0 for no timeout)\n" .
" b=1 (benchmark it; show timings)\n" .
" d={integer} (digits of accuracy: default for a=s is 500; for a=q it's about 13)\n" .
" v={integer} (verbosity; 0 = no debug prints, 1 = some, 2 = lots)\n" .
"\n" .
"Other notes:\n" .
"1. Default mode is m=b (calculate binomial probability).\n" .
"2. Default accuracy setting is a=q, which is fastest. For higher accuracy use a=s.\n" .
" The x and xx (experimental) settings are usually slower and not as good.\n"
);
# And then exit, with errorlevel = 1
exit 1;
}
# wrap a printable string in , add
s, etc., if running in a CGI context
sub wrap {
my $str = shift;
if ($cgi_context) {
$str =~ s/\n$//;
$str =~ s/\n /\n /g;
$str =~ s/\n /\n /g;
$str =~ s/ / /g;
$str =~ s/</g;
$str =~ s/\n/
\n/g;
$str = "$str
\n";
}
return $str;
}
# Ensure that argument is padded to at least n characters, and roughly centered
sub center {
my ($s,$n) = @_;
my $result = $s;
my $l = length($s);
if ($l <= $n) {
my $pr = int(($n-$l)/2);
my $pl = ($n-$l) - $pr;
$result = (' ' x $pl) . $s . (' ' x $pr);
# if there's a leading '<' or '>' then we might move the result one space to the left, to line up better with other m=acu results:
if ($pl > $pr) {
if ($result =~ / ([<>][^ ]*) /) {
$result = substr($result,1) . ' ';
}
}
}
return $result;
}
# wrapped print -- does something reasonable either at command-line or in CGI context
sub wprint {
my $str = shift;
print &wrap($str);
}
# wrapped printf -- does something reasonable either at command-line or in CGI context
sub wprintf {
my $fmt = shift;
my $str = sprintf( $fmt, @_ );
&wprint($str);
}
# insert commas into an integer if it's > 4 digits long
sub commafy {
my $number = shift;
my @pieces = ();
$number .= '';
# if ($dbg>0) { &wprint "dbg: number='$number'\n"; }
if ((length($number) > 4) && ($number =~ /^(\-|)[0-9]+$/)) {
while (length($number) > 0) {
unshift(@pieces,substr($number,-3));
substr($number,-3) = '';
# if ($dbg>0) {
# $tm1 = join(',',@pieces);
# &wprint( "dbg: number='$number', pieces='$tm1'\n" );
# }
}
$number = join(',',@pieces);
}
# if ($dbg>0) { &wprint "dbg: number='$number'\n"; }
return $number;
}#comafy
# Format a float or BigFloat into normalized scientific notation.
# This is like the ->bnstr() method for BigFloats, but with mantissa rounded to N significant digits.
# Input parameters are $x (either scalar float or BigFloat), and $n (number of significant digits).
sub bnstrN {
my ($x,$n) = @_;
my $x2 = Math::BigFloat->new($x); # in case $x is scalar
my $result;
my ($mantis,$exp) = $x2->nparts();
# print "dbg: bnstrN:\n exp=" . &dspl($exp) . "\n";
$exp->bfround(0); # otherwise exponent might display as "-6.00000000000000000000000000000" instead of "-6"
my $rounded_mantis = $mantis->copy();
$rounded_mantis->bfround(-($n-1));
if ($rounded_mantis =~ /^10\.([0-9]*)$/) {
# Sometimes rounding "almost 1" results in "10.000000000e-1", this changes it to "1.0000000000"
$rounded_mantis = '1.0' . $1;
$exp++;
}
if ($exp) {
if (ref($exp)) {
# convert BigFloat to scalar, lest we get something like "1.00e-12.0000000000000"
$exp = $exp->numify();
}
$result = $rounded_mantis . 'e' . $exp;
} else {
$result = $rounded_mantis;
}
# print "dbg: bnstrN:\n exp=$exp\n mantis=$rounded_mantis\n result=$result\n";
return $result;
}
# if input is 0.005 then result is 2 (there are two zeros after the decimal point)
sub number_of_zeros_after_the_decimal_point {
my $x = shift;
my $result;
if (ref $x) {
# BigFloat or BigRat
if ($x <= 0) {
# negative is only possible due to accumulated rounding errors
$result = $effective_accuracy - 1;
} else {
my $result2 = int(abs(log10($x)));
# print "dbg: x=" . &dspl($x) . " result2=" . &dspl($result2) . "\n";
$result = $result2->numify();
}
} else {
# scaler 64-bit float
if ($x <= 0) {
$result = $effective_accuracy - 1;
if ($dbg>0) { print "dbg: x.LE.0, x=" . &dspl($x) . " result=" . &dspl($result) . "\n"; }
} else {
$result = int(abs(log10($x)));
}
}
if ($dbg>0) { print "dbg: number_of_zeros_after_the_decimal_point(" . &dspl($x) . ") = " . &dspl($result) . "\n"; }
return $result;
}
# Depending on how tiny the result is, we might display it in both floating point and scientific notation, or just in one or the other.
# First parameter ($prob) should be a scalar float or BigFloat in [0..1].
# (Due to accumulated rounding errors, $prob can sometimes be slightly < 0 or slightly > 1. We handle that gracefully.)
# Optional second parameter ($verbosity) can be 1 (to display only "5.450654626823856e-368") or 2 (for "<0.000000000000001 = 5.450654626823856e-368").
# Third parameter ($digits) is the maximum number of sighificant digits with which to display the result (default 15).
# (Note: do not confuse $digits with global $effective_accuracy, the number of digits ued in calculations.)
# Forth parameter ($max_addend) is an optional parameter for effectively reducing the number of signifcant digits when $prob is a small number calculated from
# sums & differences of larger-magnitude addends. For example, if digits is 6, then 1.000000 - 0.999990 = 0.000010 has only two significant digits, not six.
# $max_addend should be the largest of the addends/subtrahends/minuends which were included in the calculation.
# Fifth parameter ($definitely_not_exactly_0or1) can be passed as True to tell this subroutine that "zero" is really slightly larger than zero,
# and "one" is always really slightly less than one.
sub display_prob {
&timeit_start('display_prob');
my ($prob, $verbosity, $digits, $max_addend, $definitely_not_exactly_0or1) = @_;
my $prob_is_scalar = !(ref $prob); # true for IEEE 64-bit, false for BigRat or BigFloat
if (!defined $verbosity) {
$verbosity = 2;
}
if (!defined $digits) {
$digits = 15;
if ($dbg>0) { &wprint( "dbg: digits defaults to $digits\n" ); }
} else {
if ($dbg>0) {
&wprint( "dbg: digits passed in as $digits\n" );
# &traceback("display_prob");
}
}
if (!defined $definitely_not_exactly_0or1) {
$definitely_not_exactly_0or1 = 0;
}
if ($dbg>0) { printf( "dbg: display_prob( $prob, $verbosity, $digits, %s, $definitely_not_exactly_0or1 )\n", (defined $max_addend) ? $max_addend : '{undef}' ); }
# first, we calcluate a plain floating point representation
my $fp_prob;
my $digits_fp = $digits; # floating point format digits (as opposed to scientific notation digits)
my $fp_prob1;
my $fp_prob1_digits_exceeded = 0;
my $fp_prob1_is_tiny = 0;
my $smallest_possible_fp = substr('0.00000000000000000000',0,$digits_fp+1) . "1";
my $all_nines_fp = substr('0.99999999999999999999',0,$digits_fp+1);
if ($prob <= $smallest_possible_fp) {
if (($prob <= 0) && !$definitely_not_exactly_0or1) {
# exactly 0 and exactly 1 get special-cased
$fp_prob = $fp_prob1 = substr('0.00000000000000000000',0,$digits_fp+2);
} else {
$fp_prob1 = $smallest_possible_fp;
if ($cgi_context) {
$fp_prob = '<' . $smallest_possible_fp;
} else {
$fp_prob = '<' . $smallest_possible_fp;
}
$fp_prob1_digits_exceeded = $fp_prob1_is_tiny = 1;
}
} elsif ($prob > $all_nines_fp) {
if (($prob >= 1) && !$definitely_not_exactly_0or1) {
$fp_prob = $fp_prob1 = substr('1.00000000000000000000',0,$digits_fp+2);
} else {
$fp_prob1 = $all_nines_fp;
if ($cgi_context) {
$fp_prob = '>' . $all_nines_fp;
} else {
$fp_prob = '>' . $all_nines_fp;
}
$fp_prob1_digits_exceeded = 1;
}
} else {
my $fmt_f;
if ($prob < 0.1) {
$fmt_f = '%.' . ($digits_fp+1) . 'f'; # default "%.16f"
$fp_prob = $fp_prob1 = sprintf($fmt_f, $prob);
# Problem: 0.099999999999999 would print as 0.1, resulting in one too many digits; this is the hacky fix:
if ($fp_prob =~ /^0.[1-9]/) {
$fmt_f = '%.' . $digits_fp . 'f'; # default "%.15f"
$fp_prob = $fp_prob1 = sprintf($fmt_f, $prob);
}
} else {
$fmt_f = '%.' . $digits_fp . 'f'; # default "%.15f"
$fp_prob = $fp_prob1 = sprintf($fmt_f, $prob);
}
# print "dbg: fmt_f = '$fmt_f'\n";
}
my $len0 = length($fp_prob1);
if ($dbg>0) { &wprintf("dbg: fp_prob1='$fp_prob1', fp_prob='$fp_prob'\n"); }
# scientific notation is trickier...
my $prob2_is_tiny = ($prob <= 0);
my $prob_zeros = 0;
my $max_addend_zeros = 0;
if ($prob < 0.01) {
$prob_zeros = number_of_zeros_after_the_decimal_point($prob);
# ...but we only bother with scientific notation for probabilities less than 0.01
if (!defined $max_addend) {
if ($prob_is_scalar) {
$max_addend_zeros = 308;
if ($dbg>0) { &wprint( "dbg: max_addend=undef and prob_is_scalar=1, so max_addend_zeros=$max_addend_zeros\n" ); }
} else {
# $max_addend = $prob;
$max_addend_zeros = $prob_zeros;
if ($dbg>0) { &wprint( "dbg: max_addend=undef and prob_is_scalar=0, set max_addend_zeros=prob_zeros=$max_addend_zeros (I probably need to fix this!)\n" ); }
}
} else {
# we might not have as many digits of accuracy as we'd hoped.
$max_addend_zeros = number_of_zeros_after_the_decimal_point($max_addend);
if ($definitely_not_exactly_0or1 && $prob2_is_tiny & ($dbg>1)) { print "dbg: prob2_is_tiny, prob='$prob', max_addend_zeros=$max_addend_zeros, prob_zeros=$prob_zeros, digits=$digits\n"; }
if ($dbg>0) { print "dbg: prob2_is_tiny=$prob2_is_tiny, definitely_not_exactly_0or1=$definitely_not_exactly_0or1, prob='$prob', max_addend_zeros=$max_addend_zeros, prob_zeros=$prob_zeros, digits=$digits\n"; }
if ($prob_zeros > $max_addend_zeros) {
if (($effective_accuracy - ($prob_zeros - $max_addend_zeros)) < ($digits + 3)) {
if ($dbg>0) {
print "dbg: effective_accuracy = " . dspl($effective_accuracy) . "\n";
print "dbg: prob_zeros = " . dspl($prob_zeros) . "\n";
print "dbg: max_addend_zeros = " . dspl($max_addend_zeros) . "\n";
print "dbg: digits = " . dspl($digits) . "\n";
}
$digits = ($effective_accuracy - ($prob_zeros - $max_addend_zeros)) - 3;
if ($dbg>0) { print "dbg: new digits = " . dspl($digits) . "\n"; }
if ($digits < 2) {
if ($digits <= 0) {
$digits = 1;
$prob2_is_tiny = 1;
} else {
$digits = 2;
}
if ($dbg>0) { print " new digits = " . dspl($digits) . "\n"; }
}
}
}
}
if ($prob_is_scalar && ($prob > 0) && ($prob < $POS_NORM_SMALLEST)) {
if ($dbg>0) { &wprintf( "dbg: scalar prob='%.16g' < POS_NORM_SMALLEST='%.16g'\n", $prob, $POS_NORM_SMALLEST ); }
# result is so tiny that we lost digits of precision due to denormalized 64-bit scalar representation
my $tmp = sprintf("%.1g", $prob);
if ($tmp =~ /e(\-[0-9]+)$/i) {
my $tmp_expon = $1; # should be -308..-324 because POS_NORM_SMALLEST = 2.22507385850720138e-308 and POS_DENORM_SMALLEST = 4.94065645841247e-324
if ($dbg>0) { &wprint( "dbg: scalar subnormal prob is approx. $tmp, digits was='$digits', tmp='$tmp', exp='$tmp_expon', tiny='$prob2_is_tiny'\n" ); }
if( ($tmp_expon > -308) || ($tmp_expon < -324)) {
&wprintf( "ERR: prob='%.16g' tmp='$tmp' exp='$tmp_expon' (outside expected range)\n", $prob );
exit 1;
}
if ($tmp_expon < -309) {
my $tmp_sav_digits = $digits;
$digits += ($tmp_expon + 309); # reduce number of digits
if ($dbg>0) { &wprint( "dbg: digits reduced from $tmp_sav_digits to $digits\n" ); }
if ($digits <= 1) {
$digits = 1;
$prob2_is_tiny = 1;
}
}
if ($dbg>0) { &wprint( "dbg: prob='$prob', tmp='$tmp', exp='$tmp_expon', tiny='$prob2_is_tiny', new denorm scalar digits = " . dspl($digits) . "\n" ); }
}
}
# print "dbg: prob2_is_tiny = '$prob2_is_tiny'\n";
if ($prob2_is_tiny) {
my $tmp_expon = ($max_addend_zeros + $effective_accuracy - 2);
if ($dbg>0) {print "dbg: prob2_is_tiny = True, expon='$tmp_expon', max_addend_zeros=$max_addend_zeros, effective_accuracy=$effective_accuracy, (defined max_addend)='" . (defined $max_addend) . "'\n"; }
if ($prob_is_scalar && ($tmp_expon > 317)) {
if ($dbg>0) {print "dbg: expon='$tmp_expon' -> 117 because max_addend=undef\n";}
$tmp_expon = 317;
}
if (defined $max_addend) {
# more conservative for cummulative sums, since they tend to accumulate errors
$tmp_expon -= 1;
}
# if (0 == $prob) {
# $fp_prob = '(underflow)';
# } else {
$fp_prob = '1.0e-' . $tmp_expon;
if ($cgi_context) {
$fp_prob = '<' . $fp_prob;
} else {
$fp_prob = '<' . $fp_prob;
}
# }
} else {
if ($dbg>0) { &wprintf( "prob_zeros=$prob_zeros, max_addend_zeros=$max_addend_zeros, (prob_zeros > max_addend_zeros)=%d\n", ($prob_zeros > $max_addend_zeros) ); }
if ($prob_zeros > $max_addend_zeros) {
if (($effective_accuracy - ($prob_zeros - $max_addend_zeros)) < 1) {
$prob2_is_tiny = 1;
if (!$fp_prob1_is_tiny) {
&wprint( "ERR: prob2 tiny but not prob1! Please note the exact parameter you used, and tell Dave!\n"
. "prob='$prob'\n"
. "prob1='$fp_prob1'\n"
. "prob_zeros=$prob_zeros\n"
. "max_addend_zeros=$max_addend_zeros\n"
. "effective_accuracy=$effective_accuracy\n" );
}
}
}
my $fp_prob2;
# Here we generate what I call the %g version (but I don't really use %g anymore)
#if ($prob_is_scalar) {
# $fp_prob2 = sprintf( ('%#.' . ($digits-1) . 'e' ), $prob ); # '#' flag avoids removal of trailing decimal point
# if ($dbg>0) { &wprint( "dbg: scalar fp_prob2='$fp_prob2' vs. '" . bnstrN($prob,$digits) . "'\n" ); }
#} else {
$fp_prob2 = bnstrN($prob,$digits); # scientific notation (do this because sprintf %g doesn't work for BigFloats)
# }
if ($dbg>0) { &wprint( "dbg: prob='$prob', digits=$digits, initial fp_prob2='$fp_prob2'\n" ); }
if ($fp_prob2 =~ /^(\-|)([1-9])\.([0-9]*)e\-1$/) {
# change "8.187225652655315e-1" to "0.8187225652655315"
if ($dbg>0) { &wprint( "dbg: fp2 fixup: was '$fp_prob2'\n"); }
$fp_prob2 = $1 . '0.' . $2 . $3;
if ($dbg>0) { &wprint( " now '$fp_prob2'\n"); }
}
my $len2 = length($fp_prob2);
if ($dbg>0) { &wprint( "dbg:\n fb0 = '$fp_prob'\n fb2 = '$fp_prob2'\n" ); }
if (($fp_prob1 ne $fp_prob2) && ($fp_prob2 ne substr($fp_prob1,0,$len2))) {
# the %f and %g versions are different, and the %g version isn't just %f version missing a digit or two at the end
if ($prob2_is_tiny) {
if ($fp_prob2 =~ /^([^e]*)e(.*)$/) {
my $tmp = '1.0e' . (1+$2);
# print "dbg: fp_prob2 is tiny = '$fp_prob2' -> '<$tmp'\n";
if ($cgi_context) {
$fp_prob = '<' . $tmp;
} else {
$fp_prob = '<' . $tmp;
}
} else {
&wprint("ERR: fp_prob2_is_tiny, fp_prob2='$fp_prob2', please tell Dave!\n");
}
} elsif ($verbosity <= 1) {
$fp_prob = $fp_prob2; # just use the scientific notation version
} elsif ((!$fp_prob1_digits_exceeded) && ($len0 < $len2) && ($fp_prob2 !~ /e/i)) {
# the %g version is in plain floating point notation, but has more digits than the %f version, so use the %g version
$fp_prob = $fp_prob2; # I'm unsure whether this can really happen
} else {
# display both versions
if ($cgi_context) {
$fp_prob = $fp_prob . ' = ' . $fp_prob2;
} else {
$fp_prob = $fp_prob . ' = ' . $fp_prob2;
}
}
}
}
}
# printf("dbg: display_prob:\n tmp3='%s'\n fp_prob='%s'\n", $tmp1, $tmp3, $fp_prob );
if ($cgi_context) {
$fp_prob =~ s/-/−/g; # Use minus sign instead of hyphen, to avoid inappropriate line-break; or we could use ‑ non-breaking hyphen
}
&timeit_end('display_prob');
return $fp_prob;
}#display_prob
# a=s
# Given n, k & p, calculate and return P(X=k)
# The "R" suffix means this is the BigRat version
# n & k should be scalar integers, p is a BigRat.
# Result ($probability) will be a BigFloat.
sub calc_binom_probR {
my ($n,$k,$p) = @_;
$p = Math::BigRat->new($p);
my $probability = Math::BigRat->new(0);
if ($dbg >= 1) {
&wprint( "dbg: calc_binom_probR( n=" . dspl($n) . ", k=" . dspl($k) . ", p=" . dspl($p) . " )...\n" );
}
&timeit_start('bnok');
my $nchoosek = Math::BigInt->new($n);
$nchoosek = $nchoosek -> bnok( $k );
&timeit_end('bnok');
if ($dbg >= 1) {
&wprint( "dbg: $n -> bnok($k) = nchoosek = " . dspl($nchoosek) . "\n" );
}
if ($dbg >= 2) {
my $commafied_nchoosek = commafy($nchoosek);
if ($cgi_context) {
print "
\n";
print "
\n";
print "( | $n | ) | = $commafied_nchoosek |
\n";
print "$k |
\n";
print "
\n";
print "
\n";
} else {
print "$n choose $k = $commafied_nchoosek\n";
}
}
my $one = Math::BigRat->new(1);
my $one_minus_p = $one - $p;
if ($dbg >= 1) {
&wprint( "p = $p\n1-p = $one_minus_p\n" );
}
my $tmpa = $p -> copy();
if ($dbg >= 3) {
&wprint( "initial tmpa = $tmpa\n" );
}
my $tmpb = $one_minus_p -> copy();
if ($dbg >= 3) {
&wprint( "initial tmpb = $tmpb\n" );
}
&timeit_start('bpow');
if (0 == $k) {
# On Linux, the GMP BigRat gives X^0 = X instead of 1; this works around that bug
$tmpa = $one;
} else {
$tmpa = $tmpa->bpow($k);
}
&timeit_end('bpow');
&timeit_start('bpow');
if ($n == $k) {
$tmpb = $one;
} else {
$tmpb = $tmpb->bpow($n-$k);
}
&timeit_end('bpow');
if ($dbg >= 3) {
&wprint( "dbg[3]: a=$tmpa, b=$tmpb, nchoosek=$nchoosek\n" );
}
$probability = $tmpa * $tmpb * $nchoosek;
# $probability = $probability->numify(); # believe it or not, if p is a ratio, then numufy() can be 70% of the runtime
# Convert from BigRat to BigFloat
# $probability = $probability->as_float(4000);
$probability = Math::BigFloat->new($probability->numerator()) / $probability->denominator();
if ($dbg >= 2) {
&wprint( "calc_binom_probR( $n, $k, $p ) = $probability\n" );
}
# if ($dbg >= 2) { &wprint( "dbg: calc_binom_probR() returning, result = " . &dspl($probability) . "\n" ); }
return $probability;
}
# a=x
# Given n, k & p, calculate and return P(X=k)
# The "F" suffix means this is the BigFloat (slow) version (which turns
# out to be superior to the BigRat version, most of the time).
# n & k should be scalar integers, p is a BigFloat.
# Result ($probability) will be a scalar float.
sub calc_binom_probF {
my ($n,$k,$p) = @_;
my $probability = Math::BigFloat->new(0);
&timeit_start('nchoosek');
my $nchoosek = Math::BigFloat->new($n);
$nchoosek = $nchoosek -> bnok( $k );
&timeit_end('nchoosek');
if ($dbg >= 2) {
my $commafied_nchoosek = commafy($nchoosek);
if ($cgi_context) {
print "
\n";
print "\n";
print "( | $n | ) | = $commafied_nchoosek |
\n";
print "$k |
\n";
print "
\n";
print "
\n";
} else {
print "$n choose $k = $commafied_nchoosek\n";
}
}
&timeit_start('ones');
my $one = Math::BigFloat->new(1);
my $one_minus_p = $one - $p;
&timeit_end('ones');
if ($dbg >= 2) {
&wprint( "p = $p\n1-p = $one_minus_p\n" );
}
&timeit_start('bpows');
my $tmpa = $p -> copy();
my $tmpb = $one_minus_p -> copy();
if (0 == $k) {
# On Linux, the GMP BigRat gives X^0 = X instead of 1. I've not seen that bug for BigFloat,
# but this makes sure.
$tmpa = $one;
} else {
$tmpa = $tmpa->bpow($k);
}
&timeit_start('bpows');
if ($n == $k) {
$tmpb = $one;
} else {
$tmpb = $tmpb->bpow($n-$k);
}
&timeit_end('bpows');
if ($dbg >= 2) {
&wprint( "dbg: a=$tmpa, b=$tmpb, nchoosek=$nchoosek\n" );
}
&timeit_start('prob');
$probability = $tmpa * $tmpb * $nchoosek;
&timeit_end('prob');
if ($dbg >= 2) {
&wprint( "The probability of $n choose $k (w/ p=$p) is $probability\n" );
}
# $probability = $probability->numify();
return $probability;
}
# Calculate binomial coefficient, "n choose k". Equivalent to .bnok() but for scalars:
# n! / ((k!) * ((n-k)!))
sub n_choose_k {
my ($n, $k) = @_;
my $result = 1;
if ((!defined $k) || ($k < 0) || ($k > $n)) {
die "ERROR: Bad parameters to n_choose_k($n,$k), stopped\n";
} else {
my $z = $n - $k;
if ($z > $k) {
($z, $k) = ($k, $z); # swap 'em, so that $k >= $z
}
# At this point we know that $n >= $k >= $z
# n! is a multiple of k!, so we'll skip those multiplications & divisions.
# Additionally, we know that n-k = z, so counting k up to n will take the
# same number of loops as counting z down to 1, which means we only need
# to test one of the two counters.
my $x = $k+1;
while ($x <= $n) {
# print "dbg: x = $x, z = $z\n";
$result *= $x;
$x += 1;
$result /= $z;
$z -= 1;
}
}
if ($dbg>0) {
&wprint( "dbg: ($n choose $_[1]) = $result\n" );
}
return $result;
}
# Given n, k & p, calculate and return P(X=k)
# The "S" suffix means this is the short / scalar ("quick") version.
# Inputs are assumed to be scalars (m=int, k=int, p=float).
# No bignums are used.
# The only problem with this simple version is that it's vulnerable to
# overflow of "n choose k" and underflow of "p^k" and "(1-p)^(n-k)".
# *** THIS SUBROUTINE IS NOT CURRENTLY USED ***
sub calc_binom_probS {
my ($n,$k,$p) = @_;
my $probability1 = 0;
if ($dbg>0) {
my $n_dspl = dspl($n);
my $k_dspl = dspl($k);
&wprint("dbg: calc_binom_probS( n=$n_dspl, k=$k_dspl )\n");
}
my $nchoosek1 = $n;
$nchoosek1 = &n_choose_k($n,$k);
if ($dbg>0) {
&wprint("dbg: n=" . dspl($n) . ", k=" . dspl($k) . ", nchoosek=" . dspl($nchoosek1) . "\n");
}
if ($dbg >= 2) {
my $commafied_nchoosek = commafy($nchoosek1);
if ($cgi_context) {
print "
\n";
print "\n";
print "( | $n | ) | = $commafied_nchoosek |
\n";
print "$k |
\n";
print "
\n";
print "
\n";
} else {
print "$n choose $k = $commafied_nchoosek\n";
}
}
my $one_minus_p1 = 1 - $p;
if ($dbg >= 2) {
&wprint( "p = $p\n1-p = $one_minus_p1\n" );
}
my $tmpa1 = $p;
my $tmpb1 = $one_minus_p1;
$tmpa1 = $tmpa1 ** $k;
$tmpb1 = $tmpb1 ** ($n-$k);
if ($dbg >= 2) {
&wprint( "dbg: a=$tmpa1, b=$tmpb1, nchoosek=$nchoosek1\n" );
}
$probability1 = $tmpa1 * $nchoosek1 * $tmpb1;
if ($dbg >= 2) {
&wprint( "The probability of $n choose $k (w/ p=$p) is $probability1\n" );
}
return $probability1;
}
# Given n, k & p, calculate and return P(X=k)
# The "S2" suffix means this is the 2nd short / scalar version.
# Inputs are assumed to be scalars (m=int, k=int, p=float).
# No bignums are used.
# This version is more robust than the "S" version. It fixes the possible
# overflow of "n choose k" but not the underflow of "p^k" and "(1-p)^(n-k)".
# Here's a test case that this version fixes:
# perl binomcalc.pl -n=2000 -k=890 -p=0.46 -d=2
# result = 0.007243291418493
# (Berman gets that one right; Vassarstats gets it wrong in the 3rd digit.)
# *** THIS SUBROUTINE IS NOT CURRENTLY USED ***
sub calc_binom_probS2 {
my ($n,$k1,$p) = @_;
my $k = $k1;
my $probability = 0;
my $result = 1;
my $cntr = 1;
my $tmpmsg; # for debug prints
if ($dbg>0) {
my $n_dspl = dspl($n);
my $k_dspl = dspl($k);
&wprint("dbg: calc_binom_probS2( n=$n_dspl, k=$k_dspl, p=$p )\n");
}
# my $nchoosek = &n_choose_k($n,$k);
# if ($dbg>0) {
# &wprint("dbg: n=" . dspl($n) . ", k=" . dspl($k) . ", nchoosek=" . dspl($nchoosek) . "\n");
# }
my $one_minus_p = 1 - $p;
if ($dbg >= 2) {
&wprint( "p = $p\n1-p = $one_minus_p\n" );
}
my $tmpa = $p;
my $tmpb = $one_minus_p;
$tmpa = $tmpa ** $k;
$tmpb = $tmpb ** ($n-$k);
if ($dbg >= 2) {
&wprint( "dbg: a=$tmpa, b=$tmpb\n" );
}
# $probability = $tmpa * $nchoosek * $tmpb;
# Binomial coefficient, "n choose k" is equivalent to .bnok() but for scalars:
# n! / ((k!) * ((n-k)!))
my $z = $n - $k;
if ($z > $k) {
($z, $k) = ($k, $z); # swap 'em, so that $k >= $z
}
# At this point we know that $n >= $k >= $z
my $mul_by_a_done = 0;
my $mul_by_b_done = 0;
# The result we need is (n choose k) * $tmpa * $tmpb.
# (n choose k) is often very, very large. $tmpa and $tmpb are often very, very tiny.
# Calculate (n choose k).
#
# n! is a multiple of k!, so we'll skip those multiplications & divisions.
# Additionally, we know that n-k = z, so counting k up to n will take the
# same number of loops as counting z down to 1, which means we only need
# to test one of the two counters.
#
# To avoid loss of precision in extreme cases, if the (n choose k) value begins to approach
# the limit of what can be represented in a IEEE 754 double float (a little over 1e308) then
# we do the multiplications by $tmpa and perhaps $tmpb in the midst of the calculation of
# (n choose k).
my $x = $k+1;
while ($x <= $n) {
# print "dbg: x = $x, z = $z\n";
$result *= $x;
$x += 1;
$result /= $z;
$z -= 1;
if ($result > 1e290) {
if (!$mul_by_a_done) {
if ($dbg>0) { $tmpmsg = "dbg: early mul by a, X before = $result, a=$tmpa,"; }
$result *= $tmpa;
$mul_by_a_done = 1;
if ($dbg>0) { &wprint( "$tmpmsg X after = $result (loop cntr=$cntr)\n" ); }
}
if ($result > 1e290) {
if (!$mul_by_b_done) {
if ($dbg>0) { $tmpmsg = "dbg: early mul by b, X before = $result, b=$tmpb,"; }
$result *= $tmpb;
$mul_by_b_done = 1;
if ($dbg>0) { &wprint( "$tmpmsg X after = $result (loop cntr=$cntr)\n" ); }
}
}
}
$cntr++;
}
if (!$mul_by_a_done) {
if ($dbg>0) { $tmpmsg = "dbg: late mul by a, X before = $result, a=$tmpa,"; }
$result *= $tmpa;
if ($dbg>0) { &wprint( "$tmpmsg X after = $result\n" ); }
}
if (!$mul_by_b_done) {
if ($dbg>0) { $tmpmsg = "dbg: late mul by b, X before = $result, b=$tmpb,"; }
$result *= $tmpb;
if ($dbg>0) { &wprint( "$tmpmsg X after = $result\n" ); }
}
$probability = $result;
if ($dbg >= 2) {
&wprint( "The probability of $n choose $k (w/ p=$p) is $probability ($cntr loops)\n" );
}
return $probability;
}
# base 10 logarithm
sub log10 {
my $n = shift;
if ($n <= 0) {
return -10000;
}
# if (($n < -1.911e-12) && ($n > -1.913e-12)) {
# &traceback('log10');
# }
return log($n)/log(10);
}
# Input is small (perhaps very tiny!) positive number, $p, between 0 and 1.
# This function determines the maximum power to which $p can be raised without
# the result having an exponent less than -100.
# (This is used as part of an underflow avoidance strategy.)
# E.g., if $p = 1E-5 then the result is 20. if $p = 1E-20 then the result is 10.
sub max_nonunderflowing_power1 {
my ($p) = @_;
my $result;
my $ap = abs($p);
if ($ap <= 1e-100) {
$result = 1;
} elsif ($ap > 0.99977) {
$result = 1000000; # arbitrary maximum of one million
} else {
my $tmp = abs(log10($ap)); # if p=1E-5 then this is 5
$result = int(100/$tmp); # if p=1E-5 then this is 20
}
return $result;
}
# one-digit log10: 1 2 3 4 5 6 7 8 9 9.5
our @quiklog = (0, 0, 0.30103, 0.47712, 0.60206, 0.69897, 0.77815, 0.84510, 0.90309, 0.95424, 0.97772 );
# Roughly equivalent to max_nonunderflowing_power1(), calculated a different way.
# For scalars, this is slower, but for BigFloats it might be faster?
sub max_nonunderflowing_power2 {
my ($p) = @_;
my $result;
my $ap = abs($p);
if ($ap <= 1e-100) {
$result = 1;
} elsif ($ap > 0.99977) {
$result = 1000000;
} else {
$ap = Math::BigFloat->new($ap);
# my $e = $p->exponent(); this doesn't work (it's apparently not normalized)
my ($m,$e) = $ap->nparts();
my $substr = int($m+0.5);
my $tmp = abs($e) - $quiklog[$substr]; # if p=1E-5 then abs($e) is 5
$result = int(100/$tmp); # if p=1E-5 then this is 20
# printf("dbg: m=$m, e=$e, substr=$substr, qlog[$substr]=%f, tmp=$tmp, q=%f, r=$result\n", $quiklog[$substr], 100/$tmp);
}
return $result;
}
# *** THIS SUBROUTINE IS NOT CURRENTLY USED ***
sub max_nonunderflowing_power {
my $p = shift;
&timeit_start('max_nonunderflowing_power1');
my $result1 = max_nonunderflowing_power1($p);
&timeit_end('max_nonunderflowing_power1');
&timeit_start('max_nonunderflowing_power2');
my $result2 = max_nonunderflowing_power2($p);
&timeit_end('max_nonunderflowing_power2');
if ($result1 != $result2) {
print "dbg: max_nonunderflowing_power($p), $result1, $result2\n";
}
return $result1;
}
# my $xp = 2.9E-5;
# my $ex = 55;
# my $maxpower = max_nonunderflowing_power($xp);
# printf("p=%.3g, ex=%d, maxpower=%d, p^maxpower=%.4g, p^e=%.4g\n", $xp, $ex, $maxpower, $xp**$maxpower, $xp**$ex );
# $maxpower++;
# printf("p=%.3g, ex=%d, maxpower=%d, p^maxpower=%.4g, p^e=%.4g\n", $xp, $ex, $maxpower, $xp**$maxpower, $xp**$ex );
# a=q
# Given n, k & p, calculate and return P(X=k)
# The "S3" suffix means this is the 3rd scalar / short version.
# This is the version we currently use for Quick (normal) precision. Sorry about the potential confusion
# confusion between "S" for "scalar" (this one) and "S" for "slow" (which uses calc_binom_probR).
# Inputs are assumed to be scalars (m=int, k=int, p=float).
# Result is the product of three factors:
# "n choose k" -- which is at risk of overflow
# "p^k" -- which is at risk of underflow
# "(1-p)^(n-k)" -- which is also at risk of underflow
# No bignums are used.
# This version is more robust than simpler versions. It fixes the possible
# overflow of "n choose k" and the possible underflows of "p^k" and "(1-p)^(n-k)".
# Here's a test case that breaks calc_binom_probS2 but this version fixes:
# perl binomcalc.pl -n=2000 -k=890 -p=0.2
# a=q result is 2.48298479667e-135
# (a=s result is 2.482984796673508e-135)
# (Berman reports "< 0.000001"; Vassarstats reports "<0.000001"; iCalcu reports "NaN"; wolframalpha reports "2.483e-135")
# Here's another test case that breaks calc_binom_probS2 but this version fixes:
# perl binomcalc.pl -n=3000 -k=1400 -p=0.45
# a=q result is 0.00272066277 = 2.72066276956e-3
# (a=s result is 0.002720662769557 = 2.720662769556848e-3)
# (Berman reports "0.00272066277": Vassarstats reports "Method 2. approximation via normal 0.002702"; iCalcu reports "NaN"; wolframalpha reports ".00272066")
sub calc_binom_probS3 {
my ($n,$k1,$p) = @_;
my $k = $k1;
my $probability = 0;
my $result = 1;
my $cntr = 1;
my $tmpmsg; # for debug prints
if ($dbg>0) {
my $n_dspl = dspl($n);
my $k_dspl = dspl($k);
&wprint("dbg: calc_binom_probS3( n=$n_dspl, k=$k_dspl, p=$p )\n");
}
my $one_minus_p = 1 - $p;
my $n_minus_k = $n - $k;
# my $maxpower_p = max_nonunderflowing_power($p);
my $maxpower_p = max_nonunderflowing_power1($p);
my $remaining_k = $k;
if ($maxpower_p > $k) {
$maxpower_p = $k; # p^k won't underflow anyhow
}
# my $maxpower_1mp = max_nonunderflowing_power($one_minus_p);
my $maxpower_1mp = max_nonunderflowing_power1($one_minus_p);
my $remaining_n_minus_k = $n_minus_k;
if ($maxpower_1mp > ($n_minus_k)) {
$maxpower_1mp = $n_minus_k; # (1-p)^(n-k) won't underflow anyhow
}
if ($dbg >= 2) {
&wprint( "p = $p, k=$k, mpp=$maxpower_p; (1-p)=$one_minus_p, (n-k)=$n_minus_k, mp1mp=$maxpower_1mp\n" );
}
# my $tmpa = $p;
# my $tmpb = $one_minus_p;
# $tmpa = $tmpa ** $k;
# $tmpb = $tmpb ** ($n_minus_k);
# if ($dbg >= 2) { &wprint( "dbg: a=$tmpa, b=$tmpb\n" ); }
# $probability = $tmpa * $nchoosek * $tmpb;
# Binomial coefficient, "n choose k" is equivalent to .bnok() but for scalars:
# n! / ((k!) * ((n-k)!))
my $z = $n_minus_k;
if ($z > $k) {
($z, $k) = ($k, $z); # swap 'em, so that $k >= $z
}
# At this point we know that $n >= $k >= $z
# Note that "k" here is no longer necessarily the same "k" used in the p^k
# and (1-p)^(n-k) exponents. (I probably should have used new variable names.)
my $mul_by_a_done = 0;
my $mul_by_b_done = 0;
# The result we need is (n choose k) * p^k * (1-p)^(n-k).
# (n choose k) is often very, very large.
# p^k and (1-p)^(n-k) are often very, very tiny.
# This is our (n choose k) calculation loop, mostly.
#
# When there's no threat of overflow, the following loop just calculates
# (n choose k). But when overflow threatens we get trickier.
#
# n! is a multiple of k!, so we'll skip those multiplications & divisions.
# Additionally, we know that n-k = z, so counting k up to n will take the
# same number of loops as counting z down to 1, which means we only need to
# test one of the two counters.
#
# To avoid loss of precision in extreme cases, if the intermediate value begins
# to approach the limit of what can be represented in a IEEE 754 double float
# (a little over 1e308) then we insert the multiplications by the tiny
# exponentiated probabilities in the midst of the calculation of (n choose k).
my $x = $k+1;
while ($x <= $n) {
# print "dbg: x = $x, z = $z\n";
$result *= $x;
$x += 1;
$result /= $z;
$z -= 1;
while (($result > 1e270) && (($remaining_k > 0) || ($remaining_n_minus_k > 0))) {
if ($remaining_k > 0) {
if ($dbg>2) { $tmpmsg = "dbg: early mul by a, X before = $result, a=p^$maxpower_p,"; }
$result *= ($p ** $maxpower_p);
$remaining_k -= $maxpower_p;
if ($remaining_k < $maxpower_p) {
$maxpower_p = $remaining_k;
}
if ($dbg>2) { &wprint( "$tmpmsg X after = $result (loop cntr=$cntr)\n" ); }
}
if ($result > 1e200) {
if ($remaining_n_minus_k > 0) {
if ($dbg>2) { $tmpmsg = "dbg: early mul by b, X before = $result, (1-p)^$maxpower_1mp,"; }
$result *= ($one_minus_p ** $maxpower_1mp);
$remaining_n_minus_k -= $maxpower_1mp;
if ($remaining_n_minus_k < $maxpower_1mp) {
$maxpower_1mp = $remaining_n_minus_k;
}
if ($dbg>2) { &wprint( "$tmpmsg X after = $result (loop cntr=$cntr)\n" ); }
}
}
}
$cntr++;
}
while ($remaining_k > 0) {
if ($dbg>2) { $tmpmsg = "dbg[1]: at end, X before = $result, a=p^$remaining_k,"; }
$result *= ($p ** $maxpower_p);
$remaining_k -= $maxpower_p;
if ($remaining_k < $maxpower_p) {
$maxpower_p = $remaining_k;
}
if ($dbg>2) { &wprint( "$tmpmsg X after = $result\n" ); }
}
while ($remaining_n_minus_k > 0) {
if ($dbg>2) { $tmpmsg = "dbg[2]: at end, X before = $result, (1-p)^$remaining_n_minus_k,"; }
$result *= ($one_minus_p ** $maxpower_1mp);
$remaining_n_minus_k -= $maxpower_1mp;
if ($remaining_n_minus_k < $maxpower_1mp) {
$maxpower_1mp = $remaining_n_minus_k;
}
if ($dbg>2) { &wprint( "$tmpmsg X after = $result\n" ); }
}
$probability = $result;
if ($dbg >= 2) {
&wprint( "The probability of $n choose $k (w/ p=$p) is $probability ($cntr loops)\n" );
}
return $probability;
}
# a=xx
# Given n, k & p, calculate and return P(X=k)
# The "F" suffix means this is the BigFloat (slow) version.
# n & k should be scalar integers, p can be a BigFloat or a scalar float, 0..1.
# Result ($probability) will be a BigFloat.
# Result is the product of three parts:
# "n choose k" -- which is at risk of overflow
# "p^k" -- which is at risk of underflow
# "(1-p)^(n-k)" -- which is also at risk of underflow
# Here's a test case that breaks calc_binom_probS2 but this version fixes:
# perl binomcalc.pl -n=2000 -k=890 -p=0.2
# a=s result is 2.482984796673508e-135
# (a=q result is 2.48298479667e-135; Berman reports "< 0.000001"; Vassarstats reports "<0.000001"; iCalcu reports "NaN"; wolframalpha reports "2.483e-135")
# Here's another test case that breaks calc_binom_probS2 but this version fixes:
# perl binomcalc.pl -n=3000 -k=1400 -p=0.45
# a=s result is 0.002720662769557 = 2.720662769556848e-3
# (a=q result is 0.00272066277 = 2.72066276956e-3; Berman reports "0.00272066277": Vassarstats reports "Method 2. approximation via normal 0.002702"; iCalcu reports "NaN"; wolframalpha reports ".00272066")
sub calc_binom_probF3 {
my ($n,$k1,$p1) = @_;
my $k = $k1;
my $p = Math::BigFloat->new($p1);
my $scalar_p = $p->numify();
my $probability = Math::BigFloat->new(0);
my $result = Math::BigFloat->new(1);
my $cntr = 1;
my $tmpmsg; # for debug prints
if ($dbg>0) {
my $tst1 = 1.2345E-20;
my $tst2 = Math::BigFloat->new($tst1);
my $tst3 = Math::BigFloat->new($tst2);
&wprintf("dbg: tst1=%s, tst2=%s, tst3=%s\n", dspl($tst1), dspl($tst2), dspl($tst3) );
my $n_dspl = dspl($n);
my $k_dspl = dspl($k);
my $p_dspl = dspl($p);
my $p1_dspl = dspl($p1);
if ($p1_dspl ne $p_dspl) {
$p_dspl .= "=$p1_dspl";
}
&wprint("dbg: calc_binom_probF3( n=$n_dspl, k=$k_dspl, p=$p_dspl )\n");
}
my $one_minus_p = 1 - $p;
my $n_minus_k = $n - $k;
my $maxpower_p = max_nonunderflowing_power2($p);
my $remaining_k = $k;
if ($maxpower_p > $k) {
$maxpower_p = $k; # p^k won't underflow anyhow
}
my $maxpower_1mp = max_nonunderflowing_power2($one_minus_p);
my $remaining_n_minus_k = $n_minus_k;
if ($maxpower_1mp > ($n_minus_k)) {
$maxpower_1mp = $n_minus_k; # (1-p)^(n-k) won't underflow anyhow
}
if ($dbg >= 2) {
&wprint( "p = $p, k=$k, mpp=$maxpower_p; (1-p)=$one_minus_p, (n-k)=$n_minus_k, mp1mp=$maxpower_1mp\n" );
}
# my $tmpa = $p;
# my $tmpb = $one_minus_p;
# $tmpa = $tmpa ** $k;
# $tmpb = $tmpb ** ($n_minus_k);
# if ($dbg >= 2) { &wprint( "dbg: a=$tmpa, b=$tmpb\n" ); }
# $probability = $tmpa * $nchoosek * $tmpb;
# Binomial coefficient, "n choose k" is equivalent to .bnok() but for scalars:
# n! / ((k!) * ((n-k)!))
my $z = $n_minus_k;
if ($z > $k) {
($z, $k) = ($k, $z); # swap 'em, so that $k >= $z
}
# At this point we know that $n >= $k >= $z
# Note that "k" here is no longer necessarily the same "k" used in the p^k
# and (1-p)^(n-k) exponents. (I probably should have used new variable names.)
my $mul_by_a_done = 0;
my $mul_by_b_done = 0;
# The result we need is (n choose k) * p^k * (1-p)^(n-k).
# (n choose k) is often very, very large.
# p^k and (1-p)^(n-k) are often very, very tiny.
# This is our (n choose k) calculation loop, mostly.
#
# When there's no threat of overflow, the following loop just calculates
# (n choose k). But when overflow threatens we get trickier.
#
# n! is a multiple of k!, so we'll skip those multiplications & divisions.
# Additionally, we know that n-k = z, so counting k up to n will take the
# same number of loops as counting z down to 1, which means we only need to
# test one of the two counters.
#
# To avoid loss of precision in extreme cases, if the intermediate value begins
# to approach the limit of what can be represented in a IEEE 754 double float
# (a little over 1e308) then we insert the multiplications by the tiny
# exponentiated probabilities in the midst of the calculation of (n choose k).
my $x = $k+1;
while ($x <= $n) {
# print "dbg: x = $x, z = $z\n";
$result *= $x;
$x += 1;
$result /= $z;
$z -= 1;
while (($result > 1e270) && (($remaining_k > 0) || ($remaining_n_minus_k > 0))) {
if ($remaining_k > 0) {
if ($dbg>0) { $tmpmsg = "dbg: early mul by a, X before = $result, a=p^$maxpower_p,"; }
$result *= ($p ** $maxpower_p);
$remaining_k -= $maxpower_p;
if ($remaining_k < $maxpower_p) {
$maxpower_p = $remaining_k;
}
if ($dbg>0) { &wprint( "$tmpmsg X after = $result (loop cntr=$cntr)\n" ); }
}
if ($result > 1e200) {
if ($remaining_n_minus_k > 0) {
if ($dbg>0) { $tmpmsg = "dbg: early mul by b, X before = $result, (1-p)^$maxpower_1mp,"; }
$result *= ($one_minus_p ** $maxpower_1mp);
$remaining_n_minus_k -= $maxpower_1mp;
if ($remaining_n_minus_k < $maxpower_1mp) {
$maxpower_1mp = $remaining_n_minus_k;
}
if ($dbg>0) { &wprint( "$tmpmsg X after = $result (loop cntr=$cntr)\n" ); }
}
}
}
$cntr++;
}
while ($remaining_k > 0) {
if ($dbg>0) { $tmpmsg = "dbg[1]: at end, X before = $result, a=p^$remaining_k,"; }
$result *= ($p ** $maxpower_p);
$remaining_k -= $maxpower_p;
if ($remaining_k < $maxpower_p) {
$maxpower_p = $remaining_k;
}
if ($dbg>0) { &wprint( "$tmpmsg X after = $result\n" ); }
}
while ($remaining_n_minus_k > 0) {
if ($dbg>0) { $tmpmsg = "dbg[2]: at end, X before = $result, (1-p)^$remaining_n_minus_k,"; }
$result *= ($one_minus_p ** $maxpower_1mp);
$remaining_n_minus_k -= $maxpower_1mp;
if ($remaining_n_minus_k < $maxpower_1mp) {
$maxpower_1mp = $remaining_n_minus_k;
}
if ($dbg>0) { &wprint( "$tmpmsg X after = $result\n" ); }
}
$probability = $result;
if ($dbg >= 2) {
&wprint( "The probability of $n choose $k (w/ p=$p) is $probability ($cntr loops)\n" );
}
# $probability = $probability->numify();
return $probability;
}
if ($dbg>0) {
&wprint( "Parameters passed to $scriptname = '$parameters'\n" );
&wprint( "dbg=$dbg\n" );
}
# For a='q' and a='h' we use the scalar version (the default)
our $calc_binom_prob = \&calc_binom_probS3; # calc_binom_probS3 is more robust than calc_binom_probS or calc_binom_probS2
if ('s' eq $p_a) { #
# GMP BigRat version is surprisingly fast!
# Before I switched to GMP, I used the BigFloat version calc_binom_probF here.
$calc_binom_prob = \&calc_binom_probR;
} elsif ('x' eq $p_a) {
# BigFloat eXperimental version. Slow.
$calc_binom_prob = \&calc_binom_probF;
} elsif ('xx' eq $p_a) {
# another eXperimental version. Slow.
$calc_binom_prob = \&calc_binom_probF3;
}
our $k0 = $p_k + 0; # scalar
# $k = Math::BigInt->new($p_k); # BigInt
# $k0 = $k->numify(); # (unnecessary, since I don't "use bignum")
if ($dbg>0) {&wprint( "p_k='$p_k', k0=" . dspl($k0) . "\n" );}
our $n0 = $p_n + 0; # scalar
# $n = Math::BigInt->new($p_n); # BigInt
# $n0 = $n->numify();
if ($dbg>0) {&wprint( "p_n='$p_n', n0=" . dspl($n0) . "\n" );}
# &wprint("dbg: about to evaluate p_p...\n");
our( $p0, $p1, $p2 ); # scalar, BigRat, BigFloat, respectively
if ($p_p =~ /\//) {
# &wprint("dbg: p_p is rational...\n");
$p1 = Math::BigRat->new($p_p);
$p2 = Math::BigFloat->new($p1->numerator()) / $p1->denominator();
$p0 = $p2->numify();
# &wprint("dbg: evaluated rational, p0=$p0, p1=" . dspl($p1) . ", p2=" . dspl($p2) . "\n");
} else {
# &wprint("dbg: p_p is float '$p_p' ...\n");
# if ($p_p =~ /[eE]/) { &wprint("dbg: exponential format p_p='$p_p'\n"); }
$p2 = Math::BigFloat->new($p_p);
# if ($p_p =~ /[eE]/) { &wprint("dbg: exponential format p_p=p2=" . dspl($p2) . "\n");}
$p1 = $p2;
$p0 = 0.0 + $p_p; # scalar float
}
if ('s' eq $p_a) {
$p = $p1; # BigRat
} elsif (('x' eq $p_a) || ('xx' eq $p_a)) {
$p = $p2; # BigFloat
} elsif (('q' eq $p_a) || ('h' eq $p_a)) {
$p = $p0;
}
if ($dbg>0) {&wprint("dbg: p_p='$p_p', p=" . dspl($p) . "\n");}
if ($dbg>0) {
&wprint( "n=$n0, k=$k0, p=$p\n");
}
if ($n0) {
if ($k0 > $n0) {
&givehelp("** ERROR: parameter k=$k0 to $scriptname cannot be greater than n=$n0");
}
if ($p0 > 1) {
&givehelp("** ERROR: parameter p=$p0 to $scriptname must be between 0 and 1");
}
}
# m=co calculate & display binomial coefficient, only
sub do_m_co {
my $nchoosek;
if ('q' eq $p_a) {
$nchoosek = &n_choose_k($n0, $k0);
} else {
$nchoosek = Math::BigInt->new( $p_n );
$nchoosek = $nchoosek -> bnok( $p_k );
$nchoosek = "$nchoosek";
}
if ('inf' eq $nchoosek) {
$nchoosek = 'overflow';
if ($cgi_context) {
print "n=$n0, k=$k0, (n choose k) = overflow";
if ('q' eq $p_a) {
print "
\nTry again, in 'Slow' mode.
\n";
} else {
print "\n";
}
} else {
print "n=$n0, k=$k0, (n choose k) = $nchoosek\n";
if ('q' eq $p_a) {
print "Try again, in 'Slow' mode.\n";
}
}
} else {
$nchoosek = commafy($nchoosek);
&wprint( "n=$n0, k=$k0, (n choose k) = $nchoosek" );
}
}#do_m_co
# $probability is calculated probability of x=k, $dd is the number of digits of precision to display,
# the arrays are for calculating cumulative probabilities
our ( $probability, $dd, @saved_probs, @cummu_0_to_i ); # we no longer use @cummu_i_to_k
# m=b calculate & display binomial probability of x=k, leaving the result in $probability as a side-effect
sub do_m_b {
if ($dbg>0) { &wprint( "dbg: sub do_m_b(), dd=$dd\n" ); }
if ($p_p =~ /\//) {
&wprint( "Probability of success on a single trial (p): " . $p1->bstr() . "\n" );
} else {
&wprint( "Probability of success on a single trial (p): $p0\n" );
}
&wprint( "Number of trials (n): $n0\n" );
&wprint( "Number of successes (k): $k0\n" );
# In "quick" and "hybrid" (scalar) modes, we'll never use the BigRat and BigFloat versions of p
if (('q' eq $p_a) || ('h' eq $p_a)) {
$p1 = undef;
$p2 = undef;
}
# First calculate the binomial probability P(X=k)
&timeit_start('calc_bprob');
$probability = &$calc_binom_prob( $n0, $k0, $p );
&timeit_end('calc_bprob');
my $fp_prob = &display_prob($probability,2,$dd);
&wprint( "Binomial probability P(X = k): $fp_prob\n" );
}#do_m_b
# m=cu calculate and display probability of x=k, and cumulative probability of x <= k
sub do_m_cu {
# Sum the binomial probabilities for lower values of k (down to zero)
my $cummu_0_to_im1 = 0;
my $max_addend = 0; # we keep track of the maximum addend for estimating the true number of significant digits
if ($p_p =~ /\//) {
&wprint( "Probability of success on a single trial (p): " . $p1->bstr() . "\n" );
} else {
&wprint( "Probability of success on a single trial (p): $p0\n" );
}
&wprint( "Number of trials (n): $n0\n" );
&wprint( "Number of successes (k): $k0\n" );
# In "quick" and "hybrid" (scalar) modes, we'll never use the BigRat and BigFloat versions of p
if (('q' eq $p_a) || ('h' eq $p_a)) {
$p1 = undef;
$p2 = undef;
}
# First calculate the binomial probability P(X=k)
&timeit_start('calc_bprob');
$probability = &$calc_binom_prob( $n0, $k0, $p );
&timeit_end('calc_bprob');
my $fp_prob = &display_prob($probability, 2, $dd, undef, ($p > 0) && ($p < 1));
if ($cgi_context) {
print( "Probabilities:
\n"
. " P( X = k ): $fp_prob
\n" );
} else {
&wprint( "Probabilities:\n"
. " P( X = k ): $fp_prob\n" );
}
if ($k0 > 0) {
for (my $i0=0; $i0 < $k0; $i0++) {
&timeit_start('cumu_sum');
my $probability_tmp = &$calc_binom_prob( $n0, $i0, $p );
if ('h' eq $p_a) {
# "hybrid" mode: convert from 64-bit floating point to BigFloat for summing
$probability_tmp = Math::BigFloat->new($probability_tmp);
}
$cummu_0_to_im1 += $probability_tmp;
if ($probability_tmp > $max_addend) {
$max_addend = $probability_tmp;
}
&timeit_end('cumu_sum');
}
}
# Then display the cumulative probabilities
my $cprob1;
&timeit_start('cumu_sum');
if ($n0 == $k0) {
$cprob1 = 1;
} else {
my $probability_tmp = $probability;
if ('h' eq $p_a) {
# in "hybrid" mode we calculate probabilities in 64-bit floating point, but sum them as BigFloats
$probability_tmp = Math::BigFloat->new($probability_tmp);
}
$cprob1 = $cummu_0_to_im1 + $probability_tmp;
if ($probability_tmp > $max_addend) {
$max_addend = $probability_tmp;
}
}
my $cprob2 = 1.0 - $cprob1;
my $cprob3 = 1.0 - $cummu_0_to_im1;
&timeit_end('cumu_sum');
if ($dbg>1) { print "dbg: fp_cprob0 -- p='$p', k0='$k0'\n"; }
my $fp_cprob0 = &display_prob( $cummu_0_to_im1, 2, $dd, $max_addend, ($p > 0) && ($p < 1) && ($k0 > 0) );
if ($dbg>1) { print "dbg: fp_cprob1 -- p='$p', k0='$k0', n0='$n0'\n"; }
my $fp_cprob1 = &display_prob( $cprob1, 2, $dd, $max_addend, ($p > 0) && ($p < 1) && ($k0 < $n0) );
if ($dbg>1) { print "dbg: fp_cprob2 -- p='$p', k0='$k0', n0='$n0'\n"; }
my $fp_cprob2 = &display_prob( $cprob2, 2, $dd, $max_addend, ($p > 0) && ($p < 1) && ($k0 < $n0) );
if ($dbg>1) { print "dbg: fp_cprob3 -- p='$p', k0='$k0'\n"; }
my $fp_cprob3 = &display_prob( $cprob3, 2, $dd, $max_addend, ($p > 0) && ($p < 1) && ($k0 > 0) );
if ($cgi_context) {
print( " P( X < k ): $fp_cprob0
\n"
. " P( X ≤ k ): $fp_cprob1
\n"
. " P( X > k ): $fp_cprob2
\n"
. " P( X ≥ k ): $fp_cprob3
\n" );
} else {
&wprint( " P( X < k ): $fp_cprob0\n"
. " P( X <= k ): $fp_cprob1\n"
. " P( X > k ): $fp_cprob2\n"
. " P( X >= k ): $fp_cprob3\n" );
}
}#do_m_cu
# m=acu calculate and display probability of x=k, and cumulative probabilities of x in 0..k
sub do_m_acu {
my $max_addend = 0; # we keep track of the maximum addend for estimating the true number of significant digits
# Calculate all the binomial probabilities for lower values of k (down to zero)
&wprint( "Cumulative probability:\n" );
if ($dbg>0) { &wprint( "dbg: sub do_m_acu(), dd=$dd\n" ); }
@saved_probs = ();
# we already calculated the binomial probability for x=k, so save it:
if ('h' eq $p_a) {
# "hybrid" mode: convert from 64-bit floating point to BigFloat for summing
$saved_probs[$k0] = Math::BigFloat->new($probability);
} else {
$saved_probs[$k0] = $probability;
}
@cummu_0_to_i = (); # cumulative probabilities
# Then display the cumulative probabilities
if ($cgi_context) {
$in_midst_of_generating_acu_table = 1;
print "\n";
print "k | P(X=k) | " .
"range | " .
"P(X in range) | 1 - P(X in range) |
\n";
for (my $i0=0; $i0 <= $k0; $i0++) {
my $prob;
if ($i0 < $k0) {
&timeit_start('calc_bprob');
$prob = &$calc_binom_prob( $n0, $i0, $p );
if ('h' eq $p_a) {
$prob = Math::BigFloat->new($prob);
}
$saved_probs[$i0] = $prob;
&timeit_end('calc_bprob');
} else {
$prob = $saved_probs[$i0];
}
my $cummu_to_i;
if (0 == $i0) {
$cummu_to_i = $prob;
} else {
&timeit_start('cumu_sum');
$cummu_to_i = ($cummu_0_to_i[$i0-1] + $prob);
&timeit_end('cumu_sum');
}
$cummu_0_to_i[$i0] = $cummu_to_i;
if ($prob > $max_addend) {
$max_addend = $prob;
}
&timeit_start('cumu_sum');
my $one_minus_cummu_to_i = 1.0 - $cummu_to_i;
&timeit_end('cumu_sum');
# my $cummu_from_i = $cummu_i_to_k[$i0];
# &timeit_start('cumu_sum');
# my $one_minus_cummu_from_i = 1.0 - $cummu_from_i;
# &timeit_end('cumu_sum');
my $fp_prob = &display_prob( $prob, 1, $dd, undef, ($p > 0) && ($p < 1) );
# sprintf("%#.15g", $prob);
my $fp_cummu_to_i = &display_prob( $cummu_to_i, 1, $dd, $max_addend, ($p > 0) && ($p < 1) && ($i0 < $n0) ); # sprintf("%#.15g", $cummu_to_i);
my $fp_one_minus_cummu_to_i = &display_prob( $one_minus_cummu_to_i, 1, $dd, $max_addend, ($p > 0) && ($p < 1) && ($i0 < $n0) ); # sprintf("%#.15g", $one_minus_cummu_to_i);
# my $fp_cummu_from_i = &display_prob($cummu_from_i,1,$dd); # sprintf("%#.15g", $cummu_from_i);
# my $fp_one_minus_cummu_from_i = &display_prob($one_minus_cummu_from_i,1,$dd); # sprintf("%#.15g", $one_minus_cummu_from_i);
print "$i0 | $fp_prob | " .
"0..$i0 | " .
"$fp_cummu_to_i | $fp_one_minus_cummu_to_i |
\n";
}
print "
\n";
$in_midst_of_generating_acu_table = 0;
} else {
print "|" . center('k',8) . "|" . center('P(X=k)',7+$dd) .
"|" . center('range',11) .
"|" . center('P(X in range)',7+$dd) . "|" . center('1 - P(X in range)',7+$dd) . "|\n";
for (my $i0=0; $i0 <= $k0; $i0++) {
my $prob;
if ($i0 < $k0) {
&timeit_start('calc_bprob');
$prob = &$calc_binom_prob( $n0, $i0, $p );
if ('h' eq $p_a) {
$prob = Math::BigFloat->new($prob);
}
$saved_probs[$i0] = $prob;
&timeit_end('calc_bprob');
} else {
$prob = $saved_probs[$i0];
}
my $cummu_to_i;
if (0 == $i0) {
$cummu_to_i = $prob;
} else {
&timeit_start('cumu_sum');
$cummu_to_i = ($cummu_0_to_i[$i0-1] + $prob);
&timeit_end('cumu_sum');
}
$cummu_0_to_i[$i0] = $cummu_to_i;
if ($prob > $max_addend) {
$max_addend = $prob;
}
&timeit_start('cumu_sum');
my $one_minus_cummu_to_i = 1.0 - $cummu_to_i;
&timeit_end('cumu_sum');
# my $cummu_from_i = $cummu_i_to_k[$i0];
# &timeit_start('cumu_sum');
# my $one_minus_cummu_from_i = 1.0 - $cummu_from_i;
# &timeit_end('cumu_sum');
if ($dbg>1) { print "dbg: fp_prob -- p='$p', k0='$k0', n0='$n0'\n"; }
my $fp_prob = &display_prob( $prob, 1, $dd, undef, ($p > 0) && ($p < 1) ); # sprintf("%#.15g", $prob);
if ($dbg>1) { print "dbg: fp_cummu_to_i -- p='$p', i0='$i0', n0='$n0'\n"; }
my $fp_cummu_to_i = &display_prob( $cummu_to_i, 1, $dd, $max_addend, ($p > 0) && ($p < 1) && ($i0 < $n0) ); # sprintf("%#.15g", $cummu_to_i);
if ($dbg>1) { printf( "dbg: fp_one_minus_cummu_to_i -- p='$p', i0='$i0', n0='$n0' %d %d %d %d \n", ($p > 0), ($p < 1), ($i0 < $n0), (($p > 0) && ($p < 1) && ($i0 < $n0)) ); }
my $fp_one_minus_cummu_to_i = &display_prob( $one_minus_cummu_to_i, 1, $dd, $max_addend, ($p > 0) && ($p < 1) && ($i0 < $n0) ); # sprintf("%#.15g", $one_minus_cummu_to_i);
# my $fp_cummu_from_i = &display_prob($cummu_from_i,1,$dd); # sprintf("%#.15g", $cummu_from_i);
# my $fp_one_minus_cummu_from_i = &display_prob($one_minus_cummu_from_i,1,$dd); # sprintf("%#.15g", $one_minus_cummu_from_i);
print "|" . center($i0,8) . "|" . center($fp_prob,7+$dd) .
"|" . center("0..$i0",11) .
"|" . center($fp_cummu_to_i,7+$dd) . "|" . center($fp_one_minus_cummu_to_i,7+$dd) . "|\n";
}
}
}#do_m_acu
sub main {
if (('q' eq $p_a) || ('h' eq $p_a)) {
$dd = 11; # 11 digits is conservative for IEEE 754 64-bit floating point, but if we set it to 12 then the last digit is often off by one
} else {
$dd = 16; # no. of display digits = 16 for extreme precision settings
}
if ($dd > ($accuracy - 2)) {
$dd = $accuracy - 2; # we can set d= even to a smaller number than the default a=q accuracy (that's really only useful for testing purposes)
if ($dd < 2) {
$dd = 2;
}
}
if (!$n0) {
&wprint( "(Nothing to do.)\n" );
} elsif ('co' eq $p_m) {
&do_m_co();
} elsif ('b' eq $p_m) {
&do_m_b();
} elsif ('cu' eq $p_m) {
&do_m_cu();
} elsif ('acu' eq $p_m) {
&do_m_b();
&do_m_acu();
}
}#main
sub timed_out {
if ($in_midst_of_generating_acu_table) {
print "\n\n";
}
print "\n\nERROR: Server timed out.
\n" .
"The calculation was too computationally intensive to run on my server. But if you " .
'download binomcalc.pl ' .
"and run it on your own computer, you can let it run for as long as necessary.
\n";
exit 1;
}
&timeit_start('main');
if ($p_t) {
eval {
local $SIG{ALRM} = 'timed_out';
alarm 0+$p_t; # start timout timer
&main();
alarm 0; # cancel timeout timer
}
} else {
&main();
}
&timeit_end('main');
# if b=... unspecified, then default to b=0 if runtime was less than 1 seconds, or b=1 otherwise
if (!defined $p_b) {
#if ($runtimes{'main'} >= 1.0) {
# $p_b = '1';
#} else {
$p_b = '0'; # default is b=0
#}
}
if ('0' ne $p_b) {
# benchmark = 1
&timeit_display;
}
&wprint("\n");
__END__