#!/usr/bin/perl use strict; use warnings; # push @INC,'.'; # require "traceback.pl"; # handy for debugging, but I'm not using it now # This program calculates binomial probabilities (or, optionally, just the binomial coefficient). # Unlike most binomial calculators, this one goes to considerable effort to avoid overflow and # underflow problems, so it works correctly when most others fail. # Copyright 2021, by David A. Burton # Cary, NC USA # Mobile / WhatsApp: +1 919-244-3316 # Email: https://sealevel.info/contact.html # This is free, open source software, but it is copyrighted. You are welcome to use it, in whole # or in part, for any purpose. However, if you do so, I require that you retain this notice, or # a substantially equivalent notice, in copies or substantial excerpts of the program. # TLIB Version Control fills in the version information for us: our $version_str; #--=>keyflag<=-- "&(#)%n, version %v, %11d " $version_str = "&(#)binomcalc.cgi, version 93, 07-May-2025 "; our $trimmed_version_str; if ($version_str =~ /, (version [0-9]+, [0-9]*\-[a-zA-Z]{3}\-[0-9]+) /) { $trimmed_version_str = "$1"; } else { $trimmed_version_str = ''; } $| = 1; # We would need cgi.pm for "user_agent" lookup, and some convenience functions, like header(), param(), etc. # But I'm not using any of those. # use CGI ':standard'; # Example of usage: # # our $Inf = 2e262144; # Scalar floating-point infinity our $POS_NORM_SMALLEST = 2.22507385850720138e-308; # smallest possible normalized positive IEEE 64-bit float value our $POS_DENORM_SMALLEST = 4.94065645841247e-324; # smallest possible denormalized (subnormal) positive IEEE 64-bit float value use Scalar::Util qw(blessed isdual reftype tainted isvstring looks_like_number set_prototype); use Time::HiRes qw(time sleep); # use lib '/root/perl5/lib/perl5/x86_64-linux-thread-multi/Math'; use lib '/root/perl5/lib/perl5'; # for GMP use Math::GMP; # Do this so it'll fail if GMP's missing (otherwise it falls back to the slow lib). If it fails try installing: "cpan Math::GMP" "cpan Math::BigInt::GMP" use Math::BigInt lib => 'GMP'; use Math::BigFloat lib => 'GMP'; use Math::BigRat lib => 'GMP'; # use bignum; <--- Do not do this! (My code expects scalars by default.) # Math::BigFloat->accuracy(500); <--- I'll do this further down (number of digits is now adjustable) # printf(" Bigfloat version = " . $Math::BigFloat::VERSION . "\n" ); BEGIN { # Polyfill for missing nparts method prior to BigFloat version 1.999808 (circa 2017) if (!exists &Math::BigFloat::nparts) { *Math::BigFloat::nparts = sub { my $x = shift; my ($mantis, $exp) = $x->parts(); # both are BigInts my $len = $mantis->length(); # number of digits in the mantissa $exp += ($len-1); # $mantis = Math::BigFloat->new($mantis) / (10**($len-1)); <== this fails for large $len, due to overflow $mantis = Math::BigFloat->new($mantis) / (Math::BigFloat->new(10)**($len-1)); return ($mantis,$exp); }; } } our $dbg = 0; # default our $echo_cmdline = 1; # normally we display command line, but v=0 can suppress it our $vrblx; # temp value used by &dspl() our $typefunc_avail = 0; # needed by sub dspl(), and set to 1 if we successfully load type() from autobox our $in_midst_of_generating_acu_table = 0; # if
is being generated, this will be 1 (so we can close the table on a timeout) # What is the name of this program file? our $scriptname = $0; if ($scriptname =~ /([^\/\\\:]+)$/) { $scriptname = $1; # just the file name, not the directory } our $scriptdotpl = $scriptname; # this script name, which could end in .pl or .cgi $scriptdotpl =~ s/\.cgi$/.pl/; # same, except always ends in .pl our $parameters1 = $ENV{'QUERY_STRING'}; # when invoked via server-side include our $parameters2 = join(' ',@ARGV); # when run standaline, at a command line (any OS) our $cgi_context; our $parameters; if ($parameters2) { $cgi_context = 0; $parameters = ' ' . $parameters2; # from @ARGV, so it must be at a command prompt } elsif ($parameters1) { $cgi_context = 1; $parameters = ' ' . $parameters1; # from $ENV{'QUERY_STRING'}, so it's probably CGI } else { # no parameters, so is it in CGI context, or not? $cgi_context = ($scriptname =~ /\.cgi$/); # best guess is from the filename extension: .pl or .cgi $parameters = ''; } # The parameters will be processed further down, for the most part, but here we # take a sneek peek at two of them: v={verbosity} and o={outputmode}. our $p_v = ''; # verbosity (debug level) if ($parameters =~ /[ \-\&]v=([0-9\-]+)/) { $p_v = $1; $dbg = 0 + $p_v; if ($dbg <= 0) { $echo_cmdline = 0; # explicit v=0 supresses echoing the command line } } # The o=... parameter is optional, for overriding the output mode: our $p_o = ''; # h = html / cgi, t = plain text, p =
 block
if ($parameters =~ /[ \-\&]o=([htp])/) {
   $p_o = $1;
   $cgi_context = ('h' eq $1);
}

# In a cgi context, we have to emit this magic line, before we can write anything else.
if ($cgi_context || ('p' eq $p_o)) {
   print "Content-Type: text/html\n\n";  # in case it's running as a .cgi script
}

if ($dbg>0) {
   &wprint( "$scriptname $trimmed_version_str\n" );
   &wprint( "OS = '$^O'\n" );  # What OS is this?
   &wprint( "Perl version " . $] . " = " . $^V . "\n" );
   &wprint( 'Math::BigFloat->VERSION = ' . Math::BigFloat->VERSION . "\n" );
   &wprint( 'Math::BigInt::GMP->VERSION = ' . Math::BigInt::GMP->VERSION . "\n" );
} elsif ($cgi_context) {
   print( "\n\n" );
}

# Because binomial probability calculations can be very computationally intensive,
# we need to ensure that we don't end up with too many instances running, lest it
# monopolize my little web server. (I don't bother with this if running on the
# desktop.)

our $num_server_instances = 0;
if (($^O =~ /linux/i) && ($scriptname =~ /\.cgi$/)) {
   $num_server_instances = 1;
   # my $tmp = `pgrep -a binomcalc\\.cgi`;
   # &wprint("PGREP output:\n$tmp\n-----------\n");
   my $tmpcount = `pgrep -c binomcalc\\.cgi`;
   $num_server_instances = 0 + $tmpcount;
   if ($dbg>0) {
      &wprint("Instance count = '$num_server_instances'\n");
   }
}
if ($num_server_instances > 2) {
   # The usual case is one instance running (this process).
   # My server has four cores, so I allow at most two instances, to avoid bogging it down too much.
   print "ERROR: Server busy. Wait a few minutes and try again.
\n" .
         '(Or download binomcalc.pl,' .
         " and run it on your own computer.)\n";
   exit 1;
}

if ((!$parameters) && !$cgi_context) {
   # no parameters specified -- print help message and quit
   &givehelp("** ERROR: no parameters were specified to $scriptname");
   exit 1;
}

if ($dbg >= 1) {
   &wprintf("dbg: QUERY_STRING='%s', ARGV='%s'\n", (defined $parameters1) ? $parameters1 : 'UNDEF', (defined $parameters2) ? $parameters2 : 'UNDEF' );
}


# define type() for debugging:
if ($dbg>0) {
   # use autobox::universal qw(type);
   if (eval {require autobox::universal;}) {
      autobox::universal->import('type');
      $typefunc_avail = 1;
   } else {
      if ($cgi_context) {
         print 'Warning: Cannot locate autoboxx/universal.pm in @INC. You may need to install autoboxx, like this:' . "
\n" .
                "cpan autobox::universal\n\n";
      } else {
         print 'Warning: Cannot locate autoboxx/universal.pm in @INC (you may need to install autoboxx)' . "\n" .
                "In Strawberry Perl you can get autobox like this:\n" .
                "  1. from a Windows Administrator (elevated) command prompt...\n" .
                "  2. c:\n" .
                "  3. cd \\strawberry\\perl\\bin\n" .
                "  4. cpan autobox::universal\n\n";
      }
      eval("sub type { return '?';}");
   }
}

# for benchmarking; the only parameter is an arbitrary name
our %runtimes = ();
our %runtime_cnt = ();
our %interval_started_at = ();

# Call this at start of timing interval. (If it's called more than once, the 2nd call just increments the runtime_cnt.)
sub timeit_start {
  my $name = shift;
  if (defined $interval_started_at{$name}) {
     $runtime_cnt{$name} += 1;
  } else {
     $interval_started_at{$name} = time();
     if (!exists $runtime_cnt{$name}) {
        $runtime_cnt{$name} = 0;
        $runtimes{$name} = 0.0;
     }
  }
}

# Call this at the end of the timing interval.
sub timeit_end {
  my $name = shift;
  if (!exists $interval_started_at{$name}) {
    die "ERR: timeit_end('$name'), but timeit_start wasn't called.";
  }
  my $time_increment = time() - $interval_started_at{$name};
  delete $interval_started_at{$name};
  $runtimes{$name} += $time_increment;
  $runtime_cnt{$name} += 1;
}

# Call this to reset timer (forget previous timings).
sub timeit_reset {
  my $name = shift;
  delete $interval_started_at{$name};
  delete $runtimes{$name};
  delete $runtime_cnt{$name};
}

# If a name is specified, display the timing stats for that name;
# otherwise, display them all.
sub timeit_display {
  my $name;
  if ($#_ >= 0) {
     $name = shift;
     if (exists $interval_started_at{$name}) {
        &timeit_end($name);
     }
     &wprintf("Time %s, %f / %d = %f each\n", $name, $runtimes{$name}, $runtime_cnt{$name}, ($runtimes{$name} / $runtime_cnt{$name}) );
  } else {
    foreach $name (sort keys %runtimes) {
       &timeit_display($name);
    }
  }
}

# return a description of the input paramter (for my debug prints)
sub dspl {
   local($vrblx) = $_[0];
   my( $result, $tp, $rp );
   if (!defined $vrblx) {
      $result = 'UNDEF';
      if ($dbg > 2) {
         &wprint( "dbg: dspl(UNDEF)" );
      }
   } else {
      $tp = '';
      $rp = '';
      if ($typefunc_avail) {
         $tp = &type($vrblx);
         if (('STRING' eq $tp) and looks_like_number($vrblx)) {
            if ($vrblx =~ /^(\-|)[0-9]+$/) {
               $tp = 'Integer';
            } else {
               $tp = 'Float';
            }
         }
      }
      if (('' eq $tp) or ('HASH' eq $tp)) {
         $rp = ref($vrblx);
      }
      if ($dbg > 2) {
         &wprint( "dbg: dspl(), type=$tp, ref=$rp,\n" );
      }
      if (('' eq $rp) && ('' eq $tp)) {
         $result = "$vrblx";
      } elsif ('' ne $rp) {
         $result = "$rp $vrblx";
      } elsif ('STRING' eq $tp) {
         $result = "$tp '$vrblx'";
      } else {
         $result = "$tp $vrblx";
      }
   }
   if ($dbg > 2) {
      &wprint( "  result='$result'\n" );
   }
   return $result;
}


# v= and o= have already been handled; here we process the rest of the parameters.
our ($p_n, $p_k, $p_p, $p_m, $p_a, $p_t, $p_d, $p_b);
$p_p = "0.5";  # default
$p_m = 'b';  # default
$p_a = 'q';  # default
our $accuracy = 500;  # default is 500 digits except for a=q, which is about 14 digits (note: BigFloat default is 40)
our $effective_accuracy = $accuracy;

sub process1param {
   my ($opt,$val) = @_;
   # &wprintf("  opt='%s' val='%s'\n", $opt, $val);

   if ('n' eq $opt) {
      if ($val =~ /^([1-9][0-9]*)$/) {
         $p_n = $1;
      } else {
         &wprintf("ERROR: 'n=%s' (should be a positive integer)\n", $val);
         exit 1;
      }

   } elsif ('k' eq $opt) {
      if ($val =~ /^([0-9]+)$/) {
         $p_k = $1;
      } else {
         &wprintf("ERROR: 'k=%s' (should be 0..n)\n", $val);
         exit 1;
      }

   } elsif ('p' eq $opt) {
      if ($val =~ /^([0-9\/\.eE\-\+]+)$/) {
         # if it contains a / then it's a rational number; the e, E, -, and + are so that it can be in scientific notation
         $p_p = $1;
      } else {
         &wprintf("ERROR: 'p=%s' (should be 0..1)\n", $val);
         exit 1;
      }

   } elsif ('m' eq $opt) {
      # the m=... parameter must be 'b' (default Binomial probability), 'cu' (CUmulative probability), 'acu' (All CUmulative probabilities), or 'co' (binomial COefficient)
      if ($val =~ /^(b|cu|acu|co)$/i) {
         $p_m = lc $1;
      } else {
         &wprint("ERROR: 'm=$val' (mode) parameter to $scriptname may only be b, cu, acu, or co.\n");
         exit 1;
      }

   } elsif ('a' eq $opt) {
      # the a=... parameter must be 's' (Slow = GMP BigRat), 'q' (Quick = scalar float = default), 'h' (Hybrid), 'x' (eXperimental = BigFloat), or 'xx' (also eXperimental)
      if ($val =~ /^(x(x|)|s|q|h)$/) {
         $p_a = lc $1;
      } else {
         &wprintf("ERROR: 'a=$val' (accuracy) parameter may only be q, h, s, x or xx.\n");
         exit 1;
      }

   } elsif ('t' eq $opt) {
      # the t=... (timeout) parameter is optional
      if ($val =~ /^([0-9\.]*)$/) {
         $p_t = $1;
      } else {
         &wprint("ERROR: 't=$val' (timeout) parameter must be numberic (max number of seconds).\n");
         exit 1;
      }

   } elsif ('d' eq $opt) {
      # the d=... (digits) parameter is optional (but ignored for a=q)
      if ($val =~ /^([0-9]+)$/) {
         $p_d = $1;
         $effective_accuracy = $accuracy = 0 + $p_d;
      } else {
         &wprint("ERROR: 'd=$val' (digits of precision) parameter must be a non-negative integer.\n" .
                 "(Default is 500 digits, except for a=q, which is about 15 digits.)\n");
         exit 1;
      }

   } elsif ('b' eq $opt) {
      # the b=1 (benchmark) parameter is optional
      if ($val =~ /^([01])$/) {
         $p_b = $1;
      } else {
         &wprint("ERROR: 'b=$val' (benchmark) parameter must be 0 or 1.\n");
         exit 1;
      }

   } elsif (('v' eq $opt) || ('o' eq $opt)) {
      # handled above
   } else {
      &wprintf("ERROR: unrecognized parameter to $scriptname: '%s=%s'\n", $opt, $val);
      &wprint("parameters='$parameters'\n");
      exit 1;
   }
}

my $parm;
for $parm (split(/\s+\-|\s+|\&/, $parameters)) {
   if ($dbg > 1) {
      &wprint( "dbg: parm='$parm'\n");
   }
   if ('' ne $parm) {
      if ($parm =~ /^([a-zA-Z]+)=(.+)$/) {
         my $opt = lc $1;
         my $val = $2;
         &process1param( $opt, $val );
      } else {
         &wprintf("ERROR: unrecognized parameter to $scriptname: '%s'\n", $parm);
         &wprint("Parameters='$parameters'\n");
         exit 1;
      }
   }
}

# the k=... parameter is reqired
# the n=... parameter is reqired
# the p=... parameter defaults to 0.5
# the m=... parameter defaults to b
# the a=... parameter defaults to q
our $p;  # Note: $p will be set waaaay down in the program

if ((!defined $p_n) && !defined $p_k) {
   &givehelp('');
   exit 1;
}
if ((!defined $p_n) || !defined $p_k) {
   &givehelp("ERROR: the n and k parameters are required.\n");
   exit 1;
}

if ($accuracy < 14) {
   if ($accuracy < 1) {
      $effective_accuracy = $accuracy = 1;
   }
   &wprintf("Warning: 'd=%d' digits is less than the accuracy of the 'a=q' default 'quick' setting (about 14 digits). That's probably a mistake.\n", $accuracy );
} elsif ($dbg && ('q' ne $p_a) && (('h' ne $p_a) || ('b' ne $p_m))) {
   &wprintf("Computing with 'd=%d' digits of accuracy.\n", $accuracy );
}
Math::BigRat->accuracy($accuracy);
Math::BigFloat->accuracy($accuracy);
if (($effective_accuracy > 13) && (('q' eq $p_a) || ('h' eq $p_a))) {
   # in a=q or a=h mode we calculate with 64-bit scalar floats, so effective accuracy is only about 14 digits
   $effective_accuracy = 13;  # I'm being conservative
}

if ($echo_cmdline) {
   # echo the equivalent Perl command-line
   my $cmdline = 'perl ' . $scriptdotpl;
   if ($p_v) { $cmdline .= " v=$p_v"; }
   # if ($p_t ne '') { $cmdline .= " t=$p_t"; }
   if ($p_m) { $cmdline .= " m=$p_m"; }
   if ($p_a) { $cmdline .= " a=$p_a"; }
   if ($p_d) { $cmdline .= " d=$p_d"; }
   if ($p_b) { $cmdline .= " b=$p_b"; }
   if ($p_n) { $cmdline .= " n=$p_n"; }
   if ($p_k ne '') { $cmdline .= " k=$p_k"; }  # note: k can be zero
   if ($p_p) { $cmdline .= " p=$p_p"; }
   if ($cgi_context) {
      print "$cmdline \n"
   } else {
      print "\n$cmdline\n";
   }
}

sub givehelp {
   my $ermsg = shift;
   # First, reformat the TLIB version info, for prettier display
   # e.g., $version_str = "&(#)binomcalc.pl, version 42, 19-Dec-2020 ";
   my $vstr = $trimmed_version_str;
   if ('' ne $vstr) {
      $vstr = "  ($vstr)";
   }
   if (!defined $ermsg) {
      $ermsg = '';
   } elsif ('' ne $ermsg) {
      $ermsg .= "\n\n";
   }
   # Now display the Help message, including version number
   &wprint( "\n" . $ermsg .
         "$scriptdotpl -- extreme precision binomial calculator\n" .
         "by Dave Burton$vstr\n" .
         "\n" .
         "Note: you can rename this program to binomcalc.cgi and use it from a web page, via a\n" .
         "\"server-side include,\" as in this example:\n" .
         "\n" .
         "  \n" .
         "\n" .
         "That does the same calculation as this command:\n" .
         "\n" .
         "  c:\\> perl -f binomcalc.pl n=12 k=3 p=1/2\n" .
         "\n" .
         "The following command-line parameters are supported:\n" .
         "\n" .
         " n={integer}   (REQUIRED: the n of 'n choose k')\n" .
         " k={integer}   (REQUIRED: the k of 'n choose k')\n" .
         " p={probability} (decimal value 0..1, e.g., 0.5, or a fraction, e.g., 1/3)\n" .
         " m={b, cu, acu, co} (b = binomial probability, cu = cumulative probabilities,\n" .
         "                acu = all cumulative probabilities, co = coefficient only)\n" .
         " a={q, s, h, x, xx} (q = quick/scalar = default, s is slower w/ extreme precision,\n" .
         "                h = hybrid, x and xx are experimental [don't use them])\n" .
         " o={h, t, p}   (output: h = html, t = text, p = , default determined by context)\n" .
         " t=nnn         (timeout limit, nnn seconds; default = 0 for no timeout)\n" .
         " b=1           (benchmark it; show timings)\n" .
         " d={integer}   (digits of accuracy: default for a=s is 500; for a=q it's about 13)\n" .
         " v={integer}   (verbosity; 0 = no debug prints, 1 = some, 2 = lots)\n" .
         "\n" .
         "Other notes:\n" .
         "1. Default mode is m=b (calculate binomial probability).\n" .
         "2. Default accuracy setting is a=q, which is fastest. For higher accuracy use a=s.\n" .
         "   The x and xx (experimental) settings are usually slower and not as good.\n"
         );
   # And then exit, with errorlevel = 1
   exit 1;
}

# wrap a printable string in 
, add 
s, etc., if running in a CGI context
sub wrap {
   my $str = shift;
   if ($cgi_context) {
      $str =~ s/\n$//;
      $str =~ s/\n  /\n  /g;
      $str =~ s/\n /\n /g;
      $str =~ s/  /  /g;
      $str =~ s/\n/g;
      $str = "$str\n";
   }
   return $str;
}

# Ensure that argument is padded to at least n characters, and roughly centered
sub center {
   my ($s,$n) = @_;
   my $result = $s;
   my $l = length($s);
   if ($l <= $n) {
      my $pr = int(($n-$l)/2);
      my $pl = ($n-$l) - $pr;
      $result = (' ' x $pl) . $s . (' ' x $pr);
      # if there's a leading '<' or '>' then we might move the result one space to the left, to line up better with other m=acu results:
      if ($pl > $pr) {
         if ($result =~ /  ([<>][^ ]*) /) {
            $result = substr($result,1) . ' ';
         }
      }
   }
   return $result;
}

# wrapped print -- does something reasonable either at command-line or in CGI context
sub wprint {
   my $str = shift;
   print &wrap($str);
}

# wrapped printf -- does something reasonable either at command-line or in CGI context
sub wprintf {
   my $fmt = shift;
   my $str = sprintf( $fmt, @_ );
   &wprint($str);
}

# insert commas into an integer if it's > 4 digits long
sub commafy {
   my $number = shift;
   my @pieces = ();
   $number .= '';
   # if ($dbg>0) { &wprint "dbg: number='$number'\n"; }
   if ((length($number) > 4) && ($number =~ /^(\-|)[0-9]+$/)) {
      while (length($number) > 0) {
         unshift(@pieces,substr($number,-3));
         substr($number,-3) = '';
         # if ($dbg>0) {
         #    $tm1 = join(',',@pieces);
         #    &wprint( "dbg: number='$number', pieces='$tm1'\n" );
         # }
      }
      $number = join(',',@pieces);
   }
   # if ($dbg>0) { &wprint "dbg: number='$number'\n"; }
   return $number;
}#comafy

# Format a float or BigFloat into normalized scientific notation.
# This is like the ->bnstr() method for BigFloats, but with mantissa rounded to N significant digits.
# Input parameters are $x (either scalar float or BigFloat), and $n (number of significant digits).
sub bnstrN {
   my ($x,$n) = @_;
   my $x2 = Math::BigFloat->new($x);  # in case $x is scalar
   my $result;
   my ($mantis,$exp) = $x2->nparts();
   # print "dbg: bnstrN:\n exp=" . &dspl($exp) . "\n";
   $exp->bfround(0);  # otherwise exponent might display as "-6.00000000000000000000000000000" instead of "-6"
   my $rounded_mantis = $mantis->copy();
   $rounded_mantis->bfround(-($n-1));
   if ($rounded_mantis =~ /^10\.([0-9]*)$/) {
      # Sometimes rounding "almost 1" results in "10.000000000e-1", this changes it to "1.0000000000"
      $rounded_mantis = '1.0' . $1;
      $exp++;
   }
   if ($exp) {
      if (ref($exp)) {
         # convert BigFloat to scalar, lest we get something like "1.00e-12.0000000000000"
         $exp = $exp->numify();
      }
      $result = $rounded_mantis . 'e' . $exp;
   } else {
      $result = $rounded_mantis;
   }
   # print "dbg: bnstrN:\n exp=$exp\n mantis=$rounded_mantis\n result=$result\n";
   return $result;
}


# if input is 0.005 then result is 2 (there are two zeros after the decimal point)
sub number_of_zeros_after_the_decimal_point {
  my $x = shift;
  my $result;
  if (ref $x) {
     # BigFloat or BigRat
     if ($x <= 0) {
        # negative is only possible due to accumulated rounding errors
        $result = $effective_accuracy - 1;
     } else {
        my $result2 = int(abs(log10($x)));
        # print "dbg: x=" . &dspl($x) . "   result2=" . &dspl($result2) . "\n";
        $result = $result2->numify();
     }
  } else {
     # scaler 64-bit float
     if ($x <= 0) {
        $result = $effective_accuracy - 1;
        if ($dbg>0) { print "dbg: x.LE.0, x=" . &dspl($x) . "   result=" . &dspl($result) . "\n"; }
     } else {
        $result = int(abs(log10($x)));
     }
  }
  if ($dbg>0) { print "dbg: number_of_zeros_after_the_decimal_point(" . &dspl($x) . ") = " . &dspl($result) . "\n"; }
  return $result;
}


# Depending on how tiny the result is, we might display it in both floating point and scientific notation, or just in one or the other.
# First parameter ($prob) should be a scalar float or BigFloat in [0..1].
# (Due to accumulated rounding errors, $prob can sometimes be slightly < 0 or slightly > 1. We handle that gracefully.)
# Optional second parameter ($verbosity) can be 1 (to display only "5.450654626823856e-368") or 2 (for "<0.000000000000001 = 5.450654626823856e-368").
# Third parameter ($digits) is the maximum number of sighificant digits with which to display the result (default 15).
# (Note: do not confuse $digits with global $effective_accuracy, the number of digits ued in calculations.)
# Forth parameter ($max_addend) is an optional parameter for effectively reducing the number of signifcant digits when $prob is a small number calculated from
# sums & differences of larger-magnitude addends. For example, if digits is 6, then 1.000000 - 0.999990 = 0.000010 has only two significant digits, not six.
# $max_addend should be the largest of the addends/subtrahends/minuends which were included in the calculation.
# Fifth parameter ($definitely_not_exactly_0or1) can be passed as True to tell this subroutine that "zero" is really slightly larger than zero,
# and "one" is always really slightly less than one.
sub display_prob {
   &timeit_start('display_prob');
   my ($prob, $verbosity, $digits, $max_addend, $definitely_not_exactly_0or1) = @_;
   my $prob_is_scalar = !(ref $prob);  # true for IEEE 64-bit, false for BigRat or BigFloat
   if (!defined $verbosity) {
      $verbosity = 2;
   }
   if (!defined $digits) {
      $digits = 15;
      if ($dbg>0) { &wprint( "dbg: digits defaults to $digits\n" ); }
   } else {
      if ($dbg>0) {
         &wprint( "dbg: digits passed in as $digits\n" );
         # &traceback("display_prob");
      }
   }
   if (!defined $definitely_not_exactly_0or1) {
      $definitely_not_exactly_0or1 = 0;
   }
   if ($dbg>0) { printf( "dbg: display_prob( $prob, $verbosity, $digits, %s, $definitely_not_exactly_0or1 )\n", (defined $max_addend) ? $max_addend : '{undef}' ); }

   # first, we calcluate a plain floating point representation

   my $fp_prob;

   my $digits_fp = $digits;  # floating point format digits (as opposed to scientific notation digits)
   my $fp_prob1;
   my $fp_prob1_digits_exceeded = 0;
   my $fp_prob1_is_tiny = 0;
   my $smallest_possible_fp = substr('0.00000000000000000000',0,$digits_fp+1) . "1";
   my $all_nines_fp = substr('0.99999999999999999999',0,$digits_fp+1);
   if ($prob <= $smallest_possible_fp) {
      if (($prob <= 0)  && !$definitely_not_exactly_0or1) {
         # exactly 0 and exactly 1 get special-cased
         $fp_prob = $fp_prob1 = substr('0.00000000000000000000',0,$digits_fp+2);
      } else {
         $fp_prob1 = $smallest_possible_fp;
         if ($cgi_context) {
            $fp_prob = '<' . $smallest_possible_fp;
         } else {
            $fp_prob = '<' . $smallest_possible_fp;
         }
         $fp_prob1_digits_exceeded = $fp_prob1_is_tiny = 1;
      }
   } elsif ($prob > $all_nines_fp) {
      if (($prob >= 1) && !$definitely_not_exactly_0or1) {
         $fp_prob = $fp_prob1 = substr('1.00000000000000000000',0,$digits_fp+2);
      } else {
         $fp_prob1 = $all_nines_fp;
         if ($cgi_context) {
            $fp_prob = '>' . $all_nines_fp;
         } else {
            $fp_prob = '>' . $all_nines_fp;
         }
         $fp_prob1_digits_exceeded = 1;
      }
   } else {
      my $fmt_f;
      if ($prob < 0.1) {
         $fmt_f = '%.' . ($digits_fp+1) . 'f';  # default "%.16f"
         $fp_prob = $fp_prob1 =  sprintf($fmt_f, $prob);
         # Problem: 0.099999999999999 would print as 0.1, resulting in one too many digits; this is the hacky fix:
         if ($fp_prob =~ /^0.[1-9]/) {
            $fmt_f = '%.' . $digits_fp . 'f';  # default "%.15f"
            $fp_prob = $fp_prob1 =  sprintf($fmt_f, $prob);
         }
      } else {
         $fmt_f = '%.' . $digits_fp . 'f';  # default "%.15f"
         $fp_prob = $fp_prob1 =  sprintf($fmt_f, $prob);
      }
      # print "dbg: fmt_f = '$fmt_f'\n";
   }
   my $len0 = length($fp_prob1);
   if ($dbg>0) { &wprintf("dbg: fp_prob1='$fp_prob1', fp_prob='$fp_prob'\n"); }

   # scientific notation is trickier...
   my $prob2_is_tiny = ($prob <= 0);
   my $prob_zeros = 0;
   my $max_addend_zeros = 0;
   if ($prob < 0.01) {
      $prob_zeros = number_of_zeros_after_the_decimal_point($prob);
      # ...but we only bother with scientific notation for probabilities less than 0.01
      if (!defined $max_addend) {
         if ($prob_is_scalar) {
            $max_addend_zeros = 308;
            if ($dbg>0) { &wprint( "dbg: max_addend=undef and prob_is_scalar=1, so max_addend_zeros=$max_addend_zeros\n" ); }
         } else {
            # $max_addend = $prob;
            $max_addend_zeros = $prob_zeros;
            if ($dbg>0) { &wprint( "dbg: max_addend=undef and prob_is_scalar=0, set max_addend_zeros=prob_zeros=$max_addend_zeros (I probably need to fix this!)\n" ); }
         }
      } else {
         # we might not have as many digits of accuracy as we'd hoped.
         $max_addend_zeros = number_of_zeros_after_the_decimal_point($max_addend);
         if ($definitely_not_exactly_0or1 && $prob2_is_tiny & ($dbg>1)) { print "dbg: prob2_is_tiny, prob='$prob', max_addend_zeros=$max_addend_zeros, prob_zeros=$prob_zeros, digits=$digits\n"; }
         if ($dbg>0) { print "dbg: prob2_is_tiny=$prob2_is_tiny, definitely_not_exactly_0or1=$definitely_not_exactly_0or1, prob='$prob', max_addend_zeros=$max_addend_zeros, prob_zeros=$prob_zeros, digits=$digits\n"; }
         if ($prob_zeros > $max_addend_zeros) {
            if (($effective_accuracy - ($prob_zeros - $max_addend_zeros)) < ($digits + 3)) {
               if ($dbg>0) {
                  print "dbg: effective_accuracy = " . dspl($effective_accuracy) . "\n";
                  print "dbg: prob_zeros = " . dspl($prob_zeros) . "\n";
                  print "dbg: max_addend_zeros = " . dspl($max_addend_zeros) . "\n";
                  print "dbg: digits = " . dspl($digits) . "\n";
               }
               $digits = ($effective_accuracy - ($prob_zeros - $max_addend_zeros)) - 3;
               if ($dbg>0) { print "dbg: new digits = " . dspl($digits) . "\n"; }
               if ($digits < 2) {
                  if ($digits <= 0) {
                     $digits = 1;
                     $prob2_is_tiny = 1;
                  } else {
                     $digits = 2;
                  }
                  if ($dbg>0) { print " new digits = " . dspl($digits) . "\n"; }
               }
            }
         }
      }

      if ($prob_is_scalar && ($prob > 0) && ($prob < $POS_NORM_SMALLEST)) {
         if ($dbg>0) { &wprintf( "dbg: scalar prob='%.16g' < POS_NORM_SMALLEST='%.16g'\n", $prob, $POS_NORM_SMALLEST ); }
         # result is so tiny that we lost digits of precision due to denormalized 64-bit scalar representation
         my $tmp = sprintf("%.1g", $prob);
         if ($tmp =~ /e(\-[0-9]+)$/i) {
            my $tmp_expon = $1;  # should be -308..-324 because POS_NORM_SMALLEST = 2.22507385850720138e-308 and POS_DENORM_SMALLEST = 4.94065645841247e-324
            if ($dbg>0) { &wprint( "dbg: scalar subnormal prob is approx. $tmp, digits was='$digits', tmp='$tmp', exp='$tmp_expon', tiny='$prob2_is_tiny'\n" ); }
            if( ($tmp_expon > -308) || ($tmp_expon < -324)) {
               &wprintf( "ERR: prob='%.16g' tmp='$tmp' exp='$tmp_expon' (outside expected range)\n", $prob );
               exit 1;
            }
            if ($tmp_expon < -309) {
               my $tmp_sav_digits = $digits;
               $digits += ($tmp_expon + 309);  # reduce number of digits
               if ($dbg>0) { &wprint( "dbg: digits reduced from $tmp_sav_digits to $digits\n" ); }
               if ($digits <= 1) {
                  $digits = 1;
                  $prob2_is_tiny = 1;
               }
            }
            if ($dbg>0) { &wprint( "dbg: prob='$prob', tmp='$tmp', exp='$tmp_expon', tiny='$prob2_is_tiny', new denorm scalar digits = " . dspl($digits) . "\n" ); }
         }
      }

      # print "dbg: prob2_is_tiny = '$prob2_is_tiny'\n";

      if ($prob2_is_tiny) {
         my $tmp_expon = ($max_addend_zeros + $effective_accuracy - 2);
         if ($dbg>0) {print "dbg: prob2_is_tiny = True, expon='$tmp_expon', max_addend_zeros=$max_addend_zeros, effective_accuracy=$effective_accuracy, (defined max_addend)='" . (defined $max_addend) . "'\n"; }
         if ($prob_is_scalar && ($tmp_expon > 317)) {
            if ($dbg>0) {print "dbg: expon='$tmp_expon' -> 117 because max_addend=undef\n";}
            $tmp_expon = 317;
         }
         if (defined $max_addend) {
            # more conservative for cummulative sums, since they tend to accumulate errors
            $tmp_expon -= 1;
         }
         # if (0 == $prob) {
         #    $fp_prob = '(underflow)';
         # } else {
            $fp_prob = '1.0e-' . $tmp_expon;
            if ($cgi_context) {
               $fp_prob = '<' . $fp_prob;
            } else {
               $fp_prob = '<' . $fp_prob;
            }
         # }
      } else {
         if ($dbg>0) { &wprintf( "prob_zeros=$prob_zeros, max_addend_zeros=$max_addend_zeros, (prob_zeros > max_addend_zeros)=%d\n", ($prob_zeros > $max_addend_zeros) ); }
         if ($prob_zeros > $max_addend_zeros) {
            if (($effective_accuracy - ($prob_zeros - $max_addend_zeros)) < 1) {
               $prob2_is_tiny = 1;
               if (!$fp_prob1_is_tiny) {
                  &wprint(  "ERR: prob2 tiny but not prob1! Please note the exact parameter you used, and tell Dave!\n"
                          . "prob='$prob'\n"
                          . "prob1='$fp_prob1'\n"
                          . "prob_zeros=$prob_zeros\n"
                          . "max_addend_zeros=$max_addend_zeros\n"
                          . "effective_accuracy=$effective_accuracy\n" );
               }
            }
         }

         my $fp_prob2;

         # Here we generate what I call the %g version (but I don't really use %g anymore)
         #if ($prob_is_scalar) {
         #   $fp_prob2 = sprintf( ('%#.' . ($digits-1) . 'e' ), $prob );  # '#' flag avoids removal of trailing decimal point
         #   if ($dbg>0) { &wprint( "dbg: scalar fp_prob2='$fp_prob2' vs. '" . bnstrN($prob,$digits) . "'\n" ); }
         #} else {
            $fp_prob2 = bnstrN($prob,$digits);  # scientific notation (do this because sprintf %g doesn't work for BigFloats)
         # }
         if ($dbg>0) { &wprint( "dbg: prob='$prob', digits=$digits, initial fp_prob2='$fp_prob2'\n" ); }
         if ($fp_prob2 =~ /^(\-|)([1-9])\.([0-9]*)e\-1$/) {
            # change "8.187225652655315e-1" to "0.8187225652655315"
            if ($dbg>0) { &wprint( "dbg: fp2 fixup: was '$fp_prob2'\n"); }
            $fp_prob2 = $1 . '0.' . $2 . $3;
            if ($dbg>0) { &wprint( "                now '$fp_prob2'\n"); }
         }
         my $len2 = length($fp_prob2);
         if ($dbg>0) { &wprint( "dbg:\n fb0 = '$fp_prob'\n fb2 = '$fp_prob2'\n" ); }

         if (($fp_prob1 ne $fp_prob2) && ($fp_prob2 ne substr($fp_prob1,0,$len2))) {
            # the %f and %g versions are different, and the %g version isn't just %f version missing a digit or two at the end
            if ($prob2_is_tiny) {
               if ($fp_prob2 =~ /^([^e]*)e(.*)$/) {
                  my $tmp = '1.0e' . (1+$2);
                  # print "dbg: fp_prob2 is tiny = '$fp_prob2' -> '<$tmp'\n";
                  if ($cgi_context) {
                     $fp_prob = '<' . $tmp;
                  } else {
                     $fp_prob = '<' . $tmp;
                  }
               } else {
                  &wprint("ERR: fp_prob2_is_tiny, fp_prob2='$fp_prob2', please tell Dave!\n");
               }
            } elsif ($verbosity <= 1) {
               $fp_prob = $fp_prob2;  # just use the scientific notation version
            } elsif ((!$fp_prob1_digits_exceeded) && ($len0 < $len2) && ($fp_prob2 !~ /e/i)) {
               # the %g version is in plain floating point notation, but has more digits than the %f version, so use the %g version
               $fp_prob = $fp_prob2;  # I'm unsure whether this can really happen
            } else {
               # display both versions
               if ($cgi_context) {
                  $fp_prob = $fp_prob . ' = ' . $fp_prob2;
               } else {
                  $fp_prob = $fp_prob . ' = ' . $fp_prob2;
               }
            }
         }
      }
   }

   # printf("dbg: display_prob:\n tmp3='%s'\n fp_prob='%s'\n", $tmp1, $tmp3, $fp_prob );
   if ($cgi_context) {
      $fp_prob =~ s/-/−/g;  # Use minus sign instead of hyphen, to avoid inappropriate line-break; or we could use ‑ non-breaking hyphen
   }
   &timeit_end('display_prob');
   return $fp_prob;
}#display_prob

# a=s
# Given n, k & p, calculate and return P(X=k)
# The "R" suffix means this is the BigRat version
# n & k should be scalar integers, p is a BigRat.
# Result ($probability) will be a BigFloat.
sub calc_binom_probR {
   my ($n,$k,$p) = @_;
   $p = Math::BigRat->new($p);
   my $probability = Math::BigRat->new(0);

   if ($dbg >= 1) {
      &wprint( "dbg: calc_binom_probR( n=" . dspl($n) . ", k=" . dspl($k) . ", p=" . dspl($p) . " )...\n" );
   }

   &timeit_start('bnok');
   my $nchoosek = Math::BigInt->new($n);
   $nchoosek = $nchoosek -> bnok( $k );
   &timeit_end('bnok');

   if ($dbg >= 1) {
      &wprint( "dbg: $n -> bnok($k) = nchoosek = " . dspl($nchoosek) . "\n" );
   }

   if ($dbg >= 2) {
      my $commafied_nchoosek = commafy($nchoosek);
      if ($cgi_context) {
         print "\n";
         print "\n";
         print "\n";
         print "\n";
         print "( $n )  = $commafied_nchoosek
$k\n";
         print "\n";
      } else {
         print "$n choose $k = $commafied_nchoosek\n";
      }
   }

   my $one = Math::BigRat->new(1);
   my $one_minus_p = $one - $p;

   if ($dbg >= 1) {
      &wprint( "p = $p\n1-p = $one_minus_p\n" );
   }

   my $tmpa = $p -> copy();
   if ($dbg >= 3) {
      &wprint( "initial tmpa = $tmpa\n" );
   }
   my $tmpb = $one_minus_p -> copy();
   if ($dbg >= 3) {
      &wprint( "initial tmpb = $tmpb\n" );
   }

   &timeit_start('bpow');
   if (0 == $k) {
      # On Linux, the GMP BigRat gives X^0 = X instead of 1; this works around that bug
      $tmpa = $one;
   } else {
      $tmpa = $tmpa->bpow($k);
   }
   &timeit_end('bpow');

   &timeit_start('bpow');
   if ($n == $k) {
      $tmpb = $one;
   } else {
      $tmpb = $tmpb->bpow($n-$k);
   }
   &timeit_end('bpow');

   if ($dbg >= 3) {
      &wprint( "dbg[3]: a=$tmpa, b=$tmpb, nchoosek=$nchoosek\n" );
   }

   $probability = $tmpa * $tmpb * $nchoosek;

   # $probability = $probability->numify();  # believe it or not, if p is a ratio, then numufy() can be 70% of the runtime

   # Convert from BigRat to BigFloat
   # $probability = $probability->as_float(4000);
   $probability =  Math::BigFloat->new($probability->numerator()) / $probability->denominator();

   if ($dbg >= 2) {
      &wprint( "calc_binom_probR( $n, $k, $p ) = $probability\n" );
   }

   # if ($dbg >= 2) { &wprint( "dbg: calc_binom_probR() returning, result = " . &dspl($probability) . "\n" ); }
   return $probability;
}

# a=x
# Given n, k & p, calculate and return P(X=k)
# The "F" suffix means this is the BigFloat (slow) version (which turns
# out to be superior to the BigRat version, most of the time).
# n & k should be scalar integers, p is a BigFloat.
# Result ($probability) will be a scalar float.
sub calc_binom_probF {
   my ($n,$k,$p) = @_;
   my $probability = Math::BigFloat->new(0);

   &timeit_start('nchoosek');
   my $nchoosek = Math::BigFloat->new($n);
   $nchoosek = $nchoosek -> bnok( $k );
   &timeit_end('nchoosek');

   if ($dbg >= 2) {
      my $commafied_nchoosek = commafy($nchoosek);
      if ($cgi_context) {
         print "\n";
         print "\n";
         print "\n";
         print "\n";
         print "( $n )  = $commafied_nchoosek
$k\n";
         print "\n";
      } else {
         print "$n choose $k = $commafied_nchoosek\n";
      }
   }

   &timeit_start('ones');
   my $one = Math::BigFloat->new(1);
   my $one_minus_p = $one - $p;
   &timeit_end('ones');

   if ($dbg >= 2) {
      &wprint( "p = $p\n1-p = $one_minus_p\n" );
   }

   &timeit_start('bpows');
   my $tmpa = $p -> copy();
   my $tmpb = $one_minus_p -> copy();
   if (0 == $k) {
      # On Linux, the GMP BigRat gives X^0 = X instead of 1. I've not seen that bug for BigFloat,
      # but this makes sure.
      $tmpa = $one;
   } else {
      $tmpa = $tmpa->bpow($k);
   }
   &timeit_start('bpows');
   if ($n == $k) {
      $tmpb = $one;
   } else {
      $tmpb = $tmpb->bpow($n-$k);
   }
   &timeit_end('bpows');

   if ($dbg >= 2) {
      &wprint( "dbg: a=$tmpa, b=$tmpb, nchoosek=$nchoosek\n" );
   }

   &timeit_start('prob');
   $probability = $tmpa * $tmpb * $nchoosek;
   &timeit_end('prob');
   if ($dbg >= 2) {
      &wprint( "The probability of $n choose $k (w/ p=$p) is $probability\n" );
   }
   # $probability = $probability->numify();
   return $probability;
}

# Calculate binomial coefficient, "n choose k".  Equivalent to .bnok() but for scalars:
# n! / ((k!) * ((n-k)!))
sub n_choose_k {
   my ($n, $k) = @_;
   my $result = 1;
   if ((!defined $k) || ($k < 0) || ($k > $n)) {
      die "ERROR: Bad parameters to n_choose_k($n,$k), stopped\n";
   } else {
      my $z = $n - $k;
      if ($z > $k) {
         ($z, $k) = ($k, $z);  # swap 'em, so that $k >= $z
      }
      # At this point we know that $n >= $k >= $z
      # n! is a multiple of k!, so we'll skip those multiplications & divisions.
      # Additionally, we know that n-k = z, so counting k up to n will take the
      # same number of loops as counting z down to 1, which means we only need
      # to test one of the two counters.
      my $x = $k+1;
      while ($x <= $n) {
         # print "dbg: x = $x, z = $z\n";
         $result *= $x;
         $x += 1;
         $result /= $z;
         $z -= 1;
      }
   }
   if ($dbg>0) {
      &wprint( "dbg: ($n choose $_[1]) = $result\n" );
   }
   return $result;
}

# Given n, k & p, calculate and return P(X=k)
# The "S" suffix means this is the short / scalar ("quick") version.
# Inputs are assumed to be scalars (m=int, k=int, p=float).
# No bignums are used.
# The only problem with this simple version is that it's vulnerable to
# overflow of "n choose k" and underflow of "p^k" and "(1-p)^(n-k)".
# *** THIS SUBROUTINE IS NOT CURRENTLY USED ***
sub calc_binom_probS {
   my ($n,$k,$p) = @_;
   my $probability1 = 0;

   if ($dbg>0) {
      my $n_dspl = dspl($n);
      my $k_dspl = dspl($k);
      &wprint("dbg: calc_binom_probS( n=$n_dspl, k=$k_dspl )\n");
   }

   my $nchoosek1 = $n;
   $nchoosek1 = &n_choose_k($n,$k);
   if ($dbg>0) {
      &wprint("dbg: n=" . dspl($n) . ", k=" . dspl($k) . ", nchoosek=" . dspl($nchoosek1) . "\n");
   }

   if ($dbg >= 2) {
      my $commafied_nchoosek = commafy($nchoosek1);
      if ($cgi_context) {
         print "\n";
         print "\n";
         print "\n";
         print "\n";
         print "( $n )  = $commafied_nchoosek
$k\n";
         print "\n";
      } else {
         print "$n choose $k = $commafied_nchoosek\n";
      }
   }

   my $one_minus_p1 = 1 - $p;

   if ($dbg >= 2) {
      &wprint( "p = $p\n1-p = $one_minus_p1\n" );
   }

   my $tmpa1 = $p;
   my $tmpb1 = $one_minus_p1;
   $tmpa1 = $tmpa1 ** $k;
   $tmpb1 = $tmpb1 ** ($n-$k);

   if ($dbg >= 2) {
      &wprint( "dbg: a=$tmpa1, b=$tmpb1, nchoosek=$nchoosek1\n" );
   }

   $probability1 = $tmpa1 * $nchoosek1 * $tmpb1;
   if ($dbg >= 2) {
      &wprint( "The probability of $n choose $k (w/ p=$p) is $probability1\n" );
   }
   return $probability1;
}

# Given n, k & p, calculate and return P(X=k)
# The "S2" suffix means this is the 2nd short / scalar version.
# Inputs are assumed to be scalars (m=int, k=int, p=float).
# No bignums are used.
# This version is more robust than the "S" version. It fixes the possible
# overflow of "n choose k" but not the underflow of "p^k" and "(1-p)^(n-k)".
# Here's a test case that this version fixes:
#   perl binomcalc.pl -n=2000 -k=890 -p=0.46 -d=2
# result = 0.007243291418493
# (Berman gets that one right; Vassarstats gets it wrong in the 3rd digit.)
# *** THIS SUBROUTINE IS NOT CURRENTLY USED ***
sub calc_binom_probS2 {
   my ($n,$k1,$p) = @_;
   my $k = $k1;
   my $probability = 0;
   my $result = 1;
   my $cntr = 1;
   my $tmpmsg;  # for debug prints

   if ($dbg>0) {
      my $n_dspl = dspl($n);
      my $k_dspl = dspl($k);
      &wprint("dbg: calc_binom_probS2( n=$n_dspl, k=$k_dspl, p=$p )\n");
   }

   # my $nchoosek = &n_choose_k($n,$k);
   # if ($dbg>0) {
   #    &wprint("dbg: n=" . dspl($n) . ", k=" . dspl($k) . ", nchoosek=" . dspl($nchoosek) . "\n");
   # }

   my $one_minus_p = 1 - $p;

   if ($dbg >= 2) {
      &wprint( "p = $p\n1-p = $one_minus_p\n" );
   }

   my $tmpa = $p;
   my $tmpb = $one_minus_p;
   $tmpa = $tmpa ** $k;
   $tmpb = $tmpb ** ($n-$k);

   if ($dbg >= 2) {
      &wprint( "dbg: a=$tmpa, b=$tmpb\n" );
   }

   # $probability = $tmpa * $nchoosek * $tmpb;

   # Binomial coefficient, "n choose k" is equivalent to .bnok() but for scalars:
   # n! / ((k!) * ((n-k)!))

   my $z = $n - $k;
   if ($z > $k) {
      ($z, $k) = ($k, $z);  # swap 'em, so that $k >= $z
   }
   # At this point we know that $n >= $k >= $z

   my $mul_by_a_done = 0;
   my $mul_by_b_done = 0;

   # The result we need is (n choose k) * $tmpa * $tmpb.
   # (n choose k) is often very, very large.  $tmpa and $tmpb are often very, very tiny.

   # Calculate (n choose k).
   #
   # n! is a multiple of k!, so we'll skip those multiplications & divisions.
   # Additionally, we know that n-k = z, so counting k up to n will take the
   # same number of loops as counting z down to 1, which means we only need
   # to test one of the two counters.
   #
   # To avoid loss of precision in extreme cases, if the (n choose k) value begins to approach
   # the limit of what can be represented in a IEEE 754 double float (a little over 1e308) then
   # we do the multiplications by $tmpa and perhaps $tmpb in the midst of the calculation of
   # (n choose k).
   my $x = $k+1;
   while ($x <= $n) {
      # print "dbg: x = $x, z = $z\n";
      $result *= $x;
      $x += 1;
      $result /= $z;
      $z -= 1;
      if ($result > 1e290) {
         if (!$mul_by_a_done) {
            if ($dbg>0) { $tmpmsg = "dbg: early mul by a, X before = $result, a=$tmpa,"; }
            $result *= $tmpa;
            $mul_by_a_done = 1;
            if ($dbg>0) { &wprint( "$tmpmsg X after = $result  (loop cntr=$cntr)\n" ); }
         }
         if ($result > 1e290) {
            if (!$mul_by_b_done) {
               if ($dbg>0) { $tmpmsg = "dbg: early mul by b, X before = $result, b=$tmpb,"; }
               $result *= $tmpb;
               $mul_by_b_done = 1;
               if ($dbg>0) { &wprint( "$tmpmsg X after = $result  (loop cntr=$cntr)\n" ); }
            }
         }
      }
      $cntr++;
   }

   if (!$mul_by_a_done) {
      if ($dbg>0) { $tmpmsg = "dbg: late mul by a, X before = $result, a=$tmpa,"; }
      $result *= $tmpa;
      if ($dbg>0) { &wprint( "$tmpmsg X after = $result\n" ); }
   }
   if (!$mul_by_b_done) {
      if ($dbg>0) { $tmpmsg = "dbg: late mul by b, X before = $result, b=$tmpb,"; }
      $result *= $tmpb;
      if ($dbg>0) { &wprint( "$tmpmsg X after = $result\n" ); }
   }
   $probability = $result;

   if ($dbg >= 2) {
      &wprint( "The probability of $n choose $k (w/ p=$p) is $probability  ($cntr loops)\n" );
   }
   return $probability;
}

# base 10 logarithm
sub log10 {
   my $n = shift;
   if ($n <= 0) {
      return -10000;
   }
   # if (($n < -1.911e-12) && ($n > -1.913e-12)) {
   #    &traceback('log10');
   # }
   return log($n)/log(10);
}

# Input is small (perhaps very tiny!) positive number, $p, between 0 and 1.
# This function determines the maximum power to which $p can be raised without
# the result having an exponent less than -100.
# (This is used as part of an underflow avoidance strategy.)
# E.g., if $p = 1E-5 then the result is 20.  if $p = 1E-20 then the result is 10.
sub max_nonunderflowing_power1 {
   my ($p) = @_;
   my $result;
   my $ap = abs($p);
   if ($ap <= 1e-100) {
      $result = 1;
   } elsif ($ap > 0.99977) {
      $result = 1000000;  # arbitrary maximum of one million
   } else {
      my $tmp = abs(log10($ap));  # if p=1E-5 then this is 5
      $result = int(100/$tmp);  # if p=1E-5 then this is 20
   }
   return $result;
}

# one-digit log10: 1     2         3        4        5        6        7        8        9       9.5
our @quiklog = (0, 0, 0.30103, 0.47712, 0.60206, 0.69897, 0.77815, 0.84510, 0.90309, 0.95424, 0.97772 );

# Roughly equivalent to max_nonunderflowing_power1(), calculated a different way.
# For scalars, this is slower, but for BigFloats it might be faster?
sub max_nonunderflowing_power2 {
   my ($p) = @_;
   my $result;
   my $ap = abs($p);
   if ($ap <= 1e-100) {
      $result = 1;
   } elsif ($ap > 0.99977) {
      $result = 1000000;
   } else {
      $ap = Math::BigFloat->new($ap);
      # my $e = $p->exponent();  this doesn't work (it's apparently not normalized)
      my ($m,$e) = $ap->nparts();
      my $substr = int($m+0.5);
      my $tmp = abs($e) - $quiklog[$substr];  # if p=1E-5 then abs($e) is 5
      $result = int(100/$tmp);  # if p=1E-5 then this is 20
      # printf("dbg: m=$m, e=$e, substr=$substr, qlog[$substr]=%f, tmp=$tmp, q=%f, r=$result\n", $quiklog[$substr], 100/$tmp);
   }
   return $result;
}

# *** THIS SUBROUTINE IS NOT CURRENTLY USED ***
sub max_nonunderflowing_power {
   my $p = shift;
   &timeit_start('max_nonunderflowing_power1');
   my $result1 = max_nonunderflowing_power1($p);
   &timeit_end('max_nonunderflowing_power1');
   &timeit_start('max_nonunderflowing_power2');
   my $result2 = max_nonunderflowing_power2($p);
   &timeit_end('max_nonunderflowing_power2');
   if ($result1 != $result2) {
      print "dbg: max_nonunderflowing_power($p), $result1, $result2\n";
   }
   return $result1;
}

# my $xp = 2.9E-5;
# my $ex = 55;
# my $maxpower = max_nonunderflowing_power($xp);
# printf("p=%.3g, ex=%d, maxpower=%d, p^maxpower=%.4g, p^e=%.4g\n", $xp, $ex, $maxpower, $xp**$maxpower, $xp**$ex );
# $maxpower++;
# printf("p=%.3g, ex=%d, maxpower=%d, p^maxpower=%.4g, p^e=%.4g\n", $xp, $ex, $maxpower, $xp**$maxpower, $xp**$ex );


# a=q
# Given n, k & p, calculate and return P(X=k)
# The "S3" suffix means this is the 3rd scalar / short version.
# This is the version we currently use for Quick (normal) precision. Sorry about the potential confusion
# confusion between "S" for "scalar" (this one) and "S" for "slow" (which uses calc_binom_probR).
# Inputs are assumed to be scalars (m=int, k=int, p=float).
# Result is the product of three factors:
#   "n choose k"  -- which is at risk of overflow
#   "p^k"         -- which is at risk of underflow
#   "(1-p)^(n-k)" -- which is also at risk of underflow
# No bignums are used.
# This version is more robust than simpler versions. It fixes the possible
# overflow of "n choose k" and the possible underflows of "p^k" and "(1-p)^(n-k)".
# Here's a test case that breaks calc_binom_probS2 but this version fixes:
#   perl binomcalc.pl -n=2000 -k=890 -p=0.2
#   a=q result is 2.48298479667e-135
#   (a=s result is 2.482984796673508e-135)
#   (Berman reports "< 0.000001"; Vassarstats reports "<0.000001"; iCalcu reports "NaN"; wolframalpha reports "2.483e-135")
# Here's another test case that breaks calc_binom_probS2 but this version fixes:
#   perl binomcalc.pl -n=3000 -k=1400 -p=0.45
#   a=q result is 0.00272066277     = 2.72066276956e-3
#   (a=s result is 0.002720662769557 = 2.720662769556848e-3)
#   (Berman reports "0.00272066277": Vassarstats reports "Method 2. approximation via normal 0.002702"; iCalcu reports "NaN"; wolframalpha reports ".00272066")
sub calc_binom_probS3 {
   my ($n,$k1,$p) = @_;
   my $k = $k1;
   my $probability = 0;
   my $result = 1;
   my $cntr = 1;
   my $tmpmsg;  # for debug prints

   if ($dbg>0) {
      my $n_dspl = dspl($n);
      my $k_dspl = dspl($k);
      &wprint("dbg: calc_binom_probS3( n=$n_dspl, k=$k_dspl, p=$p )\n");
   }

   my $one_minus_p = 1 - $p;
   my $n_minus_k = $n - $k;

   # my $maxpower_p = max_nonunderflowing_power($p);
   my $maxpower_p = max_nonunderflowing_power1($p);
   my $remaining_k = $k;
   if ($maxpower_p > $k) {
      $maxpower_p = $k;  # p^k won't underflow anyhow
   }

   # my $maxpower_1mp = max_nonunderflowing_power($one_minus_p);
   my $maxpower_1mp = max_nonunderflowing_power1($one_minus_p);
   my $remaining_n_minus_k = $n_minus_k;
   if ($maxpower_1mp > ($n_minus_k)) {
      $maxpower_1mp = $n_minus_k;  # (1-p)^(n-k) won't underflow anyhow
   }

   if ($dbg >= 2) {
      &wprint( "p = $p, k=$k, mpp=$maxpower_p; (1-p)=$one_minus_p, (n-k)=$n_minus_k, mp1mp=$maxpower_1mp\n" );
   }

   # my $tmpa = $p;
   # my $tmpb = $one_minus_p;
   # $tmpa = $tmpa ** $k;
   # $tmpb = $tmpb ** ($n_minus_k);
   # if ($dbg >= 2) { &wprint( "dbg: a=$tmpa, b=$tmpb\n" ); }

   # $probability = $tmpa * $nchoosek * $tmpb;

   # Binomial coefficient, "n choose k" is equivalent to .bnok() but for scalars:
   # n! / ((k!) * ((n-k)!))

   my $z = $n_minus_k;
   if ($z > $k) {
      ($z, $k) = ($k, $z);  # swap 'em, so that $k >= $z
   }
   # At this point we know that $n >= $k >= $z
   # Note that "k" here is no longer necessarily the same "k" used in the  p^k
   # and (1-p)^(n-k) exponents. (I probably should have used new variable names.)

   my $mul_by_a_done = 0;
   my $mul_by_b_done = 0;

   # The result we need is  (n choose k)  *  p^k  *  (1-p)^(n-k).
   # (n choose k) is often very, very large.
   # p^k and (1-p)^(n-k) are often very, very tiny.

   # This is our (n choose k) calculation loop, mostly.
   #
   # When there's no threat of overflow, the following loop just calculates
   # (n choose k). But when overflow threatens we get trickier.
   #
   # n! is a multiple of k!, so we'll skip those multiplications & divisions.
   # Additionally, we know that n-k = z, so counting k up to n will take the
   # same number of loops as counting z down to 1, which means we only need to
   # test one of the two counters.
   #
   # To avoid loss of precision in extreme cases, if the intermediate value begins
   # to approach the limit of what can be represented in a IEEE 754 double float
   # (a little over 1e308) then we insert the multiplications by the tiny
   # exponentiated probabilities in the midst of the calculation of (n choose k).
   my $x = $k+1;
   while ($x <= $n) {
      # print "dbg: x = $x, z = $z\n";
      $result *= $x;
      $x += 1;
      $result /= $z;
      $z -= 1;
      while (($result > 1e270) && (($remaining_k > 0) || ($remaining_n_minus_k > 0))) {
         if ($remaining_k > 0) {
            if ($dbg>2) { $tmpmsg = "dbg: early mul by a, X before = $result, a=p^$maxpower_p,"; }
            $result *= ($p ** $maxpower_p);
            $remaining_k -= $maxpower_p;
            if ($remaining_k < $maxpower_p) {
               $maxpower_p = $remaining_k;
            }
            if ($dbg>2) { &wprint( "$tmpmsg X after = $result  (loop cntr=$cntr)\n" ); }
         }
         if ($result > 1e200) {
            if ($remaining_n_minus_k > 0) {
               if ($dbg>2) { $tmpmsg = "dbg: early mul by b, X before = $result, (1-p)^$maxpower_1mp,"; }
               $result *= ($one_minus_p ** $maxpower_1mp);
               $remaining_n_minus_k -= $maxpower_1mp;
               if ($remaining_n_minus_k < $maxpower_1mp) {
                  $maxpower_1mp = $remaining_n_minus_k;
               }
               if ($dbg>2) { &wprint( "$tmpmsg X after = $result  (loop cntr=$cntr)\n" ); }
            }
         }
      }
      $cntr++;
   }

   while ($remaining_k > 0) {
      if ($dbg>2) { $tmpmsg = "dbg[1]: at end, X before = $result, a=p^$remaining_k,"; }
      $result *= ($p ** $maxpower_p);
      $remaining_k -= $maxpower_p;
      if ($remaining_k < $maxpower_p) {
         $maxpower_p = $remaining_k;
      }
      if ($dbg>2) { &wprint( "$tmpmsg X after = $result\n" ); }
   }
   while ($remaining_n_minus_k > 0) {
      if ($dbg>2) { $tmpmsg = "dbg[2]: at end, X before = $result, (1-p)^$remaining_n_minus_k,"; }
      $result *= ($one_minus_p ** $maxpower_1mp);
      $remaining_n_minus_k -= $maxpower_1mp;
      if ($remaining_n_minus_k < $maxpower_1mp) {
         $maxpower_1mp = $remaining_n_minus_k;
      }
      if ($dbg>2) { &wprint( "$tmpmsg X after = $result\n" ); }
   }

   $probability = $result;

   if ($dbg >= 2) {
      &wprint( "The probability of $n choose $k (w/ p=$p) is $probability  ($cntr loops)\n" );
   }
   return $probability;
}

# a=xx
# Given n, k & p, calculate and return P(X=k)
# The "F" suffix means this is the BigFloat (slow) version.
# n & k should be scalar integers, p can be a BigFloat or a scalar float, 0..1.
# Result ($probability) will be a BigFloat.
# Result is the product of three parts:
#   "n choose k"  -- which is at risk of overflow
#   "p^k"         -- which is at risk of underflow
#   "(1-p)^(n-k)" -- which is also at risk of underflow
# Here's a test case that breaks calc_binom_probS2 but this version fixes:
#   perl binomcalc.pl -n=2000 -k=890 -p=0.2
#   a=s result is 2.482984796673508e-135
#   (a=q result is 2.48298479667e-135; Berman reports "< 0.000001"; Vassarstats reports "<0.000001"; iCalcu reports "NaN"; wolframalpha reports "2.483e-135")
# Here's another test case that breaks calc_binom_probS2 but this version fixes:
#   perl binomcalc.pl -n=3000 -k=1400 -p=0.45
#   a=s result is 0.002720662769557 = 2.720662769556848e-3
#   (a=q result is 0.00272066277 = 2.72066276956e-3; Berman reports "0.00272066277": Vassarstats reports "Method 2. approximation via normal 0.002702"; iCalcu reports "NaN"; wolframalpha reports ".00272066")
sub calc_binom_probF3 {
   my ($n,$k1,$p1) = @_;
   my $k = $k1;
   my $p = Math::BigFloat->new($p1);
   my $scalar_p = $p->numify();
   my $probability = Math::BigFloat->new(0);
   my $result = Math::BigFloat->new(1);
   my $cntr = 1;
   my $tmpmsg;  # for debug prints

   if ($dbg>0) {
      my $tst1 = 1.2345E-20;
      my $tst2 = Math::BigFloat->new($tst1);
      my $tst3 = Math::BigFloat->new($tst2);
      &wprintf("dbg: tst1=%s, tst2=%s, tst3=%s\n", dspl($tst1), dspl($tst2), dspl($tst3) );

      my $n_dspl = dspl($n);
      my $k_dspl = dspl($k);
      my $p_dspl = dspl($p);
      my $p1_dspl = dspl($p1);
      if ($p1_dspl ne $p_dspl) {
         $p_dspl .= "=$p1_dspl";
      }
      &wprint("dbg: calc_binom_probF3( n=$n_dspl, k=$k_dspl, p=$p_dspl )\n");
   }

   my $one_minus_p = 1 - $p;
   my $n_minus_k = $n - $k;

   my $maxpower_p = max_nonunderflowing_power2($p);
   my $remaining_k = $k;
   if ($maxpower_p > $k) {
      $maxpower_p = $k;  # p^k won't underflow anyhow
   }

   my $maxpower_1mp = max_nonunderflowing_power2($one_minus_p);
   my $remaining_n_minus_k = $n_minus_k;
   if ($maxpower_1mp > ($n_minus_k)) {
      $maxpower_1mp = $n_minus_k;  # (1-p)^(n-k) won't underflow anyhow
   }

   if ($dbg >= 2) {
      &wprint( "p = $p, k=$k, mpp=$maxpower_p; (1-p)=$one_minus_p, (n-k)=$n_minus_k, mp1mp=$maxpower_1mp\n" );
   }

   # my $tmpa = $p;
   # my $tmpb = $one_minus_p;
   # $tmpa = $tmpa ** $k;
   # $tmpb = $tmpb ** ($n_minus_k);
   # if ($dbg >= 2) { &wprint( "dbg: a=$tmpa, b=$tmpb\n" ); }

   # $probability = $tmpa * $nchoosek * $tmpb;

   # Binomial coefficient, "n choose k" is equivalent to .bnok() but for scalars:
   # n! / ((k!) * ((n-k)!))

   my $z = $n_minus_k;
   if ($z > $k) {
      ($z, $k) = ($k, $z);  # swap 'em, so that $k >= $z
   }
   # At this point we know that $n >= $k >= $z
   # Note that "k" here is no longer necessarily the same "k" used in the  p^k
   # and (1-p)^(n-k) exponents. (I probably should have used new variable names.)

   my $mul_by_a_done = 0;
   my $mul_by_b_done = 0;

   # The result we need is  (n choose k)  *  p^k  *  (1-p)^(n-k).
   # (n choose k) is often very, very large.
   # p^k and (1-p)^(n-k) are often very, very tiny.

   # This is our (n choose k) calculation loop, mostly.
   #
   # When there's no threat of overflow, the following loop just calculates
   # (n choose k). But when overflow threatens we get trickier.
   #
   # n! is a multiple of k!, so we'll skip those multiplications & divisions.
   # Additionally, we know that n-k = z, so counting k up to n will take the
   # same number of loops as counting z down to 1, which means we only need to
   # test one of the two counters.
   #
   # To avoid loss of precision in extreme cases, if the intermediate value begins
   # to approach the limit of what can be represented in a IEEE 754 double float
   # (a little over 1e308) then we insert the multiplications by the tiny
   # exponentiated probabilities in the midst of the calculation of (n choose k).
   my $x = $k+1;
   while ($x <= $n) {
      # print "dbg: x = $x, z = $z\n";
      $result *= $x;
      $x += 1;
      $result /= $z;
      $z -= 1;
      while (($result > 1e270) && (($remaining_k > 0) || ($remaining_n_minus_k > 0))) {
         if ($remaining_k > 0) {
            if ($dbg>0) { $tmpmsg = "dbg: early mul by a, X before = $result, a=p^$maxpower_p,"; }
            $result *= ($p ** $maxpower_p);
            $remaining_k -= $maxpower_p;
            if ($remaining_k < $maxpower_p) {
               $maxpower_p = $remaining_k;
            }
            if ($dbg>0) { &wprint( "$tmpmsg X after = $result  (loop cntr=$cntr)\n" ); }
         }
         if ($result > 1e200) {
            if ($remaining_n_minus_k > 0) {
               if ($dbg>0) { $tmpmsg = "dbg: early mul by b, X before = $result, (1-p)^$maxpower_1mp,"; }
               $result *= ($one_minus_p ** $maxpower_1mp);
               $remaining_n_minus_k -= $maxpower_1mp;
               if ($remaining_n_minus_k < $maxpower_1mp) {
                  $maxpower_1mp = $remaining_n_minus_k;
               }
               if ($dbg>0) { &wprint( "$tmpmsg X after = $result  (loop cntr=$cntr)\n" ); }
            }
         }
      }
      $cntr++;
   }

   while ($remaining_k > 0) {
      if ($dbg>0) { $tmpmsg = "dbg[1]: at end, X before = $result, a=p^$remaining_k,"; }
      $result *= ($p ** $maxpower_p);
      $remaining_k -= $maxpower_p;
      if ($remaining_k < $maxpower_p) {
         $maxpower_p = $remaining_k;
      }
      if ($dbg>0) { &wprint( "$tmpmsg X after = $result\n" ); }
   }
   while ($remaining_n_minus_k > 0) {
      if ($dbg>0) { $tmpmsg = "dbg[2]: at end, X before = $result, (1-p)^$remaining_n_minus_k,"; }
      $result *= ($one_minus_p ** $maxpower_1mp);
      $remaining_n_minus_k -= $maxpower_1mp;
      if ($remaining_n_minus_k < $maxpower_1mp) {
         $maxpower_1mp = $remaining_n_minus_k;
      }
      if ($dbg>0) { &wprint( "$tmpmsg X after = $result\n" ); }
   }

   $probability = $result;

   if ($dbg >= 2) {
      &wprint( "The probability of $n choose $k (w/ p=$p) is $probability  ($cntr loops)\n" );
   }

   # $probability = $probability->numify();
   return $probability;
}


if ($dbg>0) {
   &wprint( "Parameters passed to $scriptname = '$parameters'\n" );
   &wprint( "dbg=$dbg\n" );
}

# For a='q' and a='h' we use the scalar version (the default)
our $calc_binom_prob = \&calc_binom_probS3;  # calc_binom_probS3 is more robust than calc_binom_probS or calc_binom_probS2

if ('s' eq $p_a) {  #
   # GMP BigRat version is surprisingly fast!
   # Before I switched to GMP, I used the BigFloat version calc_binom_probF here.
   $calc_binom_prob = \&calc_binom_probR;
} elsif ('x' eq $p_a) {
   # BigFloat eXperimental version. Slow.
   $calc_binom_prob = \&calc_binom_probF;
} elsif ('xx' eq $p_a) {
   # another eXperimental version. Slow.
   $calc_binom_prob = \&calc_binom_probF3;
}

our $k0 = $p_k + 0;  # scalar
# $k = Math::BigInt->new($p_k);  # BigInt
# $k0 = $k->numify();  # (unnecessary, since I don't "use bignum")
if ($dbg>0) {&wprint( "p_k='$p_k',  k0=" . dspl($k0) . "\n" );}

our $n0 = $p_n + 0;  # scalar
# $n = Math::BigInt->new($p_n);  # BigInt
# $n0 = $n->numify();
if ($dbg>0) {&wprint( "p_n='$p_n',  n0=" . dspl($n0) . "\n" );}

# &wprint("dbg: about to evaluate p_p...\n");

our( $p0, $p1, $p2 );  # scalar, BigRat, BigFloat, respectively
if ($p_p =~ /\//) {
   # &wprint("dbg: p_p is rational...\n");
   $p1 = Math::BigRat->new($p_p);
   $p2 = Math::BigFloat->new($p1->numerator()) / $p1->denominator();
   $p0 = $p2->numify();
   # &wprint("dbg: evaluated rational, p0=$p0, p1=" . dspl($p1) . ", p2=" . dspl($p2) . "\n");
} else {
   # &wprint("dbg: p_p is float '$p_p' ...\n");
   # if ($p_p =~ /[eE]/) { &wprint("dbg: exponential format p_p='$p_p'\n"); }
   $p2 = Math::BigFloat->new($p_p);
   # if ($p_p =~ /[eE]/) { &wprint("dbg: exponential format p_p=p2=" . dspl($p2) . "\n");}
   $p1 = $p2;
   $p0 = 0.0 + $p_p;  # scalar float
}
if ('s' eq $p_a) {
   $p = $p1;  # BigRat
} elsif (('x' eq $p_a) || ('xx' eq $p_a)) {
   $p = $p2;  # BigFloat
} elsif (('q' eq $p_a) || ('h' eq $p_a)) {
   $p = $p0;
}

if ($dbg>0) {&wprint("dbg: p_p='$p_p', p=" . dspl($p) . "\n");}

if ($dbg>0) {
   &wprint( "n=$n0, k=$k0, p=$p\n");
}

if ($n0) {
   if ($k0 > $n0) {
      &givehelp("** ERROR: parameter k=$k0 to $scriptname cannot be greater than n=$n0");
   }
   if ($p0 > 1) {
      &givehelp("** ERROR: parameter p=$p0 to $scriptname must be between 0 and 1");
   }
}


# m=co  calculate & display binomial coefficient, only
sub do_m_co {
   my $nchoosek;
   if ('q' eq $p_a) {
      $nchoosek = &n_choose_k($n0, $k0);
   } else {
      $nchoosek = Math::BigInt->new( $p_n );
      $nchoosek = $nchoosek -> bnok( $p_k );
      $nchoosek = "$nchoosek";
   }
   if ('inf' eq $nchoosek) {
      $nchoosek = 'overflow';
      if ($cgi_context) {
         print "n=$n0, k=$k0, (n choose k) = overflow";
         if ('q' eq $p_a) {
            print "
\nTry again, in 'Slow' mode.\n";
         } else {
            print "\n";
         }
      } else {
         print "n=$n0, k=$k0, (n choose k) = $nchoosek\n";
         if ('q' eq $p_a) {
            print "Try again, in 'Slow' mode.\n";
         }
      }
   } else {
      $nchoosek = commafy($nchoosek);
      &wprint( "n=$n0, k=$k0, (n choose k) = $nchoosek" );
   }
}#do_m_co


# $probability is calculated probability of x=k, $dd is the number of digits of precision to display,
# the arrays are for calculating cumulative probabilities
our ( $probability, $dd, @saved_probs, @cummu_0_to_i );  # we no longer use @cummu_i_to_k


# m=b   calculate & display binomial probability of x=k, leaving the result in $probability as a side-effect
sub do_m_b {
   if ($dbg>0) { &wprint( "dbg: sub do_m_b(), dd=$dd\n" ); }

   if ($p_p =~ /\//) {
     &wprint( "Probability of success on a single trial (p): " . $p1->bstr() . "\n" );
   } else {
     &wprint( "Probability of success on a single trial (p): $p0\n" );
   }
   &wprint( "Number of trials (n): $n0\n" );
   &wprint( "Number of successes (k): $k0\n" );

   # In "quick" and "hybrid" (scalar) modes, we'll never use the BigRat and BigFloat versions of p
   if (('q' eq $p_a) || ('h' eq $p_a)) {
      $p1 = undef;
      $p2 = undef;
   }

   # First calculate the binomial probability P(X=k)

   &timeit_start('calc_bprob');
   $probability = &$calc_binom_prob( $n0, $k0, $p );
   &timeit_end('calc_bprob');

   my $fp_prob = &display_prob($probability,2,$dd);
   &wprint( "Binomial probability P(X = k): $fp_prob\n" );
}#do_m_b


# m=cu  calculate and display probability of x=k, and cumulative probability of x <= k
sub do_m_cu {
   # Sum the binomial probabilities for lower values of k (down to zero)
   my $cummu_0_to_im1 = 0;
   my $max_addend = 0;  # we keep track of the maximum addend for estimating the true number of significant digits


   if ($p_p =~ /\//) {
     &wprint( "Probability of success on a single trial (p): " . $p1->bstr() . "\n" );
   } else {
     &wprint( "Probability of success on a single trial (p): $p0\n" );
   }
   &wprint( "Number of trials (n): $n0\n" );
   &wprint( "Number of successes (k): $k0\n" );

   # In "quick" and "hybrid" (scalar) modes, we'll never use the BigRat and BigFloat versions of p
   if (('q' eq $p_a) || ('h' eq $p_a)) {
      $p1 = undef;
      $p2 = undef;
   }

   # First calculate the binomial probability P(X=k)

   &timeit_start('calc_bprob');
   $probability = &$calc_binom_prob( $n0, $k0, $p );
   &timeit_end('calc_bprob');

   my $fp_prob = &display_prob($probability, 2, $dd, undef, ($p > 0) && ($p < 1));
   if ($cgi_context) {
      print( "Probabilities:
\n"
            . "  P( X = k ):  $fp_prob
\n" );
   } else {
      &wprint( "Probabilities:\n"
              . "  P( X = k ):  $fp_prob\n" );
   }

   if ($k0 > 0) {
      for (my $i0=0; $i0 < $k0; $i0++) {
         &timeit_start('cumu_sum');
         my $probability_tmp = &$calc_binom_prob( $n0, $i0, $p );
         if ('h' eq $p_a) {
            # "hybrid" mode: convert from 64-bit floating point to BigFloat for summing
            $probability_tmp = Math::BigFloat->new($probability_tmp);
         }
         $cummu_0_to_im1 += $probability_tmp;
         if ($probability_tmp > $max_addend) {
            $max_addend = $probability_tmp;
         }
         &timeit_end('cumu_sum');
      }
   }

   # Then display the cumulative probabilities

   my $cprob1;
   &timeit_start('cumu_sum');
   if ($n0 == $k0) {
      $cprob1 = 1;
   } else {
      my $probability_tmp = $probability;
      if ('h' eq $p_a) {
         # in "hybrid" mode we calculate probabilities in 64-bit floating point, but sum them as BigFloats
         $probability_tmp = Math::BigFloat->new($probability_tmp);
      }
      $cprob1 = $cummu_0_to_im1 + $probability_tmp;
      if ($probability_tmp > $max_addend) {
         $max_addend = $probability_tmp;
      }
   }
   my $cprob2 = 1.0 - $cprob1;
   my $cprob3 = 1.0 - $cummu_0_to_im1;
   &timeit_end('cumu_sum');

   if ($dbg>1) { print "dbg: fp_cprob0 -- p='$p', k0='$k0'\n"; }
   my $fp_cprob0 = &display_prob( $cummu_0_to_im1, 2, $dd, $max_addend, ($p > 0) && ($p < 1) && ($k0 > 0) );
   if ($dbg>1) { print "dbg: fp_cprob1 -- p='$p', k0='$k0', n0='$n0'\n"; }
   my $fp_cprob1 = &display_prob( $cprob1, 2, $dd, $max_addend, ($p > 0) && ($p < 1) && ($k0 < $n0) );
   if ($dbg>1) { print "dbg: fp_cprob2 -- p='$p', k0='$k0', n0='$n0'\n"; }
   my $fp_cprob2 = &display_prob( $cprob2, 2, $dd, $max_addend, ($p > 0) && ($p < 1) && ($k0 < $n0) );
   if ($dbg>1) { print "dbg: fp_cprob3 -- p='$p', k0='$k0'\n"; }
   my $fp_cprob3 = &display_prob( $cprob3, 2, $dd, $max_addend, ($p > 0) && ($p < 1) && ($k0 > 0) );

   if ($cgi_context) {
      print(  "   P( X < k ):  $fp_cprob0
\n"
            . "   P( X ≤ k ):  $fp_cprob1
\n"
            . "   P( X > k ):  $fp_cprob2
\n"
            . "   P( X ≥ k ):  $fp_cprob3\n" );
   } else {
      &wprint(  "  P( X < k ):  $fp_cprob0\n"
              . "  P( X <= k ): $fp_cprob1\n"
              . "  P( X > k ):  $fp_cprob2\n"
              . "  P( X >= k ): $fp_cprob3\n" );
   }
}#do_m_cu


# m=acu  calculate and display probability of x=k, and cumulative probabilities of x in 0..k
sub do_m_acu {
   my $max_addend = 0;  # we keep track of the maximum addend for estimating the true number of significant digits
   # Calculate all the binomial probabilities for lower values of k (down to zero)

   &wprint( "Cumulative probability:\n" );
   if ($dbg>0) { &wprint( "dbg: sub do_m_acu(), dd=$dd\n" ); }

   @saved_probs = ();
   # we already calculated the binomial probability for x=k, so save it:
   if ('h' eq $p_a) {
      # "hybrid" mode: convert from 64-bit floating point to BigFloat for summing
      $saved_probs[$k0] = Math::BigFloat->new($probability);
   } else {
      $saved_probs[$k0] = $probability;
   }

   @cummu_0_to_i = ();  # cumulative probabilities

   # Then display the cumulative probabilities

   if ($cgi_context) {
      $in_midst_of_generating_acu_table = 1;
      print "\n";
      print " " .
            " " .
            "\n";
      for (my $i0=0; $i0 <= $k0; $i0++) {
         my $prob;
         if ($i0 < $k0) {
            &timeit_start('calc_bprob');
            $prob = &$calc_binom_prob( $n0, $i0, $p );
            if ('h' eq $p_a) {
               $prob = Math::BigFloat->new($prob);
            }
            $saved_probs[$i0] = $prob;
            &timeit_end('calc_bprob');
         } else {
            $prob = $saved_probs[$i0];
         }
         my $cummu_to_i;
         if (0 == $i0) {
            $cummu_to_i = $prob;
         } else {
            &timeit_start('cumu_sum');
            $cummu_to_i = ($cummu_0_to_i[$i0-1] + $prob);
            &timeit_end('cumu_sum');
         }
         $cummu_0_to_i[$i0] = $cummu_to_i;
         if ($prob > $max_addend) {
            $max_addend = $prob;
         }

         &timeit_start('cumu_sum');
         my $one_minus_cummu_to_i = 1.0 - $cummu_to_i;
         &timeit_end('cumu_sum');
         # my $cummu_from_i = $cummu_i_to_k[$i0];
         # &timeit_start('cumu_sum');
         # my $one_minus_cummu_from_i = 1.0 - $cummu_from_i;
         # &timeit_end('cumu_sum');
         my $fp_prob = &display_prob( $prob, 1, $dd, undef, ($p > 0) && ($p < 1) );
         # sprintf("%#.15g", $prob);
         my $fp_cummu_to_i = &display_prob( $cummu_to_i, 1, $dd, $max_addend, ($p > 0) && ($p < 1) && ($i0 < $n0) );  # sprintf("%#.15g", $cummu_to_i);
         my $fp_one_minus_cummu_to_i = &display_prob( $one_minus_cummu_to_i, 1, $dd, $max_addend, ($p > 0) && ($p < 1) && ($i0 < $n0) );  # sprintf("%#.15g", $one_minus_cummu_to_i);
         # my $fp_cummu_from_i = &display_prob($cummu_from_i,1,$dd);  # sprintf("%#.15g", $cummu_from_i);
         # my $fp_one_minus_cummu_from_i = &display_prob($one_minus_cummu_from_i,1,$dd);  # sprintf("%#.15g", $one_minus_cummu_from_i);
         print " " .
               " " .
               "\n";
      }
      print "k P(X=k) range P(X in range)  1 - P(X in range)
$i0  $fp_prob 0..$i0 $fp_cummu_to_i  $fp_one_minus_cummu_to_i\n";
      $in_midst_of_generating_acu_table = 0;
   } else {
      print "|" . center('k',8) . "|" . center('P(X=k)',7+$dd) .
            "|" . center('range',11) .
            "|" . center('P(X in range)',7+$dd) . "|" . center('1 - P(X in range)',7+$dd) . "|\n";
      for (my $i0=0; $i0 <= $k0; $i0++) {
         my $prob;
         if ($i0 < $k0) {
            &timeit_start('calc_bprob');
            $prob = &$calc_binom_prob( $n0, $i0, $p );
            if ('h' eq $p_a) {
               $prob = Math::BigFloat->new($prob);
            }
            $saved_probs[$i0] = $prob;
            &timeit_end('calc_bprob');
         } else {
            $prob = $saved_probs[$i0];
         }
         my $cummu_to_i;
         if (0 == $i0) {
            $cummu_to_i = $prob;
         } else {
            &timeit_start('cumu_sum');
            $cummu_to_i = ($cummu_0_to_i[$i0-1] + $prob);
            &timeit_end('cumu_sum');
         }
         $cummu_0_to_i[$i0] = $cummu_to_i;
         if ($prob > $max_addend) {
            $max_addend = $prob;
         }

         &timeit_start('cumu_sum');
         my $one_minus_cummu_to_i = 1.0 - $cummu_to_i;
         &timeit_end('cumu_sum');
         # my $cummu_from_i = $cummu_i_to_k[$i0];
         # &timeit_start('cumu_sum');
         # my $one_minus_cummu_from_i = 1.0 - $cummu_from_i;
         # &timeit_end('cumu_sum');
         if ($dbg>1) { print "dbg: fp_prob -- p='$p', k0='$k0', n0='$n0'\n"; }
         my $fp_prob = &display_prob( $prob, 1, $dd, undef, ($p > 0) && ($p < 1) );  # sprintf("%#.15g", $prob);
         if ($dbg>1) { print "dbg: fp_cummu_to_i -- p='$p', i0='$i0', n0='$n0'\n"; }
         my $fp_cummu_to_i = &display_prob( $cummu_to_i, 1, $dd, $max_addend, ($p > 0) && ($p < 1) && ($i0 < $n0) );  # sprintf("%#.15g", $cummu_to_i);
         if ($dbg>1) { printf( "dbg: fp_one_minus_cummu_to_i -- p='$p', i0='$i0', n0='$n0' %d %d %d %d \n", ($p > 0), ($p < 1), ($i0 < $n0), (($p > 0) && ($p < 1) && ($i0 < $n0)) ); }
         my $fp_one_minus_cummu_to_i = &display_prob( $one_minus_cummu_to_i, 1, $dd, $max_addend, ($p > 0) && ($p < 1) && ($i0 < $n0) );  # sprintf("%#.15g", $one_minus_cummu_to_i);
         # my $fp_cummu_from_i = &display_prob($cummu_from_i,1,$dd);  # sprintf("%#.15g", $cummu_from_i);
         # my $fp_one_minus_cummu_from_i = &display_prob($one_minus_cummu_from_i,1,$dd);  # sprintf("%#.15g", $one_minus_cummu_from_i);
         print "|" . center($i0,8) . "|" . center($fp_prob,7+$dd) .
               "|" . center("0..$i0",11) .
               "|" . center($fp_cummu_to_i,7+$dd) . "|" . center($fp_one_minus_cummu_to_i,7+$dd) . "|\n";
      }
   }
}#do_m_acu


sub main {
   if (('q' eq $p_a) || ('h' eq $p_a)) {
      $dd = 11;  # 11 digits is conservative for IEEE 754 64-bit floating point, but if we set it to 12 then the last digit is often off by one
   } else {
      $dd = 16;  # no. of display digits = 16 for extreme precision settings
   }
   if ($dd > ($accuracy - 2)) {
      $dd = $accuracy - 2;  # we can set d= even to a smaller number than the default a=q accuracy (that's really only useful for testing purposes)
      if ($dd < 2) {
         $dd = 2;
      }
   }

   if (!$n0) {
      &wprint( "(Nothing to do.)\n" );
   } elsif ('co' eq $p_m) {
      &do_m_co();
   } elsif ('b' eq $p_m) {
      &do_m_b();
   } elsif ('cu' eq $p_m) {
      &do_m_cu();
   } elsif ('acu' eq $p_m) {
      &do_m_b();
      &do_m_acu();
   }
}#main

sub timed_out {
   if ($in_midst_of_generating_acu_table) {
      print "\n\n";
   }
   print "\n\nERROR: Server timed out.
\n" .
         "The calculation was too computationally intensive to run on my server. But if you " .
         'download binomcalc.pl ' .
         "and run it on your own computer, you can let it run for as long as necessary.\n";
   exit 1;
}

&timeit_start('main');
if ($p_t) {
   eval {
      local $SIG{ALRM} = 'timed_out';
      alarm 0+$p_t;  # start timout timer
      &main();
      alarm 0;  # cancel timeout timer
   }
} else {
   &main();
}
&timeit_end('main');

# if b=... unspecified, then default to b=0 if runtime was less than 1 seconds, or b=1 otherwise
if (!defined $p_b) {
   #if ($runtimes{'main'} >= 1.0) {
   #   $p_b = '1';
   #} else {
      $p_b = '0';  # default is b=0
   #}
}
if ('0' ne $p_b) {
   # benchmark = 1
   &timeit_display;
}

&wprint("\n");

__END__