mosco · Finesim97 · Jan 29, 2024
diff --git a/Makefile b/Makefile
@@ -19,7 +19,7 @@ LDFLAGS = -march=native -g -lfftw3
 
 LD = $(CXX)
 
-CROSSPROB_OBJECTS = build/crossprob.o build/ecdf1_mns2016.o build/ecdf1_new.o build/ecdf2.o build/fftwconvolver.o build/string_utils.o build/read_boundaries_file.o build/poisson_pmf.o build/common.o
+CROSSPROB_OBJECTS = build/crossprob.o build/ecdf1_mns2016.o build/ecdf1_m2023.o build/ecdf2.o build/fftwconvolver.o build/string_utils.o build/read_boundaries_file.o build/poisson_pmf.o build/common.o
 
 CROSSPROB_MC_OBJECTS = build/crossprob_mc.o build/string_utils.o build/read_boundaries_file.o build/tinymt64.o build/common.o
 
@@ -65,13 +65,13 @@ depend:
 
 src/common.o: src/common.hh
 src/crossprob.o: src/common.hh src/read_boundaries_file.hh
-src/crossprob.o: src/string_utils.hh src/ecdf1_mns2016.hh src/ecdf1_new.hh
+src/crossprob.o: src/string_utils.hh src/ecdf1_mns2016.hh src/ecdf1_m2023.hh
 src/crossprob.o: src/ecdf2.hh
 src/crossprob_mc.o: src/string_utils.hh src/read_boundaries_file.hh
 src/crossprob_mc.o: src/tinymt64.h
 src/ecdf1_mns2016.o: src/ecdf1_mns2016.hh src/common.hh
-src/ecdf1_new.o: src/ecdf1_new.hh src/common.hh src/poisson_pmf.hh
-src/ecdf1_new.o: src/fftwconvolver.hh src/aligned_mem.hh src/string_utils.hh
+src/ecdf1_m2023.o: src/ecdf1_m2023.hh src/common.hh src/poisson_pmf.hh
+src/ecdf1_m2023.o: src/fftwconvolver.hh src/aligned_mem.hh src/string_utils.hh
 src/ecdf2.o: src/ecdf2.hh src/fftwconvolver.hh src/aligned_mem.hh
 src/ecdf2.o: src/common.hh src/poisson_pmf.hh src/string_utils.hh
 src/ecdf2.o: src/read_boundaries_file.hh

diff --git a/benchmarks/exactbj.py b/benchmarks/exactbj.py
@@ -56,7 +56,7 @@ def Mn_plus_bounds(n, Mn_plus_value):
 def Mn_plus_distribution(n, x):
     """Probability of having Mn_plus < x under the null hypothesis that X_1,...X_n ~ U[0,1]"""
     b_bounds = Mn_plus_bounds(n, x)
-    return 1.0 - crossprob.ecdf1_new_b(b_bounds)
+    return 1.0 - crossprob.ecdf1_m2023_b(b_bounds)
     #return 1.0 - crossprob.ecdf2(b_bounds, [1]*len(b_bounds), True)
 
 N_BINARY_SEARCH_STEPS_MAX = 110 
@@ -96,7 +96,7 @@ def inverse_Mn_plus(n, alpha, debug_prints):
     assert i < N_BINARY_SEARCH_STEPS_MAX-1, "Reached N_BINARY_SEARCH_STEPS_MAX => did not converge!"
     result = mid
     alpha_bounds = Mn_plus_bounds(n, result)
-    nocross_probability = 1.0 - crossprob.ecdf1_new_b(alpha_bounds)
+    nocross_probability = 1.0 - crossprob.ecdf1_m2023_b(alpha_bounds)
     relative_error = absolute(alpha - nocross_probability)/alpha 
     if relative_error > EPSILON:
         print('Large relative error!', relative_error)
@@ -184,7 +184,7 @@ def precalc_timings_Mn_plus(alpha, max_ks2001_n, max_mns2016_n, num_reps):
             timings_mn2017[i] = min(timings_mn2017[i], t.elapsed)
 
             with Timer('NEW') as t:
-                results_new[i] = 1.0-crossprob.ecdf1_new_b(bounds)
+                results_new[i] = 1.0-crossprob.ecdf1_m2023_b(bounds)
             timings_new[i] = min(timings_new[i], t.elapsed)
 
             pickler.dump(f'Mn_plus_timings_{alpha}',
@@ -214,7 +214,7 @@ def precalc_timings_Mn_plus_largescale(threshold, n_range):
             bounds = Mn_plus_bounds(n, threshold)
 
         with Timer('NEW') as t:
-            results_new[i] = 1.0-crossprob.ecdf1_new_b(bounds)
+            results_new[i] = 1.0-crossprob.ecdf1_m2023_b(bounds)
         timings_new[i] = min(timings_new[i], t.elapsed)
 
         with Timer('MN2017') as t:

diff --git a/python_extension/crossprob.cc b/python_extension/crossprob.cc
@@ -4699,7 +4699,7 @@ SWIGINTERN void std_vector_Sl_double_Sg__insert__SWIG_1(std::vector< double > *s
 
 #include "../src/ecdf2.hh"
 #include "../src/ecdf1_mns2016.hh"
-#include "../src/ecdf1_new.hh"
+#include "../src/ecdf1_m2023.hh"
 
 
 SWIGINTERN int
@@ -7500,7 +7500,7 @@ SWIGINTERN PyObject *_wrap_ecdf1_mns2016_b(PyObject *SWIGUNUSEDPARM(self), PyObj
 }
 
 
-SWIGINTERN PyObject *_wrap_ecdf1_new_B(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+SWIGINTERN PyObject *_wrap_ecdf1_m2023_B(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
   PyObject *resultobj = 0;
   std::vector< double,std::allocator< double > > *arg1 = 0 ;
   int res1 = SWIG_OLDOBJ ;
@@ -7513,16 +7513,16 @@ SWIGINTERN PyObject *_wrap_ecdf1_new_B(PyObject *SWIGUNUSEDPARM(self), PyObject
     std::vector< double,std::allocator< double > > *ptr = (std::vector< double,std::allocator< double > > *)0;
     res1 = swig::asptr(swig_obj[0], &ptr);
     if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ecdf1_new_B" "', argument " "1"" of type '" "std::vector< double,std::allocator< double > > const &""'"); 
+      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ecdf1_m2023_B" "', argument " "1"" of type '" "std::vector< double,std::allocator< double > > const &""'"); 
     }
     if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ecdf1_new_B" "', argument " "1"" of type '" "std::vector< double,std::allocator< double > > const &""'"); 
+      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ecdf1_m2023_B" "', argument " "1"" of type '" "std::vector< double,std::allocator< double > > const &""'"); 
     }
     arg1 = ptr;
   }
   {
     try {
-      result = (double)ecdf1_new_B((std::vector< double,std::allocator< double > > const &)*arg1);
+      result = (double)ecdf1_m2023_B((std::vector< double,std::allocator< double > > const &)*arg1);
     } catch(std::runtime_error& e) {
       SWIG_exception(SWIG_RuntimeError, e.what());
     } catch(std::exception& e) {
@@ -7540,7 +7540,7 @@ SWIGINTERN PyObject *_wrap_ecdf1_new_B(PyObject *SWIGUNUSEDPARM(self), PyObject
 }
 
 
-SWIGINTERN PyObject *_wrap_ecdf1_new_b(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+SWIGINTERN PyObject *_wrap_ecdf1_m2023_b(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
   PyObject *resultobj = 0;
   std::vector< double,std::allocator< double > > *arg1 = 0 ;
   int res1 = SWIG_OLDOBJ ;
@@ -7553,16 +7553,16 @@ SWIGINTERN PyObject *_wrap_ecdf1_new_b(PyObject *SWIGUNUSEDPARM(self), PyObject
     std::vector< double,std::allocator< double > > *ptr = (std::vector< double,std::allocator< double > > *)0;
     res1 = swig::asptr(swig_obj[0], &ptr);
     if (!SWIG_IsOK(res1)) {
-      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ecdf1_new_b" "', argument " "1"" of type '" "std::vector< double,std::allocator< double > > const &""'"); 
+      SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ecdf1_m2023_b" "', argument " "1"" of type '" "std::vector< double,std::allocator< double > > const &""'"); 
     }
     if (!ptr) {
-      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ecdf1_new_b" "', argument " "1"" of type '" "std::vector< double,std::allocator< double > > const &""'"); 
+      SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "ecdf1_m2023_b" "', argument " "1"" of type '" "std::vector< double,std::allocator< double > > const &""'"); 
     }
     arg1 = ptr;
   }
   {
     try {
-      result = (double)ecdf1_new_b((std::vector< double,std::allocator< double > > const &)*arg1);
+      result = (double)ecdf1_m2023_b((std::vector< double,std::allocator< double > > const &)*arg1);
     } catch(std::runtime_error& e) {
       SWIG_exception(SWIG_RuntimeError, e.what());
     } catch(std::exception& e) {
@@ -7638,8 +7638,8 @@ static PyMethodDef SwigMethods[] = {
 	 { "ecdf2", _wrap_ecdf2, METH_VARARGS, "ecdf2(VectorDouble b, VectorDouble B, bool use_fft) -> double"},
 	 { "ecdf1_mns2016_B", _wrap_ecdf1_mns2016_B, METH_O, "ecdf1_mns2016_B(VectorDouble B) -> double"},
 	 { "ecdf1_mns2016_b", _wrap_ecdf1_mns2016_b, METH_O, "ecdf1_mns2016_b(VectorDouble b) -> double"},
-	 { "ecdf1_new_B", _wrap_ecdf1_new_B, METH_O, "ecdf1_new_B(VectorDouble B) -> double"},
-	 { "ecdf1_new_b", _wrap_ecdf1_new_b, METH_O, "ecdf1_new_b(VectorDouble b) -> double"},
+	 { "ecdf1_m2023_B", _wrap_ecdf1_m2023_B, METH_O, "ecdf1_m2023_B(VectorDouble B) -> double"},
+	 { "ecdf1_m2023_b", _wrap_ecdf1_m2023_b, METH_O, "ecdf1_m2023_b(VectorDouble b) -> double"},
 	 { NULL, NULL, 0, NULL }
 };
 

diff --git a/python_extension/crossprob.py b/python_extension/crossprob.py
@@ -27,16 +27,16 @@
              otherwise the O(n^3) algorithm of [KS2001]
 
 Faster functions are available for the special case of a single boundary:
-    ecdf1_new_b(b)
+    ecdf1_m2023_b(b)
         Implements a new O(n^2) algorithm. B_i are implicitly assumed to be 1. 
-    ecdf1_new_B(B)
+    ecdf1_m2023_B(B)
         Implements the O(n^2) algorithm. b_i are implicitly assumed to be 0. 
     ecdf1_mns2016_b(b)
         Implements the O(n^2) algorithm of [MNS2016]. B_i are implicitly assumed to be 1. 
-        Generally slower and less numerically stable than ecdf1_new_b()
+        Generally slower and less numerically stable than ecdf1_m2023_b()
     ecdf1_mns2016_B(B)
         Implements the O(n^2) algorithm of [MNS2016]. b_i are implicitly assumed to be 0. 
-        Generally slower and less numerically stable than ecdf1_new_B()
+        Generally slower and less numerically stable than ecdf1_m2023_B()
 
 EXAMPLES
     For a sample X_1, X_2, X_3 with order statistics X_(1) <= X_(2) <= X(3), the probability
@@ -294,12 +294,12 @@ def ecdf1_mns2016_b(b):
     r"""ecdf1_mns2016_b(VectorDouble b) -> double"""
     return _crossprob.ecdf1_mns2016_b(b)
 
-def ecdf1_new_B(B):
-    r"""ecdf1_new_B(VectorDouble B) -> double"""
-    return _crossprob.ecdf1_new_B(B)
+def ecdf1_m2023_B(B):
+    r"""ecdf1_m2023_B(VectorDouble B) -> double"""
+    return _crossprob.ecdf1_m2023_B(B)
 
-def ecdf1_new_b(b):
-    r"""ecdf1_new_b(VectorDouble b) -> double"""
-    return _crossprob.ecdf1_new_b(b)
+def ecdf1_m2023_b(b):
+    r"""ecdf1_m2023_b(VectorDouble b) -> double"""
+    return _crossprob.ecdf1_m2023_b(b)
 
 
diff --git a/setup.py b/setup.py
@@ -9,7 +9,7 @@
         'src/poisson_pmf.cc',
         'src/fftwconvolver.cc',
         'src/ecdf1_mns2016.cc',
-        'src/ecdf1_new.cc',
+        'src/ecdf1_m2023.cc',
         'src/ecdf2.cc',
         'python_extension/crossprob.cc'
     ],
@@ -45,16 +45,16 @@
              otherwise the O(n^3) algorithm of [KS2001]
 
 Faster functions are available for the special case of a single boundary:
-    ecdf1_new_b(b)
+    ecdf1_m2023_b(b)
         Implements a new O(n^2) algorithm [NEW]. B_i are implicitly assumed to be 1. 
-    ecdf1_new_B(B)
+    ecdf1_m2023_B(B)
         Implements the O(n^2) algorithm [NEW]. b_i are implicitly assumed to be 0. 
     ecdf1_mns2016_b(b)
         Implements the O(n^2) algorithm of [MNS2016]. B_i are implicitly assumed to be 1. 
-        Generally slower and less numerically stable than ecdf1_new_b()
+        Generally slower and less numerically stable than ecdf1_m2023_b()
     ecdf1_mns2016_B(B)
         Implements the O(n^2) algorithm of [MNS2016]. b_i are implicitly assumed to be 0. 
-        Generally slower and less numerically stable than ecdf1_new_B()
+        Generally slower and less numerically stable than ecdf1_m2023_B()
 
 EXAMPLES
     For a sample X_1, X_2, X_3 with order statistics X_(1) <= X_(2) <= X(3), the probability

diff --git a/src/common.cc b/src/common.cc
@@ -2,6 +2,7 @@
 
 #include <stdexcept>
 #include <sstream>
+#include <limits>
 
 using namespace std;
 

diff --git a/src/common.hh b/src/common.hh
@@ -3,6 +3,7 @@
 
 #include <vector>
 #include <string>
+#include <iterator>
 
 void check_boundary_vector(std::string name, int n, const std::vector<double>& v);
 

diff --git a/src/crossprob.cc b/src/crossprob.cc
@@ -4,13 +4,14 @@
 #include <stdexcept>
 #include <cassert>
 #include <algorithm>
+#include <iterator>
 
 #include "common.hh"
 #include "read_boundaries_file.hh"
 #include "string_utils.hh"
 
 #include "ecdf1_mns2016.hh"
-#include "ecdf1_new.hh"
+#include "ecdf1_m2023.hh"
 #include "ecdf2.hh"
 
 using namespace std;
@@ -39,7 +40,7 @@ static void print_usage()
     cout << "        ecdf2-ks2001: an O(n^3) algorithm for two-sided boundaries. [KS2001]\n";
     cout << "        ecdf2-mn2017: an O(n^2 log n) method for two-sided boundaries. [MN2017]\n";
     cout << "        ecdf1-mns2016: an O(n^2) method for one-sided boundaries. [MNS2016]\n";
-    cout << "        ecdf1-new: New O(n^2) method, typically faster than ecdf1-mns2016. [NEW]\n";
+    cout << "        ecdf1-m2023: New O(n^2) method, typically faster than ecdf1-mns2016. [M2023]\n";
     cout << "\n";            
     cout << "    <one-or-two-sided-boundaries-filename>\n";
     cout << "        This text file contains the two lines of comma-separater numbers:\n";
@@ -63,7 +64,7 @@ static void print_usage()
     cout << "\n";
     cout << "    To compute a one-sided crossing probability for two samples\n";
     cout << "    that X_(1) <= 0.5 and X_(2) <= 0.7, we can run\n";
-    cout << "        ./bin/crossprob ecdf1-new bounds1.txt\n";
+    cout << "        ./bin/crossprob ecdf1-m2023 bounds1.txt\n";
     cout << "    where bounds1.txt is the following (first line is empty):\n";
     cout << "            \n";
     cout << "    0.5, 0.7\n";
@@ -75,7 +76,8 @@ static void print_usage()
     cout << "             p-value calculation. Electronic Journal of Statistics. https://doi.org/10.1214/16-EJS1172\n";
     cout << "    [MN2017] Amit Moscovich, Boaz Nadler (2017). Fast calculation of boundary crossing probabilities for Poisson processes.\n";
     cout << "             Statistics & Probability Letters. https://doi.org/10.1016/j.spl.2016.11.027\n";
-    cout << "    [NEW]    Amit Moscovich (2020). Fast calculation of p-values for one-sided Kolmogorov-Smirnov type statistics. Preprint. https://arxiv.org/abs/2009.04954\n";
+    cout << "    [M2023]  Amit Moscovich (2023). Fast calculation of p-values for one-sided Kolmogorov-Smirnov type statistics.\n";
+    cout << "             Computational Statistics & Data Analysis. https://doi.org/10.1016/j.csda.2023.107769\n";
 }
 
 double calculate_ecdf1_mns2016(const vector<double>& b, const vector<double>& B)
@@ -90,12 +92,12 @@ double calculate_ecdf1_mns2016(const vector<double>& b, const vector<double>& B)
     }
 }
 
-double calculate_ecdf1_new(const vector<double>& b, const vector<double>& B)
+double calculate_ecdf1_m2023(const vector<double>& b, const vector<double>& B)
 {
     if ((b.size() > 0) && (B.size() == 0)) {
-        return ecdf1_new_b(b);
+        return ecdf1_m2023_b(b);
     } else if ((b.size() == 0) && (B.size() > 0)) {
-        return ecdf1_new_B(B);
+        return ecdf1_m2023_B(B);
     } else {
         print_usage();
         throw runtime_error("Expecting EITHER a lower or an upper boundary function when using the 'ecdf1-m2020' command for computing a one-sided boundary crossing.\n");
@@ -156,16 +158,16 @@ static int handle_command_line_arguments(int argc, char* argv[])
     double result;
     if (command == "ecdf1-mns2016") {
         result = calculate_ecdf1_mns2016(b, B);
-    } else if (command == "ecdf1-new") {
-        result = calculate_ecdf1_new(b, B);
+    } else if (command == "ecdf1-m2023") {
+        result = calculate_ecdf1_m2023(b, B);
     } else if (command == "ecdf2-ks2001") {
         result = calculate_ecdf2_ks2001(b, B);
     } else if (command == "ecdf2-mn2017") {
 
         result = calculate_ecdf2_mn2017(b, B);
     } else {
         print_usage();
-        throw runtime_error("Second command line argument must be one of: 'ecdf1-mns2016', 'ecdf1-new', 'ecdf2-ks2001', 'ecdf2-mn2017'.");
+        throw runtime_error("Second command line argument must be one of: 'ecdf1-mns2016', 'ecdf1-m2023', 'ecdf2-ks2001', 'ecdf2-mn2017'.");
     }
 
     cout << result << endl;

diff --git a/src/crossprob.i b/src/crossprob.i
@@ -25,16 +25,16 @@ Its parameters are:
              otherwise the O(n^3) algorithm of [KS2001]
 
 Faster functions are available for the special case of a single boundary:
-    ecdf1_new_b(b)
+    ecdf1_m2023_b(b)
         Implements a new O(n^2) algorithm. B_i are implicitly assumed to be 1. 
-    ecdf1_new_B(B)
+    ecdf1_m2023_B(B)
         Implements the O(n^2) algorithm. b_i are implicitly assumed to be 0. 
     ecdf1_mns2016_b(b)
         Implements the O(n^2) algorithm of [MNS2016]. B_i are implicitly assumed to be 1. 
-        Generally slower and less numerically stable than ecdf1_new_b()
+        Generally slower and less numerically stable than ecdf1_m2023_b()
     ecdf1_mns2016_B(B)
         Implements the O(n^2) algorithm of [MNS2016]. b_i are implicitly assumed to be 0. 
-        Generally slower and less numerically stable than ecdf1_new_B()
+        Generally slower and less numerically stable than ecdf1_m2023_B()
 
 EXAMPLES
     For a sample X_1, X_2, X_3 with order statistics X_(1) <= X_(2) <= X(3), the probability
@@ -78,11 +78,11 @@ namespace std {
 %{
 #include "../src/ecdf2.hh"
 #include "../src/ecdf1_mns2016.hh"
-#include "../src/ecdf1_new.hh"
+#include "../src/ecdf1_m2023.hh"
 %}
 
 %feature("autodoc", "1");
 %include "../src/ecdf2.hh"
 %include "../src/ecdf1_mns2016.hh"
-%include "../src/ecdf1_new.hh"
+%include "../src/ecdf1_m2023.hh"
 
diff --git a/src/ecdf1_new.cc → src/ecdf1_m2023.cc b/src/ecdf1_new.cc → src/ecdf1_m2023.cc
@@ -3,7 +3,7 @@
 #include <stdexcept>
 #include <algorithm>
 
-#include "ecdf1_new.hh"
+#include "ecdf1_m2023.hh"
 #include "common.hh"
 #include "poisson_pmf.hh"
 #include "fftwconvolver.hh"
@@ -111,9 +111,9 @@ vector<double> poisson_B_noncrossing_probability_n2(int n, double intensity, con
     return buffers.get_dest();
 }
 
-double ecdf1_new_B(const vector<double>& B)
+double ecdf1_m2023_B(const vector<double>& B)
 {
-    //cout << "Called ecdf1_new_B()\n";
+    //cout << "Called ecdf1_m2023_B()\n";
     int n = B.size();
     check_boundary_vector("B", n, B);
 
@@ -128,9 +128,9 @@ double ecdf1_new_B(const vector<double>& B)
 }
 // For n=10000, best results k=400...600
 
-double ecdf1_new_b(const vector<double>& b)
+double ecdf1_m2023_b(const vector<double>& b)
 {
-    //cout << "Called ecdf1_new_b()\n";
+    //cout << "Called ecdf1_m2023_b()\n";
     int n = b.size();
     check_boundary_vector("b", n, b);
 
@@ -139,5 +139,5 @@ double ecdf1_new_b(const vector<double>& b)
         symmetric_steps[i] = 1.0 - b[b.size() - 1 - i];
     }
 
-    return ecdf1_new_B(symmetric_steps);
+    return ecdf1_m2023_B(symmetric_steps);
 }
diff --git a/src/ecdf1_m2023.hh b/src/ecdf1_m2023.hh
@@ -0,0 +1,10 @@
+
+#ifndef __ecdf1_m2023_hh__
+#define __ecdf1_m2023_hh__
+
+#include <vector>
+
+double ecdf1_m2023_B(const std::vector<double>& B);
+double ecdf1_m2023_b(const std::vector<double>& b);
+
+#endif
diff --git a/src/ecdf1_mns2016.cc b/src/ecdf1_mns2016.cc
@@ -4,6 +4,7 @@
 #include <sstream>
 #include <cstring>
 #include <cassert>
+#include <iterator>
 #include "mm_malloc.h"
 
 #include "ecdf1_mns2016.hh"