Used duecredit to point to the original publication.

arokem · arokem · commit 448dd8db4041 · 2016-05-03T16:38:22.000-07:00
diff --git a/sklforestci/due.py b/sklforestci/due.py
@@ -0,0 +1,72 @@
+# emacs: at the end of the file
+# ex: set sts=4 ts=4 sw=4 et:
+# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### #
+"""
+
+Stub file for a guaranteed safe import of duecredit constructs:  if duecredit
+is not available.
+
+To use it, place it into your project codebase to be imported, e.g. copy as
+
+    cp stub.py /path/tomodule/module/due.py
+
+Note that it might be better to avoid naming it duecredit.py to avoid shadowing
+installed duecredit.
+
+Then use in your code as
+
+    from .due import due, Doi, BibTeX
+
+See  https://github.com/duecredit/duecredit/blob/master/README.md for examples.
+
+Origin:     Originally a part of the duecredit
+Copyright:  2015-2016  DueCredit developers
+License:    BSD-2
+"""
+
+__version__ = '0.0.5'
+
+
+class InactiveDueCreditCollector(object):
+    """Just a stub at the Collector which would not do anything"""
+    def _donothing(self, *args, **kwargs):
+        """Perform no good and no bad"""
+        pass
+
+    def dcite(self, *args, **kwargs):
+        """If I could cite I would"""
+        def nondecorating_decorator(func):
+            return func
+        return nondecorating_decorator
+
+    cite = load = add = _donothing
+
+    def __repr__(self):
+        return self.__class__.__name__ + '()'
+
+
+def _donothing_func(*args, **kwargs):
+    """Perform no good and no bad"""
+    pass
+
+try:
+    from duecredit import due, BibTeX, Doi, Url
+    if 'due' in locals() and not hasattr(due, 'cite'):
+        raise RuntimeError(
+            "Imported due lacks .cite. DueCredit is now disabled")
+except Exception as e:
+    if type(e).__name__ != 'ImportError':
+        import logging
+        logging.getLogger("duecredit").error(
+            "Failed to import duecredit due to %s" % str(e))
+    # Initiate due stub
+    due = InactiveDueCreditCollector()
+    BibTeX = Doi = Url = _donothing_func
+
+# Emacs mode definitions
+# Local Variables:
+# mode: python
+# py-indent-offset: 4
+# tab-width: 4
+# indent-tabs-mode: nil
+# End:
diff --git a/sklforestci/sklforestci.py b/sklforestci/sklforestci.py
@@ -1,9 +1,49 @@
 import numpy as np
 from sklearn.ensemble.forest import _generate_sample_indices
+from .due import due, BibTeX
 
 __all__ = ["calc_inbag", "random_forest_error", "_bias_correction",
            "_core_computation"]
 
+due.cite(BibTeX("""
+@ARTICLE{Wager2014-wn,
+  title       = "Confidence Intervals for Random Forests: The Jackknife and the
+                 Infinitesimal Jackknife",
+  author      = "Wager, Stefan and Hastie, Trevor and Efron, Bradley",
+  affiliation = "Department of Statistics, Stanford University, Stanford, CA
+                 94305, USA. Department of Statistics, Stanford University,
+                 Stanford, CA 94305, USA. Department of Statistics, Stanford
+                 University, Stanford, CA 94305, USA.",
+  abstract    = "We study the variability of predictions made by bagged
+                 learners and random forests, and show how to estimate standard
+                 errors for these methods. Our work builds on variance
+                 estimates for bagging proposed by Efron (1992, 2013) that are
+                 based on the jackknife and the infinitesimal jackknife (IJ).
+                 In practice, bagged predictors are computed using a finite
+                 number B of bootstrap replicates, and working with a large B
+                 can be computationally expensive. Direct applications of
+                 jackknife and IJ estimators to bagging require B = $\Theta$(n
+                 (1.5)) bootstrap replicates to converge, where n is the size
+                 of the training set. We propose improved versions that only
+                 require B = $\Theta$(n) replicates. Moreover, we show that the
+                 IJ estimator requires 1.7 times less bootstrap replicates than
+                 the jackknife to achieve a given accuracy. Finally, we study
+                 the sampling distributions of the jackknife and IJ variance
+                 estimates themselves. We illustrate our findings with multiple
+                 experiments and simulation studies.",
+  journal     = "J. Mach. Learn. Res.",
+  volume      =  15,
+  number      =  1,
+  pages       = "1625--1651",
+  month       =  jan,
+  year        =  2014,
+  keywords    = "Monte Carlo noise; bagging; jackknife methods; variance
+                 estimation"}
+                 """),
+         description=("Confidence Intervals for Random Forests:",
+                      "The Jackknife and the Infinitesimal Jackknife"),
+         path='sklforestci')
+
 
 def calc_inbag(n_samples, forest):
     """