Skip to content

Commit 448dd8d

Browse files
committed
Used duecredit to point to the original publication.
1 parent 0738c42 commit 448dd8d

File tree

2 files changed

+112
-0
lines changed

2 files changed

+112
-0
lines changed

sklforestci/due.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# emacs: at the end of the file
2+
# ex: set sts=4 ts=4 sw=4 et:
3+
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### #
4+
"""
5+
6+
Stub file for a guaranteed safe import of duecredit constructs: if duecredit
7+
is not available.
8+
9+
To use it, place it into your project codebase to be imported, e.g. copy as
10+
11+
cp stub.py /path/tomodule/module/due.py
12+
13+
Note that it might be better to avoid naming it duecredit.py to avoid shadowing
14+
installed duecredit.
15+
16+
Then use in your code as
17+
18+
from .due import due, Doi, BibTeX
19+
20+
See https://github.com/duecredit/duecredit/blob/master/README.md for examples.
21+
22+
Origin: Originally a part of the duecredit
23+
Copyright: 2015-2016 DueCredit developers
24+
License: BSD-2
25+
"""
26+
27+
__version__ = '0.0.5'
28+
29+
30+
class InactiveDueCreditCollector(object):
31+
"""Just a stub at the Collector which would not do anything"""
32+
def _donothing(self, *args, **kwargs):
33+
"""Perform no good and no bad"""
34+
pass
35+
36+
def dcite(self, *args, **kwargs):
37+
"""If I could cite I would"""
38+
def nondecorating_decorator(func):
39+
return func
40+
return nondecorating_decorator
41+
42+
cite = load = add = _donothing
43+
44+
def __repr__(self):
45+
return self.__class__.__name__ + '()'
46+
47+
48+
def _donothing_func(*args, **kwargs):
49+
"""Perform no good and no bad"""
50+
pass
51+
52+
try:
53+
from duecredit import due, BibTeX, Doi, Url
54+
if 'due' in locals() and not hasattr(due, 'cite'):
55+
raise RuntimeError(
56+
"Imported due lacks .cite. DueCredit is now disabled")
57+
except Exception as e:
58+
if type(e).__name__ != 'ImportError':
59+
import logging
60+
logging.getLogger("duecredit").error(
61+
"Failed to import duecredit due to %s" % str(e))
62+
# Initiate due stub
63+
due = InactiveDueCreditCollector()
64+
BibTeX = Doi = Url = _donothing_func
65+
66+
# Emacs mode definitions
67+
# Local Variables:
68+
# mode: python
69+
# py-indent-offset: 4
70+
# tab-width: 4
71+
# indent-tabs-mode: nil
72+
# End:

sklforestci/sklforestci.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,49 @@
11
import numpy as np
22
from sklearn.ensemble.forest import _generate_sample_indices
3+
from .due import due, BibTeX
34

45
__all__ = ["calc_inbag", "random_forest_error", "_bias_correction",
56
"_core_computation"]
67

8+
due.cite(BibTeX("""
9+
@ARTICLE{Wager2014-wn,
10+
title = "Confidence Intervals for Random Forests: The Jackknife and the
11+
Infinitesimal Jackknife",
12+
author = "Wager, Stefan and Hastie, Trevor and Efron, Bradley",
13+
affiliation = "Department of Statistics, Stanford University, Stanford, CA
14+
94305, USA. Department of Statistics, Stanford University,
15+
Stanford, CA 94305, USA. Department of Statistics, Stanford
16+
University, Stanford, CA 94305, USA.",
17+
abstract = "We study the variability of predictions made by bagged
18+
learners and random forests, and show how to estimate standard
19+
errors for these methods. Our work builds on variance
20+
estimates for bagging proposed by Efron (1992, 2013) that are
21+
based on the jackknife and the infinitesimal jackknife (IJ).
22+
In practice, bagged predictors are computed using a finite
23+
number B of bootstrap replicates, and working with a large B
24+
can be computationally expensive. Direct applications of
25+
jackknife and IJ estimators to bagging require B = $\Theta$(n
26+
(1.5)) bootstrap replicates to converge, where n is the size
27+
of the training set. We propose improved versions that only
28+
require B = $\Theta$(n) replicates. Moreover, we show that the
29+
IJ estimator requires 1.7 times less bootstrap replicates than
30+
the jackknife to achieve a given accuracy. Finally, we study
31+
the sampling distributions of the jackknife and IJ variance
32+
estimates themselves. We illustrate our findings with multiple
33+
experiments and simulation studies.",
34+
journal = "J. Mach. Learn. Res.",
35+
volume = 15,
36+
number = 1,
37+
pages = "1625--1651",
38+
month = jan,
39+
year = 2014,
40+
keywords = "Monte Carlo noise; bagging; jackknife methods; variance
41+
estimation"}
42+
"""),
43+
description=("Confidence Intervals for Random Forests:",
44+
"The Jackknife and the Infinitesimal Jackknife"),
45+
path='sklforestci')
46+
747

848
def calc_inbag(n_samples, forest):
949
"""

0 commit comments

Comments
 (0)