|
1 | 1 | import numpy as np
|
2 | 2 | from sklearn.ensemble.forest import _generate_sample_indices
|
| 3 | +from .due import due, BibTeX |
3 | 4 |
|
4 | 5 | __all__ = ["calc_inbag", "random_forest_error", "_bias_correction",
|
5 | 6 | "_core_computation"]
|
6 | 7 |
|
| 8 | +due.cite(BibTeX(""" |
| 9 | +@ARTICLE{Wager2014-wn, |
| 10 | + title = "Confidence Intervals for Random Forests: The Jackknife and the |
| 11 | + Infinitesimal Jackknife", |
| 12 | + author = "Wager, Stefan and Hastie, Trevor and Efron, Bradley", |
| 13 | + affiliation = "Department of Statistics, Stanford University, Stanford, CA |
| 14 | + 94305, USA. Department of Statistics, Stanford University, |
| 15 | + Stanford, CA 94305, USA. Department of Statistics, Stanford |
| 16 | + University, Stanford, CA 94305, USA.", |
| 17 | + abstract = "We study the variability of predictions made by bagged |
| 18 | + learners and random forests, and show how to estimate standard |
| 19 | + errors for these methods. Our work builds on variance |
| 20 | + estimates for bagging proposed by Efron (1992, 2013) that are |
| 21 | + based on the jackknife and the infinitesimal jackknife (IJ). |
| 22 | + In practice, bagged predictors are computed using a finite |
| 23 | + number B of bootstrap replicates, and working with a large B |
| 24 | + can be computationally expensive. Direct applications of |
| 25 | + jackknife and IJ estimators to bagging require B = $\Theta$(n |
| 26 | + (1.5)) bootstrap replicates to converge, where n is the size |
| 27 | + of the training set. We propose improved versions that only |
| 28 | + require B = $\Theta$(n) replicates. Moreover, we show that the |
| 29 | + IJ estimator requires 1.7 times less bootstrap replicates than |
| 30 | + the jackknife to achieve a given accuracy. Finally, we study |
| 31 | + the sampling distributions of the jackknife and IJ variance |
| 32 | + estimates themselves. We illustrate our findings with multiple |
| 33 | + experiments and simulation studies.", |
| 34 | + journal = "J. Mach. Learn. Res.", |
| 35 | + volume = 15, |
| 36 | + number = 1, |
| 37 | + pages = "1625--1651", |
| 38 | + month = jan, |
| 39 | + year = 2014, |
| 40 | + keywords = "Monte Carlo noise; bagging; jackknife methods; variance |
| 41 | + estimation"} |
| 42 | + """), |
| 43 | + description=("Confidence Intervals for Random Forests:", |
| 44 | + "The Jackknife and the Infinitesimal Jackknife"), |
| 45 | + path='sklforestci') |
| 46 | + |
7 | 47 |
|
8 | 48 | def calc_inbag(n_samples, forest):
|
9 | 49 | """
|
|
0 commit comments