@@ -80,7 +80,7 @@ The ordered splits create "bins" and the maximum number of such
80
80
bins can be specified using the ` maxBins ` parameter.
81
81
82
82
Note that the number of bins cannot be greater than the number of instances ` $N$ ` (a rare scenario
83
- since the default ` maxBins ` value is 100 ). The tree algorithm automatically reduces the number of
83
+ since the default ` maxBins ` value is 32 ). The tree algorithm automatically reduces the number of
84
84
bins if the condition is not satisfied.
85
85
86
86
** Categorical features**
@@ -117,7 +117,7 @@ all nodes at each level of the tree. This could lead to high memory requirements
117
117
of the tree, potentially leading to memory overflow errors. To alleviate this problem, a ` maxMemoryInMB `
118
118
training parameter specifies the maximum amount of memory at the workers (twice as much at the
119
119
master) to be allocated to the histogram computation. The default value is conservatively chosen to
120
- be 128 MB to allow the decision algorithm to work in most scenarios. Once the memory requirements
120
+ be 256 MB to allow the decision algorithm to work in most scenarios. Once the memory requirements
121
121
for a level-wise computation cross the ` maxMemoryInMB ` threshold, the node training tasks at each
122
122
subsequent level are split into smaller tasks.
123
123
@@ -167,7 +167,7 @@ val numClasses = 2
167
167
val categoricalFeaturesInfo = Map[ Int, Int] ( )
168
168
val impurity = "gini"
169
169
val maxDepth = 5
170
- val maxBins = 100
170
+ val maxBins = 32
171
171
172
172
val model = DecisionTree.trainClassifier(data, numClasses, categoricalFeaturesInfo, impurity,
173
173
maxDepth, maxBins)
@@ -213,7 +213,7 @@ Integer numClasses = 2;
213
213
HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<Integer, Integer>();
214
214
String impurity = "gini";
215
215
Integer maxDepth = 5;
216
- Integer maxBins = 100 ;
216
+ Integer maxBins = 32 ;
217
217
218
218
// Train a DecisionTree model for classification.
219
219
final DecisionTreeModel model = DecisionTree.trainClassifier(data, numClasses,
@@ -250,7 +250,7 @@ data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt').cache()
250
250
# Train a DecisionTree model.
251
251
# Empty categoricalFeaturesInfo indicates all features are continuous.
252
252
model = DecisionTree.trainClassifier(data, numClasses=2, categoricalFeaturesInfo={},
253
- impurity='gini', maxDepth=5, maxBins=100 )
253
+ impurity='gini', maxDepth=5, maxBins=32 )
254
254
255
255
# Evaluate model on training instances and compute training error
256
256
predictions = model.predict(data.map(lambda x: x.features))
@@ -293,7 +293,7 @@ val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").cache
293
293
val categoricalFeaturesInfo = Map[ Int, Int] ( )
294
294
val impurity = "variance"
295
295
val maxDepth = 5
296
- val maxBins = 100
296
+ val maxBins = 32
297
297
298
298
val model = DecisionTree.trainRegressor(data, categoricalFeaturesInfo, impurity,
299
299
maxDepth, maxBins)
@@ -338,7 +338,7 @@ JavaSparkContext sc = new JavaSparkContext(sparkConf);
338
338
HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<Integer, Integer>();
339
339
String impurity = "variance";
340
340
Integer maxDepth = 5;
341
- Integer maxBins = 100 ;
341
+ Integer maxBins = 32 ;
342
342
343
343
// Train a DecisionTree model.
344
344
final DecisionTreeModel model = DecisionTree.trainRegressor(data,
@@ -380,7 +380,7 @@ data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt').cache()
380
380
# Train a DecisionTree model.
381
381
# Empty categoricalFeaturesInfo indicates all features are continuous.
382
382
model = DecisionTree.trainRegressor(data, categoricalFeaturesInfo={},
383
- impurity='variance', maxDepth=5, maxBins=100 )
383
+ impurity='variance', maxDepth=5, maxBins=32 )
384
384
385
385
# Evaluate model on training instances and compute training error
386
386
predictions = model.predict(data.map(lambda x: x.features))
0 commit comments