@@ -299,7 +299,7 @@ def trainClassifier(cls, data, numClasses, categoricalFeaturesInfo, numTrees,
299299 1 internal node + 2 leaf nodes. (default: 4)
300300 :param maxBins: maximum number of bins used for splitting
301301 features
302- (default: 100 )
302+ (default: 32 )
303303 :param seed: Random seed for bootstrapping and choosing feature
304304 subsets.
305305 :return: RandomForestModel that can be used for prediction
@@ -377,7 +377,7 @@ def trainRegressor(cls, data, categoricalFeaturesInfo, numTrees, featureSubsetSt
377377 1 leaf node; depth 1 means 1 internal node + 2 leaf
378378 nodes. (default: 4)
379379 :param maxBins: maximum number of bins used for splitting
380- features (default: 100 )
380+ features (default: 32 )
381381 :param seed: Random seed for bootstrapping and choosing feature
382382 subsets.
383383 :return: RandomForestModel that can be used for prediction
@@ -435,16 +435,17 @@ class GradientBoostedTrees(object):
435435
436436 @classmethod
437437 def _train (cls , data , algo , categoricalFeaturesInfo ,
438- loss , numIterations , learningRate , maxDepth ):
438+ loss , numIterations , learningRate , maxDepth , maxBins ):
439439 first = data .first ()
440440 assert isinstance (first , LabeledPoint ), "the data should be RDD of LabeledPoint"
441441 model = callMLlibFunc ("trainGradientBoostedTreesModel" , data , algo , categoricalFeaturesInfo ,
442- loss , numIterations , learningRate , maxDepth )
442+ loss , numIterations , learningRate , maxDepth , maxBins )
443443 return GradientBoostedTreesModel (model )
444444
445445 @classmethod
446446 def trainClassifier (cls , data , categoricalFeaturesInfo ,
447- loss = "logLoss" , numIterations = 100 , learningRate = 0.1 , maxDepth = 3 ):
447+ loss = "logLoss" , numIterations = 100 , learningRate = 0.1 , maxDepth = 3 ,
448+ maxBins = 32 ):
448449 """
449450 Method to train a gradient-boosted trees model for
450451 classification.
@@ -467,6 +468,8 @@ def trainClassifier(cls, data, categoricalFeaturesInfo,
467468 :param maxDepth: Maximum depth of the tree. E.g., depth 0 means
468469 1 leaf node; depth 1 means 1 internal node + 2 leaf
469470 nodes. (default: 3)
471+ :param maxBins: maximum number of bins used for splitting
472+ features (default: 32) DecisionTree requires maxBins >= max categories
470473 :return: GradientBoostedTreesModel that can be used for
471474 prediction
472475
@@ -499,11 +502,12 @@ def trainClassifier(cls, data, categoricalFeaturesInfo,
499502 [1.0, 0.0]
500503 """
501504 return cls ._train (data , "classification" , categoricalFeaturesInfo ,
502- loss , numIterations , learningRate , maxDepth )
505+ loss , numIterations , learningRate , maxDepth , maxBins )
503506
504507 @classmethod
505508 def trainRegressor (cls , data , categoricalFeaturesInfo ,
506- loss = "leastSquaresError" , numIterations = 100 , learningRate = 0.1 , maxDepth = 3 ):
509+ loss = "leastSquaresError" , numIterations = 100 , learningRate = 0.1 , maxDepth = 3 ,
510+ maxBins = 32 ):
507511 """
508512 Method to train a gradient-boosted trees model for regression.
509513
@@ -522,6 +526,8 @@ def trainRegressor(cls, data, categoricalFeaturesInfo,
522526 contribution of each estimator. The learning rate
523527 should be between in the interval (0, 1].
524528 (default: 0.1)
529+ :param maxBins: maximum number of bins used for splitting
530+ features (default: 32) DecisionTree requires maxBins >= max categories
525531 :param maxDepth: Maximum depth of the tree. E.g., depth 0 means
526532 1 leaf node; depth 1 means 1 internal node + 2 leaf
527533 nodes. (default: 3)
@@ -556,7 +562,7 @@ def trainRegressor(cls, data, categoricalFeaturesInfo,
556562 [1.0, 0.0]
557563 """
558564 return cls ._train (data , "regression" , categoricalFeaturesInfo ,
559- loss , numIterations , learningRate , maxDepth )
565+ loss , numIterations , learningRate , maxDepth , maxBins )
560566
561567
562568def _test ():
0 commit comments