Skip to content

[SPARK-30630][ML] Remove numTrees in GBT in 3.0.0 #27330

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -342,9 +342,6 @@ class GBTClassificationModel private[ml](
}
}

/** Number of trees in ensemble */
val numTrees: Int = trees.length

@Since("1.4.0")
override def copy(extra: ParamMap): GBTClassificationModel = {
copyValues(new GBTClassificationModel(uid, _trees, _treeWeights, numFeatures, numClasses),
Expand All @@ -353,7 +350,7 @@ class GBTClassificationModel private[ml](

@Since("1.4.0")
override def toString: String = {
s"GBTClassificationModel: uid = $uid, numTrees=$numTrees, numClasses=$numClasses, " +
s"GBTClassificationModel: uid = $uid, numTrees=$getNumTrees, numClasses=$numClasses, " +
s"numFeatures=$numFeatures"
}

Expand All @@ -374,7 +371,7 @@ class GBTClassificationModel private[ml](
/** Raw prediction for the positive class. */
private def margin(features: Vector): Double = {
val treePredictions = _trees.map(_.rootNode.predictImpl(features).prediction)
blas.ddot(numTrees, treePredictions, 1, _treeWeights, 1)
blas.ddot(getNumTrees, treePredictions, 1, _treeWeights, 1)
}

/** (private[ml]) Convert to a model in the old API */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -300,12 +300,9 @@ class GBTRegressionModel private[ml](
// TODO: When we add a generic Boosting class, handle transform there? SPARK-7129
// Classifies by thresholding sum of weighted tree predictions
val treePredictions = _trees.map(_.rootNode.predictImpl(features).prediction)
blas.ddot(numTrees, treePredictions, 1, _treeWeights, 1)
blas.ddot(getNumTrees, treePredictions, 1, _treeWeights, 1)
}

/** Number of trees in ensemble */
val numTrees: Int = trees.length

@Since("1.4.0")
override def copy(extra: ParamMap): GBTRegressionModel = {
copyValues(new GBTRegressionModel(uid, _trees, _treeWeights, numFeatures),
Expand All @@ -314,7 +311,7 @@ class GBTRegressionModel private[ml](

@Since("1.4.0")
override def toString: String = {
s"GBTRegressionModel: uid=$uid, numTrees=$numTrees, numFeatures=$numFeatures"
s"GBTRegressionModel: uid=$uid, numTrees=$getNumTrees, numFeatures=$numFeatures"
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,8 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest {
assert(raw.size === 2)
// check that raw prediction is tree predictions dot tree weights
val treePredictions = gbtModel.trees.map(_.rootNode.predictImpl(features).prediction)
val prediction = blas.ddot(gbtModel.numTrees, treePredictions, 1, gbtModel.treeWeights, 1)
val prediction = blas.ddot(gbtModel.getNumTrees, treePredictions, 1,
gbtModel.treeWeights, 1)
assert(raw ~== Vectors.dense(-prediction, prediction) relTol eps)

// Compare rawPrediction with probability
Expand Down Expand Up @@ -436,9 +437,9 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest {
gbt.setValidationIndicatorCol(validationIndicatorCol)
val modelWithValidation = gbt.fit(trainDF.union(validationDF))

assert(modelWithoutValidation.numTrees === numIter)
assert(modelWithoutValidation.getNumTrees === numIter)
// early stop
assert(modelWithValidation.numTrees < numIter)
assert(modelWithValidation.getNumTrees < numIter)

val (errorWithoutValidation, errorWithValidation) = {
val remappedRdd = validationData.map {
Expand All @@ -457,10 +458,10 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest {
modelWithoutValidation.treeWeights, modelWithoutValidation.getOldLossType,
OldAlgo.Classification)
assert(evaluationArray.length === numIter)
assert(evaluationArray(modelWithValidation.numTrees) >
evaluationArray(modelWithValidation.numTrees - 1))
assert(evaluationArray(modelWithValidation.getNumTrees) >
evaluationArray(modelWithValidation.getNumTrees - 1))
var i = 1
while (i < modelWithValidation.numTrees) {
while (i < modelWithValidation.getNumTrees) {
assert(evaluationArray(i) <= evaluationArray(i - 1))
i += 1
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,9 +274,9 @@ class GBTRegressorSuite extends MLTest with DefaultReadWriteTest {
gbt.setValidationIndicatorCol(validationIndicatorCol)
val modelWithValidation = gbt.fit(trainDF.union(validationDF))

assert(modelWithoutValidation.numTrees === numIter)
assert(modelWithoutValidation.getNumTrees === numIter)
// early stop
assert(modelWithValidation.numTrees < numIter)
assert(modelWithValidation.getNumTrees < numIter)

val errorWithoutValidation = GradientBoostedTrees.computeWeightedError(
validationData.map(_.toInstance),
Expand All @@ -294,10 +294,10 @@ class GBTRegressorSuite extends MLTest with DefaultReadWriteTest {
modelWithoutValidation.treeWeights, modelWithoutValidation.getOldLossType,
OldAlgo.Regression)
assert(evaluationArray.length === numIter)
assert(evaluationArray(modelWithValidation.numTrees) >
evaluationArray(modelWithValidation.numTrees - 1))
assert(evaluationArray(modelWithValidation.getNumTrees) >
evaluationArray(modelWithValidation.getNumTrees - 1))
var i = 1
while (i < modelWithValidation.numTrees) {
while (i < modelWithValidation.getNumTrees) {
assert(evaluationArray(i) <= evaluationArray(i - 1))
i += 1
}
Expand Down
4 changes: 4 additions & 0 deletions project/MimaExcludes.scala
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,10 @@ object MimaExcludes {
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel.layers"),
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel.this"),

// [SPARK-30630][ML] Remove numTrees in GBT
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.numTrees"),
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.numTrees"),

// Data Source V2 API changes
(problem: Problem) => problem match {
case MissingClassProblem(cls) =>
Expand Down