tdas
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/Model.scala
Lines changed: 2 additions & 0 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/Model.scala
Lines changed: 2 additions & 0 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
Lines changed: 1 addition & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
Lines changed: 26 additions & 21 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
Lines changed: 26 additions & 21 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/impl/tree/treeParams.scala
Lines changed: 5 additions & 5 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/impl/tree/treeParams.scala
Lines changed: 5 additions & 5 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
Lines changed: 1 addition & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
Lines changed: 72 additions & 61 deletions b/‎mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
Lines changed: 72 additions & 61 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
Lines changed: 1 addition & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎mllib/src/test/java/org/apache/spark/ml/classification/JavaDecisionTreeClassifierSuite.java
Lines changed: 0 additions & 2 deletions b/‎mllib/src/test/java/org/apache/spark/ml/classification/JavaDecisionTreeClassifierSuite.java
Lines changed: 0 additions & 2 deletions
@@ -30,11 +30,13 @@ import org.apache.spark.ml.param.ParamMap
 abstract class Model[M <: Model[M]] extends Transformer {
   /**
    * The parent estimator that produced this model.
+   * Note: For ensembles' component Models, this value can be null.
    */
   val parent: Estimator[M]
 
   /**
    * Fitting parameters, such that parent.fit(..., fittingParamMap) could reproduce the model.
+   * Note: For ensembles' component Models, this value can be null.
    */
   val fittingParamMap: ParamMap
 }
@@ -113,7 +113,7 @@ final class DecisionTreeClassificationModel private[ml] (
   require(rootNode != null,
     "DecisionTreeClassificationModel given null rootNode, but it requires a non-null rootNode.")
 
-  override protected def predict(features: Vector): Double = {
+  override private[ml] def predict(features: Vector): Double = {
     rootNode.predict(features)
   }
 
 
@@ -19,11 +19,10 @@ package org.apache.spark.ml.classification
 
 import scala.collection.mutable
 
-import org.apache.spark.SparkContext
 import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor}
 import org.apache.spark.ml.impl.tree._
-import org.apache.spark.ml.param.ParamMap
+import org.apache.spark.ml.param.{Params, ParamMap}
 import org.apache.spark.ml.tree.{DecisionTreeModel, TreeEnsembleModel}
 import org.apache.spark.ml.util.MetadataUtils
 import org.apache.spark.mllib.linalg.Vector
@@ -100,11 +99,10 @@ final class RandomForestClassifier
   }
 
   /** (private[ml]) Create a Strategy instance to use with the old API. */
-  override private[ml] def getOldStrategy(
+  private[ml] def getOldStrategy(
       categoricalFeatures: Map[Int, Int],
       numClasses: Int): OldStrategy = {
-    super.getOldStrategy(categoricalFeatures, numClasses, OldAlgo.Classification, getOldImpurity,
-      getSubsamplingRate)
+    super.getOldStrategy(categoricalFeatures, numClasses, OldAlgo.Classification, getOldImpurity)
   }
 }
 
@@ -123,10 +121,11 @@ object RandomForestClassifier {
  * It supports both binary and multiclass labels, as well as both continuous and categorical
  * features.
  * @param trees  Decision trees in the ensemble.
+ *               Warning: These have null parents.
  */
 @AlphaComponent
 final class RandomForestClassificationModel private[ml] (
-    override val parent: DecisionTreeClassifier,
+    override val parent: RandomForestClassifier,
     override val fittingParamMap: ParamMap,
     val trees: Array[DecisionTreeClassificationModel])
   extends PredictionModel[Vector, RandomForestClassificationModel]
@@ -140,6 +139,8 @@ final class RandomForestClassificationModel private[ml] (
   override lazy val getTreeWeights: Array[Double] = Array.fill[Double](numTrees)(1.0)
 
   override def predict(features: Vector): Double = {
+    // TODO: Override transform() to broadcast model.
+    // TODO: When we add a generic Bagging class, handle transform there. Skip single-Row predict.
     // Classifies using majority votes.
     // Ignore the weights since all are 1.0 for now.
     val votes = mutable.Map.empty[Int, Double]
@@ -150,33 +151,37 @@ final class RandomForestClassificationModel private[ml] (
     votes.maxBy(_._2)._1
   }
 
-  override def toString: String = {
-    s"RandomForestClassificationModel with $numTrees trees"
+  override protected def copy(): RandomForestClassificationModel = {
+    val m = new RandomForestClassificationModel(parent, fittingParamMap, trees)
+    Params.inheritValues(this.extractParamMap(), this, m)
+    m
   }
 
-  override def save(sc: SparkContext, path: String): Unit = {
-    this.toOld.save(sc, path)
+  override def toString: String = {
+    s"RandomForestClassificationModel with $numTrees trees"
   }
 
-  override protected def formatVersion: String = OldRandomForestModel.formatVersion
-
-  /** Convert to a model in the old API */
+  /** (private[ml]) Convert to a model in the old API */
   private[ml] def toOld: OldRandomForestModel = {
     new OldRandomForestModel(OldAlgo.Classification, trees.map(_.toOld))
   }
 }
 
-object RandomForestClassificationModel
-  extends Loader[RandomForestClassificationModel] {
-
-  override def load(sc: SparkContext, path: String): RandomForestClassificationModel = {
-    RandomForestClassificationModel.fromOld(OldRandomForestModel.load(sc, path))
-  }
+private[ml] object RandomForestClassificationModel {
 
-  private[ml] def fromOld(oldModel: OldRandomForestModel): RandomForestClassificationModel = {
+  /** (private[ml]) Convert a model from the old API */
+  def fromOld(
+      oldModel: OldRandomForestModel,
+      parent: RandomForestClassifier,
+      fittingParamMap: ParamMap,
+      categoricalFeatures: Map[Int, Int]): RandomForestClassificationModel = {
     require(oldModel.algo == OldAlgo.Classification,
       s"Cannot convert non-classification RandomForestModel (old API) to" +
         s" RandomForestClassificationModel (new API).  Algo is: ${oldModel.algo}")
-    new RandomForestClassificationModel(oldModel.trees.map(DecisionTreeClassificationModel.fromOld))
+    val trees = oldModel.trees.map { tree =>
+      // parent, fittingParamMap for each tree is null since there are no good ways to set these.
+      DecisionTreeClassificationModel.fromOld(tree, null, null, categoricalFeatures)
+    }
+    new RandomForestClassificationModel(parent, fittingParamMap, trees)
   }
 }
@@ -352,12 +352,12 @@ private[ml] trait TreeEnsembleParams extends DecisionTreeParams {
    * Create a Strategy instance to use with the old API.
    * NOTE: The caller should set impurity and seed.
    */
-  override private[ml] def getOldStrategy(
+  private[ml] def getOldStrategy(
       categoricalFeatures: Map[Int, Int],
-      numClasses: Int): OldStrategy = {
-    val strategy = super.getOldStrategy(categoricalFeatures, numClasses)
-    strategy.setSubsamplingRate(getSubsamplingRate)
-    strategy
+      numClasses: Int,
+      oldAlgo: OldAlgo.Algo,
+      oldImpurity: OldImpurity): OldStrategy = {
+    super.getOldStrategy(categoricalFeatures, numClasses, oldAlgo, oldImpurity, getSubsamplingRate)
   }
 }
 
 
@@ -104,7 +104,7 @@ final class DecisionTreeRegressionModel private[ml] (
   require(rootNode != null,
     "DecisionTreeClassificationModel given null rootNode, but it requires a non-null rootNode.")
 
-  override protected def predict(features: Vector): Double = {
+  override private[ml] def predict(features: Vector): Double = {
     rootNode.predict(features)
   }
 
 
@@ -15,90 +15,86 @@
  * limitations under the License.
  */
 
-package org.apache.spark.mllib.regression
-
-import org.apache.spark.SparkContext
-import org.apache.spark.mllib.impl.tree._
+package org.apache.spark.ml.regression
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor}
+import org.apache.spark.ml.impl.tree.{RandomForestParams, TreeRegressorParams}
+import org.apache.spark.ml.param.{Params, ParamMap}
+import org.apache.spark.ml.tree.{DecisionTreeModel, TreeEnsembleModel}
+import org.apache.spark.ml.util.MetadataUtils
 import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.{RandomForest => OldRandomForest}
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo, Strategy => OldStrategy}
 import org.apache.spark.mllib.tree.model.{RandomForestModel => OldRandomForestModel}
-import org.apache.spark.mllib.util.{Loader, Saveable}
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.DataFrame
 
 
 /**
+ * :: AlphaComponent ::
+ *
  * [[http://en.wikipedia.org/wiki/Random_forest  Random Forest]] learning algorithm for regression.
  * It supports both continuous and categorical features.
  */
-class RandomForestRegressor
-  extends TreeRegressor[RandomForestRegressionModel]
-  with RandomForestParams[RandomForestRegressor]
-  with TreeRegressorParams[RandomForestRegressor] {
+@AlphaComponent
+final class RandomForestRegressor
+  extends Predictor[Vector, RandomForestRegressor, RandomForestRegressionModel]
+  with RandomForestParams with TreeRegressorParams {
 
   // Override parameter setters from parent trait for Java API compatibility.
 
   // Parameters from TreeRegressorParams:
 
-  override def setMaxDepth(maxDepth: Int): RandomForestRegressor = super.setMaxDepth(maxDepth)
+  override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)
 
-  override def setMaxBins(maxBins: Int): RandomForestRegressor = super.setMaxBins(maxBins)
+  override def setMaxBins(value: Int): this.type = super.setMaxBins(value)
 
-  override def setMinInstancesPerNode(minInstancesPerNode: Int): RandomForestRegressor =
-    super.setMinInstancesPerNode(minInstancesPerNode)
+  override def setMinInstancesPerNode(value: Int): this.type =
+    super.setMinInstancesPerNode(value)
 
-  override def setMinInfoGain(minInfoGain: Double): RandomForestRegressor =
-    super.setMinInfoGain(minInfoGain)
+  override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)
 
-  override def setMaxMemoryInMB(maxMemoryInMB: Int): RandomForestRegressor =
-    super.setMaxMemoryInMB(maxMemoryInMB)
+  override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)
 
-  override def setCacheNodeIds(cacheNodeIds: Boolean): RandomForestRegressor =
-    super.setCacheNodeIds(cacheNodeIds)
+  override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)
 
-  override def setCheckpointInterval(checkpointInterval: Int): RandomForestRegressor =
-    super.setCheckpointInterval(checkpointInterval)
+  override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)
 
-  override def setImpurity(impurity: String): RandomForestRegressor =
-    super.setImpurity(impurity)
+  override def setImpurity(value: String): this.type = super.setImpurity(value)
 
   // Parameters from TreeEnsembleParams:
 
-  override def setSubsamplingRate(subsamplingRate: Double): RandomForestRegressor =
-    super.setSubsamplingRate(subsamplingRate)
+  override def setSubsamplingRate(value: Double): this.type = super.setSubsamplingRate(value)
 
-  override def setSeed(seed: Long): RandomForestRegressor = super.setSeed(seed)
+  override def setSeed(value: Long): this.type = super.setSeed(value)
 
   // Parameters from RandomForestParams:
 
-  override def setNumTrees(numTrees: Int): RandomForestRegressor = super.setNumTrees(numTrees)
+  override def setNumTrees(value: Int): this.type = super.setNumTrees(value)
 
-  override def setFeaturesPerNode(featuresPerNode: String): RandomForestRegressor =
-    super.setFeaturesPerNode(featuresPerNode)
+  override def setFeaturesPerNode(value: String): this.type = super.setFeaturesPerNode(value)
 
-  override def run(
-      input: RDD[LabeledPoint],
-      categoricalFeatures: Map[Int, Int]): RandomForestRegressionModel = {
+  override protected def train(
+      dataset: DataFrame,
+      paramMap: ParamMap): RandomForestRegressionModel = {
+    val categoricalFeatures: Map[Int, Int] =
+      MetadataUtils.getCategoricalFeatures(dataset.schema(paramMap(featuresCol)))
+    val oldDataset: RDD[LabeledPoint] = extractLabeledPoints(dataset, paramMap)
     val strategy = getOldStrategy(categoricalFeatures)
     val oldModel = OldRandomForest.trainRegressor(
-      input, strategy, getNumTrees, getFeaturesPerNodeStr, getSeed.toInt)
-    RandomForestRegressionModel.fromOld(oldModel)
+      oldDataset, strategy, getNumTrees, getFeaturesPerNodeStr, getSeed.toInt)
+    RandomForestRegressionModel.fromOld(oldModel, this, paramMap, categoricalFeatures)
   }
 
-  /**
-   * Create a Strategy instance to use with the old API.
-   * TODO: Make this protected once we deprecate the old API.
-   */
-  private[mllib] def getOldStrategy(categoricalFeatures: Map[Int, Int]): OldStrategy = {
-    val strategy = super.getOldStrategy(categoricalFeatures, numClasses = 0)
-    strategy.algo = OldAlgo.Regression
-    strategy.impurity = getOldImpurity
-    strategy
+  /** (private[ml]) Create a Strategy instance to use with the old API. */
+  private[ml] def getOldStrategy(categoricalFeatures: Map[Int, Int]): OldStrategy = {
+    super.getOldStrategy(categoricalFeatures, numClasses = 0, OldAlgo.Regression, getOldImpurity)
   }
 }
 
 object RandomForestRegressor {
-
   /** Accessor for supported impurity settings */
   final val supportedImpurities: Array[String] = TreeRegressorParams.supportedImpurities
 
@@ -107,51 +103,66 @@ object RandomForestRegressor {
 }
 
 /**
+ * :: AlphaComponent ::
+ *
  * [[http://en.wikipedia.org/wiki/Random_forest  Random Forest]] model for regression.
  * It supports both continuous and categorical features.
  * @param trees  Decision trees in the ensemble.
  */
-class RandomForestRegressionModel(val trees: Array[DecisionTreeRegressionModel])
-  extends TreeEnsembleModel with Serializable with Saveable {
+@AlphaComponent
+final class RandomForestRegressionModel private[ml] (
+    override val parent: RandomForestRegressor,
+    override val fittingParamMap: ParamMap,
+    val trees: Array[DecisionTreeRegressionModel])
+  extends PredictionModel[Vector, RandomForestRegressionModel]
+  with TreeEnsembleModel with Serializable {
 
   require(numTrees > 0, "RandomForestRegressionModel requires at least 1 tree.")
 
   override def getTrees: Array[DecisionTreeModel] = trees.asInstanceOf[Array[DecisionTreeModel]]
 
+  // Note: We may add support for weights (based on tree performance) later on.
   override lazy val getTreeWeights: Array[Double] = Array.fill[Double](numTrees)(1.0)
 
   override def predict(features: Vector): Double = {
+    // TODO: Override transform() to broadcast model.
+    // TODO: When we add a generic Bagging class, handle transform there. Skip single-Row predict.
     // Predict average of tree predictions.
     // Ignore the weights since all are 1.0 for now.
     trees.map(_.predict(features)).sum / numTrees
   }
 
-  override def toString: String = {
-    s"RandomForestRegressionModel with $numTrees trees"
+  override protected def copy(): RandomForestRegressionModel = {
+    val m = new RandomForestRegressionModel(parent, fittingParamMap, trees)
+    Params.inheritValues(this.extractParamMap(), this, m)
+    m
   }
 
-  override def save(sc: SparkContext, path: String): Unit = {
-    this.toOld.save(sc, path)
+  override def toString: String = {
+    s"RandomForestRegressionModel with $numTrees trees"
   }
 
-  override protected def formatVersion: String = OldRandomForestModel.formatVersion
-
-  /** Convert to a model in the old API */
-  private[mllib] def toOld: OldRandomForestModel = {
+  /** (private[ml]) Convert to a model in the old API */
+  private[ml] def toOld: OldRandomForestModel = {
     new OldRandomForestModel(OldAlgo.Regression, trees.map(_.toOld))
   }
 }
 
-object RandomForestRegressionModel extends Loader[RandomForestRegressionModel] {
+private[ml] object RandomForestRegressionModel {
 
-  override def load(sc: SparkContext, path: String): RandomForestRegressionModel = {
-    RandomForestRegressionModel.fromOld(OldRandomForestModel.load(sc, path))
-  }
-
-  private[mllib] def fromOld(oldModel: OldRandomForestModel): RandomForestRegressionModel = {
+  /** (private[ml]) Convert a model from the old API */
+  def fromOld(
+      oldModel: OldRandomForestModel,
+      parent: RandomForestRegressor,
+      fittingParamMap: ParamMap,
+      categoricalFeatures: Map[Int, Int]): RandomForestRegressionModel = {
     require(oldModel.algo == OldAlgo.Regression,
       s"Cannot convert non-regression RandomForestModel (old API) to" +
         s" RandomForestRegressionModel (new API).  Algo is: ${oldModel.algo}")
-    new RandomForestRegressionModel(oldModel.trees.map(DecisionTreeRegressionModel.fromOld))
+    val trees = oldModel.trees.map { tree =>
+      // parent, fittingParamMap for each tree is null since there are no good ways to set these.
+      DecisionTreeRegressionModel.fromOld(tree, null, null, categoricalFeatures)
+    }
+    new RandomForestRegressionModel(parent, fittingParamMap, trees)
   }
 }
@@ -72,7 +72,7 @@ trait TreeEnsembleModel {
   // Note: We use getTrees since subclasses of TreeEnsembleModel will store subclasses of
   //       DecisionTreeModel.
 
-  /** Trees in this ensemble */
+  /** Trees in this ensemble. Warning: These have null parent Estimators. */
   def getTrees: Array[DecisionTreeModel]
 
   /** Weights for each tree, zippable with [[getTrees]] */
 
@@ -17,7 +17,6 @@
 
 package org.apache.spark.ml.classification;
 
-import java.io.File;
 import java.io.Serializable;
 import java.util.HashMap;
 import java.util.Map;
@@ -32,7 +31,6 @@
 import org.apache.spark.mllib.classification.LogisticRegressionSuite;
 import org.apache.spark.mllib.regression.LabeledPoint;
 import org.apache.spark.sql.DataFrame;
-import org.apache.spark.util.Utils;
 
 
 public class JavaDecisionTreeClassifierSuite implements Serializable {
Original file line number	Diff line number	Diff line change
`@@ -113,7 +113,7 @@ final class DecisionTreeClassificationModel private[ml] (`
`113`	`113`	`require(rootNode != null,`
`114`	`114`	`"DecisionTreeClassificationModel given null rootNode, but it requires a non-null rootNode.")`
`115`	`115`
`116`		`- override protected def predict(features: Vector): Double = {`
	`116`	`+ override private[ml] def predict(features: Vector): Double = {`
`117`	`117`	`rootNode.predict(features)`
`118`	`118`	`}`
`119`	`119`
Original file line number	Diff line number	Diff line change
`@@ -104,7 +104,7 @@ final class DecisionTreeRegressionModel private[ml] (`
`104`	`104`	`require(rootNode != null,`
`105`	`105`	`"DecisionTreeClassificationModel given null rootNode, but it requires a non-null rootNode.")`
`106`	`106`
`107`		`- override protected def predict(features: Vector): Double = {`
	`107`	`+ override private[ml] def predict(features: Vector): Double = {`
`108`	`108`	`rootNode.predict(features)`
`109`	`109`	`}`
`110`	`110`