apache
diff --git a/‎core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
Lines changed: 22 additions & 6 deletions b/‎core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
Lines changed: 22 additions & 6 deletions
diff --git a/‎docs/mllib-ensembles.md
Lines changed: 1 addition & 1 deletion b/‎docs/mllib-ensembles.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/src/main/scala/org/apache/spark/examples/ml/CrossValidatorExample.scala
Lines changed: 1 addition & 1 deletion b/‎examples/src/main/scala/org/apache/spark/examples/ml/CrossValidatorExample.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
Lines changed: 2 additions & 2 deletions b/‎examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/src/main/scala/org/apache/spark/examples/ml/MovieLensALS.scala
Lines changed: 3 additions & 3 deletions b/‎examples/src/main/scala/org/apache/spark/examples/ml/MovieLensALS.scala
Lines changed: 3 additions & 3 deletions
diff --git a/‎examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala
Lines changed: 3 additions & 3 deletions b/‎examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala
Lines changed: 3 additions & 3 deletions
diff --git a/‎examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala
Lines changed: 2 additions & 2 deletions b/‎examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala
Lines changed: 1 addition & 1 deletion b/‎examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
Lines changed: 1 addition & 1 deletion b/‎examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
Lines changed: 1 addition & 1 deletion b/‎examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎external/kafka/src/test/scala/org/apache/spark/streaming/kafka/DirectKafkaStreamSuite.scala
Lines changed: 16 additions & 12 deletions b/‎external/kafka/src/test/scala/org/apache/spark/streaming/kafka/DirectKafkaStreamSuite.scala
Lines changed: 16 additions & 12 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
Lines changed: 1 addition & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala
Lines changed: 1 addition & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
Lines changed: 1 addition & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
Lines changed: 1 addition & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
Lines changed: 1 addition & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
Lines changed: 2 additions & 2 deletions b/‎mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
Lines changed: 2 additions & 2 deletions
diff --git a/‎python/pyspark/sql/dataframe.py
Lines changed: 7 additions & 1 deletion b/‎python/pyspark/sql/dataframe.py
Lines changed: 7 additions & 1 deletion
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
Lines changed: 10 additions & 0 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
Lines changed: 10 additions & 0 deletions
@@ -98,7 +98,13 @@ class DAGScheduler(
 
   private[scheduler] val activeJobs = new HashSet[ActiveJob]
 
-  // Contains the locations that each RDD's partitions are cached on
+  /**
+   * Contains the locations that each RDD's partitions are cached on.  This map's keys are RDD ids
+   * and its values are arrays indexed by partition numbers. Each array value is the set of
+   * locations where that RDD partition is cached.
+   *
+   * All accesses to this map should be guarded by synchronizing on it (see SPARK-4454).
+   */
   private val cacheLocs = new HashMap[Int, Array[Seq[TaskLocation]]]
 
   // For tracking failed nodes, we use the MapOutputTracker's epoch number, which is sent with
@@ -183,7 +189,8 @@ class DAGScheduler(
     eventProcessLoop.post(TaskSetFailed(taskSet, reason))
   }
 
-  private def getCacheLocs(rdd: RDD[_]): Array[Seq[TaskLocation]] = {
+  private def getCacheLocs(rdd: RDD[_]): Array[Seq[TaskLocation]] = cacheLocs.synchronized {
+    // Note: this doesn't use `getOrElse()` because this method is called O(num tasks) times
     if (!cacheLocs.contains(rdd.id)) {
       val blockIds = rdd.partitions.indices.map(index => RDDBlockId(rdd.id, index)).toArray[BlockId]
       val locs = BlockManager.blockIdsToBlockManagers(blockIds, env, blockManagerMaster)
@@ -194,7 +201,7 @@ class DAGScheduler(
     cacheLocs(rdd.id)
   }
 
-  private def clearCacheLocs() {
+  private def clearCacheLocs(): Unit = cacheLocs.synchronized {
     cacheLocs.clear()
   }
 
@@ -1276,17 +1283,26 @@ class DAGScheduler(
   }
 
   /**
-   * Synchronized method that might be called from other threads.
+   * Gets the locality information associated with a partition of a particular RDD.
+   *
+   * This method is thread-safe and is called from both DAGScheduler and SparkContext.
+   *
    * @param rdd whose partitions are to be looked at
    * @param partition to lookup locality information for
    * @return list of machines that are preferred by the partition
    */
   private[spark]
-  def getPreferredLocs(rdd: RDD[_], partition: Int): Seq[TaskLocation] = synchronized {
+  def getPreferredLocs(rdd: RDD[_], partition: Int): Seq[TaskLocation] = {
     getPreferredLocsInternal(rdd, partition, new HashSet)
   }
 
-  /** Recursive implementation for getPreferredLocs. */
+  /**
+   * Recursive implementation for getPreferredLocs.
+   *
+   * This method is thread-safe because it only accesses DAGScheduler state through thread-safe
+   * methods (getCacheLocs()); please be careful when modifying this method, because any new
+   * DAGScheduler state accessed by it may require additional synchronization.
+   */
   private def getPreferredLocsInternal(
       rdd: RDD[_],
       partition: Int,
 
@@ -458,7 +458,7 @@ val (trainingData, testData) = (splits(0), splits(1))
 //  The defaultParams for Classification use LogLoss by default.
 val boostingStrategy = BoostingStrategy.defaultParams("Classification")
 boostingStrategy.numIterations = 3 // Note: Use more iterations in practice.
-boostingStrategy.treeStrategy.numClassesForClassification = 2
+boostingStrategy.treeStrategy.numClasses = 2
 boostingStrategy.treeStrategy.maxDepth = 5
 //  Empty categoricalFeaturesInfo indicates all features are continuous.
 boostingStrategy.treeStrategy.categoricalFeaturesInfo = Map[Int, Int]()
 
@@ -90,7 +90,7 @@ object CrossValidatorExample {
     crossval.setNumFolds(2) // Use 3+ in practice
 
     // Run cross-validation, and choose the best set of parameters.
-    val cvModel = crossval.fit(training.toDF)
+    val cvModel = crossval.fit(training.toDF())
 
     // Prepare test documents, which are unlabeled.
     val test = sc.parallelize(Seq(
 
@@ -58,7 +58,7 @@ object DeveloperApiExample {
     lr.setMaxIter(10)
 
     // Learn a LogisticRegression model.  This uses the parameters stored in lr.
-    val model = lr.fit(training.toDF)
+    val model = lr.fit(training.toDF())
 
     // Prepare test data.
     val test = sc.parallelize(Seq(
@@ -67,7 +67,7 @@ object DeveloperApiExample {
       LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5))))
 
     // Make predictions on test data.
-    val sumPredictions: Double = model.transform(test.toDF)
+    val sumPredictions: Double = model.transform(test.toDF())
       .select("features", "label", "prediction")
       .collect()
       .map { case Row(features: Vector, label: Double, prediction: Double) =>
 
@@ -137,9 +137,9 @@ object MovieLensALS {
       .setRegParam(params.regParam)
       .setNumBlocks(params.numBlocks)
 
-    val model = als.fit(training.toDF)
+    val model = als.fit(training.toDF())
 
-    val predictions = model.transform(test.toDF).cache()
+    val predictions = model.transform(test.toDF()).cache()
 
     // Evaluate the model.
     // TODO: Create an evaluator to compute RMSE.
@@ -158,7 +158,7 @@ object MovieLensALS {
 
     // Inspect false positives.
     predictions.registerTempTable("prediction")
-    sc.textFile(params.movies).map(Movie.parseMovie).toDF.registerTempTable("movie")
+    sc.textFile(params.movies).map(Movie.parseMovie).toDF().registerTempTable("movie")
     sqlContext.sql(
       """
         |SELECT userId, prediction.movieId, title, rating, prediction
 
@@ -58,7 +58,7 @@ object SimpleParamsExample {
       .setRegParam(0.01)
 
     // Learn a LogisticRegression model.  This uses the parameters stored in lr.
-    val model1 = lr.fit(training.toDF)
+    val model1 = lr.fit(training.toDF())
     // Since model1 is a Model (i.e., a Transformer produced by an Estimator),
     // we can view the parameters it used during fit().
     // This prints the parameter (name: value) pairs, where names are unique IDs for this
@@ -77,7 +77,7 @@ object SimpleParamsExample {
 
     // Now learn a new model using the paramMapCombined parameters.
     // paramMapCombined overrides all parameters set earlier via lr.set* methods.
-    val model2 = lr.fit(training.toDF, paramMapCombined)
+    val model2 = lr.fit(training.toDF(), paramMapCombined)
     println("Model 2 was fit using parameters: " + model2.fittingParamMap)
 
     // Prepare test data.
@@ -90,7 +90,7 @@ object SimpleParamsExample {
     // LogisticRegression.transform will only use the 'features' column.
     // Note that model2.transform() outputs a 'myProbability' column instead of the usual
     // 'probability' column since we renamed the lr.probabilityCol parameter previously.
-    model2.transform(test.toDF)
+    model2.transform(test.toDF())
       .select("features", "label", "myProbability", "prediction")
       .collect()
       .foreach { case Row(features: Vector, label: Double, prob: Vector, prediction: Double) =>
 
@@ -69,7 +69,7 @@ object SimpleTextClassificationPipeline {
       .setStages(Array(tokenizer, hashingTF, lr))
 
     // Fit the pipeline to training documents.
-    val model = pipeline.fit(training.toDF)
+    val model = pipeline.fit(training.toDF())
 
     // Prepare test documents, which are unlabeled.
     val test = sc.parallelize(Seq(
@@ -79,7 +79,7 @@ object SimpleTextClassificationPipeline {
       Document(7L, "apache hadoop")))
 
     // Make predictions on test documents.
-    model.transform(test.toDF)
+    model.transform(test.toDF())
       .select("id", "text", "probability", "prediction")
       .collect()
       .foreach { case Row(id: Long, text: String, prob: Vector, prediction: Double) =>
 
@@ -81,7 +81,7 @@ object DatasetExample {
     println(s"Loaded ${origData.count()} instances from file: ${params.input}")
 
     // Convert input data to DataFrame explicitly.
-    val df: DataFrame = origData.toDF
+    val df: DataFrame = origData.toDF()
     println(s"Inferred schema:\n${df.schema.prettyJson}")
     println(s"Converted to DataFrame with ${df.count()} records")
 
 
@@ -34,7 +34,7 @@ object RDDRelation {
     // Importing the SQL context gives access to all the SQL functions and implicit conversions.
     import sqlContext.implicits._
 
-    val df = sc.parallelize((1 to 100).map(i => Record(i, s"val_$i"))).toDF
+    val df = sc.parallelize((1 to 100).map(i => Record(i, s"val_$i"))).toDF()
     // Any RDD containing case classes can be registered as a table.  The schema of the table is
     // automatically inferred using scala reflection.
     df.registerTempTable("records")
 
@@ -68,7 +68,7 @@ object HiveFromSpark {
 
     // You can also register RDDs as temporary tables within a HiveContext.
     val rdd = sc.parallelize((1 to 100).map(i => Record(i, s"val_$i")))
-    rdd.toDF.registerTempTable("records")
+    rdd.toDF().registerTempTable("records")
 
     // Queries can then join RDD data with data stored in Hive.
     println("Result of SELECT *:")
 
@@ -20,20 +20,21 @@ package org.apache.spark.streaming.kafka
 import java.io.File
 
 import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.duration._
 import scala.language.postfixOps
 
+import kafka.common.TopicAndPartition
+import kafka.message.MessageAndMetadata
 import kafka.serializer.StringDecoder
 import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll}
-import org.scalatest.concurrent.{Eventually, Timeouts}
+import org.scalatest.concurrent.Eventually
 
-import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.streaming.{Milliseconds, StreamingContext, Time}
-import org.apache.spark.streaming.dstream.{DStream, InputDStream}
+import org.apache.spark.streaming.dstream.DStream
 import org.apache.spark.util.Utils
-import kafka.common.TopicAndPartition
-import kafka.message.MessageAndMetadata
 
 class DirectKafkaStreamSuite extends KafkaStreamSuiteBase
   with BeforeAndAfter with BeforeAndAfterAll with Eventually {
@@ -67,13 +68,14 @@ class DirectKafkaStreamSuite extends KafkaStreamSuiteBase
   }
 
 
-  ignore("basic stream receiving with multiple topics and smallest starting offset") {
+  test("basic stream receiving with multiple topics and smallest starting offset") {
     val topics = Set("basic1", "basic2", "basic3")
     val data = Map("a" -> 7, "b" -> 9)
     topics.foreach { t =>
       createTopic(t)
       sendMessages(t, data)
     }
+    val totalSent = data.values.sum * topics.size
     val kafkaParams = Map(
       "metadata.broker.list" -> s"$brokerAddress",
       "auto.offset.reset" -> "smallest"
@@ -84,7 +86,8 @@ class DirectKafkaStreamSuite extends KafkaStreamSuiteBase
       KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](
         ssc, kafkaParams, topics)
     }
-    var total = 0L
+
+    val allReceived = new ArrayBuffer[(String, String)]
 
     stream.foreachRDD { rdd =>
     // Get the offset ranges in the RDD
@@ -104,16 +107,17 @@ class DirectKafkaStreamSuite extends KafkaStreamSuiteBase
       collected.foreach { case (partSize, rangeSize) =>
         assert(partSize === rangeSize, "offset ranges are wrong")
       }
-      total += collected.size  // Add up all the collected items
     }
+    stream.foreachRDD { rdd => allReceived ++= rdd.collect() }
     ssc.start()
     eventually(timeout(20000.milliseconds), interval(200.milliseconds)) {
-      assert(total === data.values.sum * topics.size, "didn't get all messages")
+      assert(allReceived.size === totalSent,
+        "didn't get expected number of messages, messages:\n" + allReceived.mkString("\n"))
     }
     ssc.stop()
   }
 
-  ignore("receiving from largest starting offset") {
+  test("receiving from largest starting offset") {
     val topic = "largest"
     val topicPartition = TopicAndPartition(topic, 0)
     val data = Map("a" -> 10)
@@ -158,7 +162,7 @@ class DirectKafkaStreamSuite extends KafkaStreamSuiteBase
   }
 
 
-  ignore("creating stream by offset") {
+  test("creating stream by offset") {
     val topic = "offset"
     val topicPartition = TopicAndPartition(topic, 0)
     val data = Map("a" -> 10)
@@ -204,7 +208,7 @@ class DirectKafkaStreamSuite extends KafkaStreamSuiteBase
   }
 
   // Test to verify the offset ranges can be recovered from the checkpoints
-  ignore("offset recovery") {
+  test("offset recovery") {
     val topic = "recovery"
     createTopic(topic)
     testDir = Utils.createTempDir()
 
@@ -102,7 +102,7 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] {
       sc.parallelize(Seq(metadata), 1).saveAsTextFile(metadataPath(path))
 
       // Create Parquet data.
-      val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF
+      val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF()
       dataRDD.saveAsParquetFile(dataPath(path))
     }
 
 
@@ -62,7 +62,7 @@ private[classification] object GLMClassificationModel {
 
       // Create Parquet data.
       val data = Data(weights, intercept, threshold)
-      sc.parallelize(Seq(data), 1).toDF.saveAsParquetFile(Loader.dataPath(path))
+      sc.parallelize(Seq(data), 1).toDF().saveAsParquetFile(Loader.dataPath(path))
     }
 
     /**
 
@@ -58,7 +58,7 @@ private[regression] object GLMRegressionModel {
 
       // Create Parquet data.
       val data = Data(weights, intercept)
-      val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF
+      val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF()
       // TODO: repartition with 1 partition after SPARK-5532 gets fixed
       dataRDD.saveAsParquetFile(Loader.dataPath(path))
     }
 
@@ -197,7 +197,7 @@ object DecisionTreeModel extends Loader[DecisionTreeModel] {
       val nodes = model.topNode.subtreeIterator.toSeq
       val dataRDD: DataFrame = sc.parallelize(nodes)
         .map(NodeData.apply(0, _))
-        .toDF
+        .toDF()
       dataRDD.saveAsParquetFile(Loader.dataPath(path))
     }
 
 
@@ -289,7 +289,7 @@ private[tree] object TreeEnsembleModel {
       // Create Parquet data.
       val dataRDD = sc.parallelize(model.trees.zipWithIndex).flatMap { case (tree, treeId) =>
         tree.topNode.subtreeIterator.toSeq.map(node => NodeData(treeId, node))
-      }.toDF
+      }.toDF()
       dataRDD.saveAsParquetFile(Loader.dataPath(path))
     }
 
 
@@ -358,8 +358,8 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging {
       .setNumUserBlocks(numUserBlocks)
       .setNumItemBlocks(numItemBlocks)
     val alpha = als.getAlpha
-    val model = als.fit(training.toDF)
-    val predictions = model.transform(test.toDF)
+    val model = als.fit(training.toDF())
+    val predictions = model.transform(test.toDF())
       .select("rating", "prediction")
       .map { case Row(rating: Float, prediction: Float) =>
         (rating.toDouble, prediction.toDouble)
 
@@ -434,12 +434,18 @@ def unpersist(self, blocking=True):
     def repartition(self, numPartitions):
         """ Return a new :class:`DataFrame` that has exactly `numPartitions`
         partitions.
+
+        >>> df.repartition(10).rdd.getNumPartitions()
+        10
         """
-        return DataFrame(self._jdf.repartition(numPartitions, None), self.sql_ctx)
+        return DataFrame(self._jdf.repartition(numPartitions), self.sql_ctx)
 
     def distinct(self):
         """
         Return a new :class:`DataFrame` containing the distinct rows in this DataFrame.
+
+        >>> df.distinct().count()
+        2L
         """
         return DataFrame(self._jdf.distinct(), self.sql_ctx)
 
 
@@ -23,6 +23,16 @@ import org.apache.spark.sql.types._
 
 case class Project(projectList: Seq[NamedExpression], child: LogicalPlan) extends UnaryNode {
   def output = projectList.map(_.toAttribute)
+
+  override lazy val resolved: Boolean = {
+    val containsAggregatesOrGenerators = projectList.exists ( _.collect {
+        case agg: AggregateExpression => agg
+        case generator: Generator => generator
+      }.nonEmpty
+    )
+
+    !expressions.exists(!_.resolved) && childrenResolved && !containsAggregatesOrGenerators
+  }
 }
 
 /**
Original file line number	Diff line number	Diff line change
`@@ -102,7 +102,7 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] {`
`102`	`102`	`sc.parallelize(Seq(metadata), 1).saveAsTextFile(metadataPath(path))`
`103`	`103`
`104`	`104`	`// Create Parquet data.`
`105`		`- val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF`
	`105`	`+ val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF()`
`106`	`106`	`dataRDD.saveAsParquetFile(dataPath(path))`
`107`	`107`	`}`
`108`	`108`
Original file line number	Diff line number	Diff line change
`@@ -62,7 +62,7 @@ private[classification] object GLMClassificationModel {`
`62`	`62`
`63`	`63`	`// Create Parquet data.`
`64`	`64`	`val data = Data(weights, intercept, threshold)`
`65`		`- sc.parallelize(Seq(data), 1).toDF.saveAsParquetFile(Loader.dataPath(path))`
	`65`	`+ sc.parallelize(Seq(data), 1).toDF().saveAsParquetFile(Loader.dataPath(path))`
`66`	`66`	`}`
`67`	`67`
`68`	`68`	`/**`
Original file line number	Diff line number	Diff line change
`@@ -58,7 +58,7 @@ private[regression] object GLMRegressionModel {`
`58`	`58`
`59`	`59`	`// Create Parquet data.`
`60`	`60`	`val data = Data(weights, intercept)`
`61`		`- val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF`
	`61`	`+ val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF()`
`62`	`62`	`// TODO: repartition with 1 partition after SPARK-5532 gets fixed`
`63`	`63`	`dataRDD.saveAsParquetFile(Loader.dataPath(path))`
`64`	`64`	`}`
Original file line number	Diff line number	Diff line change
`@@ -197,7 +197,7 @@ object DecisionTreeModel extends Loader[DecisionTreeModel] {`
`197`	`197`	`val nodes = model.topNode.subtreeIterator.toSeq`
`198`	`198`	`val dataRDD: DataFrame = sc.parallelize(nodes)`
`199`	`199`	`.map(NodeData.apply(0, _))`
`200`		`- .toDF`
	`200`	`+ .toDF()`
`201`	`201`	`dataRDD.saveAsParquetFile(Loader.dataPath(path))`
`202`	`202`	`}`
`203`	`203`
Original file line number	Diff line number	Diff line change
`@@ -289,7 +289,7 @@ private[tree] object TreeEnsembleModel {`
`289`	`289`	`// Create Parquet data.`
`290`	`290`	`val dataRDD = sc.parallelize(model.trees.zipWithIndex).flatMap { case (tree, treeId) =>`
`291`	`291`	`tree.topNode.subtreeIterator.toSeq.map(node => NodeData(treeId, node))`
`292`		`- }.toDF`
	`292`	`+ }.toDF()`
`293`	`293`	`dataRDD.saveAsParquetFile(Loader.dataPath(path))`
`294`	`294`	`}`
`295`	`295`