Skip to content

Commit b0b4b9e

Browse files
committed
OOM
1 parent 21be94b commit b0b4b9e

File tree

2 files changed

+11
-1
lines changed

2 files changed

+11
-1
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,9 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
530530
case s: String => JString(s)
531531
case u: UUID => JString(u.toString)
532532
case dt: DataType => dt.jsonValue
533-
case m: Metadata => m.jsonValue
533+
// SPARK-17356: In usage of mllib, Metadata may store a huge vector of data, transforming
534+
// it to JSON may trigger OutOfMemoryError.
535+
case m: Metadata => Metadata.empty.jsonValue
534536
case s: StorageLevel =>
535537
("useDisk" -> s.useDisk) ~ ("useMemory" -> s.useMemory) ~ ("useOffHeap" -> s.useOffHeap) ~
536538
("deserialized" -> s.deserialized) ~ ("replication" -> s.replication)

sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.expressions._
3030
import org.apache.spark.sql.execution.columnar.InMemoryRelation
3131
import org.apache.spark.sql.execution.datasources.LogicalRelation
3232
import org.apache.spark.sql.execution.{LogicalRDD, Queryable}
33+
import org.apache.spark.sql.types.Metadata
3334

3435
abstract class QueryTest extends PlanTest {
3536

@@ -224,6 +225,13 @@ abstract class QueryTest extends PlanTest {
224225
val normalized1 = logicalPlan.transformAllExpressions {
225226
case udf: ScalaUDF => udf.copy(function = null)
226227
case gen: UserDefinedGenerator => gen.copy(function = null)
228+
// After SPARK-17356: the JSON representation no longer has the Metadata. We need to remove
229+
// the Metadata from the normalized plan so that we can compare this plan with the
230+
// JSON-deserialzed plan.
231+
case a @ Alias(child, name) if a.explicitMetadata.isDefined =>
232+
Alias(child, name)(a.exprId, a.qualifiers, Some(Metadata.empty))
233+
case a: AttributeReference if a.metadata != Metadata.empty =>
234+
AttributeReference(a.name, a.dataType, a.nullable, Metadata.empty)(a.exprId, a.qualifiers)
227235
}
228236

229237
// RDDs/data are not serializable to JSON, so we need to collect LogicalPlans that contains

0 commit comments

Comments
 (0)