Skip to content

Commit 43b2a63

Browse files
pierre-borckmansrxin
pierre-borckmans
authored andcommitted
[SPARK-12477][SQL] - Tungsten projection fails for null values in array fields
Accessing null elements in an array field fails when tungsten is enabled. It works in Spark 1.3.1, and in Spark > 1.5 with Tungsten disabled. This PR solves this by checking if the accessed element in the array field is null, in the generated code. Example: ``` // Array of String case class AS( as: Seq[String] ) val dfAS = sc.parallelize( Seq( AS ( Seq("a",null,"b") ) ) ).toDF dfAS.registerTempTable("T_AS") for (i <- 0 to 2) { println(i + " = " + sqlContext.sql(s"select as[$i] from T_AS").collect.mkString(","))} ``` With Tungsten disabled: ``` 0 = [a] 1 = [null] 2 = [b] ``` With Tungsten enabled: ``` 0 = [a] 15/12/22 09:32:50 ERROR Executor: Exception in task 7.0 in stage 1.0 (TID 15) java.lang.NullPointerException at org.apache.spark.sql.catalyst.expressions.UnsafeRowWriters$UTF8StringWriter.getSize(UnsafeRowWriters.java:90) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source) at org.apache.spark.sql.execution.TungstenProject$$anonfun$3$$anonfun$apply$3.apply(basicOperators.scala:90) at org.apache.spark.sql.execution.TungstenProject$$anonfun$3$$anonfun$apply$3.apply(basicOperators.scala:88) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$class.foreach(Iterator.scala:727) at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) ``` Author: pierre-borckmans <pierre.borckmans@realimpactanalytics.com> Closes apache#10429 from pierre-borckmans/SPARK-12477_Tungsten-Projection-Null-Element-In-Array.
1 parent 50301c0 commit 43b2a63

File tree

2 files changed

+10
-1
lines changed

2 files changed

+10
-1
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ case class GetArrayItem(child: Expression, ordinal: Expression)
227227
nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
228228
s"""
229229
final int index = (int) $eval2;
230-
if (index >= $eval1.numElements() || index < 0) {
230+
if (index >= $eval1.numElements() || index < 0 || $eval1.isNullAt(index)) {
231231
${ev.isNull} = true;
232232
} else {
233233
${ev.value} = ${ctx.getValue(eval1, dataType, "index")};

sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,13 @@ class DataFrameComplexTypeSuite extends QueryTest with SharedSQLContext {
4343
val df = sparkContext.parallelize(Seq((1, 1))).toDF("a", "b")
4444
df.select(array($"a").as("s")).select(f(expr("s[0]"))).collect()
4545
}
46+
47+
test("SPARK-12477 accessing null element in array field") {
48+
val df = sparkContext.parallelize(Seq((Seq("val1", null, "val2"),
49+
Seq(Some(1), None, Some(2))))).toDF("s", "i")
50+
val nullStringRow = df.selectExpr("s[1]").collect()(0)
51+
assert(nullStringRow == org.apache.spark.sql.Row(null))
52+
val nullIntRow = df.selectExpr("i[1]").collect()(0)
53+
assert(nullIntRow == org.apache.spark.sql.Row(null))
54+
}
4655
}

0 commit comments

Comments
 (0)