-
Notifications
You must be signed in to change notification settings - Fork 28.6k
[SPARK-32002][SQL]Support ExtractValue from nested ArrayStruct #28860
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4339b81
da9a3d5
b8f4a69
400ad7d
b6e92c0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -59,6 +59,23 @@ object ExtractValue { | |
GetArrayStructFields(child, fields(ordinal).copy(name = fieldName), | ||
ordinal, fields.length, containsNull) | ||
|
||
case (ExtractNestedArray(StructType(fields), containsNull, containsNullSeq), | ||
NonNullLiteral(v, StringType)) => | ||
child match { | ||
case ExtractGetArrayStructField(_, num) if num == containsNullSeq.size => | ||
val fieldName = v.toString | ||
val ordinal = findField(fields, fieldName, resolver) | ||
val row = (0 until num).foldRight(child) { (_, e) => | ||
GetArrayItem(e, Literal(0)) | ||
} | ||
val innerArray = GetArrayStructFields(row, fields(ordinal).copy(name = fieldName), | ||
ordinal, fields.length, containsNull) | ||
containsNullSeq.foldRight(innerArray: Expression) { (_, expr) => | ||
new CreateArray(Seq(expr)) | ||
} | ||
case _ => GetArrayItem(child, extraction) | ||
} | ||
|
||
case (_: ArrayType, _) => GetArrayItem(child, extraction) | ||
|
||
case (MapType(kt, _, _), _) => GetMapValue(child, extraction) | ||
|
@@ -95,6 +112,50 @@ object ExtractValue { | |
|
||
trait ExtractValue extends Expression | ||
|
||
object ExtractNestedArray { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's add documentation here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's also name it something like |
||
|
||
type ReturnType = Option[(DataType, Boolean, Seq[Boolean])] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's also add some comments for what this type means. |
||
|
||
def unapply(dataType: DataType): ReturnType = { | ||
extractArrayType(dataType) | ||
} | ||
|
||
def extractArrayType(dataType: DataType): ReturnType = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we combine this and |
||
dataType match { | ||
case ArrayType(dt, containsNull) => | ||
extractArrayType(dt) match { | ||
case Some((d, cn, seq)) => Some(d, cn, containsNull +: seq) | ||
case None => Some(dt, containsNull, Seq.empty[Boolean]) | ||
} | ||
case _ => None | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* Extract GetArrayStructField from Expression | ||
*/ | ||
object ExtractGetArrayStructField { | ||
|
||
type ReturnType = Option[(Expression, Int)] | ||
|
||
def unapply(expr: Expression): ReturnType = { | ||
extractArrayStruct(expr) | ||
} | ||
|
||
def extractArrayStruct(expr: Expression): ReturnType = { | ||
expr match { | ||
case gas @ GetArrayStructFields(child, _, _, _, _) => | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's avoid arguments matching. This is actually an anti pattern - https://github.com/databricks/scala-style-guide#pattern-matching |
||
extractArrayStruct(child) match { | ||
case Some((e, deep)) => Some(e, deep + 1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
case None => Some(child, 1) | ||
} | ||
case _ => None | ||
} | ||
} | ||
} | ||
|
||
|
||
/** | ||
* Returns the value of fields in the Struct `child`. | ||
* | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,8 @@ import java.net.{MalformedURLException, URL} | |
import java.sql.{Date, Timestamp} | ||
import java.util.concurrent.atomic.AtomicBoolean | ||
|
||
import scala.collection.mutable.ArrayBuffer | ||
|
||
import org.apache.spark.{AccumulatorSuite, SparkException} | ||
import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart} | ||
import org.apache.spark.sql.catalyst.expressions.GenericRow | ||
|
@@ -3521,6 +3523,25 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark | |
|""".stripMargin), Row(1)) | ||
} | ||
} | ||
|
||
test("SPARK-32002: Support Extract value from nested ArrayStruct") { | ||
withTempView("rows") { | ||
val df = spark.read | ||
.json(Seq( | ||
"""{"a": [{"b": [{"c": [1,2]}]}]}""", | ||
"""{"a": [{"b": [{"c": [1]}, {"c": [2]}]}]}""", | ||
"""{"a":[{}]}""").toDS()) | ||
df.createOrReplaceTempView("nest") | ||
|
||
checkAnswer(sql( | ||
""" | ||
|SELECT a.b.c FROM nest | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we add deeper cases? Also, you can simplify the test cases, for example, as below: val df = spark.range(10).select(array(struct(array(struct("id")).alias("col1"))).alias("col0"))
df.selectExpr("col0.col1.id") |
||
""".stripMargin), | ||
Row(ArrayBuffer(ArrayBuffer(ArrayBuffer(1, 2)))) :: | ||
Row(ArrayBuffer(ArrayBuffer(ArrayBuffer(1), ArrayBuffer(2)))) :: | ||
Row(ArrayBuffer(null)) :: Nil) | ||
} | ||
} | ||
} | ||
|
||
case class Foo(bar: Option[String]) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's also update the documentation and table above.