Skip to content

Commit 0ba8f4b

Browse files
dongjoon-hyuncloud-fan
authored andcommitted
[SPARK-21787][SQL] Support for pushing down filters for DateType in native OrcFileFormat
## What changes were proposed in this pull request? This PR support for pushing down filters for DateType in ORC ## How was this patch tested? Pass the Jenkins with newly add and updated test cases. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #18995 from dongjoon-hyun/SPARK-21787.
1 parent aa1764b commit 0ba8f4b

File tree

2 files changed

+25
-7
lines changed

2 files changed

+25
-7
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,7 @@ private[orc] object OrcFilters {
8282
* Both CharType and VarcharType are cleaned at AstBuilder.
8383
*/
8484
private def isSearchableType(dataType: DataType) = dataType match {
85-
// TODO: SPARK-21787 Support for pushing down filters for DateType in ORC
86-
case BinaryType | DateType => false
85+
case BinaryType => false
8786
case _: AtomicType => true
8887
case _ => false
8988
}

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,30 @@ class OrcFilterSuite extends OrcTest with SharedSQLContext {
316316
}
317317
}
318318

319+
test("filter pushdown - date") {
320+
val dates = Seq("2017-08-18", "2017-08-19", "2017-08-20", "2017-08-21").map { day =>
321+
Date.valueOf(day)
322+
}
323+
withOrcDataFrame(dates.map(Tuple1(_))) { implicit df =>
324+
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
325+
326+
checkFilterPredicate('_1 === dates(0), PredicateLeaf.Operator.EQUALS)
327+
checkFilterPredicate('_1 <=> dates(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
328+
329+
checkFilterPredicate('_1 < dates(1), PredicateLeaf.Operator.LESS_THAN)
330+
checkFilterPredicate('_1 > dates(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
331+
checkFilterPredicate('_1 <= dates(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
332+
checkFilterPredicate('_1 >= dates(3), PredicateLeaf.Operator.LESS_THAN)
333+
334+
checkFilterPredicate(Literal(dates(0)) === '_1, PredicateLeaf.Operator.EQUALS)
335+
checkFilterPredicate(Literal(dates(0)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
336+
checkFilterPredicate(Literal(dates(1)) > '_1, PredicateLeaf.Operator.LESS_THAN)
337+
checkFilterPredicate(Literal(dates(2)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
338+
checkFilterPredicate(Literal(dates(0)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
339+
checkFilterPredicate(Literal(dates(3)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
340+
}
341+
}
342+
319343
test("no filter pushdown - non-supported types") {
320344
implicit class IntToBinary(int: Int) {
321345
def b: Array[Byte] = int.toString.getBytes(StandardCharsets.UTF_8)
@@ -328,11 +352,6 @@ class OrcFilterSuite extends OrcTest with SharedSQLContext {
328352
withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df =>
329353
checkNoFilterPredicate('_1 <=> 1.b)
330354
}
331-
// DateType
332-
val stringDate = "2015-01-01"
333-
withOrcDataFrame(Seq(Tuple1(Date.valueOf(stringDate)))) { implicit df =>
334-
checkNoFilterPredicate('_1 === Date.valueOf(stringDate))
335-
}
336355
// MapType
337356
withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df =>
338357
checkNoFilterPredicate('_1.isNotNull)

0 commit comments

Comments
 (0)