Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-8186] [SPARK-8187] [SQL] datetime function: date_add, date_sub #6782

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
remove datediff because datediff use GTC time, do that in another pr
  • Loading branch information
adrian-wang committed Jun 24, 2015
commit a6243f76862355b8088f76addae3d21318b8eb43
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,7 @@ object FunctionRegistry {

// datetime functions
expression[DateAdd]("date_add"),
expression[DateSub]("date_sub"),
expression[DateDiff]("datediff")
expression[DateSub]("date_sub")
)

val builtin: FunctionRegistry = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,65 +17,99 @@

package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.sql.types._

case class DateAdd(left: Expression, right: Expression)
extends BinaryExpression with ExpectsInputTypes {

override def children: Seq[Expression] = left :: right :: Nil
override def expectedChildTypes: Seq[DataType] = DateType :: IntegerType :: Nil

override def dataType: DataType = DateType
import java.util.{Calendar, TimeZone}

override def toString: String = s"DateAdd($left, $right)"

override def eval(input: Row): Any = {
val startDate = left.eval(input)
val days = right.eval(input)
if (startDate == null || days == null) {
null
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.util.DateUtils
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String

case class DateAdd(startDate: Expression, days: Expression) extends Expression {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think it'd be helpful to define the semantics here (basically what eval is doing)

override def children: Seq[Expression] = startDate :: days :: Nil

override def foldable: Boolean = startDate.foldable && days.foldable
override def nullable: Boolean = startDate.nullable || days.nullable

override def checkInputDataTypes(): TypeCheckResult = {
val supportedLeftType = Seq(StringType, DateType, TimestampType, NullType)
if (!supportedLeftType.contains(startDate.dataType)) {
TypeCheckResult.TypeCheckFailure(
s"type of startdate expression in DateAdd should be string/timestamp/date," +
s" not ${startDate.dataType}")
} else if (days.dataType != IntegerType && days.dataType != NullType) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we also support integral types here for days?

For example, we always infer integer from Python and Json as LongType.

TypeCheckResult.TypeCheckFailure(
s"type of days expression in DateAdd should be int, not ${days.dataType}.")
} else {
startDate.asInstanceOf[Int] + days.asInstanceOf[Int]
TypeCheckResult.TypeCheckSuccess
}
}
}

case class DateSub(left: Expression, right: Expression)
extends BinaryExpression with ExpectsInputTypes {

override def expectedChildTypes: Seq[DataType] = DateType :: IntegerType :: Nil
override def dataType: DataType = StringType

override def dataType: DataType = DateType

override def toString: String = s"DateSub($left, $right)"
override def toString: String = s"DateAdd($startDate, $days)"

override def eval(input: Row): Any = {
val startDate = left.eval(input)
val days = right.eval(input)
if (startDate == null || days == null) {
val start = startDate.eval(input)
val d = days.eval(input)
if (start == null || d == null) {
null
} else {
startDate.asInstanceOf[Int] - days.asInstanceOf[Int]
val offset = d.asInstanceOf[Int]
val resultDays = startDate.dataType match {
case StringType =>
DateUtils.millisToDays(DateUtils.stringToTime(
start.asInstanceOf[UTF8String].toString).getTime) + offset
case TimestampType =>
DateUtils.millisToDays(DateUtils.toJavaTimestamp(
start.asInstanceOf[Long]).getTime) + offset
case DateType => start.asInstanceOf[Int] + offset
}
UTF8String.fromString(DateUtils.toString(resultDays))
}
}
}

case class DateDiff(left: Expression, right: Expression)
extends BinaryExpression with ExpectsInputTypes {

override def expectedChildTypes: Seq[DataType] = DateType :: DateType :: Nil
case class DateSub(startDate: Expression, days: Expression) extends Expression {
override def children: Seq[Expression] = startDate :: days :: Nil

override def foldable: Boolean = startDate.foldable && days.foldable
override def nullable: Boolean = startDate.nullable || days.nullable

override def checkInputDataTypes(): TypeCheckResult = {
val supportedLeftType = Seq(StringType, DateType, TimestampType, NullType)
if (!supportedLeftType.contains(startDate.dataType)) {
TypeCheckResult.TypeCheckFailure(
s"type of startdate expression in DateSub should be string/timestamp/date," +
s" not ${startDate.dataType}")
} else if (days.dataType != IntegerType && days.dataType != NullType) {
TypeCheckResult.TypeCheckFailure(
s"type of days expression in DateSub should be int, not ${days.dataType}.")
} else {
TypeCheckResult.TypeCheckSuccess
}
}

override def dataType: DataType = IntegerType
override def dataType: DataType = StringType

override def toString: String = s"DateDiff($left, $right)"
override def toString: String = s"DateSub($startDate, $days)"

override def eval(input: Row): Any = {
val startDate = left.eval(input)
val endDate = right.eval(input)
if (startDate == null || endDate == null) {
val start = startDate.eval(input)
val d = days.eval(input)
if (start == null || d == null) {
null
} else {
startDate.asInstanceOf[Int] - endDate.asInstanceOf[Int]
val offset = d.asInstanceOf[Int]
val resultDays = startDate.dataType match {
case StringType =>
DateUtils.millisToDays(DateUtils.stringToTime(
start.asInstanceOf[UTF8String].toString).getTime) - offset
case TimestampType =>
DateUtils.millisToDays(DateUtils.toJavaTimestamp(
start.asInstanceOf[Long]).getTime) - offset
case DateType => start.asInstanceOf[Int] - offset
}
UTF8String.fromString(DateUtils.toString(resultDays))
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,43 +17,39 @@

package org.apache.spark.sql.catalyst.expressions

import java.sql.Date
import java.sql.{Date, Timestamp}

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.util.DateUtils
import org.apache.spark.sql.types.IntegerType

class DatetimeFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
test("date_add") {
checkEvaluation(
DateAdd(Literal(Date.valueOf("2016-02-28")), Literal(1)),
DateUtils.fromJavaDate(Date.valueOf("2016-02-29")), create_row(null))
"2016-02-29", Row.empty)
checkEvaluation(
DateAdd(Literal(Date.valueOf("2016-03-01")), Literal(-1)),
DateUtils.fromJavaDate(Date.valueOf("2016-02-29")), create_row(null))
checkEvaluation(DateAdd(Literal(null), Literal(-1)), null, create_row(null))
DateAdd(Literal("2016-03-01"), Literal(-1)), "2016-02-29", Row.empty)
checkEvaluation(
DateAdd(Literal(Timestamp.valueOf("2016-03-01 23:59:59")), Literal(-2)),
"2016-02-28", Row.empty)
checkEvaluation(
DateAdd(Literal("2016-03-01 23:59:59"), Literal(-3)),
"2016-02-27", Row.empty)
checkEvaluation(DateAdd(Literal(null), Literal(-1)), null, Row.empty)
}

test("date_sub") {
checkEvaluation(
DateSub(Literal(Date.valueOf("2015-01-01")), Literal(1)),
DateUtils.fromJavaDate(Date.valueOf("2014-12-31")), create_row(null))
checkEvaluation(
DateSub(Literal(Date.valueOf("2015-01-01")), Literal(-1)),
DateUtils.fromJavaDate(Date.valueOf("2015-01-02")), create_row(null))
DateSub(Literal("2015-01-01"), Literal(1)), "2014-12-31", Row.empty)
checkEvaluation(
DateSub(Literal(Date.valueOf("2015-01-01")), Literal(null)), null, create_row(null))
}

test("date_diff") {
DateSub(Literal(Date.valueOf("2015-01-01")), Literal(-1)), "2015-01-02", Row.empty)
checkEvaluation(
DateDiff(Literal(Date.valueOf("2015-01-01")), Literal(Date.valueOf("2015-01-01"))),
0, create_row(null))
DateSub(Literal(Timestamp.valueOf("2015-01-01 01:00:00")), Literal(-1)),
"2015-01-02", Row.empty)
checkEvaluation(
DateDiff(Literal(Date.valueOf("2015-06-01")), Literal(Date.valueOf("2015-06-12"))),
-11, create_row(null))
DateSub(Literal("2015-01-01 01:00:00"), Literal(0)), "2015-01-01", Row.empty)
checkEvaluation(
DateDiff(Literal(Date.valueOf("2015-06-01")), Literal(Date.valueOf("2015-05-31"))),
1, create_row(null))
checkEvaluation(DateDiff(Literal(null), Literal(null)), null, create_row(null))
DateSub(Literal("2015-01-01"), Literal.create(null, IntegerType)), null, Row.empty)
}

}
18 changes: 0 additions & 18 deletions sql/core/src/main/scala/org/apache/spark/sql/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1021,24 +1021,6 @@ object functions {
*/
def date_sub(startDate: String, days: String): Column = date_sub(Column(startDate), Column(days))

/**
* Returns the number of days from startDate to endDate, given values for startDate and days.
*
* @group datetime_funcs
* @since 1.5.0
*/
def datediff(startDate: Column, endDate: Column): Column = DateDiff(startDate.expr, endDate.expr)

/**
* Returns the number of days from startDate to endDate, given column names for startDate and
* days.
*
* @group datetime_funcs
* @since 1.5.0
*/
def datediff(startDate: String, endDate: String): Column =
datediff(Column(startDate), Column(endDate))

/**
* Returns the double value that is closer than any other to e, the base of the natural
* logarithms.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql

import java.sql.Date
import java.sql.{Date, Timestamp}

import org.apache.spark.sql.functions._
import org.scalatest.BeforeAndAfterAll
Expand All @@ -39,40 +39,29 @@ class DatetimeExpressionsSuite extends QueryTest with BeforeAndAfterAll {
test("function date_add") {
checkAnswer(
df.select(date_add("day", "num")),
Seq(Row(Date.valueOf("2015-06-02")), Row(Date.valueOf("2015-06-05"))))
Seq(Row("2015-06-02"), Row("2015-06-05")))
checkAnswer(
df.select(date_add(column("day"), lit(null))).limit(1), Row(null))

checkAnswer(ctx.sql("""SELECT DATE_ADD("2015-06-12", -1)"""), Row(Date.valueOf("2015-06-11")))
checkAnswer(ctx.sql("""SELECT DATE_ADD("2015-06-12", -1)"""), Row("2015-06-11"))
checkAnswer(ctx.sql("SELECT DATE_ADD(null, 1)"), Row(null))
checkAnswer(
ctx.sql("""SELECT DATE_ADD(day, 11) FROM dttable LIMIT 1"""),
Row(Date.valueOf("2015-06-12")))
Row("2015-06-12"))
}

test("function date_sub") {
checkAnswer(
df.select(date_sub("day", "num")),
Seq(Row(Date.valueOf("2015-05-31")), Row(Date.valueOf("2015-05-30"))))
Seq(Row("2015-05-31"), Row("2015-05-30")))
checkAnswer(
df.select(date_sub(lit(null), column("num"))).limit(1), Row(null))

checkAnswer(ctx.sql("""SELECT DATE_SUB("2015-06-12", 31)"""), Row(Date.valueOf("2015-05-12")))
checkAnswer(ctx.sql("""SELECT DATE_SUB("2015-06-12 14:00:00", 31)"""), Row("2015-05-12"))
checkAnswer(ctx.sql("""SELECT DATE_SUB("2015-06-12", null)"""), Row(null))
checkAnswer(
ctx.sql("""SELECT DATE_SUB(day, num) FROM dttable LIMIT 1"""),
Row(Date.valueOf("2015-05-31")))
Row("2015-05-31"))
}

test("function datediff") {
checkAnswer(
df.select(datediff(lit("2015-06-12"), lit("2011-06-12"))).limit(1), Row(1461))
checkAnswer(df.select(datediff(lit(null), lit(null))).limit(1), Row(null))

checkAnswer(ctx.sql("""SELECT DATEDIFF("2015-06-12", "2014-06-12")"""), Row(365))
checkAnswer(ctx.sql("""SELECT DATEDIFF("2015-06-12", null)"""), Row(null))
checkAnswer(
ctx.sql("""SELECT DATEDIFF(day, "2015-06-01") FROM dttable LIMIT 1"""),
Row(0))
}
}