Skip to content

[SPARK-31892][SQL][FOLLOWUP][test-java11] Improve test coverage for datetime pattern with formatter functions #28718

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class Iso8601DateFormatter(
extends DateFormatter with DateTimeFormatterHelper {

@transient
private lazy val formatter = getOrCreateFormatter(pattern, locale)
private lazy val formatter = getOrCreateFormatter(pattern, locale, isParsing)

@transient
private lazy val legacyFormatter = DateFormatter.getLegacyFormatter(
Expand Down Expand Up @@ -132,7 +132,7 @@ object DateFormatter {
zoneId: ZoneId,
locale: Locale = defaultLocale,
legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT,
isParsing: Boolean = true): DateFormatter = {
isParsing: Boolean): DateFormatter = {
val pattern = format.getOrElse(defaultPattern)
if (SQLConf.get.legacyTimeParserPolicy == LEGACY) {
getLegacyFormatter(pattern, zoneId, locale, legacyFormat)
Expand Down Expand Up @@ -166,10 +166,10 @@ object DateFormatter {
}

def apply(format: String, zoneId: ZoneId): DateFormatter = {
getFormatter(Some(format), zoneId)
getFormatter(Some(format), zoneId, isParsing = false)
}

def apply(zoneId: ZoneId): DateFormatter = {
getFormatter(None, zoneId)
getFormatter(None, zoneId, isParsing = false)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ trait DateTimeFormatterHelper {
protected def getOrCreateFormatter(
pattern: String,
locale: Locale,
isParsing: Boolean = false): DateTimeFormatter = {
isParsing: Boolean): DateTimeFormatter = {
val newPattern = convertIncompatiblePattern(pattern, isParsing)
val useVarLen = isParsing && newPattern.contains('S')
val key = (newPattern, locale, useVarLen)
Expand Down Expand Up @@ -260,7 +260,7 @@ private object DateTimeFormatterHelper {
* @param pattern The input pattern.
* @return The pattern for new parser
*/
def convertIncompatiblePattern(pattern: String, isParsing: Boolean = false): String = {
def convertIncompatiblePattern(pattern: String, isParsing: Boolean): String = {
val eraDesignatorContained = pattern.split("'").zipWithIndex.exists {
case (patternPart, index) =>
// Text can be quoted using single quotes, we only check the non-quote parts.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ class Iso8601TimestampFormatter(
zoneId: ZoneId,
locale: Locale,
legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT,
needVarLengthSecondFraction: Boolean)
isParsing: Boolean)
extends TimestampFormatter with DateTimeFormatterHelper {
@transient
protected lazy val formatter: DateTimeFormatter =
getOrCreateFormatter(pattern, locale, needVarLengthSecondFraction)
getOrCreateFormatter(pattern, locale, isParsing)

@transient
protected lazy val legacyFormatter = TimestampFormatter.getLegacyFormatter(
Expand Down Expand Up @@ -122,7 +122,7 @@ class FractionTimestampFormatter(zoneId: ZoneId)
zoneId,
TimestampFormatter.defaultLocale,
LegacyDateFormats.FAST_DATE_FORMAT,
needVarLengthSecondFraction = false) {
isParsing = false) {

@transient
override protected lazy val formatter = DateTimeFormatterHelper.fractionFormatter
Expand Down Expand Up @@ -293,7 +293,7 @@ object TimestampFormatter {
zoneId: ZoneId,
locale: Locale = defaultLocale,
legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT,
isParsing: Boolean = false): TimestampFormatter = {
isParsing: Boolean): TimestampFormatter = {
val pattern = format.getOrElse(defaultPattern)
if (SQLConf.get.legacyTimeParserPolicy == LEGACY) {
getLegacyFormatter(pattern, zoneId, locale, legacyFormat)
Expand Down Expand Up @@ -340,12 +340,12 @@ object TimestampFormatter {
def apply(
format: String,
zoneId: ZoneId,
isParsing: Boolean = false): TimestampFormatter = {
isParsing: Boolean): TimestampFormatter = {
getFormatter(Some(format), zoneId, isParsing = isParsing)
}

def apply(zoneId: ZoneId): TimestampFormatter = {
getFormatter(None, zoneId)
getFormatter(None, zoneId, isParsing = false)
}

def getFractionFormatter(zoneId: ZoneId): TimestampFormatter = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
private val JST_OPT = Option(JST.getId)

def toMillis(timestamp: String): Long = {
val tf = TimestampFormatter("yyyy-MM-dd HH:mm:ss", UTC)
val tf = TimestampFormatter("yyyy-MM-dd HH:mm:ss", UTC, isParsing = false)
DateTimeUtils.microsToMillis(tf.parse(timestamp))
}
val date = "2015-04-08 13:10:15"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ import org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper._

class DateTimeFormatterHelperSuite extends SparkFunSuite {

private def convertIncompatiblePattern(pattern: String): String = {
DateTimeFormatterHelper.convertIncompatiblePattern(pattern, isParsing = false)
}

test("check incompatible pattern") {
assert(convertIncompatiblePattern("MM-DD-u") === "MM-DD-e")
assert(convertIncompatiblePattern("yyyy-MM-dd'T'HH:mm:ss.SSSz")
Expand All @@ -42,7 +46,7 @@ class DateTimeFormatterHelperSuite extends SparkFunSuite {
}
unsupportedLettersForParsing.foreach { l =>
val e = intercept[IllegalArgumentException] {
convertIncompatiblePattern(s"$l", isParsing = true)
DateTimeFormatterHelper.convertIncompatiblePattern(s"$l", isParsing = true)
}
assert(e.getMessage === s"Illegal pattern character: $l")
assert(convertIncompatiblePattern(s"$l").nonEmpty)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,4 +199,50 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
// SparkUpgradeException here.
intercept[SparkUpgradeException](formatter.parse("02-29"))
}

test("Disable week-based date fields and quarter fields for parsing") {

def checkSparkUpgrade(c: Char): Unit = {
intercept[SparkUpgradeException] {
DateFormatter(
c.toString,
UTC,
DateFormatter.defaultLocale,
LegacyDateFormats.SIMPLE_DATE_FORMAT,
isParsing = true)
}
assert(DateFormatter(
c.toString,
UTC,
DateFormatter.defaultLocale,
LegacyDateFormats.SIMPLE_DATE_FORMAT,
isParsing = false).format(0).nonEmpty)
}

def checkIllegalArg(c: Char): Unit = {
intercept[IllegalArgumentException] {
DateFormatter(
c.toString,
UTC,
DateFormatter.defaultLocale,
LegacyDateFormats.SIMPLE_DATE_FORMAT,
isParsing = true)
}

assert(DateFormatter(
c.toString,
UTC,
DateFormatter.defaultLocale,
LegacyDateFormats.SIMPLE_DATE_FORMAT,
isParsing = false).format(0).nonEmpty)
}

Seq('Y', 'W', 'w', 'E', 'u', 'F').foreach { l =>
checkSparkUpgrade(l)
}

Seq('q', 'Q').foreach { l =>
checkIllegalArg(l)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
2177456523456789L,
11858049903010203L).foreach { micros =>
outstandingZoneIds.foreach { zoneId =>
val timestamp = TimestampFormatter(pattern, zoneId).format(micros)
val timestamp = TimestampFormatter(pattern, zoneId, isParsing = false).format(micros)
val parsed = TimestampFormatter(
pattern, zoneId, isParsing = true).parse(timestamp)
assert(micros === parsed)
Expand All @@ -120,14 +120,14 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
val pattern = "yyyy-MM-dd'T'HH:mm:ss.SSSSSS"
val micros = TimestampFormatter(
pattern, zoneId, isParsing = true).parse(timestamp)
val formatted = TimestampFormatter(pattern, zoneId).format(micros)
val formatted = TimestampFormatter(pattern, zoneId, isParsing = false).format(micros)
assert(timestamp === formatted)
}
}
}

test("case insensitive parsing of am and pm") {
val formatter = TimestampFormatter("yyyy MMM dd hh:mm:ss a", UTC)
val formatter = TimestampFormatter("yyyy MMM dd hh:mm:ss a", UTC, isParsing = false)
val micros = formatter.parse("2009 Mar 20 11:30:01 am")
assert(micros === date(2009, 3, 20, 11, 30, 1))
}
Expand Down Expand Up @@ -157,8 +157,8 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
assert(TimestampFormatter(UTC).format(micros) === "-0099-01-01 00:00:00")
assert(TimestampFormatter(UTC).format(instant) === "-0099-01-01 00:00:00")
withDefaultTimeZone(UTC) { // toJavaTimestamp depends on the default time zone
assert(TimestampFormatter("yyyy-MM-dd HH:mm:SS G", UTC).format(toJavaTimestamp(micros))
=== "0100-01-01 00:00:00 BC")
assert(TimestampFormatter("yyyy-MM-dd HH:mm:SS G", UTC, isParsing = false)
.format(toJavaTimestamp(micros)) === "0100-01-01 00:00:00 BC")
}
}

Expand Down Expand Up @@ -209,7 +209,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
"2019-10-14T09:39:07.1", "2019-10-14T09:39:07.1")

try {
TimestampFormatter("yyyy/MM/dd HH_mm_ss.SSSSSS", zoneId, true)
TimestampFormatter("yyyy/MM/dd HH_mm_ss.SSSSSS", zoneId, isParsing = true)
.parse("2019/11/14 20#25#30.123456")
fail("Expected to throw an exception for the invalid input")
} catch {
Expand All @@ -222,7 +222,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
test("formatting timestamp strings up to microsecond precision") {
outstandingZoneIds.foreach { zoneId =>
def check(pattern: String, input: String, expected: String): Unit = {
val formatter = TimestampFormatter(pattern, zoneId)
val formatter = TimestampFormatter(pattern, zoneId, isParsing = false)
val timestamp = stringToTimestamp(UTF8String.fromString(input), zoneId).get
val actual = formatter.format(timestamp)
assert(actual === expected)
Expand Down Expand Up @@ -259,7 +259,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
}

test("SPARK-30958: parse timestamp with negative year") {
val formatter1 = TimestampFormatter("yyyy-MM-dd HH:mm:ss", UTC, true)
val formatter1 = TimestampFormatter("yyyy-MM-dd HH:mm:ss", UTC, isParsing = true)
assert(formatter1.parse("-1234-02-22 02:22:22") === date(-1234, 2, 22, 2, 22, 22))

def assertParsingError(f: => Unit): Unit = {
Expand All @@ -272,7 +272,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
}

// "yyyy" with "G" can't parse negative year or year 0000.
val formatter2 = TimestampFormatter("G yyyy-MM-dd HH:mm:ss", UTC, true)
val formatter2 = TimestampFormatter("G yyyy-MM-dd HH:mm:ss", UTC, isParsing = true)
assertParsingError(formatter2.parse("BC -1234-02-22 02:22:22"))
assertParsingError(formatter2.parse("AC 0000-02-22 02:22:22"))

Expand Down Expand Up @@ -318,7 +318,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
test("parsing hour with various patterns") {
def createFormatter(pattern: String): TimestampFormatter = {
// Use `SIMPLE_DATE_FORMAT`, so that the legacy parser also fails with invalid value range.
TimestampFormatter(pattern, UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, false)
TimestampFormatter(pattern, UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, isParsing = false)
}

withClue("HH") {
Expand Down Expand Up @@ -377,41 +377,42 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
}

test("missing date fields") {
val formatter = TimestampFormatter("HH:mm:ss", UTC)
val formatter = TimestampFormatter("HH:mm:ss", UTC, isParsing = true)
val micros = formatter.parse("11:30:01")
assert(micros === date(1970, 1, 1, 11, 30, 1))
}

test("missing year field with invalid date") {
// Use `SIMPLE_DATE_FORMAT`, so that the legacy parser also fails with invalid date.
val formatter = TimestampFormatter("MM-dd", UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, false)
val formatter =
TimestampFormatter("MM-dd", UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, isParsing = false)
withDefaultTimeZone(UTC)(intercept[DateTimeException](formatter.parse("02-29")))
}

test("missing am/pm field") {
Seq("HH", "hh", "KK", "kk").foreach { hour =>
val formatter = TimestampFormatter(s"yyyy $hour:mm:ss", UTC)
val formatter = TimestampFormatter(s"yyyy $hour:mm:ss", UTC, isParsing = true)
val micros = formatter.parse("2009 11:30:01")
assert(micros === date(2009, 1, 1, 11, 30, 1))
}
}

test("missing time fields") {
val formatter = TimestampFormatter("yyyy HH", UTC)
val formatter = TimestampFormatter("yyyy HH", UTC, isParsing = true)
val micros = formatter.parse("2009 11")
assert(micros === date(2009, 1, 1, 11))
}

test("missing hour field") {
val f1 = TimestampFormatter("mm:ss a", UTC)
val f1 = TimestampFormatter("mm:ss a", UTC, isParsing = true)
val t1 = f1.parse("30:01 PM")
assert(t1 === date(1970, 1, 1, 12, 30, 1))
val t2 = f1.parse("30:01 AM")
assert(t2 === date(1970, 1, 1, 0, 30, 1))
val f2 = TimestampFormatter("mm:ss", UTC)
val f2 = TimestampFormatter("mm:ss", UTC, isParsing = true)
val t3 = f2.parse("30:01")
assert(t3 === date(1970, 1, 1, 0, 30, 1))
val f3 = TimestampFormatter("a", UTC)
val f3 = TimestampFormatter("a", UTC, isParsing = true)
val t4 = f3.parse("PM")
assert(t4 === date(1970, 1, 1, 12))
val t5 = f3.parse("AM")
Expand All @@ -421,12 +422,41 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
test("explicitly forbidden datetime patterns") {
// not support by the legacy one too
Seq("QQQQQ", "qqqqq", "A", "c", "e", "n", "N", "p").foreach { pattern =>
intercept[IllegalArgumentException](TimestampFormatter(pattern, UTC).format(0))
intercept[IllegalArgumentException](TimestampFormatter(pattern, UTC, isParsing = false)
.format(0))
}
// supported by the legacy one, then we will suggest users with SparkUpgradeException
Seq("GGGGG", "MMMMM", "LLLLL", "EEEEE", "uuuuu", "aa", "aaa", "y" * 11, "y" * 11)
Seq("GGGGG", "MMMMM", "LLLLL", "EEEEE", "uuuuu", "aa", "aaa", "y" * 11, "Y" * 11)
.foreach { pattern =>
intercept[SparkUpgradeException](TimestampFormatter(pattern, UTC).format(0))
intercept[SparkUpgradeException] {
TimestampFormatter(pattern, UTC, isParsing = false).format(0)
}
}
}

test("Disable week-based date fields and quarter fields for parsing") {

def checkSparkUpgrade(c: Char): Unit = {
intercept[SparkUpgradeException] {
TimestampFormatter(c.toString, UTC, isParsing = true)
}
assert(TimestampFormatter(c.toString, UTC, isParsing = false).format(0).nonEmpty)
}

def checkIllegalArg(c: Char): Unit = {
intercept[IllegalArgumentException] {
TimestampFormatter(c.toString, UTC, isParsing = true)
}

assert(TimestampFormatter(c.toString, UTC, isParsing = false).format(0).nonEmpty)
}

Seq('Y', 'W', 'w', 'E', 'u', 'F').foreach { l =>
checkSparkUpgrade(l)
}

Seq('q', 'Q').foreach { l =>
checkIllegalArg(l)
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
--- TESTS FOR DATETIME FORMATTING FUNCTIONS WITH INVALID PATTERNS ---

-- separating this from datetime-formatting.sql, because the text form
-- for patterns with 5 letters in SimpleDateFormat varies from different JDKs
select date_format('2018-11-17 13:33:33.333', 'GGGGG');
-- pattern letter count can not be greater than 10
select date_format('2018-11-17 13:33:33.333', 'yyyyyyyyyyy');
select date_format('2018-11-17 13:33:33.333', 'YYYYYYYYYYY');
-- q/L in JDK 8 will fail when the count is more than 2
select date_format('2018-11-17 13:33:33.333', 'qqqqq');
select date_format('2018-11-17 13:33:33.333', 'QQQQQ');
select date_format('2018-11-17 13:33:33.333', 'MMMMM');
select date_format('2018-11-17 13:33:33.333', 'LLLLL');
select date_format('2018-11-17 13:33:33.333', 'www');
select date_format('2018-11-17 13:33:33.333', 'WW');
select date_format('2018-11-17 13:33:33.333', 'uuuuu');
select date_format('2018-11-17 13:33:33.333', 'EEEEE');
select date_format('2018-11-17 13:33:33.333', 'FF');
select date_format('2018-11-17 13:33:33.333', 'ddd');
-- DD is invalid if the day-of-year exceeds 100, but it becomes valid in Java 11
-- select date_format('2018-11-17 13:33:33.333', 'DD');
select date_format('2018-11-17 13:33:33.333', 'DDDD');
select date_format('2018-11-17 13:33:33.333', 'HHH');
select date_format('2018-11-17 13:33:33.333', 'hhh');
select date_format('2018-11-17 13:33:33.333', 'kkk');
select date_format('2018-11-17 13:33:33.333', 'KKK');
select date_format('2018-11-17 13:33:33.333', 'mmm');
select date_format('2018-11-17 13:33:33.333', 'sss');
select date_format('2018-11-17 13:33:33.333', 'SSSSSSSSSS');
select date_format('2018-11-17 13:33:33.333', 'aa');
select date_format('2018-11-17 13:33:33.333', 'V');
select date_format('2018-11-17 13:33:33.333', 'zzzzz');
select date_format('2018-11-17 13:33:33.333', 'XXXXXX');
select date_format('2018-11-17 13:33:33.333', 'ZZZZZZ');
select date_format('2018-11-17 13:33:33.333', 'OO');
select date_format('2018-11-17 13:33:33.333', 'xxxxxx');
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--SET spark.sql.legacy.timeParserPolicy=LEGACY
--IMPORT datetime-formatting.sql
Loading