Skip to content

Commit 7e59d65

Browse files
committed
[SPARK-31892][SQL][FOLLOWUP] Improve test coverage for valid pattern formatting
1 parent afe95bd commit 7e59d65

File tree

10 files changed

+437
-32
lines changed

10 files changed

+437
-32
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class Iso8601DateFormatter(
4646
extends DateFormatter with DateTimeFormatterHelper {
4747

4848
@transient
49-
private lazy val formatter = getOrCreateFormatter(pattern, locale)
49+
private lazy val formatter = getOrCreateFormatter(pattern, locale, isParsing)
5050

5151
@transient
5252
private lazy val legacyFormatter = DateFormatter.getLegacyFormatter(
@@ -132,7 +132,7 @@ object DateFormatter {
132132
zoneId: ZoneId,
133133
locale: Locale = defaultLocale,
134134
legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT,
135-
isParsing: Boolean = true): DateFormatter = {
135+
isParsing: Boolean): DateFormatter = {
136136
val pattern = format.getOrElse(defaultPattern)
137137
if (SQLConf.get.legacyTimeParserPolicy == LEGACY) {
138138
getLegacyFormatter(pattern, zoneId, locale, legacyFormat)
@@ -166,10 +166,10 @@ object DateFormatter {
166166
}
167167

168168
def apply(format: String, zoneId: ZoneId): DateFormatter = {
169-
getFormatter(Some(format), zoneId)
169+
getFormatter(Some(format), zoneId, isParsing = false)
170170
}
171171

172172
def apply(zoneId: ZoneId): DateFormatter = {
173-
getFormatter(None, zoneId)
173+
getFormatter(None, zoneId, isParsing = false)
174174
}
175175
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ trait DateTimeFormatterHelper {
8989
protected def getOrCreateFormatter(
9090
pattern: String,
9191
locale: Locale,
92-
isParsing: Boolean = false): DateTimeFormatter = {
92+
isParsing: Boolean): DateTimeFormatter = {
9393
val newPattern = convertIncompatiblePattern(pattern, isParsing)
9494
val useVarLen = isParsing && newPattern.contains('S')
9595
val key = (newPattern, locale, useVarLen)

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,11 @@ class Iso8601TimestampFormatter(
6262
zoneId: ZoneId,
6363
locale: Locale,
6464
legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT,
65-
needVarLengthSecondFraction: Boolean)
65+
isParsing: Boolean)
6666
extends TimestampFormatter with DateTimeFormatterHelper {
6767
@transient
6868
protected lazy val formatter: DateTimeFormatter =
69-
getOrCreateFormatter(pattern, locale, needVarLengthSecondFraction)
69+
getOrCreateFormatter(pattern, locale, isParsing)
7070

7171
@transient
7272
protected lazy val legacyFormatter = TimestampFormatter.getLegacyFormatter(
@@ -122,7 +122,7 @@ class FractionTimestampFormatter(zoneId: ZoneId)
122122
zoneId,
123123
TimestampFormatter.defaultLocale,
124124
LegacyDateFormats.FAST_DATE_FORMAT,
125-
needVarLengthSecondFraction = false) {
125+
isParsing = false) {
126126

127127
@transient
128128
override protected lazy val formatter = DateTimeFormatterHelper.fractionFormatter
@@ -293,7 +293,7 @@ object TimestampFormatter {
293293
zoneId: ZoneId,
294294
locale: Locale = defaultLocale,
295295
legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT,
296-
isParsing: Boolean = false): TimestampFormatter = {
296+
isParsing: Boolean): TimestampFormatter = {
297297
val pattern = format.getOrElse(defaultPattern)
298298
if (SQLConf.get.legacyTimeParserPolicy == LEGACY) {
299299
getLegacyFormatter(pattern, zoneId, locale, legacyFormat)
@@ -340,12 +340,12 @@ object TimestampFormatter {
340340
def apply(
341341
format: String,
342342
zoneId: ZoneId,
343-
isParsing: Boolean = false): TimestampFormatter = {
343+
isParsing: Boolean): TimestampFormatter = {
344344
getFormatter(Some(format), zoneId, isParsing = isParsing)
345345
}
346346

347347
def apply(zoneId: ZoneId): TimestampFormatter = {
348-
getFormatter(None, zoneId)
348+
getFormatter(None, zoneId, isParsing = false)
349349
}
350350

351351
def getFractionFormatter(zoneId: ZoneId): TimestampFormatter = {

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
4141
private val JST_OPT = Option(JST.getId)
4242

4343
def toMillis(timestamp: String): Long = {
44-
val tf = TimestampFormatter("yyyy-MM-dd HH:mm:ss", UTC)
44+
val tf = TimestampFormatter("yyyy-MM-dd HH:mm:ss", UTC, isParsing = false)
4545
DateTimeUtils.microsToMillis(tf.parse(timestamp))
4646
}
4747
val date = "2015-04-08 13:10:15"

sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,4 +199,50 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
199199
// SparkUpgradeException here.
200200
intercept[SparkUpgradeException](formatter.parse("02-29"))
201201
}
202+
203+
test("Disable week-based date fields and quarter fields for parsing") {
204+
205+
def checkSparkUpgrade(c: Char): Unit = {
206+
intercept[SparkUpgradeException] {
207+
DateFormatter(
208+
c.toString,
209+
UTC,
210+
DateFormatter.defaultLocale,
211+
LegacyDateFormats.SIMPLE_DATE_FORMAT,
212+
isParsing = true)
213+
}
214+
assert(DateFormatter(
215+
c.toString,
216+
UTC,
217+
DateFormatter.defaultLocale,
218+
LegacyDateFormats.SIMPLE_DATE_FORMAT,
219+
isParsing = false).format(0).nonEmpty)
220+
}
221+
222+
def checkIllegalArg(c: Char): Unit = {
223+
intercept[IllegalArgumentException] {
224+
DateFormatter(
225+
c.toString,
226+
UTC,
227+
DateFormatter.defaultLocale,
228+
LegacyDateFormats.SIMPLE_DATE_FORMAT,
229+
isParsing = true)
230+
}
231+
232+
assert(DateFormatter(
233+
c.toString,
234+
UTC,
235+
DateFormatter.defaultLocale,
236+
LegacyDateFormats.SIMPLE_DATE_FORMAT,
237+
isParsing = false).format(0).nonEmpty)
238+
}
239+
240+
Seq('Y', 'W', 'w', 'E', 'u', 'F').foreach { l =>
241+
checkSparkUpgrade(l)
242+
}
243+
244+
Seq('q', 'Q').foreach { l =>
245+
checkIllegalArg(l)
246+
}
247+
}
202248
}

sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala

Lines changed: 47 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
9696
2177456523456789L,
9797
11858049903010203L).foreach { micros =>
9898
outstandingZoneIds.foreach { zoneId =>
99-
val timestamp = TimestampFormatter(pattern, zoneId).format(micros)
99+
val timestamp = TimestampFormatter(pattern, zoneId, isParsing = false).format(micros)
100100
val parsed = TimestampFormatter(
101101
pattern, zoneId, isParsing = true).parse(timestamp)
102102
assert(micros === parsed)
@@ -120,14 +120,14 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
120120
val pattern = "yyyy-MM-dd'T'HH:mm:ss.SSSSSS"
121121
val micros = TimestampFormatter(
122122
pattern, zoneId, isParsing = true).parse(timestamp)
123-
val formatted = TimestampFormatter(pattern, zoneId).format(micros)
123+
val formatted = TimestampFormatter(pattern, zoneId, isParsing = false).format(micros)
124124
assert(timestamp === formatted)
125125
}
126126
}
127127
}
128128

129129
test("case insensitive parsing of am and pm") {
130-
val formatter = TimestampFormatter("yyyy MMM dd hh:mm:ss a", UTC)
130+
val formatter = TimestampFormatter("yyyy MMM dd hh:mm:ss a", UTC, isParsing = false)
131131
val micros = formatter.parse("2009 Mar 20 11:30:01 am")
132132
assert(micros === date(2009, 3, 20, 11, 30, 1))
133133
}
@@ -157,8 +157,8 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
157157
assert(TimestampFormatter(UTC).format(micros) === "-0099-01-01 00:00:00")
158158
assert(TimestampFormatter(UTC).format(instant) === "-0099-01-01 00:00:00")
159159
withDefaultTimeZone(UTC) { // toJavaTimestamp depends on the default time zone
160-
assert(TimestampFormatter("yyyy-MM-dd HH:mm:SS G", UTC).format(toJavaTimestamp(micros))
161-
=== "0100-01-01 00:00:00 BC")
160+
assert(TimestampFormatter("yyyy-MM-dd HH:mm:SS G", UTC, isParsing = false)
161+
.format(toJavaTimestamp(micros)) === "0100-01-01 00:00:00 BC")
162162
}
163163
}
164164

@@ -209,7 +209,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
209209
"2019-10-14T09:39:07.1", "2019-10-14T09:39:07.1")
210210

211211
try {
212-
TimestampFormatter("yyyy/MM/dd HH_mm_ss.SSSSSS", zoneId, true)
212+
TimestampFormatter("yyyy/MM/dd HH_mm_ss.SSSSSS", zoneId, isParsing = true)
213213
.parse("2019/11/14 20#25#30.123456")
214214
fail("Expected to throw an exception for the invalid input")
215215
} catch {
@@ -222,7 +222,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
222222
test("formatting timestamp strings up to microsecond precision") {
223223
outstandingZoneIds.foreach { zoneId =>
224224
def check(pattern: String, input: String, expected: String): Unit = {
225-
val formatter = TimestampFormatter(pattern, zoneId)
225+
val formatter = TimestampFormatter(pattern, zoneId, isParsing = false)
226226
val timestamp = stringToTimestamp(UTF8String.fromString(input), zoneId).get
227227
val actual = formatter.format(timestamp)
228228
assert(actual === expected)
@@ -259,7 +259,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
259259
}
260260

261261
test("SPARK-30958: parse timestamp with negative year") {
262-
val formatter1 = TimestampFormatter("yyyy-MM-dd HH:mm:ss", UTC, true)
262+
val formatter1 = TimestampFormatter("yyyy-MM-dd HH:mm:ss", UTC, isParsing = true)
263263
assert(formatter1.parse("-1234-02-22 02:22:22") === date(-1234, 2, 22, 2, 22, 22))
264264

265265
def assertParsingError(f: => Unit): Unit = {
@@ -272,7 +272,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
272272
}
273273

274274
// "yyyy" with "G" can't parse negative year or year 0000.
275-
val formatter2 = TimestampFormatter("G yyyy-MM-dd HH:mm:ss", UTC, true)
275+
val formatter2 = TimestampFormatter("G yyyy-MM-dd HH:mm:ss", UTC, isParsing = true)
276276
assertParsingError(formatter2.parse("BC -1234-02-22 02:22:22"))
277277
assertParsingError(formatter2.parse("AC 0000-02-22 02:22:22"))
278278

@@ -318,7 +318,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
318318
test("parsing hour with various patterns") {
319319
def createFormatter(pattern: String): TimestampFormatter = {
320320
// Use `SIMPLE_DATE_FORMAT`, so that the legacy parser also fails with invalid value range.
321-
TimestampFormatter(pattern, UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, false)
321+
TimestampFormatter(pattern, UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, isParsing = false)
322322
}
323323

324324
withClue("HH") {
@@ -377,38 +377,68 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
377377
}
378378

379379
test("missing date fields") {
380-
val formatter = TimestampFormatter("HH:mm:ss", UTC)
380+
val formatter = TimestampFormatter("HH:mm:ss", UTC, isParsing = true)
381381
val micros = formatter.parse("11:30:01")
382382
assert(micros === date(1970, 1, 1, 11, 30, 1))
383383
}
384384

385385
test("missing year field with invalid date") {
386386
// Use `SIMPLE_DATE_FORMAT`, so that the legacy parser also fails with invalid date.
387-
val formatter = TimestampFormatter("MM-dd", UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, false)
387+
val formatter =
388+
TimestampFormatter("MM-dd", UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, isParsing = false)
388389
withDefaultTimeZone(UTC)(intercept[DateTimeException](formatter.parse("02-29")))
389390
}
390391

391392
test("missing am/pm field") {
392-
val formatter = TimestampFormatter("yyyy hh:mm:ss", UTC)
393+
val formatter = TimestampFormatter("yyyy hh:mm:ss", UTC, isParsing = true)
393394
val micros = formatter.parse("2009 11:30:01")
394395
assert(micros === date(2009, 1, 1, 11, 30, 1))
395396
}
396397

397398
test("missing time fields") {
398-
val formatter = TimestampFormatter("yyyy HH", UTC)
399+
val formatter = TimestampFormatter("yyyy HH", UTC, isParsing = true)
399400
val micros = formatter.parse("2009 11")
400401
assert(micros === date(2009, 1, 1, 11))
401402
}
402403

403404
test("explicitly forbidden datetime patterns") {
404405
// not support by the legacy one too
405406
Seq("QQQQQ", "qqqqq", "A", "c", "e", "n", "N", "p").foreach { pattern =>
406-
intercept[IllegalArgumentException](TimestampFormatter(pattern, UTC).format(0))
407+
intercept[IllegalArgumentException](TimestampFormatter(pattern, UTC, isParsing = false)
408+
.format(0))
407409
}
408410
// supported by the legacy one, then we will suggest users with SparkUpgradeException
409-
Seq("GGGGG", "MMMMM", "LLLLL", "EEEEE", "uuuuu", "aa", "aaa", "y" * 11, "y" * 11)
411+
Seq("GGGGG", "MMMMM", "LLLLL", "EEEEE", "uuuuu", "aa", "aaa", "y" * 11, "Y" * 11)
410412
.foreach { pattern =>
411-
intercept[SparkUpgradeException](TimestampFormatter(pattern, UTC).format(0))
413+
intercept[SparkUpgradeException] {
414+
TimestampFormatter(pattern, UTC, isParsing = false).format(0)
415+
}
416+
}
417+
}
418+
419+
test("Disable week-based date fields and quarter fields for parsing") {
420+
421+
def checkSparkUpgrade(c: Char): Unit = {
422+
intercept[SparkUpgradeException] {
423+
TimestampFormatter(c.toString, UTC, isParsing = true)
424+
}
425+
assert(TimestampFormatter(c.toString, UTC, isParsing = false).format(0).nonEmpty)
426+
}
427+
428+
def checkIllegalArg(c: Char): Unit = {
429+
intercept[IllegalArgumentException] {
430+
TimestampFormatter(c.toString, UTC, isParsing = true)
431+
}
432+
433+
assert(TimestampFormatter(c.toString, UTC, isParsing = false).format(0).nonEmpty)
434+
}
435+
436+
Seq('Y', 'W', 'w', 'E', 'u', 'F').foreach { l =>
437+
checkSparkUpgrade(l)
438+
}
439+
440+
Seq('q', 'Q').foreach { l =>
441+
checkIllegalArg(l)
412442
}
413443
}
414444
}

sql/core/src/test/resources/sql-tests/inputs/datetime.sql

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,3 +168,21 @@ select date_format(date '2018-11-17', 'yyyyyyyyyyy-MM-dd');
168168
-- SPARK-31879: the first day of week
169169
select date_format('2020-01-01', 'YYYY-MM-dd uu');
170170
select date_format('2020-01-01', 'YYYY-MM-dd uuuu');
171+
172+
-- valid formatter pattern check
173+
create temporary view ttt as select t from VALUES
174+
(timestamp '1582-06-01 11:33:33.123UTC+080000'),
175+
(timestamp '1996-04-01 00:33:33.123Australia/Darwin'),
176+
(timestamp '1970-01-01 00:00:00.000Europe/Paris'),
177+
(timestamp '1970-12-31 23:59:59.999America/Los_Angeles'),
178+
(timestamp '2018-11-17 13:33:33.123Z'),
179+
(timestamp '2020-01-01 01:33:33.123Asia/Shanghai'),
180+
(timestamp '2100-01-01 01:33:33.123Asia/Srednekolymsk') tt(t);
181+
select date_format(t, 'Y YY YYYY YYYYY YYYYY y yy yyy yyyy yyyyy') from ttt;
182+
select date_format(t, 'q qq Q QQ QQQ QQQQ') from ttt;
183+
select date_format(t, 'M MM MMM MMMM L LL') from ttt;
184+
select date_format(t, 'W ww d dd DDD u uu uuu uuuu F E EE EEE EEEE') from ttt;
185+
select date_format(t, 'h hh H HH k kk K KK m mm s ss SSS') from ttt;
186+
select date_format(t, 'VV z zz zzz zzzz O OOOO X XX XXX XXXX XXXXX x xx xxx xxxx xxxx xxxxx Z ZZ ZZZ ZZZZ ZZZZZ') from ttt;
187+
select date_format(date '1970-01-01', 'D DD');
188+

0 commit comments

Comments
 (0)