Skip to content

Commit 47d7322

Browse files
committed
Merge branch 'master' into SPARK-31892-F
2 parents 34aef79 + afcc14c commit 47d7322

File tree

11 files changed

+162
-110
lines changed

11 files changed

+162
-110
lines changed

core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -173,9 +173,11 @@ function renderDagViz(forJob) {
173173
});
174174

175175
metadataContainer().selectAll(".barrier-rdd").each(function() {
176-
var rddId = d3.select(this).text().trim();
177-
var clusterId = VizConstants.clusterPrefix + rddId;
178-
svg.selectAll("g." + clusterId).classed("barrier", true)
176+
var opId = d3.select(this).text().trim();
177+
var opClusterId = VizConstants.clusterPrefix + opId;
178+
var stageId = $(this).parents(".stage-metadata").attr("stage-id");
179+
var stageClusterId = VizConstants.graphPrefix + stageId;
180+
svg.selectAll("g[id=" + stageClusterId + "] g." + opClusterId).classed("barrier", true)
179181
});
180182

181183
resizeSvg(svg);

core/src/test/scala/org/apache/spark/ui/RealBrowserUISeleniumSuite.scala

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,34 @@ abstract class RealBrowserUISeleniumSuite(val driverProp: String)
100100
}
101101
}
102102

103+
test("SPARK-31886: Color barrier execution mode RDD correctly") {
104+
withSpark(newSparkContext()) { sc =>
105+
sc.parallelize(1 to 10).barrier.mapPartitions(identity).repartition(1).collect()
106+
107+
eventually(timeout(10.seconds), interval(50.milliseconds)) {
108+
goToUi(sc, "/jobs/job/?id=0")
109+
webDriver.findElement(By.id("job-dag-viz")).click()
110+
111+
val stage0 = webDriver.findElement(By.cssSelector("g[id='graph_0']"))
112+
val stage1 = webDriver.findElement(By.cssSelector("g[id='graph_1']"))
113+
val barrieredOps = webDriver.findElements(By.className("barrier-rdd")).iterator()
114+
115+
while (barrieredOps.hasNext) {
116+
val barrieredOpId = barrieredOps.next().getAttribute("innerHTML")
117+
val foundInStage0 =
118+
stage0.findElements(
119+
By.cssSelector("g.barrier.cluster.cluster_" + barrieredOpId))
120+
assert(foundInStage0.size === 1)
121+
122+
val foundInStage1 =
123+
stage1.findElements(
124+
By.cssSelector("g.barrier.cluster.cluster_" + barrieredOpId))
125+
assert(foundInStage1.size === 0)
126+
}
127+
}
128+
}
129+
}
130+
103131
/**
104132
* Create a test SparkContext with the SparkUI enabled.
105133
* It is safe to `get` the SparkUI directly from the SparkContext returned here.

resource-managers/kubernetes/integration-tests/pom.xml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,9 @@
186186
<profiles>
187187
<profile>
188188
<id>hadoop-2.7</id>
189+
<activation>
190+
<activeByDefault>true</activeByDefault>
191+
</activation>
189192
<dependencies>
190193
<dependency>
191194
<groupId>com.amazonaws</groupId>

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,15 @@ trait DateTimeFormatterHelper {
6262
accessor.get(ChronoField.HOUR_OF_DAY)
6363
} else if (accessor.isSupported(ChronoField.HOUR_OF_AMPM)) {
6464
// When we reach here, it means am/pm is not specified. Here we assume it's am.
65+
// All of CLOCK_HOUR_OF_AMPM(h)/HOUR_OF_DAY(H)/CLOCK_HOUR_OF_DAY(k)/HOUR_OF_AMPM(K) will
66+
// be resolved to HOUR_OF_AMPM here, we do not need to handle them separately
6567
accessor.get(ChronoField.HOUR_OF_AMPM)
68+
} else if (accessor.isSupported(ChronoField.AMPM_OF_DAY) &&
69+
accessor.get(ChronoField.AMPM_OF_DAY) == 1) {
70+
// When reach here, the `hour` part is missing, and PM is specified.
71+
// None of CLOCK_HOUR_OF_AMPM(h)/HOUR_OF_DAY(H)/CLOCK_HOUR_OF_DAY(k)/HOUR_OF_AMPM(K) is
72+
// specified
73+
12
6674
} else {
6775
0
6876
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,4 +1197,14 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
11971197
checkNullify(l)
11981198
}
11991199
}
1200+
1201+
1202+
test("SPARK-31896: Handle am-pm timestamp parsing when hour is missing") {
1203+
checkEvaluation(
1204+
new ParseToTimestamp(Literal("PM"), Literal("a")).child,
1205+
Timestamp.valueOf("1970-01-01 12:00:00.0"))
1206+
checkEvaluation(
1207+
new ParseToTimestamp(Literal("11:11 PM"), Literal("mm:ss a")).child,
1208+
Timestamp.valueOf("1970-01-01 12:11:11.0"))
1209+
}
12001210
}

sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -390,9 +390,11 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
390390
}
391391

392392
test("missing am/pm field") {
393-
val formatter = TimestampFormatter("yyyy hh:mm:ss", UTC, isParsing = true)
394-
val micros = formatter.parse("2009 11:30:01")
395-
assert(micros === date(2009, 1, 1, 11, 30, 1))
393+
Seq("HH", "hh", "KK", "kk").foreach { hour =>
394+
val formatter = TimestampFormatter(s"yyyy $hour:mm:ss", UTC, isParsing = true)
395+
val micros = formatter.parse("2009 11:30:01")
396+
assert(micros === date(2009, 1, 1, 11, 30, 1))
397+
}
396398
}
397399

398400
test("missing time fields") {
@@ -401,6 +403,22 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
401403
assert(micros === date(2009, 1, 1, 11))
402404
}
403405

406+
test("missing hour field") {
407+
val f1 = TimestampFormatter("mm:ss a", UTC, isParsing = true)
408+
val t1 = f1.parse("30:01 PM")
409+
assert(t1 === date(1970, 1, 1, 12, 30, 1))
410+
val t2 = f1.parse("30:01 AM")
411+
assert(t2 === date(1970, 1, 1, 0, 30, 1))
412+
val f2 = TimestampFormatter("mm:ss", UTC, isParsing = true)
413+
val t3 = f2.parse("30:01")
414+
assert(t3 === date(1970, 1, 1, 0, 30, 1))
415+
val f3 = TimestampFormatter("a", UTC, isParsing = true)
416+
val t4 = f3.parse("PM")
417+
assert(t4 === date(1970, 1, 1, 12))
418+
val t5 = f3.parse("AM")
419+
assert(t5 === date(1970))
420+
}
421+
404422
test("explicitly forbidden datetime patterns") {
405423
// not support by the legacy one too
406424
Seq("QQQQQ", "qqqqq", "A", "c", "e", "n", "N", "p").foreach { pattern =>

sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@ package org.apache.spark.sql.execution
1919

2020
import java.nio.charset.StandardCharsets
2121
import java.sql.{Date, Timestamp}
22-
import java.time.{Instant, LocalDate}
22+
import java.time.{Instant, LocalDate, ZoneOffset}
2323

2424
import org.apache.spark.sql.Row
25-
import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter}
25+
import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, LegacyDateFormats, TimestampFormatter}
2626
import org.apache.spark.sql.execution.command.{DescribeCommandBase, ExecutedCommandExec, ShowTablesCommand, ShowViewsCommand}
2727
import org.apache.spark.sql.execution.datasources.v2.{DescribeTableExec, ShowTablesExec}
2828
import org.apache.spark.sql.internal.SQLConf
@@ -72,9 +72,24 @@ object HiveResult {
7272
}
7373
}
7474

75-
private def zoneId = DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone)
76-
private def dateFormatter = DateFormatter(zoneId)
77-
private def timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId)
75+
// We can create the date formatter only once because it does not depend on Spark's
76+
// session time zone controlled by the SQL config `spark.sql.session.timeZone`.
77+
// The `zoneId` parameter is used only in parsing of special date values like `now`,
78+
// `yesterday` and etc. but not in date formatting. While formatting of:
79+
// - `java.time.LocalDate`, zone id is not used by `DateTimeFormatter` at all.
80+
// - `java.sql.Date`, the date formatter delegates formatting to the legacy formatter
81+
// which uses the default system time zone `TimeZone.getDefault`. This works correctly
82+
// due to `DateTimeUtils.toJavaDate` which is based on the system time zone too.
83+
private val dateFormatter = DateFormatter(
84+
format = DateFormatter.defaultPattern,
85+
// We can set any time zone id. UTC was taken for simplicity.
86+
zoneId = ZoneOffset.UTC,
87+
locale = DateFormatter.defaultLocale,
88+
// Use `FastDateFormat` as the legacy formatter because it is thread-safe.
89+
legacyFormat = LegacyDateFormats.FAST_DATE_FORMAT,
90+
isParsing = false)
91+
private def timestampFormatter = TimestampFormatter.getFractionFormatter(
92+
DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone))
7893

7994
/** Formats a datum (based on the given data type) and returns the string representation. */
8095
def toHiveString(a: (Any, DataType), nested: Boolean = false): String = a match {
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
--- TESTS FOR DATETIME FORMATTING FUNCTIONS ---
22

33
-- valid formatter pattern check
4-
create temporary view ttt as select t from VALUES
4+
create temporary view v as select t from values
55
(timestamp '1582-06-01 11:33:33.123UTC+080000'),
66
(timestamp '1970-01-01 00:00:00.000Europe/Paris'),
77
(timestamp '1970-12-31 23:59:59.999Asia/Srednekolymsk'),
88
(timestamp '1996-04-01 00:33:33.123Australia/Darwin'),
99
(timestamp '2018-11-17 13:33:33.123Z'),
1010
(timestamp '2020-01-01 01:33:33.123Asia/Shanghai'),
11-
(timestamp '2100-01-01 01:33:33.123America/Los_Angeles') tt(t);
11+
(timestamp '2100-01-01 01:33:33.123America/Los_Angeles') t(col);
1212

13-
select t, date_format(t, 'Y-w-u YYYY-ww-uu YYY-W-uuu YY YYYYY uuuu E EE EEE EEEE') from ttt;
14-
select t, date_format(t, 'q qq Q QQ QQQ QQQQ') from ttt;
15-
select t, date_format(t, 'y-M-d H:m:s yyyy-MM-dd HH:mm:ss.SSS yy yyy yyyyy MMM MMMM L LL F h hh k kk K KK a') from ttt;
16-
select t, date_format(t, 'z zz zzz zzzz X XX XXX Z ZZ ZZZ ZZZZ ZZZZZ') from ttt;
13+
select col, date_format(col, 'Y-w-u YYYY-ww-uu YYY-W-uuu YY YYYYY uuuu E EE EEE EEEE') from v;
14+
select col, date_format(col, 'q qq Q QQ QQQ QQQQ') from v;
15+
select col, date_format(col, 'y-M-d H:m:s yyyy-MM-dd HH:mm:ss.SSS yy yyy yyyyy MMM MMMM L LL F h hh k kk K KK a') from v;
16+
select col, date_format(col, 'z zz zzz zzzz X XX XXX Z ZZ ZZZ ZZZZ ZZZZZ') from v;
1717
-- These patterns for time zone is unsupported by the legacy formatter
18-
select t, date_format(t, 'VV O OOOO XXXX XXXXX x xx xxx xxxx xxxx xxxxx') from ttt;
18+
select col, date_format(col, 'VV O OOOO XXXX XXXXX x xx xxx xxxx xxxx xxxxx') from v;
1919
select date_format(date '1970-01-01', 'D DD DDD');

sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out

Lines changed: 22 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3,78 +3,64 @@
33

44

55
-- !query
6-
create temporary view ttt as select t from VALUES
6+
create temporary view v as select t from values
77
(timestamp '1582-06-01 11:33:33.123UTC+080000'),
88
(timestamp '1970-01-01 00:00:00.000Europe/Paris'),
99
(timestamp '1970-12-31 23:59:59.999Asia/Srednekolymsk'),
1010
(timestamp '1996-04-01 00:33:33.123Australia/Darwin'),
1111
(timestamp '2018-11-17 13:33:33.123Z'),
1212
(timestamp '2020-01-01 01:33:33.123Asia/Shanghai'),
13-
(timestamp '2100-01-01 01:33:33.123America/Los_Angeles') tt(t)
13+
(timestamp '2100-01-01 01:33:33.123America/Los_Angeles') t(col)
1414
-- !query schema
1515
struct<>
1616
-- !query output
17-
17+
org.apache.spark.sql.AnalysisException
18+
cannot resolve '`t`' given input columns: [t.col]; line 1 pos 34
1819

1920

2021
-- !query
21-
select t, date_format(t, 'Y-w-u YYYY-ww-uu YYY-W-uuu YY YYYYY uuuu E EE EEE EEEE') from ttt
22+
select col, date_format(col, 'Y-w-u YYYY-ww-uu YYY-W-uuu YY YYYYY uuuu E EE EEE EEEE') from v
2223
-- !query schema
23-
struct<t:timestamp,date_format(t, Y-w-u YYYY-ww-uu YYY-W-uuu YY YYYYY uuuu E EE EEE EEEE):string>
24+
struct<>
2425
-- !query output
25-
1582-05-31 19:40:35.123 1582-22-4 1582-22-04 1582-5-004 82 01582 0004 Thu Thu Thu Thursday
26-
1969-12-31 15:00:00 1970-1-3 1970-01-03 1970-5-003 70 01970 0003 Wed Wed Wed Wednesday
27-
1970-12-31 04:59:59.999 1970-53-4 1970-53-04 1970-5-004 70 01970 0004 Thu Thu Thu Thursday
28-
1996-03-31 07:03:33.123 1996-13-7 1996-13-07 1996-4-007 96 01996 0007 Sun Sun Sun Sunday
29-
2018-11-17 05:33:33.123 2018-46-6 2018-46-06 2018-3-006 18 02018 0006 Sat Sat Sat Saturday
30-
2019-12-31 09:33:33.123 2020-1-2 2020-01-02 2020-5-002 20 02020 0002 Tue Tue Tue Tuesday
31-
2100-01-01 01:33:33.123 2099-53-5 2099-53-05 2099-0-005 99 02099 0005 Fri Fri Fri Friday
26+
org.apache.spark.sql.AnalysisException
27+
Table or view not found: v; line 1 pos 92
3228

3329

3430
-- !query
35-
select t, date_format(t, 'q qq Q QQ QQQ QQQQ') from ttt
31+
select col, date_format(col, 'q qq Q QQ QQQ QQQQ') from v
3632
-- !query schema
3733
struct<>
3834
-- !query output
39-
java.lang.IllegalArgumentException
40-
Illegal pattern character 'q'
35+
org.apache.spark.sql.AnalysisException
36+
Table or view not found: v; line 1 pos 56
4137

4238

4339
-- !query
44-
select t, date_format(t, 'y-M-d H:m:s yyyy-MM-dd HH:mm:ss.SSS yy yyy yyyyy MMM MMMM L LL F h hh k kk K KK a') from ttt
40+
select col, date_format(col, 'y-M-d H:m:s yyyy-MM-dd HH:mm:ss.SSS yy yyy yyyyy MMM MMMM L LL F h hh k kk K KK a') from v
4541
-- !query schema
46-
struct<t:timestamp,date_format(t, y-M-d H:m:s yyyy-MM-dd HH:mm:ss.SSS yy yyy yyyyy MMM MMMM L LL F h hh k kk K KK a):string>
42+
struct<>
4743
-- !query output
48-
1582-05-31 19:40:35.123 1582-5-31 19:40:35 1582-05-31 19:40:35.123 82 1582 01582 May May 5 05 5 7 07 19 19 7 07 PM
49-
1969-12-31 15:00:00 1969-12-31 15:0:0 1969-12-31 15:00:00.000 69 1969 01969 Dec December 12 12 5 3 03 15 15 3 03 PM
50-
1970-12-31 04:59:59.999 1970-12-31 4:59:59 1970-12-31 04:59:59.999 70 1970 01970 Dec December 12 12 5 4 04 4 04 4 04 AM
51-
1996-03-31 07:03:33.123 1996-3-31 7:3:33 1996-03-31 07:03:33.123 96 1996 01996 Mar March 3 03 5 7 07 7 07 7 07 AM
52-
2018-11-17 05:33:33.123 2018-11-17 5:33:33 2018-11-17 05:33:33.123 18 2018 02018 Nov November 11 11 3 5 05 5 05 5 05 AM
53-
2019-12-31 09:33:33.123 2019-12-31 9:33:33 2019-12-31 09:33:33.123 19 2019 02019 Dec December 12 12 5 9 09 9 09 9 09 AM
54-
2100-01-01 01:33:33.123 2100-1-1 1:33:33 2100-01-01 01:33:33.123 00 2100 02100 Jan January 1 01 1 1 01 1 01 1 01 AM
44+
org.apache.spark.sql.AnalysisException
45+
Table or view not found: v; line 1 pos 119
5546

5647

5748
-- !query
58-
select t, date_format(t, 'z zz zzz zzzz X XX XXX Z ZZ ZZZ ZZZZ ZZZZZ') from ttt
49+
select col, date_format(col, 'z zz zzz zzzz X XX XXX Z ZZ ZZZ ZZZZ ZZZZZ') from v
5950
-- !query schema
60-
struct<t:timestamp,date_format(t, z zz zzz zzzz X XX XXX Z ZZ ZZZ ZZZZ ZZZZZ):string>
51+
struct<>
6152
-- !query output
62-
1582-05-31 19:40:35.123 PST PST PST Pacific Standard Time -08 -0800 -08:00 -0800 -0800 -0800 -0800 -0800
63-
1969-12-31 15:00:00 PST PST PST Pacific Standard Time -08 -0800 -08:00 -0800 -0800 -0800 -0800 -0800
64-
1970-12-31 04:59:59.999 PST PST PST Pacific Standard Time -08 -0800 -08:00 -0800 -0800 -0800 -0800 -0800
65-
1996-03-31 07:03:33.123 PST PST PST Pacific Standard Time -08 -0800 -08:00 -0800 -0800 -0800 -0800 -0800
66-
2018-11-17 05:33:33.123 PST PST PST Pacific Standard Time -08 -0800 -08:00 -0800 -0800 -0800 -0800 -0800
67-
2019-12-31 09:33:33.123 PST PST PST Pacific Standard Time -08 -0800 -08:00 -0800 -0800 -0800 -0800 -0800
68-
2100-01-01 01:33:33.123 PST PST PST Pacific Standard Time -08 -0800 -08:00 -0800 -0800 -0800 -0800 -0800
53+
org.apache.spark.sql.AnalysisException
54+
Table or view not found: v; line 1 pos 81
6955

7056

7157
-- !query
72-
select t, date_format(t, 'VV O OOOO XXXX XXXXX x xx xxx xxxx xxxx xxxxx') from ttt
58+
select col, date_format(col, 'VV O OOOO XXXX XXXXX x xx xxx xxxx xxxx xxxxx') from v
7359
-- !query schema
7460
struct<>
7561
-- !query output
76-
java.lang.IllegalArgumentException
77-
Illegal pattern character 'V'
62+
org.apache.spark.sql.AnalysisException
63+
Table or view not found: v; line 1 pos 83
7864

7965

8066
-- !query

0 commit comments

Comments
 (0)