Merge branch 'master' into SPARK-31892-F

yaooqinn · yaooqinn · commit 47d7322d7122 · 2020-06-03T22:12:01.000+08:00
diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -173,9 +173,11 @@ function renderDagViz(forJob) {
   });
 
   metadataContainer().selectAll(".barrier-rdd").each(function() {
-    var rddId = d3.select(this).text().trim();
-    var clusterId = VizConstants.clusterPrefix + rddId;
-    svg.selectAll("g." + clusterId).classed("barrier", true)
+    var opId = d3.select(this).text().trim();
+    var opClusterId = VizConstants.clusterPrefix + opId;
+    var stageId = $(this).parents(".stage-metadata").attr("stage-id");
+    var stageClusterId = VizConstants.graphPrefix + stageId;
+    svg.selectAll("g[id=" + stageClusterId + "] g." + opClusterId).classed("barrier", true)
   });
 
   resizeSvg(svg);
diff --git a/core/src/test/scala/org/apache/spark/ui/RealBrowserUISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/RealBrowserUISeleniumSuite.scala
@@ -100,6 +100,34 @@ abstract class RealBrowserUISeleniumSuite(val driverProp: String)
     }
   }
 
+  test("SPARK-31886: Color barrier execution mode RDD correctly") {
+    withSpark(newSparkContext()) { sc =>
+      sc.parallelize(1 to 10).barrier.mapPartitions(identity).repartition(1).collect()
+
+      eventually(timeout(10.seconds), interval(50.milliseconds)) {
+        goToUi(sc, "/jobs/job/?id=0")
+        webDriver.findElement(By.id("job-dag-viz")).click()
+
+        val stage0 = webDriver.findElement(By.cssSelector("g[id='graph_0']"))
+        val stage1 = webDriver.findElement(By.cssSelector("g[id='graph_1']"))
+        val barrieredOps = webDriver.findElements(By.className("barrier-rdd")).iterator()
+
+        while (barrieredOps.hasNext) {
+          val barrieredOpId = barrieredOps.next().getAttribute("innerHTML")
+          val foundInStage0 =
+            stage0.findElements(
+              By.cssSelector("g.barrier.cluster.cluster_" + barrieredOpId))
+          assert(foundInStage0.size === 1)
+
+          val foundInStage1 =
+            stage1.findElements(
+              By.cssSelector("g.barrier.cluster.cluster_" + barrieredOpId))
+          assert(foundInStage1.size === 0)
+        }
+      }
+    }
+  }
+
   /**
    * Create a test SparkContext with the SparkUI enabled.
    * It is safe to `get` the SparkUI directly from the SparkContext returned here.
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -186,6 +186,9 @@
   <profiles>
     <profile>
       <id>hadoop-2.7</id>
+      <activation>
+        <activeByDefault>true</activeByDefault>
+      </activation>
       <dependencies>
         <dependency>
           <groupId>com.amazonaws</groupId>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
@@ -62,7 +62,15 @@ trait DateTimeFormatterHelper {
       accessor.get(ChronoField.HOUR_OF_DAY)
     } else if (accessor.isSupported(ChronoField.HOUR_OF_AMPM)) {
       // When we reach here, it means am/pm is not specified. Here we assume it's am.
+      // All of CLOCK_HOUR_OF_AMPM(h)/HOUR_OF_DAY(H)/CLOCK_HOUR_OF_DAY(k)/HOUR_OF_AMPM(K) will
+      // be resolved to HOUR_OF_AMPM here, we do not need to handle them separately
       accessor.get(ChronoField.HOUR_OF_AMPM)
+    } else if (accessor.isSupported(ChronoField.AMPM_OF_DAY) &&
+      accessor.get(ChronoField.AMPM_OF_DAY) == 1) {
+      // When reach here, the `hour` part is missing, and PM is specified.
+      // None of CLOCK_HOUR_OF_AMPM(h)/HOUR_OF_DAY(H)/CLOCK_HOUR_OF_DAY(k)/HOUR_OF_AMPM(K) is
+      // specified
+      12
     } else {
       0
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -1197,4 +1197,14 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       checkNullify(l)
     }
   }
+
+
+  test("SPARK-31896: Handle am-pm timestamp parsing when hour is missing") {
+    checkEvaluation(
+      new ParseToTimestamp(Literal("PM"), Literal("a")).child,
+      Timestamp.valueOf("1970-01-01 12:00:00.0"))
+    checkEvaluation(
+      new ParseToTimestamp(Literal("11:11 PM"), Literal("mm:ss a")).child,
+      Timestamp.valueOf("1970-01-01 12:11:11.0"))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
@@ -390,9 +390,11 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
   }
 
   test("missing am/pm field") {
-    val formatter = TimestampFormatter("yyyy hh:mm:ss", UTC, isParsing = true)
-    val micros = formatter.parse("2009 11:30:01")
-    assert(micros === date(2009, 1, 1, 11, 30, 1))
+    Seq("HH", "hh", "KK", "kk").foreach { hour =>
+      val formatter = TimestampFormatter(s"yyyy $hour:mm:ss", UTC, isParsing = true)
+      val micros = formatter.parse("2009 11:30:01")
+      assert(micros === date(2009, 1, 1, 11, 30, 1))
+    }
   }
 
   test("missing time fields") {
@@ -401,6 +403,22 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
     assert(micros === date(2009, 1, 1, 11))
   }
 
+  test("missing hour field") {
+    val f1 = TimestampFormatter("mm:ss a", UTC, isParsing = true)
+    val t1 = f1.parse("30:01 PM")
+    assert(t1 === date(1970, 1, 1, 12, 30, 1))
+    val t2 = f1.parse("30:01 AM")
+    assert(t2 === date(1970, 1, 1, 0, 30, 1))
+    val f2 = TimestampFormatter("mm:ss", UTC, isParsing = true)
+    val t3 = f2.parse("30:01")
+    assert(t3 === date(1970, 1, 1, 0, 30, 1))
+    val f3 = TimestampFormatter("a", UTC, isParsing = true)
+    val t4 = f3.parse("PM")
+    assert(t4 === date(1970, 1, 1, 12))
+    val t5 = f3.parse("AM")
+    assert(t5 === date(1970))
+  }
+
   test("explicitly forbidden datetime patterns") {
     // not support by the legacy one too
     Seq("QQQQQ", "qqqqq", "A", "c", "e", "n", "N", "p").foreach { pattern =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
@@ -19,10 +19,10 @@ package org.apache.spark.sql.execution
 
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
-import java.time.{Instant, LocalDate}
+import java.time.{Instant, LocalDate, ZoneOffset}
 
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, LegacyDateFormats, TimestampFormatter}
 import org.apache.spark.sql.execution.command.{DescribeCommandBase, ExecutedCommandExec, ShowTablesCommand, ShowViewsCommand}
 import org.apache.spark.sql.execution.datasources.v2.{DescribeTableExec, ShowTablesExec}
 import org.apache.spark.sql.internal.SQLConf
@@ -72,9 +72,24 @@ object HiveResult {
     }
   }
 
-  private def zoneId = DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone)
-  private def dateFormatter = DateFormatter(zoneId)
-  private def timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId)
+  // We can create the date formatter only once because it does not depend on Spark's
+  // session time zone controlled by the SQL config `spark.sql.session.timeZone`.
+  // The `zoneId` parameter is used only in parsing of special date values like `now`,
+  // `yesterday` and etc. but not in date formatting. While formatting of:
+  // - `java.time.LocalDate`, zone id is not used by `DateTimeFormatter` at all.
+  // - `java.sql.Date`, the date formatter delegates formatting to the legacy formatter
+  //   which uses the default system time zone `TimeZone.getDefault`. This works correctly
+  //   due to `DateTimeUtils.toJavaDate` which is based on the system time zone too.
+  private val dateFormatter = DateFormatter(
+    format = DateFormatter.defaultPattern,
+    // We can set any time zone id. UTC was taken for simplicity.
+    zoneId = ZoneOffset.UTC,
+    locale = DateFormatter.defaultLocale,
+    // Use `FastDateFormat` as the legacy formatter because it is thread-safe.
+    legacyFormat = LegacyDateFormats.FAST_DATE_FORMAT,
+    isParsing = false)
+  private def timestampFormatter = TimestampFormatter.getFractionFormatter(
+    DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone))
 
   /** Formats a datum (based on the given data type) and returns the string representation. */
   def toHiveString(a: (Any, DataType), nested: Boolean = false): String = a match {
diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting.sql
@@ -1,19 +1,19 @@
 --- TESTS FOR DATETIME FORMATTING FUNCTIONS ---
 
 -- valid formatter pattern check
-create temporary view ttt as select t from VALUES
+create temporary view v as select t from values
  (timestamp '1582-06-01 11:33:33.123UTC+080000'),
  (timestamp '1970-01-01 00:00:00.000Europe/Paris'),
  (timestamp '1970-12-31 23:59:59.999Asia/Srednekolymsk'),
  (timestamp '1996-04-01 00:33:33.123Australia/Darwin'),
  (timestamp '2018-11-17 13:33:33.123Z'),
  (timestamp '2020-01-01 01:33:33.123Asia/Shanghai'),
- (timestamp '2100-01-01 01:33:33.123America/Los_Angeles') tt(t);
+ (timestamp '2100-01-01 01:33:33.123America/Los_Angeles') t(col);
 
-select t, date_format(t, 'Y-w-u YYYY-ww-uu YYY-W-uuu YY YYYYY uuuu E EE EEE EEEE') from ttt;
-select t, date_format(t, 'q qq Q QQ QQQ QQQQ') from ttt;
-select t, date_format(t, 'y-M-d H:m:s yyyy-MM-dd HH:mm:ss.SSS yy yyy yyyyy MMM MMMM L LL F h hh k kk K KK a') from ttt;
-select t, date_format(t, 'z zz zzz zzzz X XX XXX  Z ZZ ZZZ ZZZZ ZZZZZ') from ttt;
+select col, date_format(col, 'Y-w-u YYYY-ww-uu YYY-W-uuu YY YYYYY uuuu E EE EEE EEEE') from v;
+select col, date_format(col, 'q qq Q QQ QQQ QQQQ') from v;
+select col, date_format(col, 'y-M-d H:m:s yyyy-MM-dd HH:mm:ss.SSS yy yyy yyyyy MMM MMMM L LL F h hh k kk K KK a') from v;
+select col, date_format(col, 'z zz zzz zzzz X XX XXX  Z ZZ ZZZ ZZZZ ZZZZZ') from v;
 -- These patterns for time zone is unsupported by the legacy formatter
-select t, date_format(t, 'VV O OOOO XXXX XXXXX x xx xxx xxxx xxxx xxxxx') from ttt;
+select col, date_format(col, 'VV O OOOO XXXX XXXXX x xx xxx xxxx xxxx xxxxx') from v;
 select date_format(date '1970-01-01', 'D DD DDD');
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out
@@ -3,78 +3,64 @@
 
 
 -- !query
-create temporary view ttt as select t from VALUES
+create temporary view v as select t from values
  (timestamp '1582-06-01 11:33:33.123UTC+080000'),
  (timestamp '1970-01-01 00:00:00.000Europe/Paris'),
  (timestamp '1970-12-31 23:59:59.999Asia/Srednekolymsk'),
  (timestamp '1996-04-01 00:33:33.123Australia/Darwin'),
  (timestamp '2018-11-17 13:33:33.123Z'),
  (timestamp '2020-01-01 01:33:33.123Asia/Shanghai'),
- (timestamp '2100-01-01 01:33:33.123America/Los_Angeles') tt(t)
+ (timestamp '2100-01-01 01:33:33.123America/Los_Angeles') t(col)
 -- !query schema
 struct<>
 -- !query output
-
+org.apache.spark.sql.AnalysisException
+cannot resolve '`t`' given input columns: [t.col]; line 1 pos 34
 
 
 -- !query
-select t, date_format(t, 'Y-w-u YYYY-ww-uu YYY-W-uuu YY YYYYY uuuu E EE EEE EEEE') from ttt
+select col, date_format(col, 'Y-w-u YYYY-ww-uu YYY-W-uuu YY YYYYY uuuu E EE EEE EEEE') from v
 -- !query schema
-struct<t:timestamp,date_format(t, Y-w-u YYYY-ww-uu YYY-W-uuu YY YYYYY uuuu E EE EEE EEEE):string>
+struct<>
 -- !query output
-1582-05-31 19:40:35.123	1582-22-4 1582-22-04 1582-5-004 82 01582 0004 Thu Thu Thu Thursday
-1969-12-31 15:00:00	1970-1-3 1970-01-03 1970-5-003 70 01970 0003 Wed Wed Wed Wednesday
-1970-12-31 04:59:59.999	1970-53-4 1970-53-04 1970-5-004 70 01970 0004 Thu Thu Thu Thursday
-1996-03-31 07:03:33.123	1996-13-7 1996-13-07 1996-4-007 96 01996 0007 Sun Sun Sun Sunday
-2018-11-17 05:33:33.123	2018-46-6 2018-46-06 2018-3-006 18 02018 0006 Sat Sat Sat Saturday
-2019-12-31 09:33:33.123	2020-1-2 2020-01-02 2020-5-002 20 02020 0002 Tue Tue Tue Tuesday
-2100-01-01 01:33:33.123	2099-53-5 2099-53-05 2099-0-005 99 02099 0005 Fri Fri Fri Friday
+org.apache.spark.sql.AnalysisException
+Table or view not found: v; line 1 pos 92
 
 
 -- !query
-select t, date_format(t, 'q qq Q QQ QQQ QQQQ') from ttt
+select col, date_format(col, 'q qq Q QQ QQQ QQQQ') from v
 -- !query schema
 struct<>
 -- !query output
-java.lang.IllegalArgumentException
-Illegal pattern character 'q'
+org.apache.spark.sql.AnalysisException
+Table or view not found: v; line 1 pos 56
 
 
 -- !query
-select t, date_format(t, 'y-M-d H:m:s yyyy-MM-dd HH:mm:ss.SSS yy yyy yyyyy MMM MMMM L LL F h hh k kk K KK a') from ttt
+select col, date_format(col, 'y-M-d H:m:s yyyy-MM-dd HH:mm:ss.SSS yy yyy yyyyy MMM MMMM L LL F h hh k kk K KK a') from v
 -- !query schema
-struct<t:timestamp,date_format(t, y-M-d H:m:s yyyy-MM-dd HH:mm:ss.SSS yy yyy yyyyy MMM MMMM L LL F h hh k kk K KK a):string>
+struct<>
 -- !query output
-1582-05-31 19:40:35.123	1582-5-31 19:40:35 1582-05-31 19:40:35.123 82 1582 01582 May May 5 05 5 7 07 19 19 7 07 PM
-1969-12-31 15:00:00	1969-12-31 15:0:0 1969-12-31 15:00:00.000 69 1969 01969 Dec December 12 12 5 3 03 15 15 3 03 PM
-1970-12-31 04:59:59.999	1970-12-31 4:59:59 1970-12-31 04:59:59.999 70 1970 01970 Dec December 12 12 5 4 04 4 04 4 04 AM
-1996-03-31 07:03:33.123	1996-3-31 7:3:33 1996-03-31 07:03:33.123 96 1996 01996 Mar March 3 03 5 7 07 7 07 7 07 AM
-2018-11-17 05:33:33.123	2018-11-17 5:33:33 2018-11-17 05:33:33.123 18 2018 02018 Nov November 11 11 3 5 05 5 05 5 05 AM
-2019-12-31 09:33:33.123	2019-12-31 9:33:33 2019-12-31 09:33:33.123 19 2019 02019 Dec December 12 12 5 9 09 9 09 9 09 AM
-2100-01-01 01:33:33.123	2100-1-1 1:33:33 2100-01-01 01:33:33.123 00 2100 02100 Jan January 1 01 1 1 01 1 01 1 01 AM
+org.apache.spark.sql.AnalysisException
+Table or view not found: v; line 1 pos 119
 
 
 -- !query
-select t, date_format(t, 'z zz zzz zzzz X XX XXX  Z ZZ ZZZ ZZZZ ZZZZZ') from ttt
+select col, date_format(col, 'z zz zzz zzzz X XX XXX  Z ZZ ZZZ ZZZZ ZZZZZ') from v
 -- !query schema
-struct<t:timestamp,date_format(t, z zz zzz zzzz X XX XXX  Z ZZ ZZZ ZZZZ ZZZZZ):string>
+struct<>
 -- !query output
-1582-05-31 19:40:35.123	PST PST PST Pacific Standard Time -08 -0800 -08:00  -0800 -0800 -0800 -0800 -0800
-1969-12-31 15:00:00	PST PST PST Pacific Standard Time -08 -0800 -08:00  -0800 -0800 -0800 -0800 -0800
-1970-12-31 04:59:59.999	PST PST PST Pacific Standard Time -08 -0800 -08:00  -0800 -0800 -0800 -0800 -0800
-1996-03-31 07:03:33.123	PST PST PST Pacific Standard Time -08 -0800 -08:00  -0800 -0800 -0800 -0800 -0800
-2018-11-17 05:33:33.123	PST PST PST Pacific Standard Time -08 -0800 -08:00  -0800 -0800 -0800 -0800 -0800
-2019-12-31 09:33:33.123	PST PST PST Pacific Standard Time -08 -0800 -08:00  -0800 -0800 -0800 -0800 -0800
-2100-01-01 01:33:33.123	PST PST PST Pacific Standard Time -08 -0800 -08:00  -0800 -0800 -0800 -0800 -0800
+org.apache.spark.sql.AnalysisException
+Table or view not found: v; line 1 pos 81
 
 
 -- !query
-select t, date_format(t, 'VV O OOOO XXXX XXXXX x xx xxx xxxx xxxx xxxxx') from ttt
+select col, date_format(col, 'VV O OOOO XXXX XXXXX x xx xxx xxxx xxxx xxxxx') from v
 -- !query schema
 struct<>
 -- !query output
-java.lang.IllegalArgumentException
-Illegal pattern character 'V'
+org.apache.spark.sql.AnalysisException
+Table or view not found: v; line 1 pos 83
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-formatting.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-formatting.sql.out
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala

Original file line number	Diff line number	Diff line change
`@@ -1197,4 +1197,14 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {`
`1197`	`1197`	`checkNullify(l)`
`1198`	`1198`	`}`
`1199`	`1199`	`}`
	`1200`	`+`
	`1201`	`+`
	`1202`	`+ test("SPARK-31896: Handle am-pm timestamp parsing when hour is missing") {`
	`1203`	`+ checkEvaluation(`
	`1204`	`+ new ParseToTimestamp(Literal("PM"), Literal("a")).child,`
	`1205`	`+ Timestamp.valueOf("1970-01-01 12:00:00.0"))`
	`1206`	`+ checkEvaluation(`
	`1207`	`+ new ParseToTimestamp(Literal("11:11 PM"), Literal("mm:ss a")).child,`
	`1208`	`+ Timestamp.valueOf("1970-01-01 12:11:11.0"))`
	`1209`	`+ }`
`1200`	`1210`	`}`