'fix-code-style.etc'

GuoPhilipse · GuoPhilipse · commit 4577fa813b15 · 2020-05-21T07:01:27.000+08:00
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -621,7 +621,7 @@ object SQLConf {
     .stringConf
     .transform(_.toUpperCase(Locale.ROOT))
     .checkValues(ParquetOutputTimestampType.values.map(_.toString))
-    .createWithDefault(ParquetOutputTimestampType.INT96.toString)
+    .createWithDefault(ParquetOutputTimestampType.TIMESTAMP_MICROS.toString)
 
   val PARQUET_COMPRESSION = buildConf("spark.sql.parquet.compression.codec")
     .doc("Sets the compression codec used when writing Parquet files. If either `compression` or " +
@@ -845,10 +845,8 @@ object SQLConf {
     .doc("When true, enable the metadata-only query optimization that use the table's metadata " +
       "to produce the partition columns instead of table scans. It applies when all the columns " +
       "scanned are partition columns and the query has an aggregate operator that satisfies " +
-      "distinct semantics. By default the optimization is disabled, and deprecated as of Spark " +
-      "3.0 since it may return incorrect results when the files are empty, see also SPARK-26709." +
-      "It will be removed in the future releases. If you must use, use 'SparkSessionExtensions' " +
-      "instead to inject it as a custom rule.")
+      "distinct semantics. By default the optimization is disabled, since it may return " +
+      "incorrect results when the files are empty.")
     .version("2.1.1")
     .booleanConf
     .createWithDefault(false)
@@ -2065,18 +2063,16 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
-  val NESTED_PREDICATE_PUSHDOWN_FILE_SOURCE_LIST =
-    buildConf("spark.sql.optimizer.nestedPredicatePushdown.supportedFileSources")
+  val NESTED_PREDICATE_PUSHDOWN_ENABLED =
+    buildConf("spark.sql.optimizer.nestedPredicatePushdown.enabled")
       .internal()
-      .doc("A comma-separated list of data source short names or fully qualified data source " +
-        "implementation class names for which Spark tries to push down predicates for nested " +
-        "columns and/or names containing `dots` to data sources. This configuration is only " +
-        "effective with file-based data source in DSv1. Currently, Parquet implements " +
-        "both optimizations while ORC only supports predicates for names containing `dots`. The " +
-        "other data sources don't support this feature yet. So the default value is 'parquet,orc'.")
+      .doc("When true, Spark tries to push down predicates for nested columns and or names " +
+        "containing `dots` to data sources. Currently, Parquet implements both optimizations " +
+        "while ORC only supports predicates for names containing `dots`. The other data sources" +
+        "don't support this feature yet.")
       .version("3.0.0")
-      .stringConf
-      .createWithDefault("parquet,orc")
+      .booleanConf
+      .createWithDefault(true)
 
   val SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED =
     buildConf("spark.sql.optimizer.serializer.nestedSchemaPruning.enabled")
@@ -2228,6 +2224,15 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT_ENABLED =
+    buildConf("spark.sql.legacy.createHiveTableByDefault.enabled")
+      .internal()
+      .doc("When set to true, CREATE TABLE syntax without a provider will use hive " +
+        s"instead of the value of ${DEFAULT_DATA_SOURCE_NAME.key}.")
+      .version("3.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val LEGACY_BUCKETED_TABLE_SCAN_OUTPUT_ORDERING =
     buildConf("spark.sql.legacy.bucketedTableScan.outputOrdering")
       .internal()
@@ -2519,85 +2524,70 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
-  val LEGACY_PARQUET_REBASE_MODE_IN_WRITE =
-    buildConf("spark.sql.legacy.parquet.datetimeRebaseModeInWrite")
+  val LEGACY_PARQUET_REBASE_DATETIME_IN_WRITE =
+    buildConf("spark.sql.legacy.parquet.rebaseDateTimeInWrite.enabled")
       .internal()
-      .doc("When LEGACY, Spark will rebase dates/timestamps from Proleptic Gregorian calendar " +
-        "to the legacy hybrid (Julian + Gregorian) calendar when writing Parquet files. " +
-        "When CORRECTED, Spark will not do rebase and write the dates/timestamps as it is. " +
-        "When EXCEPTION, which is the default, Spark will fail the writing if it sees " +
-        "ancient dates/timestamps that are ambiguous between the two calendars.")
+      .doc("When true, rebase dates/timestamps from Proleptic Gregorian calendar " +
+        "to the hybrid calendar (Julian + Gregorian) in write. " +
+        "The rebasing is performed by converting micros/millis/days to " +
+        "a local date/timestamp in the source calendar, interpreting the resulted date/" +
+        "timestamp in the target calendar, and getting the number of micros/millis/days " +
+        "since the epoch 1970-01-01 00:00:00Z.")
       .version("3.0.0")
-      .stringConf
-      .transform(_.toUpperCase(Locale.ROOT))
-      .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
-      .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)
-
-  val LEGACY_PARQUET_REBASE_MODE_IN_READ =
-    buildConf("spark.sql.legacy.parquet.datetimeRebaseModeInRead")
-      .internal()
-      .doc("When LEGACY, Spark will rebase dates/timestamps from the legacy hybrid (Julian + " +
-        "Gregorian) calendar to Proleptic Gregorian calendar when reading Parquet files. " +
-        "When CORRECTED, Spark will not do rebase and read the dates/timestamps as it is. " +
-        "When EXCEPTION, which is the default, Spark will fail the reading if it sees " +
-        "ancient dates/timestamps that are ambiguous between the two calendars. This config is " +
-        "only effective if the writer info (like Spark, Hive) of the Parquet files is unknown.")
-      .version("3.0.0")
-      .stringConf
-      .transform(_.toUpperCase(Locale.ROOT))
-      .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
-      .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)
+      .booleanConf
+      .createWithDefault(false)
 
-  val LEGACY_AVRO_REBASE_MODE_IN_WRITE =
-    buildConf("spark.sql.legacy.avro.datetimeRebaseModeInWrite")
+  val LEGACY_PARQUET_REBASE_DATETIME_IN_READ =
+    buildConf("spark.sql.legacy.parquet.rebaseDateTimeInRead.enabled")
       .internal()
-      .doc("When LEGACY, Spark will rebase dates/timestamps from Proleptic Gregorian calendar " +
-        "to the legacy hybrid (Julian + Gregorian) calendar when writing Avro files. " +
-        "When CORRECTED, Spark will not do rebase and write the dates/timestamps as it is. " +
-        "When EXCEPTION, which is the default, Spark will fail the writing if it sees " +
-        "ancient dates/timestamps that are ambiguous between the two calendars.")
+      .doc("When true, rebase dates/timestamps " +
+        "from the hybrid calendar to Proleptic Gregorian calendar in read. " +
+        "The rebasing is performed by converting micros/millis/days to " +
+        "a local date/timestamp in the source calendar, interpreting the resulted date/" +
+        "timestamp in the target calendar, and getting the number of micros/millis/days " +
+        "since the epoch 1970-01-01 00:00:00Z.")
       .version("3.0.0")
-      .stringConf
-      .transform(_.toUpperCase(Locale.ROOT))
-      .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
-      .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)
-
-  val LEGACY_AVRO_REBASE_MODE_IN_READ =
-    buildConf("spark.sql.legacy.avro.datetimeRebaseModeInRead")
-      .internal()
-      .doc("When LEGACY, Spark will rebase dates/timestamps from the legacy hybrid (Julian + " +
-        "Gregorian) calendar to Proleptic Gregorian calendar when reading Avro files. " +
-        "When CORRECTED, Spark will not do rebase and read the dates/timestamps as it is. " +
-        "When EXCEPTION, which is the default, Spark will fail the reading if it sees " +
-        "ancient dates/timestamps that are ambiguous between the two calendars. This config is " +
-        "only effective if the writer info (like Spark, Hive) of the Avro files is unknown.")
+      .booleanConf
+      .createWithDefault(false)
+
+  val LEGACY_AVRO_REBASE_DATETIME_IN_WRITE =
+    buildConf("spark.sql.legacy.avro.rebaseDateTimeInWrite.enabled")
+      .internal()
+      .doc("When true, rebase dates/timestamps from Proleptic Gregorian calendar " +
+        "to the hybrid calendar (Julian + Gregorian) in write. " +
+        "The rebasing is performed by converting micros/millis/days to " +
+        "a local date/timestamp in the source calendar, interpreting the resulted date/" +
+        "timestamp in the target calendar, and getting the number of micros/millis/days " +
+        "since the epoch 1970-01-01 00:00:00Z.")
       .version("3.0.0")
-      .stringConf
-      .transform(_.toUpperCase(Locale.ROOT))
-      .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
-      .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)
+      .booleanConf
+      .createWithDefault(false)
 
-  val SCRIPT_TRANSFORMATION_EXIT_TIMEOUT =
-    buildConf("spark.sql.scriptTransformation.exitTimeoutInSeconds")
+  val LEGACY_AVRO_REBASE_DATETIME_IN_READ =
+    buildConf("spark.sql.legacy.avro.rebaseDateTimeInRead.enabled")
       .internal()
-      .doc("Timeout for executor to wait for the termination of transformation script when EOF.")
+      .doc("When true, rebase dates/timestamps " +
+        "from the hybrid calendar to Proleptic Gregorian calendar in read. " +
+        "The rebasing is performed by converting micros/millis/days to " +
+        "a local date/timestamp in the source calendar, interpreting the resulted date/" +
+        "timestamp in the target calendar, and getting the number of micros/millis/days " +
+        "since the epoch 1970-01-01 00:00:00Z.")
       .version("3.0.0")
-      .timeConf(TimeUnit.SECONDS)
-      .checkValue(_ > 0, "The timeout value must be positive")
-      .createWithDefault(10L)
+      .booleanConf
+      .createWithDefault(false)
 
-  val NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE =
+  val LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE =
     buildConf("spark.sql.legacy.numericConvertToTimestampEnable")
       .internal()
       .doc("when true,use legacy numberic can convert to timestamp")
       .version("3.0.0")
       .booleanConf
       .createWithDefault(false)
 
-  val NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS =
+  val LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS =
     buildConf("spark.sql.legacy.numericConvertToTimestampInSeconds")
       .internal()
-      .doc("The legacy only works when NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE is true." +
+      .doc("The legacy only works when LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE is true." +
         "when true,the value will be  interpreted as seconds,which follow spark style," +
         "when false,value is interpreted as milliseconds,which follow hive style")
       .version("3.0.0")
@@ -2633,10 +2623,7 @@ object SQLConf {
       DeprecatedConfig(ARROW_FALLBACK_ENABLED.key, "3.0",
         s"Use '${ARROW_PYSPARK_FALLBACK_ENABLED.key}' instead of it."),
       DeprecatedConfig(SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE.key, "3.0",
-        s"Use '${ADVISORY_PARTITION_SIZE_IN_BYTES.key}' instead of it."),
-      DeprecatedConfig(OPTIMIZER_METADATA_ONLY.key, "3.0",
-        "Avoid to depend on this optimization to prevent a potential correctness issue. " +
-          "If you must use, use 'SparkSessionExtensions' instead to inject it as a custom rule.")
+        s"Use '${ADVISORY_PARTITION_SIZE_IN_BYTES.key}' instead of it.")
     )
 
     Map(configs.map { cfg => cfg.key -> cfg } : _*)
@@ -3129,6 +3116,8 @@ class SQLConf extends Serializable with Logging {
 
   def nestedSchemaPruningEnabled: Boolean = getConf(NESTED_SCHEMA_PRUNING_ENABLED)
 
+  def nestedPredicatePushdownEnabled: Boolean = getConf(NESTED_PREDICATE_PUSHDOWN_ENABLED)
+
   def serializerNestedSchemaPruningEnabled: Boolean =
     getConf(SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED)
 
@@ -3162,6 +3151,9 @@ class SQLConf extends Serializable with Logging {
   def allowNegativeScaleOfDecimalEnabled: Boolean =
     getConf(SQLConf.LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED)
 
+  def createHiveTableByDefaultEnabled: Boolean =
+    getConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT_ENABLED)
+
   def truncateTableIgnorePermissionAcl: Boolean =
     getConf(SQLConf.TRUNCATE_TABLE_IGNORE_PERMISSION_ACL)
 
@@ -3183,16 +3175,14 @@ class SQLConf extends Serializable with Logging {
 
   def integerGroupingIdEnabled: Boolean = getConf(SQLConf.LEGACY_INTEGER_GROUPING_ID)
 
-
   def parquetRebaseDateTimeInReadEnabled: Boolean = {
     getConf(SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_READ)
   }
 
-  def numericConvertToTimestampEnable: Boolean = getConf(NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE)
-
-  def numericConvertToTimestampInSeconds: Boolean = getConf(NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS)
-
+  def numericConvertToTimestampEnable: Boolean = getConf(LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE)
 
+  def numericConvertToTimestampInSeconds: Boolean =
+    getConf(LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS)
 
   /** ********************** SQLConf functionality methods ************ */
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -1302,8 +1302,8 @@ class CastSuite extends CastSuiteBase {
 
   test("SPARK-31710:Add legacy when casting long to timestamp") {
     withSQLConf(
-      SQLConf.NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE.key -> "true",
-      SQLConf.NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS.key -> "false") {
+      SQLConf.LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE.key -> "true",
+      SQLConf.LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS.key -> "false") {
         def checkLongToTimestamp(l: Long, expected: Long): Unit = {
           checkEvaluation(cast(l, TimestampType), expected)
         }
@@ -1314,8 +1314,8 @@ class CastSuite extends CastSuiteBase {
         checkLongToTimestamp(123L, 123000L)
     }
     withSQLConf(
-      SQLConf.NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE.key -> "true",
-      SQLConf.NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS.key -> "true") {
+      SQLConf.LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE.key -> "true",
+      SQLConf.LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS.key -> "true") {
         def checkLongToTimestamp(l: Long, expected: Long): Unit = {
           checkEvaluation(cast(l, TimestampType), expected)
         }
@@ -1327,8 +1327,8 @@ class CastSuite extends CastSuiteBase {
     }
 
     withSQLConf(
-      SQLConf.NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE.key -> "false",
-      SQLConf.NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS.key -> "false") {
+      SQLConf.LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE.key -> "false",
+      SQLConf.LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS.key -> "false") {
         def checkByteToTimestamp(b: Byte, expected: Long): Unit = {
           assert(!cast(b, TimestampType).resolved)
         }
@@ -1360,8 +1360,8 @@ class CastSuite extends CastSuiteBase {
     }
 
     withSQLConf(
-      SQLConf.NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE.key -> "false",
-      SQLConf.NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS.key -> "true") {
+      SQLConf.LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE.key -> "false",
+      SQLConf.LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS.key -> "true") {
         def checkByteToTimestamp(b: Byte, expected: Long): Unit = {
           assert(!cast(b, TimestampType).resolved)
         }

Original file line number	Diff line number	Diff line change
`@@ -1302,8 +1302,8 @@ class CastSuite extends CastSuiteBase {`
`1302`	`1302`
`1303`	`1303`	`test("SPARK-31710:Add legacy when casting long to timestamp") {`
`1304`	`1304`	`withSQLConf(`
`1305`		`- SQLConf.NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE.key -> "true",`
`1306`		`- SQLConf.NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS.key -> "false") {`
	`1305`	`+ SQLConf.LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE.key -> "true",`
	`1306`	`+ SQLConf.LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS.key -> "false") {`
`1307`	`1307`	`def checkLongToTimestamp(l: Long, expected: Long): Unit = {`
`1308`	`1308`	`checkEvaluation(cast(l, TimestampType), expected)`
`1309`	`1309`	`}`
`@@ -1314,8 +1314,8 @@ class CastSuite extends CastSuiteBase {`
`1314`	`1314`	`checkLongToTimestamp(123L, 123000L)`
`1315`	`1315`	`}`
`1316`	`1316`	`withSQLConf(`
`1317`		`- SQLConf.NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE.key -> "true",`
`1318`		`- SQLConf.NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS.key -> "true") {`
	`1317`	`+ SQLConf.LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE.key -> "true",`
	`1318`	`+ SQLConf.LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS.key -> "true") {`
`1319`	`1319`	`def checkLongToTimestamp(l: Long, expected: Long): Unit = {`
`1320`	`1320`	`checkEvaluation(cast(l, TimestampType), expected)`
`1321`	`1321`	`}`
`@@ -1327,8 +1327,8 @@ class CastSuite extends CastSuiteBase {`
`1327`	`1327`	`}`
`1328`	`1328`
`1329`	`1329`	`withSQLConf(`
`1330`		`- SQLConf.NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE.key -> "false",`
`1331`		`- SQLConf.NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS.key -> "false") {`
	`1330`	`+ SQLConf.LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE.key -> "false",`
	`1331`	`+ SQLConf.LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS.key -> "false") {`
`1332`	`1332`	`def checkByteToTimestamp(b: Byte, expected: Long): Unit = {`
`1333`	`1333`	`assert(!cast(b, TimestampType).resolved)`
`1334`	`1334`	`}`
`@@ -1360,8 +1360,8 @@ class CastSuite extends CastSuiteBase {`
`1360`	`1360`	`}`
`1361`	`1361`
`1362`	`1362`	`withSQLConf(`
`1363`		`- SQLConf.NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE.key -> "false",`
`1364`		`- SQLConf.NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS.key -> "true") {`
	`1363`	`+ SQLConf.LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE.key -> "false",`
	`1364`	`+ SQLConf.LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS.key -> "true") {`
`1365`	`1365`	`def checkByteToTimestamp(b: Byte, expected: Long): Unit = {`
`1366`	`1366`	`assert(!cast(b, TimestampType).resolved)`
`1367`	`1367`	`}`