@@ -621,7 +621,7 @@ object SQLConf {
621
621
.stringConf
622
622
.transform(_.toUpperCase(Locale .ROOT ))
623
623
.checkValues(ParquetOutputTimestampType .values.map(_.toString))
624
- .createWithDefault(ParquetOutputTimestampType .INT96 .toString)
624
+ .createWithDefault(ParquetOutputTimestampType .TIMESTAMP_MICROS .toString)
625
625
626
626
val PARQUET_COMPRESSION = buildConf(" spark.sql.parquet.compression.codec" )
627
627
.doc(" Sets the compression codec used when writing Parquet files. If either `compression` or " +
@@ -845,10 +845,8 @@ object SQLConf {
845
845
.doc(" When true, enable the metadata-only query optimization that use the table's metadata " +
846
846
" to produce the partition columns instead of table scans. It applies when all the columns " +
847
847
" scanned are partition columns and the query has an aggregate operator that satisfies " +
848
- " distinct semantics. By default the optimization is disabled, and deprecated as of Spark " +
849
- " 3.0 since it may return incorrect results when the files are empty, see also SPARK-26709." +
850
- " It will be removed in the future releases. If you must use, use 'SparkSessionExtensions' " +
851
- " instead to inject it as a custom rule." )
848
+ " distinct semantics. By default the optimization is disabled, since it may return " +
849
+ " incorrect results when the files are empty." )
852
850
.version(" 2.1.1" )
853
851
.booleanConf
854
852
.createWithDefault(false )
@@ -2065,18 +2063,16 @@ object SQLConf {
2065
2063
.booleanConf
2066
2064
.createWithDefault(true )
2067
2065
2068
- val NESTED_PREDICATE_PUSHDOWN_FILE_SOURCE_LIST =
2069
- buildConf(" spark.sql.optimizer.nestedPredicatePushdown.supportedFileSources " )
2066
+ val NESTED_PREDICATE_PUSHDOWN_ENABLED =
2067
+ buildConf(" spark.sql.optimizer.nestedPredicatePushdown.enabled " )
2070
2068
.internal()
2071
- .doc(" A comma-separated list of data source short names or fully qualified data source " +
2072
- " implementation class names for which Spark tries to push down predicates for nested " +
2073
- " columns and/or names containing `dots` to data sources. This configuration is only " +
2074
- " effective with file-based data source in DSv1. Currently, Parquet implements " +
2075
- " both optimizations while ORC only supports predicates for names containing `dots`. The " +
2076
- " other data sources don't support this feature yet. So the default value is 'parquet,orc'." )
2069
+ .doc(" When true, Spark tries to push down predicates for nested columns and or names " +
2070
+ " containing `dots` to data sources. Currently, Parquet implements both optimizations " +
2071
+ " while ORC only supports predicates for names containing `dots`. The other data sources" +
2072
+ " don't support this feature yet." )
2077
2073
.version(" 3.0.0" )
2078
- .stringConf
2079
- .createWithDefault(" parquet,orc " )
2074
+ .booleanConf
2075
+ .createWithDefault(true )
2080
2076
2081
2077
val SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED =
2082
2078
buildConf(" spark.sql.optimizer.serializer.nestedSchemaPruning.enabled" )
@@ -2228,6 +2224,15 @@ object SQLConf {
2228
2224
.booleanConf
2229
2225
.createWithDefault(false )
2230
2226
2227
+ val LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT_ENABLED =
2228
+ buildConf(" spark.sql.legacy.createHiveTableByDefault.enabled" )
2229
+ .internal()
2230
+ .doc(" When set to true, CREATE TABLE syntax without a provider will use hive " +
2231
+ s " instead of the value of ${DEFAULT_DATA_SOURCE_NAME .key}. " )
2232
+ .version(" 3.0.0" )
2233
+ .booleanConf
2234
+ .createWithDefault(false )
2235
+
2231
2236
val LEGACY_BUCKETED_TABLE_SCAN_OUTPUT_ORDERING =
2232
2237
buildConf(" spark.sql.legacy.bucketedTableScan.outputOrdering" )
2233
2238
.internal()
@@ -2519,72 +2524,57 @@ object SQLConf {
2519
2524
.booleanConf
2520
2525
.createWithDefault(false )
2521
2526
2522
- val LEGACY_PARQUET_REBASE_MODE_IN_WRITE =
2523
- buildConf(" spark.sql.legacy.parquet.datetimeRebaseModeInWrite " )
2527
+ val LEGACY_PARQUET_REBASE_DATETIME_IN_WRITE =
2528
+ buildConf(" spark.sql.legacy.parquet.rebaseDateTimeInWrite.enabled " )
2524
2529
.internal()
2525
- .doc(" When LEGACY, Spark will rebase dates/timestamps from Proleptic Gregorian calendar " +
2526
- " to the legacy hybrid (Julian + Gregorian) calendar when writing Parquet files. " +
2527
- " When CORRECTED, Spark will not do rebase and write the dates/timestamps as it is. " +
2528
- " When EXCEPTION, which is the default, Spark will fail the writing if it sees " +
2529
- " ancient dates/timestamps that are ambiguous between the two calendars." )
2530
+ .doc(" When true, rebase dates/timestamps from Proleptic Gregorian calendar " +
2531
+ " to the hybrid calendar (Julian + Gregorian) in write. " +
2532
+ " The rebasing is performed by converting micros/millis/days to " +
2533
+ " a local date/timestamp in the source calendar, interpreting the resulted date/" +
2534
+ " timestamp in the target calendar, and getting the number of micros/millis/days " +
2535
+ " since the epoch 1970-01-01 00:00:00Z." )
2530
2536
.version(" 3.0.0" )
2531
- .stringConf
2532
- .transform(_.toUpperCase(Locale .ROOT ))
2533
- .checkValues(LegacyBehaviorPolicy .values.map(_.toString))
2534
- .createWithDefault(LegacyBehaviorPolicy .EXCEPTION .toString)
2535
-
2536
- val LEGACY_PARQUET_REBASE_MODE_IN_READ =
2537
- buildConf(" spark.sql.legacy.parquet.datetimeRebaseModeInRead" )
2538
- .internal()
2539
- .doc(" When LEGACY, Spark will rebase dates/timestamps from the legacy hybrid (Julian + " +
2540
- " Gregorian) calendar to Proleptic Gregorian calendar when reading Parquet files. " +
2541
- " When CORRECTED, Spark will not do rebase and read the dates/timestamps as it is. " +
2542
- " When EXCEPTION, which is the default, Spark will fail the reading if it sees " +
2543
- " ancient dates/timestamps that are ambiguous between the two calendars. This config is " +
2544
- " only effective if the writer info (like Spark, Hive) of the Parquet files is unknown." )
2545
- .version(" 3.0.0" )
2546
- .stringConf
2547
- .transform(_.toUpperCase(Locale .ROOT ))
2548
- .checkValues(LegacyBehaviorPolicy .values.map(_.toString))
2549
- .createWithDefault(LegacyBehaviorPolicy .EXCEPTION .toString)
2537
+ .booleanConf
2538
+ .createWithDefault(false )
2550
2539
2551
- val LEGACY_AVRO_REBASE_MODE_IN_WRITE =
2552
- buildConf(" spark.sql.legacy.avro.datetimeRebaseModeInWrite " )
2540
+ val LEGACY_PARQUET_REBASE_DATETIME_IN_READ =
2541
+ buildConf(" spark.sql.legacy.parquet.rebaseDateTimeInRead.enabled " )
2553
2542
.internal()
2554
- .doc(" When LEGACY, Spark will rebase dates/timestamps from Proleptic Gregorian calendar " +
2555
- " to the legacy hybrid (Julian + Gregorian) calendar when writing Avro files. " +
2556
- " When CORRECTED, Spark will not do rebase and write the dates/timestamps as it is. " +
2557
- " When EXCEPTION, which is the default, Spark will fail the writing if it sees " +
2558
- " ancient dates/timestamps that are ambiguous between the two calendars." )
2543
+ .doc(" When true, rebase dates/timestamps " +
2544
+ " from the hybrid calendar to Proleptic Gregorian calendar in read. " +
2545
+ " The rebasing is performed by converting micros/millis/days to " +
2546
+ " a local date/timestamp in the source calendar, interpreting the resulted date/" +
2547
+ " timestamp in the target calendar, and getting the number of micros/millis/days " +
2548
+ " since the epoch 1970-01-01 00:00:00Z." )
2559
2549
.version(" 3.0.0" )
2560
- .stringConf
2561
- .transform(_.toUpperCase(Locale .ROOT ))
2562
- .checkValues(LegacyBehaviorPolicy .values.map(_.toString))
2563
- .createWithDefault(LegacyBehaviorPolicy .EXCEPTION .toString)
2564
-
2565
- val LEGACY_AVRO_REBASE_MODE_IN_READ =
2566
- buildConf(" spark.sql.legacy.avro.datetimeRebaseModeInRead" )
2567
- .internal()
2568
- .doc(" When LEGACY, Spark will rebase dates/timestamps from the legacy hybrid (Julian + " +
2569
- " Gregorian) calendar to Proleptic Gregorian calendar when reading Avro files. " +
2570
- " When CORRECTED, Spark will not do rebase and read the dates/timestamps as it is. " +
2571
- " When EXCEPTION, which is the default, Spark will fail the reading if it sees " +
2572
- " ancient dates/timestamps that are ambiguous between the two calendars. This config is " +
2573
- " only effective if the writer info (like Spark, Hive) of the Avro files is unknown." )
2550
+ .booleanConf
2551
+ .createWithDefault(false )
2552
+
2553
+ val LEGACY_AVRO_REBASE_DATETIME_IN_WRITE =
2554
+ buildConf(" spark.sql.legacy.avro.rebaseDateTimeInWrite.enabled" )
2555
+ .internal()
2556
+ .doc(" When true, rebase dates/timestamps from Proleptic Gregorian calendar " +
2557
+ " to the hybrid calendar (Julian + Gregorian) in write. " +
2558
+ " The rebasing is performed by converting micros/millis/days to " +
2559
+ " a local date/timestamp in the source calendar, interpreting the resulted date/" +
2560
+ " timestamp in the target calendar, and getting the number of micros/millis/days " +
2561
+ " since the epoch 1970-01-01 00:00:00Z." )
2574
2562
.version(" 3.0.0" )
2575
- .stringConf
2576
- .transform(_.toUpperCase(Locale .ROOT ))
2577
- .checkValues(LegacyBehaviorPolicy .values.map(_.toString))
2578
- .createWithDefault(LegacyBehaviorPolicy .EXCEPTION .toString)
2563
+ .booleanConf
2564
+ .createWithDefault(false )
2579
2565
2580
- val SCRIPT_TRANSFORMATION_EXIT_TIMEOUT =
2581
- buildConf(" spark.sql.scriptTransformation.exitTimeoutInSeconds " )
2566
+ val LEGACY_AVRO_REBASE_DATETIME_IN_READ =
2567
+ buildConf(" spark.sql.legacy.avro.rebaseDateTimeInRead.enabled " )
2582
2568
.internal()
2583
- .doc(" Timeout for executor to wait for the termination of transformation script when EOF." )
2569
+ .doc(" When true, rebase dates/timestamps " +
2570
+ " from the hybrid calendar to Proleptic Gregorian calendar in read. " +
2571
+ " The rebasing is performed by converting micros/millis/days to " +
2572
+ " a local date/timestamp in the source calendar, interpreting the resulted date/" +
2573
+ " timestamp in the target calendar, and getting the number of micros/millis/days " +
2574
+ " since the epoch 1970-01-01 00:00:00Z." )
2584
2575
.version(" 3.0.0" )
2585
- .timeConf(TimeUnit .SECONDS )
2586
- .checkValue(_ > 0 , " The timeout value must be positive" )
2587
- .createWithDefault(10L )
2576
+ .booleanConf
2577
+ .createWithDefault(false )
2588
2578
2589
2579
val LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE =
2590
2580
buildConf(" spark.sql.legacy.numericConvertToTimestampEnable" )
@@ -2632,10 +2622,7 @@ object SQLConf {
2632
2622
DeprecatedConfig (ARROW_FALLBACK_ENABLED .key, " 3.0" ,
2633
2623
s " Use ' ${ARROW_PYSPARK_FALLBACK_ENABLED .key}' instead of it. " ),
2634
2624
DeprecatedConfig (SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE .key, " 3.0" ,
2635
- s " Use ' ${ADVISORY_PARTITION_SIZE_IN_BYTES .key}' instead of it. " ),
2636
- DeprecatedConfig (OPTIMIZER_METADATA_ONLY .key, " 3.0" ,
2637
- " Avoid to depend on this optimization to prevent a potential correctness issue. " +
2638
- " If you must use, use 'SparkSessionExtensions' instead to inject it as a custom rule." )
2625
+ s " Use ' ${ADVISORY_PARTITION_SIZE_IN_BYTES .key}' instead of it. " )
2639
2626
)
2640
2627
2641
2628
Map (configs.map { cfg => cfg.key -> cfg } : _* )
@@ -3128,6 +3115,8 @@ class SQLConf extends Serializable with Logging {
3128
3115
3129
3116
def nestedSchemaPruningEnabled : Boolean = getConf(NESTED_SCHEMA_PRUNING_ENABLED )
3130
3117
3118
+ def nestedPredicatePushdownEnabled : Boolean = getConf(NESTED_PREDICATE_PUSHDOWN_ENABLED )
3119
+
3131
3120
def serializerNestedSchemaPruningEnabled : Boolean =
3132
3121
getConf(SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED )
3133
3122
@@ -3161,6 +3150,9 @@ class SQLConf extends Serializable with Logging {
3161
3150
def allowNegativeScaleOfDecimalEnabled : Boolean =
3162
3151
getConf(SQLConf .LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED )
3163
3152
3153
+ def createHiveTableByDefaultEnabled : Boolean =
3154
+ getConf(SQLConf .LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT_ENABLED )
3155
+
3164
3156
def truncateTableIgnorePermissionAcl : Boolean =
3165
3157
getConf(SQLConf .TRUNCATE_TABLE_IGNORE_PERMISSION_ACL )
3166
3158
0 commit comments