@@ -621,7 +621,7 @@ object SQLConf {
621
621
.stringConf
622
622
.transform(_.toUpperCase(Locale .ROOT ))
623
623
.checkValues(ParquetOutputTimestampType .values.map(_.toString))
624
- .createWithDefault(ParquetOutputTimestampType .INT96 .toString)
624
+ .createWithDefault(ParquetOutputTimestampType .TIMESTAMP_MICROS .toString)
625
625
626
626
val PARQUET_COMPRESSION = buildConf(" spark.sql.parquet.compression.codec" )
627
627
.doc(" Sets the compression codec used when writing Parquet files. If either `compression` or " +
@@ -845,10 +845,8 @@ object SQLConf {
845
845
.doc(" When true, enable the metadata-only query optimization that use the table's metadata " +
846
846
" to produce the partition columns instead of table scans. It applies when all the columns " +
847
847
" scanned are partition columns and the query has an aggregate operator that satisfies " +
848
- " distinct semantics. By default the optimization is disabled, and deprecated as of Spark " +
849
- " 3.0 since it may return incorrect results when the files are empty, see also SPARK-26709." +
850
- " It will be removed in the future releases. If you must use, use 'SparkSessionExtensions' " +
851
- " instead to inject it as a custom rule." )
848
+ " distinct semantics. By default the optimization is disabled, since it may return " +
849
+ " incorrect results when the files are empty." )
852
850
.version(" 2.1.1" )
853
851
.booleanConf
854
852
.createWithDefault(false )
@@ -2065,18 +2063,16 @@ object SQLConf {
2065
2063
.booleanConf
2066
2064
.createWithDefault(true )
2067
2065
2068
- val NESTED_PREDICATE_PUSHDOWN_FILE_SOURCE_LIST =
2069
- buildConf(" spark.sql.optimizer.nestedPredicatePushdown.supportedFileSources " )
2066
+ val NESTED_PREDICATE_PUSHDOWN_ENABLED =
2067
+ buildConf(" spark.sql.optimizer.nestedPredicatePushdown.enabled " )
2070
2068
.internal()
2071
- .doc(" A comma-separated list of data source short names or fully qualified data source " +
2072
- " implementation class names for which Spark tries to push down predicates for nested " +
2073
- " columns and/or names containing `dots` to data sources. This configuration is only " +
2074
- " effective with file-based data source in DSv1. Currently, Parquet implements " +
2075
- " both optimizations while ORC only supports predicates for names containing `dots`. The " +
2076
- " other data sources don't support this feature yet. So the default value is 'parquet,orc'." )
2069
+ .doc(" When true, Spark tries to push down predicates for nested columns and or names " +
2070
+ " containing `dots` to data sources. Currently, Parquet implements both optimizations " +
2071
+ " while ORC only supports predicates for names containing `dots`. The other data sources" +
2072
+ " don't support this feature yet." )
2077
2073
.version(" 3.0.0" )
2078
- .stringConf
2079
- .createWithDefault(" parquet,orc " )
2074
+ .booleanConf
2075
+ .createWithDefault(true )
2080
2076
2081
2077
val SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED =
2082
2078
buildConf(" spark.sql.optimizer.serializer.nestedSchemaPruning.enabled" )
@@ -2228,6 +2224,15 @@ object SQLConf {
2228
2224
.booleanConf
2229
2225
.createWithDefault(false )
2230
2226
2227
+ val LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT_ENABLED =
2228
+ buildConf(" spark.sql.legacy.createHiveTableByDefault.enabled" )
2229
+ .internal()
2230
+ .doc(" When set to true, CREATE TABLE syntax without a provider will use hive " +
2231
+ s " instead of the value of ${DEFAULT_DATA_SOURCE_NAME .key}. " )
2232
+ .version(" 3.0.0" )
2233
+ .booleanConf
2234
+ .createWithDefault(false )
2235
+
2231
2236
val LEGACY_BUCKETED_TABLE_SCAN_OUTPUT_ORDERING =
2232
2237
buildConf(" spark.sql.legacy.bucketedTableScan.outputOrdering" )
2233
2238
.internal()
@@ -2519,85 +2524,70 @@ object SQLConf {
2519
2524
.booleanConf
2520
2525
.createWithDefault(false )
2521
2526
2522
- val LEGACY_PARQUET_REBASE_MODE_IN_WRITE =
2523
- buildConf(" spark.sql.legacy.parquet.datetimeRebaseModeInWrite " )
2527
+ val LEGACY_PARQUET_REBASE_DATETIME_IN_WRITE =
2528
+ buildConf(" spark.sql.legacy.parquet.rebaseDateTimeInWrite.enabled " )
2524
2529
.internal()
2525
- .doc(" When LEGACY, Spark will rebase dates/timestamps from Proleptic Gregorian calendar " +
2526
- " to the legacy hybrid (Julian + Gregorian) calendar when writing Parquet files. " +
2527
- " When CORRECTED, Spark will not do rebase and write the dates/timestamps as it is. " +
2528
- " When EXCEPTION, which is the default, Spark will fail the writing if it sees " +
2529
- " ancient dates/timestamps that are ambiguous between the two calendars." )
2530
+ .doc(" When true, rebase dates/timestamps from Proleptic Gregorian calendar " +
2531
+ " to the hybrid calendar (Julian + Gregorian) in write. " +
2532
+ " The rebasing is performed by converting micros/millis/days to " +
2533
+ " a local date/timestamp in the source calendar, interpreting the resulted date/" +
2534
+ " timestamp in the target calendar, and getting the number of micros/millis/days " +
2535
+ " since the epoch 1970-01-01 00:00:00Z." )
2530
2536
.version(" 3.0.0" )
2531
- .stringConf
2532
- .transform(_.toUpperCase(Locale .ROOT ))
2533
- .checkValues(LegacyBehaviorPolicy .values.map(_.toString))
2534
- .createWithDefault(LegacyBehaviorPolicy .EXCEPTION .toString)
2535
-
2536
- val LEGACY_PARQUET_REBASE_MODE_IN_READ =
2537
- buildConf(" spark.sql.legacy.parquet.datetimeRebaseModeInRead" )
2538
- .internal()
2539
- .doc(" When LEGACY, Spark will rebase dates/timestamps from the legacy hybrid (Julian + " +
2540
- " Gregorian) calendar to Proleptic Gregorian calendar when reading Parquet files. " +
2541
- " When CORRECTED, Spark will not do rebase and read the dates/timestamps as it is. " +
2542
- " When EXCEPTION, which is the default, Spark will fail the reading if it sees " +
2543
- " ancient dates/timestamps that are ambiguous between the two calendars. This config is " +
2544
- " only effective if the writer info (like Spark, Hive) of the Parquet files is unknown." )
2545
- .version(" 3.0.0" )
2546
- .stringConf
2547
- .transform(_.toUpperCase(Locale .ROOT ))
2548
- .checkValues(LegacyBehaviorPolicy .values.map(_.toString))
2549
- .createWithDefault(LegacyBehaviorPolicy .EXCEPTION .toString)
2537
+ .booleanConf
2538
+ .createWithDefault(false )
2550
2539
2551
- val LEGACY_AVRO_REBASE_MODE_IN_WRITE =
2552
- buildConf(" spark.sql.legacy.avro.datetimeRebaseModeInWrite " )
2540
+ val LEGACY_PARQUET_REBASE_DATETIME_IN_READ =
2541
+ buildConf(" spark.sql.legacy.parquet.rebaseDateTimeInRead.enabled " )
2553
2542
.internal()
2554
- .doc(" When LEGACY, Spark will rebase dates/timestamps from Proleptic Gregorian calendar " +
2555
- " to the legacy hybrid (Julian + Gregorian) calendar when writing Avro files. " +
2556
- " When CORRECTED, Spark will not do rebase and write the dates/timestamps as it is. " +
2557
- " When EXCEPTION, which is the default, Spark will fail the writing if it sees " +
2558
- " ancient dates/timestamps that are ambiguous between the two calendars." )
2543
+ .doc(" When true, rebase dates/timestamps " +
2544
+ " from the hybrid calendar to Proleptic Gregorian calendar in read. " +
2545
+ " The rebasing is performed by converting micros/millis/days to " +
2546
+ " a local date/timestamp in the source calendar, interpreting the resulted date/" +
2547
+ " timestamp in the target calendar, and getting the number of micros/millis/days " +
2548
+ " since the epoch 1970-01-01 00:00:00Z." )
2559
2549
.version(" 3.0.0" )
2560
- .stringConf
2561
- .transform(_.toUpperCase(Locale .ROOT ))
2562
- .checkValues(LegacyBehaviorPolicy .values.map(_.toString))
2563
- .createWithDefault(LegacyBehaviorPolicy .EXCEPTION .toString)
2564
-
2565
- val LEGACY_AVRO_REBASE_MODE_IN_READ =
2566
- buildConf(" spark.sql.legacy.avro.datetimeRebaseModeInRead" )
2567
- .internal()
2568
- .doc(" When LEGACY, Spark will rebase dates/timestamps from the legacy hybrid (Julian + " +
2569
- " Gregorian) calendar to Proleptic Gregorian calendar when reading Avro files. " +
2570
- " When CORRECTED, Spark will not do rebase and read the dates/timestamps as it is. " +
2571
- " When EXCEPTION, which is the default, Spark will fail the reading if it sees " +
2572
- " ancient dates/timestamps that are ambiguous between the two calendars. This config is " +
2573
- " only effective if the writer info (like Spark, Hive) of the Avro files is unknown." )
2550
+ .booleanConf
2551
+ .createWithDefault(false )
2552
+
2553
+ val LEGACY_AVRO_REBASE_DATETIME_IN_WRITE =
2554
+ buildConf(" spark.sql.legacy.avro.rebaseDateTimeInWrite.enabled" )
2555
+ .internal()
2556
+ .doc(" When true, rebase dates/timestamps from Proleptic Gregorian calendar " +
2557
+ " to the hybrid calendar (Julian + Gregorian) in write. " +
2558
+ " The rebasing is performed by converting micros/millis/days to " +
2559
+ " a local date/timestamp in the source calendar, interpreting the resulted date/" +
2560
+ " timestamp in the target calendar, and getting the number of micros/millis/days " +
2561
+ " since the epoch 1970-01-01 00:00:00Z." )
2574
2562
.version(" 3.0.0" )
2575
- .stringConf
2576
- .transform(_.toUpperCase(Locale .ROOT ))
2577
- .checkValues(LegacyBehaviorPolicy .values.map(_.toString))
2578
- .createWithDefault(LegacyBehaviorPolicy .EXCEPTION .toString)
2563
+ .booleanConf
2564
+ .createWithDefault(false )
2579
2565
2580
- val SCRIPT_TRANSFORMATION_EXIT_TIMEOUT =
2581
- buildConf(" spark.sql.scriptTransformation.exitTimeoutInSeconds " )
2566
+ val LEGACY_AVRO_REBASE_DATETIME_IN_READ =
2567
+ buildConf(" spark.sql.legacy.avro.rebaseDateTimeInRead.enabled " )
2582
2568
.internal()
2583
- .doc(" Timeout for executor to wait for the termination of transformation script when EOF." )
2569
+ .doc(" When true, rebase dates/timestamps " +
2570
+ " from the hybrid calendar to Proleptic Gregorian calendar in read. " +
2571
+ " The rebasing is performed by converting micros/millis/days to " +
2572
+ " a local date/timestamp in the source calendar, interpreting the resulted date/" +
2573
+ " timestamp in the target calendar, and getting the number of micros/millis/days " +
2574
+ " since the epoch 1970-01-01 00:00:00Z." )
2584
2575
.version(" 3.0.0" )
2585
- .timeConf(TimeUnit .SECONDS )
2586
- .checkValue(_ > 0 , " The timeout value must be positive" )
2587
- .createWithDefault(10L )
2576
+ .booleanConf
2577
+ .createWithDefault(false )
2588
2578
2589
- val NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE =
2579
+ val LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE =
2590
2580
buildConf(" spark.sql.legacy.numericConvertToTimestampEnable" )
2591
2581
.internal()
2592
2582
.doc(" when true,use legacy numberic can convert to timestamp" )
2593
2583
.version(" 3.0.0" )
2594
2584
.booleanConf
2595
2585
.createWithDefault(false )
2596
2586
2597
- val NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS =
2587
+ val LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS =
2598
2588
buildConf(" spark.sql.legacy.numericConvertToTimestampInSeconds" )
2599
2589
.internal()
2600
- .doc(" The legacy only works when NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE is true." +
2590
+ .doc(" The legacy only works when LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE is true." +
2601
2591
" when true,the value will be interpreted as seconds,which follow spark style," +
2602
2592
" when false,value is interpreted as milliseconds,which follow hive style" )
2603
2593
.version(" 3.0.0" )
@@ -2633,10 +2623,7 @@ object SQLConf {
2633
2623
DeprecatedConfig (ARROW_FALLBACK_ENABLED .key, " 3.0" ,
2634
2624
s " Use ' ${ARROW_PYSPARK_FALLBACK_ENABLED .key}' instead of it. " ),
2635
2625
DeprecatedConfig (SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE .key, " 3.0" ,
2636
- s " Use ' ${ADVISORY_PARTITION_SIZE_IN_BYTES .key}' instead of it. " ),
2637
- DeprecatedConfig (OPTIMIZER_METADATA_ONLY .key, " 3.0" ,
2638
- " Avoid to depend on this optimization to prevent a potential correctness issue. " +
2639
- " If you must use, use 'SparkSessionExtensions' instead to inject it as a custom rule." )
2626
+ s " Use ' ${ADVISORY_PARTITION_SIZE_IN_BYTES .key}' instead of it. " )
2640
2627
)
2641
2628
2642
2629
Map (configs.map { cfg => cfg.key -> cfg } : _* )
@@ -3129,6 +3116,8 @@ class SQLConf extends Serializable with Logging {
3129
3116
3130
3117
def nestedSchemaPruningEnabled : Boolean = getConf(NESTED_SCHEMA_PRUNING_ENABLED )
3131
3118
3119
+ def nestedPredicatePushdownEnabled : Boolean = getConf(NESTED_PREDICATE_PUSHDOWN_ENABLED )
3120
+
3132
3121
def serializerNestedSchemaPruningEnabled : Boolean =
3133
3122
getConf(SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED )
3134
3123
@@ -3162,6 +3151,9 @@ class SQLConf extends Serializable with Logging {
3162
3151
def allowNegativeScaleOfDecimalEnabled : Boolean =
3163
3152
getConf(SQLConf .LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED )
3164
3153
3154
+ def createHiveTableByDefaultEnabled : Boolean =
3155
+ getConf(SQLConf .LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT_ENABLED )
3156
+
3165
3157
def truncateTableIgnorePermissionAcl : Boolean =
3166
3158
getConf(SQLConf .TRUNCATE_TABLE_IGNORE_PERMISSION_ACL )
3167
3159
@@ -3183,16 +3175,14 @@ class SQLConf extends Serializable with Logging {
3183
3175
3184
3176
def integerGroupingIdEnabled : Boolean = getConf(SQLConf .LEGACY_INTEGER_GROUPING_ID )
3185
3177
3186
-
3187
3178
def parquetRebaseDateTimeInReadEnabled : Boolean = {
3188
3179
getConf(SQLConf .LEGACY_PARQUET_REBASE_DATETIME_IN_READ )
3189
3180
}
3190
3181
3191
- def numericConvertToTimestampEnable : Boolean = getConf(NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE )
3192
-
3193
- def numericConvertToTimestampInSeconds : Boolean = getConf(NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS )
3194
-
3182
+ def numericConvertToTimestampEnable : Boolean = getConf(LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE )
3195
3183
3184
+ def numericConvertToTimestampInSeconds : Boolean =
3185
+ getConf(LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_IN_SECONDS )
3196
3186
3197
3187
/** ********************** SQLConf functionality methods ************ */
3198
3188
0 commit comments