@@ -621,7 +621,7 @@ object SQLConf {
621
621
.stringConf
622
622
.transform(_.toUpperCase(Locale .ROOT ))
623
623
.checkValues(ParquetOutputTimestampType .values.map(_.toString))
624
- .createWithDefault(ParquetOutputTimestampType .TIMESTAMP_MICROS .toString)
624
+ .createWithDefault(ParquetOutputTimestampType .INT96 .toString)
625
625
626
626
val PARQUET_COMPRESSION = buildConf(" spark.sql.parquet.compression.codec" )
627
627
.doc(" Sets the compression codec used when writing Parquet files. If either `compression` or " +
@@ -845,8 +845,10 @@ object SQLConf {
845
845
.doc(" When true, enable the metadata-only query optimization that use the table's metadata " +
846
846
" to produce the partition columns instead of table scans. It applies when all the columns " +
847
847
" scanned are partition columns and the query has an aggregate operator that satisfies " +
848
- " distinct semantics. By default the optimization is disabled, since it may return " +
849
- " incorrect results when the files are empty." )
848
+ " distinct semantics. By default the optimization is disabled, and deprecated as of Spark " +
849
+ " 3.0 since it may return incorrect results when the files are empty, see also SPARK-26709." +
850
+ " It will be removed in the future releases. If you must use, use 'SparkSessionExtensions' " +
851
+ " instead to inject it as a custom rule." )
850
852
.version(" 2.1.1" )
851
853
.booleanConf
852
854
.createWithDefault(false )
@@ -2063,16 +2065,18 @@ object SQLConf {
2063
2065
.booleanConf
2064
2066
.createWithDefault(true )
2065
2067
2066
- val NESTED_PREDICATE_PUSHDOWN_ENABLED =
2067
- buildConf(" spark.sql.optimizer.nestedPredicatePushdown.enabled " )
2068
+ val NESTED_PREDICATE_PUSHDOWN_FILE_SOURCE_LIST =
2069
+ buildConf(" spark.sql.optimizer.nestedPredicatePushdown.supportedFileSources " )
2068
2070
.internal()
2069
- .doc(" When true, Spark tries to push down predicates for nested columns and or names " +
2070
- " containing `dots` to data sources. Currently, Parquet implements both optimizations " +
2071
- " while ORC only supports predicates for names containing `dots`. The other data sources" +
2072
- " don't support this feature yet." )
2071
+ .doc(" A comma-separated list of data source short names or fully qualified data source " +
2072
+ " implementation class names for which Spark tries to push down predicates for nested " +
2073
+ " columns and/or names containing `dots` to data sources. This configuration is only " +
2074
+ " effective with file-based data source in DSv1. Currently, Parquet implements " +
2075
+ " both optimizations while ORC only supports predicates for names containing `dots`. The " +
2076
+ " other data sources don't support this feature yet. So the default value is 'parquet,orc'." )
2073
2077
.version(" 3.0.0" )
2074
- .booleanConf
2075
- .createWithDefault(true )
2078
+ .stringConf
2079
+ .createWithDefault(" parquet,orc " )
2076
2080
2077
2081
val SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED =
2078
2082
buildConf(" spark.sql.optimizer.serializer.nestedSchemaPruning.enabled" )
@@ -2224,15 +2228,6 @@ object SQLConf {
2224
2228
.booleanConf
2225
2229
.createWithDefault(false )
2226
2230
2227
- val LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT_ENABLED =
2228
- buildConf(" spark.sql.legacy.createHiveTableByDefault.enabled" )
2229
- .internal()
2230
- .doc(" When set to true, CREATE TABLE syntax without a provider will use hive " +
2231
- s " instead of the value of ${DEFAULT_DATA_SOURCE_NAME .key}. " )
2232
- .version(" 3.0.0" )
2233
- .booleanConf
2234
- .createWithDefault(false )
2235
-
2236
2231
val LEGACY_BUCKETED_TABLE_SCAN_OUTPUT_ORDERING =
2237
2232
buildConf(" spark.sql.legacy.bucketedTableScan.outputOrdering" )
2238
2233
.internal()
@@ -2524,61 +2519,75 @@ object SQLConf {
2524
2519
.booleanConf
2525
2520
.createWithDefault(false )
2526
2521
2527
- val LEGACY_PARQUET_REBASE_DATETIME_IN_WRITE =
2528
- buildConf(" spark.sql.legacy.parquet.rebaseDateTimeInWrite.enabled " )
2522
+ val LEGACY_PARQUET_REBASE_MODE_IN_WRITE =
2523
+ buildConf(" spark.sql.legacy.parquet.datetimeRebaseModeInWrite " )
2529
2524
.internal()
2530
- .doc(" When true, rebase dates/timestamps from Proleptic Gregorian calendar " +
2531
- " to the hybrid calendar (Julian + Gregorian) in write. " +
2532
- " The rebasing is performed by converting micros/millis/days to " +
2533
- " a local date/timestamp in the source calendar, interpreting the resulted date/" +
2534
- " timestamp in the target calendar, and getting the number of micros/millis/days " +
2535
- " since the epoch 1970-01-01 00:00:00Z." )
2525
+ .doc(" When LEGACY, Spark will rebase dates/timestamps from Proleptic Gregorian calendar " +
2526
+ " to the legacy hybrid (Julian + Gregorian) calendar when writing Parquet files. " +
2527
+ " When CORRECTED, Spark will not do rebase and write the dates/timestamps as it is. " +
2528
+ " When EXCEPTION, which is the default, Spark will fail the writing if it sees " +
2529
+ " ancient dates/timestamps that are ambiguous between the two calendars." )
2536
2530
.version(" 3.0.0" )
2537
- .booleanConf
2538
- .createWithDefault(false )
2539
-
2540
- val LEGACY_PARQUET_REBASE_DATETIME_IN_READ =
2541
- buildConf(" spark.sql.legacy.parquet.rebaseDateTimeInRead.enabled" )
2542
- .internal()
2543
- .doc(" When true, rebase dates/timestamps " +
2544
- " from the hybrid calendar to Proleptic Gregorian calendar in read. " +
2545
- " The rebasing is performed by converting micros/millis/days to " +
2546
- " a local date/timestamp in the source calendar, interpreting the resulted date/" +
2547
- " timestamp in the target calendar, and getting the number of micros/millis/days " +
2548
- " since the epoch 1970-01-01 00:00:00Z." )
2531
+ .stringConf
2532
+ .transform(_.toUpperCase(Locale .ROOT ))
2533
+ .checkValues(LegacyBehaviorPolicy .values.map(_.toString))
2534
+ .createWithDefault(LegacyBehaviorPolicy .EXCEPTION .toString)
2535
+
2536
+ val LEGACY_PARQUET_REBASE_MODE_IN_READ =
2537
+ buildConf(" spark.sql.legacy.parquet.datetimeRebaseModeInRead" )
2538
+ .internal()
2539
+ .doc(" When LEGACY, Spark will rebase dates/timestamps from the legacy hybrid (Julian + " +
2540
+ " Gregorian) calendar to Proleptic Gregorian calendar when reading Parquet files. " +
2541
+ " When CORRECTED, Spark will not do rebase and read the dates/timestamps as it is. " +
2542
+ " When EXCEPTION, which is the default, Spark will fail the reading if it sees " +
2543
+ " ancient dates/timestamps that are ambiguous between the two calendars. This config is " +
2544
+ " only effective if the writer info (like Spark, Hive) of the Parquet files is unknown." )
2549
2545
.version(" 3.0.0" )
2550
- .booleanConf
2551
- .createWithDefault(false )
2546
+ .stringConf
2547
+ .transform(_.toUpperCase(Locale .ROOT ))
2548
+ .checkValues(LegacyBehaviorPolicy .values.map(_.toString))
2549
+ .createWithDefault(LegacyBehaviorPolicy .EXCEPTION .toString)
2552
2550
2553
- val LEGACY_AVRO_REBASE_DATETIME_IN_WRITE =
2554
- buildConf(" spark.sql.legacy.avro.rebaseDateTimeInWrite.enabled " )
2551
+ val LEGACY_AVRO_REBASE_MODE_IN_WRITE =
2552
+ buildConf(" spark.sql.legacy.avro.datetimeRebaseModeInWrite " )
2555
2553
.internal()
2556
- .doc(" When true, rebase dates/timestamps from Proleptic Gregorian calendar " +
2557
- " to the hybrid calendar (Julian + Gregorian) in write. " +
2558
- " The rebasing is performed by converting micros/millis/days to " +
2559
- " a local date/timestamp in the source calendar, interpreting the resulted date/" +
2560
- " timestamp in the target calendar, and getting the number of micros/millis/days " +
2561
- " since the epoch 1970-01-01 00:00:00Z." )
2554
+ .doc(" When LEGACY, Spark will rebase dates/timestamps from Proleptic Gregorian calendar " +
2555
+ " to the legacy hybrid (Julian + Gregorian) calendar when writing Avro files. " +
2556
+ " When CORRECTED, Spark will not do rebase and write the dates/timestamps as it is. " +
2557
+ " When EXCEPTION, which is the default, Spark will fail the writing if it sees " +
2558
+ " ancient dates/timestamps that are ambiguous between the two calendars." )
2562
2559
.version(" 3.0.0" )
2563
- .booleanConf
2564
- .createWithDefault(false )
2560
+ .stringConf
2561
+ .transform(_.toUpperCase(Locale .ROOT ))
2562
+ .checkValues(LegacyBehaviorPolicy .values.map(_.toString))
2563
+ .createWithDefault(LegacyBehaviorPolicy .EXCEPTION .toString)
2564
+
2565
+ val LEGACY_AVRO_REBASE_MODE_IN_READ =
2566
+ buildConf(" spark.sql.legacy.avro.datetimeRebaseModeInRead" )
2567
+ .internal()
2568
+ .doc(" When LEGACY, Spark will rebase dates/timestamps from the legacy hybrid (Julian + " +
2569
+ " Gregorian) calendar to Proleptic Gregorian calendar when reading Avro files. " +
2570
+ " When CORRECTED, Spark will not do rebase and read the dates/timestamps as it is. " +
2571
+ " When EXCEPTION, which is the default, Spark will fail the reading if it sees " +
2572
+ " ancient dates/timestamps that are ambiguous between the two calendars. This config is " +
2573
+ " only effective if the writer info (like Spark, Hive) of the Avro files is unknown." )
2574
+ .version(" 3.0.0" )
2575
+ .stringConf
2576
+ .transform(_.toUpperCase(Locale .ROOT ))
2577
+ .checkValues(LegacyBehaviorPolicy .values.map(_.toString))
2578
+ .createWithDefault(LegacyBehaviorPolicy .EXCEPTION .toString)
2565
2579
2566
- val LEGACY_AVRO_REBASE_DATETIME_IN_READ =
2567
- buildConf(" spark.sql.legacy.avro.rebaseDateTimeInRead.enabled " )
2580
+ val SCRIPT_TRANSFORMATION_EXIT_TIMEOUT =
2581
+ buildConf(" spark.sql.scriptTransformation.exitTimeoutInSeconds " )
2568
2582
.internal()
2569
- .doc(" When true, rebase dates/timestamps " +
2570
- " from the hybrid calendar to Proleptic Gregorian calendar in read. " +
2571
- " The rebasing is performed by converting micros/millis/days to " +
2572
- " a local date/timestamp in the source calendar, interpreting the resulted date/" +
2573
- " timestamp in the target calendar, and getting the number of micros/millis/days " +
2574
- " since the epoch 1970-01-01 00:00:00Z." )
2583
+ .doc(" Timeout for executor to wait for the termination of transformation script when EOF." )
2575
2584
.version(" 3.0.0" )
2576
- .booleanConf
2577
- .createWithDefault(false )
2585
+ .timeConf(TimeUnit .SECONDS )
2586
+ .checkValue(_ > 0 , " The timeout value must be positive" )
2587
+ .createWithDefault(10L )
2578
2588
2579
2589
val LEGACY_NUMERIC_CONVERT_TO_TIMESTAMP_ENABLE =
2580
2590
buildConf(" spark.sql.legacy.numericConvertToTimestampEnable" )
2581
- .internal()
2582
2591
.doc(" when true,use legacy numberic can convert to timestamp" )
2583
2592
.version(" 3.0.0" )
2584
2593
.booleanConf
@@ -2622,7 +2631,10 @@ object SQLConf {
2622
2631
DeprecatedConfig (ARROW_FALLBACK_ENABLED .key, " 3.0" ,
2623
2632
s " Use ' ${ARROW_PYSPARK_FALLBACK_ENABLED .key}' instead of it. " ),
2624
2633
DeprecatedConfig (SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE .key, " 3.0" ,
2625
- s " Use ' ${ADVISORY_PARTITION_SIZE_IN_BYTES .key}' instead of it. " )
2634
+ s " Use ' ${ADVISORY_PARTITION_SIZE_IN_BYTES .key}' instead of it. " ),
2635
+ DeprecatedConfig (OPTIMIZER_METADATA_ONLY .key, " 3.0" ,
2636
+ " Avoid to depend on this optimization to prevent a potential correctness issue. " +
2637
+ " If you must use, use 'SparkSessionExtensions' instead to inject it as a custom rule." )
2626
2638
)
2627
2639
2628
2640
Map (configs.map { cfg => cfg.key -> cfg } : _* )
@@ -3115,8 +3127,6 @@ class SQLConf extends Serializable with Logging {
3115
3127
3116
3128
def nestedSchemaPruningEnabled : Boolean = getConf(NESTED_SCHEMA_PRUNING_ENABLED )
3117
3129
3118
- def nestedPredicatePushdownEnabled : Boolean = getConf(NESTED_PREDICATE_PUSHDOWN_ENABLED )
3119
-
3120
3130
def serializerNestedSchemaPruningEnabled : Boolean =
3121
3131
getConf(SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED )
3122
3132
@@ -3150,9 +3160,6 @@ class SQLConf extends Serializable with Logging {
3150
3160
def allowNegativeScaleOfDecimalEnabled : Boolean =
3151
3161
getConf(SQLConf .LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED )
3152
3162
3153
- def createHiveTableByDefaultEnabled : Boolean =
3154
- getConf(SQLConf .LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT_ENABLED )
3155
-
3156
3163
def truncateTableIgnorePermissionAcl : Boolean =
3157
3164
getConf(SQLConf .TRUNCATE_TABLE_IGNORE_PERMISSION_ACL )
3158
3165
0 commit comments