@@ -2660,13 +2660,18 @@ object SQLConf {
2660
2660
}
2661
2661
2662
2662
val BUCKET_READ_STRATEGY_IN_JOIN =
2663
- buildConf(" spark.sql.bucketing.bucketReadStrategyInJoin" )
2664
- .doc(" When set to COALESCE, if two bucketed tables with the different number of buckets " +
2665
- " are joined, the side with a bigger number of buckets will be coalesced to have the same " +
2666
- " number of buckets as the other side. When set to REPARTITION, the side with a bigger " +
2667
- " number of buckets will be repartitioned to have the same number of buckets as the other " +
2668
- " side. The bigger number of buckets must be divisible by the smaller number of buckets, " +
2669
- " and the strategy is applied to sort-merge joins and shuffled hash joins. " +
2663
+ buildConf(" spark.sql.sources.bucketing.readStrategyInJoin" )
2664
+ .doc(" The bucket read strategy can be set to one of " +
2665
+ BucketReadStrategyInJoin .values.mkString(" , " ) +
2666
+ s " . When set to ${BucketReadStrategyInJoin .COALESCE }, if two bucketed tables with " +
2667
+ " different number of buckets are joined, the side with a bigger number of buckets will " +
2668
+ " be coalesced to have the same number of buckets as the smaller side. When set to " +
2669
+ s " ${BucketReadStrategyInJoin .REPARTITION }, the side with a smaller number of buckets " +
2670
+ " will be repartitioned to have the same number of buckets as the bigger side. For either " +
2671
+ " coalescing or repartitioning to be applied, The bigger number of buckets must be " +
2672
+ " divisible by the smaller number of buckets, and the strategy is applied to sort-merge " +
2673
+ s " joins and shuffled hash joins. By default, the read strategy is set to " +
2674
+ s " ${BucketReadStrategyInJoin .OFF }, and neither coalescing nor reparitioning is applied. " +
2670
2675
" Note: Coalescing bucketed table can avoid unnecessary shuffle in join, but it also " +
2671
2676
" reduces parallelism and could possibly cause OOM for shuffled hash join. Repartitioning " +
2672
2677
" bucketed table avoids unnecessary shuffle in join while maintaining the parallelism " +
@@ -2678,7 +2683,7 @@ object SQLConf {
2678
2683
.createWithDefault(BucketReadStrategyInJoin .OFF .toString)
2679
2684
2680
2685
val BUCKET_READ_STRATEGY_IN_JOIN_MAX_BUCKET_RATIO =
2681
- buildConf(" spark.sql.bucketing.bucketReadStrategyInJoin .maxBucketRatio" )
2686
+ buildConf(" spark.sql.sources. bucketing.readStrategyInJoin .maxBucketRatio" )
2682
2687
.doc(" The ratio of the number of two buckets being coalesced/repartitioned should be " +
2683
2688
" less than or equal to this value for bucket coalescing/repartitioning to be applied. " +
2684
2689
s " This configuration only has an effect when ' ${BUCKET_READ_STRATEGY_IN_JOIN .key}' " +
0 commit comments