Skip to content

Commit f041e55

Browse files
tejasapatilcloud-fan
authored andcommitted
[SPARK-19618][SQL] Inconsistency wrt max. buckets allowed from Dataframe API vs SQL
## What changes were proposed in this pull request? Jira: https://issues.apache.org/jira/browse/SPARK-19618 Moved the check for validating number of buckets from `DataFrameWriter` to `BucketSpec` creation ## How was this patch tested? - Added more unit tests Author: Tejas Patil <tejasp@fb.com> Closes #16948 from tejasapatil/SPARK-19618_max_buckets.
1 parent 8487902 commit f041e55

File tree

4 files changed

+25
-19
lines changed

4 files changed

+25
-19
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,9 @@ case class BucketSpec(
135135
numBuckets: Int,
136136
bucketColumnNames: Seq[String],
137137
sortColumnNames: Seq[String]) {
138-
if (numBuckets <= 0) {
139-
throw new AnalysisException(s"Expected positive number of buckets, but got `$numBuckets`.")
138+
if (numBuckets <= 0 || numBuckets >= 100000) {
139+
throw new AnalysisException(
140+
s"Number of buckets should be greater than 0 but less than 100000. Got `$numBuckets`")
140141
}
141142

142143
override def toString: String = {

sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
275275
}
276276

277277
numBuckets.map { n =>
278-
require(n > 0 && n < 100000, "Bucket number must be greater than 0 and less than 100000.")
279278
BucketSpec(n, bucketColumnNames.get, sortColumnNames.getOrElse(Nil))
280279
}
281280
}

sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ class CreateTableAsSelectSuite
206206
}
207207
}
208208

209-
test("create table using as select - with non-zero buckets") {
209+
test("create table using as select - with valid number of buckets") {
210210
val catalog = spark.sessionState.catalog
211211
withTable("t") {
212212
sql(
@@ -222,19 +222,21 @@ class CreateTableAsSelectSuite
222222
}
223223
}
224224

225-
test("create table using as select - with zero buckets") {
225+
test("create table using as select - with invalid number of buckets") {
226226
withTable("t") {
227-
val e = intercept[AnalysisException] {
228-
sql(
229-
s"""
230-
|CREATE TABLE t USING PARQUET
231-
|OPTIONS (PATH '${path.toURI}')
232-
|CLUSTERED BY (a) SORTED BY (b) INTO 0 BUCKETS
233-
|AS SELECT 1 AS a, 2 AS b
234-
""".stripMargin
235-
)
236-
}.getMessage
237-
assert(e.contains("Expected positive number of buckets, but got `0`"))
227+
Seq(0, 100000).foreach(numBuckets => {
228+
val e = intercept[AnalysisException] {
229+
sql(
230+
s"""
231+
|CREATE TABLE t USING PARQUET
232+
|OPTIONS (PATH '${path.toURI}')
233+
|CLUSTERED BY (a) SORTED BY (b) INTO $numBuckets BUCKETS
234+
|AS SELECT 1 AS a, 2 AS b
235+
""".stripMargin
236+
)
237+
}.getMessage
238+
assert(e.contains("Number of buckets should be greater than 0 but less than 100000"))
239+
})
238240
}
239241
}
240242

sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,14 @@ class BucketedWriteSuite extends QueryTest with SQLTestUtils with TestHiveSingle
3838
intercept[AnalysisException](df.write.bucketBy(2, "k").saveAsTable("tt"))
3939
}
4040

41-
test("numBuckets not greater than 0 or less than 100000") {
41+
test("numBuckets be greater than 0 but less than 100000") {
4242
val df = Seq(1 -> "a", 2 -> "b").toDF("i", "j")
43-
intercept[IllegalArgumentException](df.write.bucketBy(0, "i").saveAsTable("tt"))
44-
intercept[IllegalArgumentException](df.write.bucketBy(100000, "i").saveAsTable("tt"))
43+
44+
Seq(-1, 0, 100000).foreach(numBuckets => {
45+
val e = intercept[AnalysisException](df.write.bucketBy(numBuckets, "i").saveAsTable("tt"))
46+
assert(
47+
e.getMessage.contains("Number of buckets should be greater than 0 but less than 100000"))
48+
})
4549
}
4650

4751
test("specify sorting columns without bucketing columns") {

0 commit comments

Comments
 (0)