Skip to content

Commit fdbacdf

Browse files
panbingkuncloud-fan
authored andcommitted
[SPARK-48760][SQL][DOCS][FOLLOWUP] Add CLUSTER BY to doc sql-ref-syntax-ddl-alter-table.md
### What changes were proposed in this pull request? The pr is following up #47156, aims to - add `CLUSTER BY` to doc `sql-ref-syntax-ddl-alter-table.md` - move parser tests from `o.a.s.s.c.p.DDLParserSuite` to `AlterTableClusterByParserSuite` - use `checkError` to check exception in `o.a.s.s.e.c.AlterTableClusterBySuiteBase` ### Why are the changes needed? - Enable the doc `sql-ref-syntax-ddl-alter-table.md` to cover new syntax `ALTER TABLE ... CLUSTER BY ...`. - Align with other similar tests, eg: AlterTableRename* ### Does this PR introduce _any_ user-facing change? Yes, Make end-users can query the explanation of `CLUSTER BY` through the doc `sql-ref-syntax-ddl-alter-table.md`. ### How was this patch tested? Updated UT. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #47254 from panbingkun/SPARK-48760_FOLLOWUP. Authored-by: panbingkun <panbingkun@baidu.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent 43b6718 commit fdbacdf

File tree

7 files changed

+142
-41
lines changed

7 files changed

+142
-41
lines changed

docs/sql-ref-syntax-ddl-alter-table.md

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,32 @@ ALTER TABLE table_identifier DROP [ IF EXISTS ] partition_spec [PURGE]
233233
Partition to be dropped. Note that one can use a typed literal (e.g., date'2019-01-02') in the partition spec.
234234

235235
**Syntax:** `PARTITION ( partition_col_name = partition_col_val [ , ... ] )`
236+
237+
#### CLUSTER BY
238+
239+
`ALTER TABLE CLUSTER BY` command can also be used for changing or removing the clustering columns for existing tables.
240+
241+
##### Syntax
242+
243+
```sql
244+
-- Changing Clustering Columns
245+
ALTER TABLE table_identifier CLUSTER BY ( col_name [ , ... ] )
246+
247+
-- Removing Clustering Columns
248+
ALTER TABLE table_identifier CLUSTER BY NONE
249+
```
250+
251+
#### Parameters
252+
253+
* **table_identifier**
254+
255+
Specifies a table name, which may be optionally qualified with a database name.
256+
257+
**Syntax:** `[ database_name. ] table_name`
258+
259+
* **col_name**
260+
261+
Specifies the name of the column.
236262

237263
### SET AND UNSET
238264

@@ -596,6 +622,51 @@ SHOW PARTITIONS StudentInfo;
596622
| age=20|
597623
+---------+
598624

625+
-- CLUSTER BY
626+
DESC Teacher;
627+
+------------------------+---------+-------+
628+
| col_name|data_type|comment|
629+
+------------------------+---------+-------+
630+
| name| string| NULL|
631+
| gender| string| NULL|
632+
| country| string| NULL|
633+
| age| int| NULL|
634+
|# Clustering Information| | |
635+
| # col_name|data_type|comment|
636+
| gender| string| NULL|
637+
+------------------------+---------+-------+
638+
639+
ALTER TABLE Teacher CLUSTER BY (gender, country);
640+
641+
-- After changing clustering columns
642+
DESC Teacher;
643+
+------------------------+---------+-------+
644+
| col_name|data_type|comment|
645+
+------------------------+---------+-------+
646+
| name| string| NULL|
647+
| gender| string| NULL|
648+
| country| string| NULL|
649+
| age| int| NULL|
650+
|# Clustering Information| | |
651+
| # col_name|data_type|comment|
652+
| gender| string| NULL|
653+
| country| string| NULL|
654+
+------------------------+---------+-------+
655+
656+
ALTER TABLE Teacher CLUSTER BY NONE;
657+
658+
-- After removing clustering columns
659+
DESC Teacher;
660+
+------------------------+---------+-------+
661+
| col_name|data_type|comment|
662+
+------------------------+---------+-------+
663+
| name| string| NULL|
664+
| gender| string| NULL|
665+
| country| string| NULL|
666+
| age| int| NULL|
667+
|# Clustering Information| | |
668+
+------------------------+---------+-------+
669+
599670
-- Change the fileformat
600671
ALTER TABLE loc_orc SET fileformat orc;
601672

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import java.util.Locale
2121

2222
import org.apache.spark.SparkThrowable
2323
import org.apache.spark.sql.catalyst.analysis._
24-
import org.apache.spark.sql.catalyst.catalog.ClusterBySpec
2524
import org.apache.spark.sql.catalyst.expressions.{EqualTo, Hex, Literal}
2625
import org.apache.spark.sql.catalyst.plans.logical._
2726
import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition.{after, first}
@@ -236,23 +235,6 @@ class DDLParserSuite extends AnalysisTest {
236235
}
237236
}
238237

239-
test("alter table cluster by") {
240-
comparePlans(
241-
parsePlan("ALTER TABLE table_name CLUSTER BY (`a.b`, c.d, none)"),
242-
AlterTableClusterBy(
243-
UnresolvedTable(Seq("table_name"), "ALTER TABLE ... CLUSTER BY"),
244-
Some(ClusterBySpec(Seq(
245-
FieldReference(Seq("a.b")),
246-
FieldReference(Seq("c", "d")),
247-
FieldReference(Seq("none")))))))
248-
249-
comparePlans(
250-
parsePlan("ALTER TABLE table_name CLUSTER BY NONE"),
251-
AlterTableClusterBy(
252-
UnresolvedTable(Seq("table_name"), "ALTER TABLE ... CLUSTER BY"),
253-
None))
254-
}
255-
256238
test("create/replace table - with comment") {
257239
val createSql = "CREATE TABLE my_tab(a INT, b STRING) USING parquet COMMENT 'abc'"
258240
val replaceSql = "REPLACE TABLE my_tab(a INT, b STRING) USING parquet COMMENT 'abc'"

sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,13 +103,10 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
103103
builder.build())
104104
AlterTableChangeColumnCommand(table.catalogTable.identifier, colName, newColumn)
105105

106-
case AlterTableClusterBy(ResolvedTable(catalog, ident, table: V1Table, _), clusterBySpecOpt)
106+
case AlterTableClusterBy(ResolvedTable(catalog, _, table: V1Table, _), clusterBySpecOpt)
107107
if isSessionCatalog(catalog) =>
108-
val prop = clusterBySpecOpt.map { clusterBySpec =>
109-
Map(ClusterBySpec.toProperty(table.schema, clusterBySpec, conf.resolver))
110-
}.getOrElse {
111-
Map(ClusterBySpec.toProperty(table.schema, ClusterBySpec(Nil), conf.resolver))
112-
}
108+
val prop = Map(ClusterBySpec.toProperty(table.schema,
109+
clusterBySpecOpt.getOrElse(ClusterBySpec(Nil)), conf.resolver))
113110
AlterTableSetPropertiesCommand(table.catalogTable.identifier, prop, isView = false)
114111

115112
case RenameColumn(ResolvedV1TableIdentifier(ident), _, _) =>
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.spark.sql.execution.command
18+
19+
import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedTable}
20+
import org.apache.spark.sql.catalyst.catalog.ClusterBySpec
21+
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
22+
import org.apache.spark.sql.catalyst.plans.logical.AlterTableClusterBy
23+
import org.apache.spark.sql.connector.expressions.FieldReference
24+
import org.apache.spark.sql.test.SharedSparkSession
25+
26+
class AlterTableClusterByParserSuite extends AnalysisTest with SharedSparkSession {
27+
28+
test("alter table cluster by") {
29+
comparePlans(
30+
parsePlan("ALTER TABLE table_name CLUSTER BY (`a.b`, c.d, none)"),
31+
AlterTableClusterBy(
32+
UnresolvedTable(Seq("table_name"), "ALTER TABLE ... CLUSTER BY"),
33+
Some(ClusterBySpec(Seq(
34+
FieldReference(Seq("a.b")),
35+
FieldReference(Seq("c", "d")),
36+
FieldReference(Seq("none")))))))
37+
38+
comparePlans(
39+
parsePlan("ALTER TABLE table_name CLUSTER BY NONE"),
40+
AlterTableClusterBy(
41+
UnresolvedTable(Seq("table_name"), "ALTER TABLE ... CLUSTER BY"),
42+
None))
43+
}
44+
}

sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableClusterBySuiteBase.scala

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -45,45 +45,52 @@ trait AlterTableClusterBySuiteBase extends QueryTest with DDLCommandTestUtils {
4545

4646
test("test basic ALTER TABLE with clustering columns") {
4747
withNamespaceAndTable("ns", "table") { tbl =>
48-
spark.sql(s"CREATE TABLE $tbl (id INT, data STRING) $defaultUsing CLUSTER BY (id, data)")
48+
sql(s"CREATE TABLE $tbl (id INT, data STRING) $defaultUsing CLUSTER BY (id, data)")
4949
validateClusterBy(tbl, Seq("id", "data"))
50-
spark.sql(s"ALTER TABLE $tbl CLUSTER BY (data, id)")
50+
sql(s"ALTER TABLE $tbl CLUSTER BY (data, id)")
5151
validateClusterBy(tbl, Seq("data", "id"))
52-
spark.sql(s"ALTER TABLE $tbl CLUSTER BY NONE")
52+
sql(s"ALTER TABLE $tbl CLUSTER BY NONE")
5353
validateClusterBy(tbl, Seq.empty)
54-
spark.sql(s"ALTER TABLE $tbl CLUSTER BY (id)")
54+
sql(s"ALTER TABLE $tbl CLUSTER BY (id)")
5555
validateClusterBy(tbl, Seq("id"))
5656
}
5757
}
5858

5959
test("test clustering columns with comma") {
6060
withNamespaceAndTable("ns", "table") { tbl =>
61-
spark.sql(s"CREATE TABLE $tbl (`i,d` INT, data STRING) $defaultUsing " +
62-
"CLUSTER BY (`i,d`, data)")
61+
sql(s"CREATE TABLE $tbl (`i,d` INT, data STRING) $defaultUsing CLUSTER BY (`i,d`, data)")
6362
validateClusterBy(tbl, Seq("`i,d`", "data"))
64-
spark.sql(s"ALTER TABLE $tbl CLUSTER BY (data, `i,d`)")
63+
sql(s"ALTER TABLE $tbl CLUSTER BY (data, `i,d`)")
6564
validateClusterBy(tbl, Seq("data", "`i,d`"))
6665
}
6766
}
6867

6968
test("test nested clustering columns") {
7069
withNamespaceAndTable("ns", "table") { tbl =>
71-
spark.sql(s"CREATE TABLE $tbl " +
70+
sql(s"CREATE TABLE $tbl " +
7271
s"($nestedColumnSchema) " +
7372
s"$defaultUsing CLUSTER BY (${nestedClusteringColumns.mkString(",")})")
7473
validateClusterBy(tbl, nestedClusteringColumns)
75-
spark.sql(s"ALTER TABLE $tbl CLUSTER BY (${nestedClusteringColumnsNew.mkString(",")})")
74+
sql(s"ALTER TABLE $tbl CLUSTER BY (${nestedClusteringColumnsNew.mkString(",")})")
7675
validateClusterBy(tbl, nestedClusteringColumnsNew)
7776
}
7877
}
7978

8079
test("clustering columns not defined in schema") {
8180
withNamespaceAndTable("ns", "table") { tbl =>
8281
sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing CLUSTER BY (id)")
83-
val err = intercept[AnalysisException] {
84-
sql(s"ALTER TABLE $tbl CLUSTER BY (unknown)")
85-
}
86-
assert(err.message.contains("Couldn't find column unknown in:"))
82+
checkError(
83+
exception = intercept[AnalysisException] {
84+
sql(s"ALTER TABLE $tbl CLUSTER BY (unknown)")
85+
},
86+
errorClass = "_LEGACY_ERROR_TEMP_3060",
87+
parameters = Map("i" -> "unknown",
88+
"schema" ->
89+
"""root
90+
| |-- id: long (nullable = true)
91+
| |-- data: string (nullable = true)
92+
|""".stripMargin)
93+
)
8794
}
8895
}
8996

sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableClusterBySuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ trait AlterTableClusterBySuiteBase extends command.AlterTableClusterBySuiteBase
3636
override def validateClusterBy(tableName: String, clusteringColumns: Seq[String]): Unit = {
3737
val catalog = spark.sessionState.catalog
3838
val (_, db, t) = parseTableName(tableName)
39-
val table = catalog.getTableMetadata(TableIdentifier.apply(t, Some(db)))
39+
val table = catalog.getTableMetadata(TableIdentifier(t, Some(db)))
4040
assert(table.clusterBySpec === Some(ClusterBySpec(clusteringColumns.map(FieldReference(_)))))
4141
}
4242
}

sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableClusterBySuite.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,13 @@ class AlterTableClusterBySuite extends command.AlterTableClusterBySuiteBase
4040

4141
test("test REPLACE TABLE with clustering columns") {
4242
withNamespaceAndTable("ns", "table") { tbl =>
43-
spark.sql(s"CREATE TABLE $tbl (id INT) $defaultUsing CLUSTER BY (id)")
43+
sql(s"CREATE TABLE $tbl (id INT) $defaultUsing CLUSTER BY (id)")
4444
validateClusterBy(tbl, Seq("id"))
4545

46-
spark.sql(s"REPLACE TABLE $tbl (id INT, id2 INT) $defaultUsing CLUSTER BY (id2)")
46+
sql(s"REPLACE TABLE $tbl (id INT, id2 INT) $defaultUsing CLUSTER BY (id2)")
4747
validateClusterBy(tbl, Seq("id2"))
4848

49-
spark.sql(s"ALTER TABLE $tbl CLUSTER BY (id)")
49+
sql(s"ALTER TABLE $tbl CLUSTER BY (id)")
5050
validateClusterBy(tbl, Seq("id"))
5151
}
5252
}

0 commit comments

Comments
 (0)