Skip to content

Commit c59d94b

Browse files
committed
Support "ANALYZE TABLE tableName COMPUTE STATISTICS noscan".
1 parent 4c51098 commit c59d94b

File tree

4 files changed

+90
-5
lines changed

4 files changed

+90
-5
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ private[hive] case class AddFile(filePath: String) extends Command
4646

4747
private[hive] case class DropTable(tableName: String, ifExists: Boolean) extends Command
4848

49+
private[hive] case class AnalyzeTable(tableName: String) extends Command
50+
4951
/** Provides a mapping from HiveQL statements to catalyst logical plans and expression trees. */
5052
private[hive] object HiveQl {
5153
protected val nativeCommands = Seq(
@@ -74,7 +76,6 @@ private[hive] object HiveQl {
7476
"TOK_CREATEFUNCTION",
7577
"TOK_DROPFUNCTION",
7678

77-
"TOK_ANALYZE",
7879
"TOK_ALTERDATABASE_PROPERTIES",
7980
"TOK_ALTERINDEX_PROPERTIES",
8081
"TOK_ALTERINDEX_REBUILD",
@@ -92,7 +93,6 @@ private[hive] object HiveQl {
9293
"TOK_ALTERTABLE_SKEWED",
9394
"TOK_ALTERTABLE_TOUCH",
9495
"TOK_ALTERTABLE_UNARCHIVE",
95-
"TOK_ANALYZE",
9696
"TOK_CREATEDATABASE",
9797
"TOK_CREATEFUNCTION",
9898
"TOK_CREATEINDEX",
@@ -239,7 +239,6 @@ private[hive] object HiveQl {
239239
ShellCommand(sql.drop(1))
240240
} else {
241241
val tree = getAst(sql)
242-
243242
if (nativeCommands contains tree.getText) {
244243
NativeCommand(sql)
245244
} else {
@@ -387,6 +386,22 @@ private[hive] object HiveQl {
387386
ifExists) =>
388387
val tableName = tableNameParts.map { case Token(p, Nil) => p }.mkString(".")
389388
DropTable(tableName, ifExists.nonEmpty)
389+
// Support "ANALYZE TABLE tableNmae COMPUTE STATISTICS noscan"
390+
case Token("TOK_ANALYZE",
391+
Token("TOK_TAB", Token("TOK_TABNAME", tableNameParts) :: partitionSpec) ::
392+
isNoscan) =>
393+
// Reference:
394+
// https://cwiki.apache.org/confluence/display/Hive/StatsDev#StatsDev-ExistingTables
395+
if (partitionSpec.nonEmpty) {
396+
// Analyze partitions will be treated as a Hive native command.
397+
NativePlaceholder
398+
} else if (isNoscan.isEmpty) {
399+
// If users do not specific "noscan", it will be treated as a Hive native command.
400+
NativePlaceholder
401+
} else {
402+
val tableName = tableNameParts.map { case Token(p, Nil) => p }.mkString(".")
403+
AnalyzeTable(tableName)
404+
}
390405
// Just fake explain for any of the native commands.
391406
case Token("TOK_EXPLAIN", explainArgs)
392407
if noExplainCommands.contains(explainArgs.head.getText) =>

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ private[hive] trait HiveStrategies {
8383

8484
case DropTable(tableName, ifExists) => execution.DropTable(tableName, ifExists) :: Nil
8585

86+
case AnalyzeTable(tableName) => execution.AnalyzeTable(tableName) :: Nil
87+
8688
case describe: logical.DescribeCommand =>
8789
val resolvedTable = context.executePlan(describe.table).analyzed
8890
resolvedTable match {

sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DropTable.scala renamed to sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,33 @@ import org.apache.spark.sql.catalyst.expressions.Row
2323
import org.apache.spark.sql.execution.{Command, LeafNode}
2424
import org.apache.spark.sql.hive.HiveContext
2525

26+
/**
27+
* :: DeveloperApi ::
28+
*
29+
* Analyzes the given table in the current database to generate statistics, which will be
30+
* used in query optimizations.
31+
*
32+
* Right now, it only supports Hive tables and it only updates the size of a Hive table
33+
* in the Hive metastore.
34+
*/
35+
@DeveloperApi
36+
case class AnalyzeTable(tableName: String) extends LeafNode with Command {
37+
38+
def hiveContext = sqlContext.asInstanceOf[HiveContext]
39+
40+
def output = Seq.empty
41+
42+
override protected[sql] lazy val sideEffectResult = {
43+
hiveContext.analyze(tableName)
44+
Seq.empty[Any]
45+
}
46+
47+
override def execute(): RDD[Row] = {
48+
sideEffectResult
49+
sparkContext.emptyRDD[Row]
50+
}
51+
}
52+
2653
/**
2754
* :: DeveloperApi ::
2855
* Drops a table from the metastore and removes it if it is cached.

sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,54 @@ package org.apache.spark.sql.hive
1919

2020
import scala.reflect.ClassTag
2121

22+
2223
import org.apache.spark.sql.{SQLConf, QueryTest}
24+
import org.apache.spark.sql.catalyst.plans.logical.NativeCommand
2325
import org.apache.spark.sql.execution.{BroadcastHashJoin, ShuffledHashJoin}
2426
import org.apache.spark.sql.hive.test.TestHive
2527
import org.apache.spark.sql.hive.test.TestHive._
2628

2729
class StatisticsSuite extends QueryTest {
2830

31+
test("parse analyze commands") {
32+
def assertAnalyzeCommand(analyzeCommand: String, c: Class[_]) {
33+
val parsed = HiveQl.parseSql(analyzeCommand)
34+
val operators = parsed.collect {
35+
case a: AnalyzeTable => a
36+
case o => o
37+
}
38+
39+
assert(operators.size === 1)
40+
if (operators(0).getClass() != c) {
41+
fail(
42+
s"""$analyzeCommand expected command: $c, but got ${operators(0)}
43+
|parsed command:
44+
|$parsed
45+
""".stripMargin)
46+
}
47+
}
48+
49+
assertAnalyzeCommand(
50+
"ANALYZE TABLE Table1 COMPUTE STATISTICS",
51+
classOf[NativeCommand])
52+
assertAnalyzeCommand(
53+
"ANALYZE TABLE Table1 PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS",
54+
classOf[NativeCommand])
55+
assertAnalyzeCommand(
56+
"ANALYZE TABLE Table1 PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS noscan",
57+
classOf[NativeCommand])
58+
assertAnalyzeCommand(
59+
"ANALYZE TABLE Table1 PARTITION(ds, hr) COMPUTE STATISTICS",
60+
classOf[NativeCommand])
61+
assertAnalyzeCommand(
62+
"ANALYZE TABLE Table1 PARTITION(ds, hr) COMPUTE STATISTICS noscan",
63+
classOf[NativeCommand])
64+
65+
assertAnalyzeCommand(
66+
"ANALYZE TABLE Table1 COMPUTE STATISTICS nOscAn",
67+
classOf[AnalyzeTable])
68+
}
69+
2970
test("analyze MetastoreRelations") {
3071
def queryTotalSize(tableName: String): BigInt =
3172
catalog.lookupRelation(None, tableName).statistics.sizeInBytes
@@ -37,7 +78,7 @@ class StatisticsSuite extends QueryTest {
3778

3879
assert(queryTotalSize("analyzeTable") === defaultSizeInBytes)
3980

40-
analyze("analyzeTable")
81+
sql("ANALYZE TABLE analyzeTable COMPUTE STATISTICS noscan")
4182

4283
assert(queryTotalSize("analyzeTable") === BigInt(11624))
4384

@@ -66,7 +107,7 @@ class StatisticsSuite extends QueryTest {
66107

67108
assert(queryTotalSize("analyzeTable_part") === defaultSizeInBytes)
68109

69-
analyze("analyzeTable_part")
110+
sql("ANALYZE TABLE analyzeTable_part COMPUTE STATISTICS noscan")
70111

71112
assert(queryTotalSize("analyzeTable_part") === BigInt(17436))
72113

0 commit comments

Comments
 (0)