Revert "[SPARK-5213] [SQL] Pluggable SQL Parser Support"

pwendell · pwendell · commit beeafcfd6ee1 · 2015-04-30T20:33:36.000-07:00
This reverts commit 3ba5aaa.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala
@@ -25,6 +25,10 @@ import scala.util.parsing.input.CharArrayReader.EofCh
 
 import org.apache.spark.sql.catalyst.plans.logical._
 
+private[sql] object KeywordNormalizer {
+  def apply(str: String): String = str.toLowerCase()
+}
+
 private[sql] abstract class AbstractSparkSQLParser
   extends StandardTokenParsers with PackratParsers {
 
@@ -38,7 +42,7 @@ private[sql] abstract class AbstractSparkSQLParser
   }
 
   protected case class Keyword(str: String) {
-    def normalize: String = lexical.normalizeKeyword(str)
+    def normalize: String = KeywordNormalizer(str)
     def parser: Parser[String] = normalize
   }
 
@@ -86,16 +90,13 @@ class SqlLexical extends StdLexical {
     reserved ++= keywords
   }
 
-  /* Normal the keyword string */
-  def normalizeKeyword(str: String): String = str.toLowerCase
-
   delimiters += (
     "@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")",
     ",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "|", "^", "~", "<=>"
   )
 
   protected override def processIdent(name: String) = {
-    val token = normalizeKeyword(name)
+    val token = KeywordNormalizer(name)
     if (reserved contains token) Keyword(token) else Identifier(name)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/Dialect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/Dialect.scala
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/errors/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/errors/package.scala
@@ -38,8 +38,6 @@ package object errors {
     }
   }
 
-  class DialectException(msg: String, cause: Throwable) extends Exception(msg, cause)
-
   /**
    *  Wraps any exceptions that are thrown while executing `f` in a
    *  [[catalyst.errors.TreeNodeException TreeNodeException]], attaching the provided `tree`.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -24,7 +24,6 @@ import scala.collection.JavaConversions._
 import scala.collection.immutable
 import scala.language.implicitConversions
 import scala.reflect.runtime.universe.TypeTag
-import scala.util.control.NonFatal
 
 import com.google.common.reflect.TypeToken
 
@@ -33,11 +32,9 @@ import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.errors.DialectException
 import org.apache.spark.sql.catalyst.optimizer.{DefaultOptimizer, Optimizer}
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
-import org.apache.spark.sql.catalyst.Dialect
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, ScalaReflection, expressions}
 import org.apache.spark.sql.execution.{Filter, _}
 import org.apache.spark.sql.jdbc.{JDBCPartition, JDBCPartitioningInfo, JDBCRelation}
@@ -47,45 +44,6 @@ import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 import org.apache.spark.{Partition, SparkContext}
 
-/**
- * Currently we support the default dialect named "sql", associated with the class
- * [[DefaultDialect]]
- *
- * And we can also provide custom SQL Dialect, for example in Spark SQL CLI:
- * {{{
- *-- switch to "hiveql" dialect
- *   spark-sql>SET spark.sql.dialect=hiveql;
- *   spark-sql>SELECT * FROM src LIMIT 1;
- *
- *-- switch to "sql" dialect
- *   spark-sql>SET spark.sql.dialect=sql;
- *   spark-sql>SELECT * FROM src LIMIT 1;
- *
- *-- register the new SQL dialect
- *   spark-sql> SET spark.sql.dialect=com.xxx.xxx.SQL99Dialect;
- *   spark-sql> SELECT * FROM src LIMIT 1;
- *
- *-- register the non-exist SQL dialect
- *   spark-sql> SET spark.sql.dialect=NotExistedClass;
- *   spark-sql> SELECT * FROM src LIMIT 1;
- *
- *-- Exception will be thrown and switch to dialect
- *-- "sql" (for SQLContext) or 
- *-- "hiveql" (for HiveContext)
- * }}}
- */
-private[spark] class DefaultDialect extends Dialect {
-  @transient
-  protected val sqlParser = {
-    val catalystSqlParser = new catalyst.SqlParser
-    new SparkSQLParser(catalystSqlParser.parse)
-  }
-
-  override def parse(sqlText: String): LogicalPlan = {
-    sqlParser.parse(sqlText)
-  }
-}
-
 /**
  * The entry point for working with structured data (rows and columns) in Spark.  Allows the
  * creation of [[DataFrame]] objects as well as the execution of SQL queries.
@@ -174,27 +132,17 @@ class SQLContext(@transient val sparkContext: SparkContext)
   protected[sql] lazy val optimizer: Optimizer = DefaultOptimizer
 
   @transient
-  protected[sql] val ddlParser = new DDLParser((sql: String) => { getSQLDialect().parse(sql) })
-
-  protected[sql] def getSQLDialect(): Dialect = {
-    try {
-      val clazz = Utils.classForName(dialectClassName)
-      clazz.newInstance().asInstanceOf[Dialect]
-    } catch {
-      case NonFatal(e) =>
-        // Since we didn't find the available SQL Dialect, it will fail even for SET command:
-        // SET spark.sql.dialect=sql; Let's reset as default dialect automatically.
-        val dialect = conf.dialect
-        // reset the sql dialect
-        conf.unsetConf(SQLConf.DIALECT)
-        // throw out the exception, and the default sql dialect will take effect for next query.
-        throw new DialectException(
-          s"""Instantiating dialect '$dialect' failed.
-             |Reverting to default dialect '${conf.dialect}'""".stripMargin, e)
-    }
+  protected[sql] val ddlParser = new DDLParser(sqlParser.parse(_))
+
+  @transient
+  protected[sql] val sqlParser = {
+    val fallback = new catalyst.SqlParser
+    new SparkSQLParser(fallback.parse(_))
   }
 
-  protected[sql] def parseSql(sql: String): LogicalPlan = ddlParser.parse(sql, false)
+  protected[sql] def parseSql(sql: String): LogicalPlan = {
+    ddlParser.parse(sql, false).getOrElse(sqlParser.parse(sql))
+  }
 
   protected[sql] def executeSql(sql: String): this.QueryExecution = executePlan(parseSql(sql))
 
@@ -208,12 +156,6 @@ class SQLContext(@transient val sparkContext: SparkContext)
   @transient
   protected[sql] val defaultSession = createSession()
 
-  protected[sql] def dialectClassName = if (conf.dialect == "sql") {
-    classOf[DefaultDialect].getCanonicalName
-  } else {
-    conf.dialect
-  }
-
   sparkContext.getConf.getAll.foreach {
     case (key, value) if key.startsWith("spark.sql") => setConf(key, value)
     case _ =>
@@ -1003,7 +945,11 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * @group basic
    */
   def sql(sqlText: String): DataFrame = {
-    DataFrame(this, parseSql(sqlText))
+    if (conf.dialect == "sql") {
+      DataFrame(this, parseSql(sqlText))
+    } else {
+      sys.error(s"Unsupported SQL dialect: ${conf.dialect}")
+    }
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
@@ -38,12 +38,12 @@ private[sql] class DDLParser(
     parseQuery: String => LogicalPlan)
   extends AbstractSparkSQLParser with DataTypeParser with Logging {
 
-  def parse(input: String, exceptionOnError: Boolean): LogicalPlan = {
+  def parse(input: String, exceptionOnError: Boolean): Option[LogicalPlan] = {
     try {
-      parse(input)
+      Some(parse(input))
     } catch {
       case ddlException: DDLException => throw ddlException
-      case _ if !exceptionOnError => parseQuery(input)
+      case _ if !exceptionOnError => None
       case x: Throwable => throw x
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -19,18 +19,13 @@ package org.apache.spark.sql
 
 import org.scalatest.BeforeAndAfterAll
 
-import org.apache.spark.sql.catalyst.errors.DialectException
 import org.apache.spark.sql.execution.GeneratedAggregate
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.TestData._
 import org.apache.spark.sql.test.TestSQLContext
 import org.apache.spark.sql.test.TestSQLContext.{udf => _, _}
-
 import org.apache.spark.sql.types._
 
-/** A SQL Dialect for testing purpose, and it can not be nested type */
-class MyDialect extends DefaultDialect
-
 class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
   // Make sure the tables are loaded.
   TestData
@@ -79,23 +74,6 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
       Row("1", 1) :: Row("2", 1) :: Row("3", 1) :: Nil)
   }
 
-  test("SQL Dialect Switching to a new SQL parser") {
-    val newContext = new SQLContext(TestSQLContext.sparkContext)
-    newContext.setConf("spark.sql.dialect", classOf[MyDialect].getCanonicalName())
-    assert(newContext.getSQLDialect().getClass === classOf[MyDialect])
-    assert(newContext.sql("SELECT 1").collect() === Array(Row(1)))
-  }
-
-  test("SQL Dialect Switch to an invalid parser with alias") {
-    val newContext = new SQLContext(TestSQLContext.sparkContext)
-    newContext.sql("SET spark.sql.dialect=MyTestClass")
-    intercept[DialectException] {
-      newContext.sql("SELECT 1")
-    }
-    // test if the dialect set back to DefaultSQLDialect
-    assert(newContext.getSQLDialect().getClass === classOf[DefaultDialect])
-  }
-
   test("SPARK-4625 support SORT BY in SimpleSQLParser & DSL") {
     checkAnswer(
       sql("SELECT a FROM testData2 SORT BY a"),
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -20,9 +20,6 @@ package org.apache.spark.sql.hive
 import java.io.{BufferedReader, InputStreamReader, PrintStream}
 import java.sql.Timestamp
 
-import org.apache.hadoop.hive.ql.parse.VariableSubstitution
-import org.apache.spark.sql.catalyst.Dialect
-
 import scala.collection.JavaConversions._
 import scala.language.implicitConversions
 
@@ -45,15 +42,6 @@ import org.apache.spark.sql.hive.execution.{DescribeHiveTableCommand, HiveNative
 import org.apache.spark.sql.sources.{DDLParser, DataSourceStrategy}
 import org.apache.spark.sql.types._
 
-/**
- * This is the HiveQL Dialect, this dialect is strongly bind with HiveContext
- */
-private[hive] class HiveQLDialect extends Dialect {
-  override def parse(sqlText: String): LogicalPlan = {
-    HiveQl.parseSql(sqlText)
-  }
-}
-
 /**
  * An instance of the Spark SQL execution engine that integrates with data stored in Hive.
  * Configuration for Hive is read from hive-site.xml on the classpath.
@@ -93,16 +81,25 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   protected[sql] def convertCTAS: Boolean =
     getConf("spark.sql.hive.convertCTAS", "false").toBoolean
 
-  @transient
-  protected[sql] lazy val substitutor = new VariableSubstitution()
-
-  protected[sql] override def parseSql(sql: String): LogicalPlan = {
-    super.parseSql(substitutor.substitute(hiveconf, sql))
-  }
-
   override protected[sql] def executePlan(plan: LogicalPlan): this.QueryExecution =
     new this.QueryExecution(plan)
 
+  @transient
+  protected[sql] val ddlParserWithHiveQL = new DDLParser(HiveQl.parseSql(_))
+
+  override def sql(sqlText: String): DataFrame = {
+    val substituted = new VariableSubstitution().substitute(hiveconf, sqlText)
+    // TODO: Create a framework for registering parsers instead of just hardcoding if statements.
+    if (conf.dialect == "sql") {
+      super.sql(substituted)
+    } else if (conf.dialect == "hiveql") {
+      val ddlPlan = ddlParserWithHiveQL.parse(sqlText, exceptionOnError = false)
+      DataFrame(this, ddlPlan.getOrElse(HiveQl.parseSql(substituted)))
+    }  else {
+      sys.error(s"Unsupported SQL dialect: ${conf.dialect}. Try 'sql' or 'hiveql'")
+    }
+  }
+
   /**
    * Invalidate and refresh all the cached the metadata of the given table. For performance reasons,
    * Spark SQL or the external data source library it uses might cache certain metadata about a
@@ -359,12 +356,6 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
     }
   }
 
-  override protected[sql] def dialectClassName = if (conf.dialect == "hiveql") {
-    classOf[HiveQLDialect].getCanonicalName
-  } else {
-    super.dialectClassName
-  }
-
   @transient
   private val hivePlanner = new SparkPlanner with HiveStrategies {
     val hiveContext = self
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -107,10 +107,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
     /** Fewer partitions to speed up testing. */
     protected[sql] override lazy val conf: SQLConf = new SQLConf {
       override def numShufflePartitions: Int = getConf(SQLConf.SHUFFLE_PARTITIONS, "5").toInt
-
-      // TODO as in unit test, conf.clear() probably be called, all of the value will be cleared.
-      // The super.getConf(SQLConf.DIALECT) is "sql" by default, we need to set it as "hiveql"
-      override def dialect: String = super.getConf(SQLConf.DIALECT, "hiveql")
+      override def dialect: String = getConf(SQLConf.DIALECT, "hiveql")
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala

Original file line number	Diff line number	Diff line change
`@@ -25,6 +25,10 @@ import scala.util.parsing.input.CharArrayReader.EofCh`
`25`	`25`
`26`	`26`	`import org.apache.spark.sql.catalyst.plans.logical._`
`27`	`27`
	`28`	`+private[sql] object KeywordNormalizer {`
	`29`	`+ def apply(str: String): String = str.toLowerCase()`
	`30`	`+}`
	`31`	`+`
`28`	`32`	`private[sql] abstract class AbstractSparkSQLParser`
`29`	`33`	`extends StandardTokenParsers with PackratParsers {`
`30`	`34`
`@@ -38,7 +42,7 @@ private[sql] abstract class AbstractSparkSQLParser`
`38`	`42`	`}`
`39`	`43`
`40`	`44`	`protected case class Keyword(str: String) {`
`41`		`- def normalize: String = lexical.normalizeKeyword(str)`
	`45`	`+ def normalize: String = KeywordNormalizer(str)`
`42`	`46`	`def parser: Parser[String] = normalize`
`43`	`47`	`}`
`44`	`48`
`@@ -86,16 +90,13 @@ class SqlLexical extends StdLexical {`
`86`	`90`	`reserved ++= keywords`
`87`	`91`	`}`
`88`	`92`
`89`		`- /* Normal the keyword string */`
`90`		`- def normalizeKeyword(str: String): String = str.toLowerCase`
`91`		`-`
`92`	`93`	`delimiters += (`
`93`	`94`	`"@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")",`
`94`	`95`	`",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "\|", "^", "~", "<=>"`
`95`	`96`	`)`
`96`	`97`
`97`	`98`	`protected override def processIdent(name: String) = {`
`98`		`- val token = normalizeKeyword(name)`
	`99`	`+ val token = KeywordNormalizer(name)`
`99`	`100`	`if (reserved contains token) Keyword(token) else Identifier(name)`
`100`	`101`	`}`
`101`	`102`
Original file line number	Diff line number	Diff line change
`@@ -38,8 +38,6 @@ package object errors {`
`38`	`38`	`}`
`39`	`39`	`}`
`40`	`40`
`41`		`- class DialectException(msg: String, cause: Throwable) extends Exception(msg, cause)`
`42`		`-`
`43`	`41`	`/**`
`44`	`42`	* Wraps any exceptions that are thrown while executing `f` in a
`45`	`43`	* [[catalyst.errors.TreeNodeException TreeNodeException]], attaching the provided `tree`.
Original file line number	Diff line number	Diff line change
`@@ -107,10 +107,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {`
`107`	`107`	`/** Fewer partitions to speed up testing. */`
`108`	`108`	`protected[sql] override lazy val conf: SQLConf = new SQLConf {`
`109`	`109`	`override def numShufflePartitions: Int = getConf(SQLConf.SHUFFLE_PARTITIONS, "5").toInt`
`110`		`-`
`111`		`- // TODO as in unit test, conf.clear() probably be called, all of the value will be cleared.`
`112`		`- // The super.getConf(SQLConf.DIALECT) is "sql" by default, we need to set it as "hiveql"`
`113`		`- override def dialect: String = super.getConf(SQLConf.DIALECT, "hiveql")`
	`110`	`+ override def dialect: String = getConf(SQLConf.DIALECT, "hiveql")`
`114`	`111`	`}`
`115`	`112`	`}`
`116`	`113`