Skip to content

Commit 8045814

Browse files
rxinyhuai
authored andcommitted
[SPARK-14782][SPARK-14778][SQL] Remove HiveConf dependency from HiveSqlAstBuilder
## What changes were proposed in this pull request? The patch removes HiveConf dependency from HiveSqlAstBuilder. This is required in order to merge HiveSqlParser and SparkSqlAstBuilder, which would require getting rid of the Hive specific dependencies in HiveSqlParser. This patch also accomplishes [SPARK-14778] Remove HiveSessionState.substitutor. ## How was this patch tested? This should be covered by existing tests. Author: Reynold Xin <rxin@databricks.com> Closes #12550 from rxin/SPARK-14782.
1 parent 90933e2 commit 8045814

File tree

4 files changed

+27
-40
lines changed

4 files changed

+27
-40
lines changed

sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ object SQLConf {
289289
val DEFAULT_DATA_SOURCE_NAME = SQLConfigBuilder("spark.sql.sources.default")
290290
.doc("The default data source to use in input/output.")
291291
.stringConf
292-
.createWithDefault("org.apache.spark.sql.parquet")
292+
.createWithDefault("parquet")
293293

294294
// This is used to control the when we will split a schema's JSON string to multiple pieces
295295
// in order to fit the JSON string in metastore's table property (by default, the value has

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ import com.google.common.base.Objects
2424
import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
2525
import org.apache.hadoop.fs.{FileStatus, Path}
2626
import org.apache.hadoop.hive.common.StatsSetupConst
27-
import org.apache.hadoop.hive.conf.HiveConf
2827
import org.apache.hadoop.hive.metastore.{TableType => HiveTableType}
2928
import org.apache.hadoop.hive.metastore.api.FieldSchema
3029
import org.apache.hadoop.hive.ql.metadata.{Table => HiveTable, _}
@@ -46,6 +45,7 @@ import org.apache.spark.sql.execution.datasources.parquet.{DefaultSource => Parq
4645
import org.apache.spark.sql.hive.client._
4746
import org.apache.spark.sql.hive.execution.HiveNativeCommand
4847
import org.apache.spark.sql.hive.orc.{DefaultSource => OrcDefaultSource}
48+
import org.apache.spark.sql.internal.SQLConf
4949
import org.apache.spark.sql.types._
5050

5151
private[hive] case class HiveSerDe(
@@ -59,10 +59,10 @@ private[hive] object HiveSerDe {
5959
*
6060
* @param source Currently the source abbreviation can be one of the following:
6161
* SequenceFile, RCFile, ORC, PARQUET, and case insensitive.
62-
* @param hiveConf Hive Conf
62+
* @param conf SQLConf
6363
* @return HiveSerDe associated with the specified source
6464
*/
65-
def sourceToSerDe(source: String, hiveConf: HiveConf): Option[HiveSerDe] = {
65+
def sourceToSerDe(source: String, conf: SQLConf): Option[HiveSerDe] = {
6666
val serdeMap = Map(
6767
"sequencefile" ->
6868
HiveSerDe(
@@ -73,7 +73,8 @@ private[hive] object HiveSerDe {
7373
HiveSerDe(
7474
inputFormat = Option("org.apache.hadoop.hive.ql.io.RCFileInputFormat"),
7575
outputFormat = Option("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"),
76-
serde = Option(hiveConf.getVar(HiveConf.ConfVars.HIVEDEFAULTRCFILESERDE))),
76+
serde = Option(conf.getConfString("hive.default.rcfile.serde",
77+
"org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe"))),
7778

7879
"orc" ->
7980
HiveSerDe(
@@ -297,7 +298,7 @@ private[hive] class HiveMetastoreCatalog(hive: SQLContext) extends Logging {
297298
CatalogTableType.MANAGED_TABLE
298299
}
299300

300-
val maybeSerDe = HiveSerDe.sourceToSerDe(provider, hiveconf)
301+
val maybeSerDe = HiveSerDe.sourceToSerDe(provider, conf)
301302
val dataSource =
302303
DataSource(
303304
hive,

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,6 @@ private[hive] class HiveSessionState(ctx: SQLContext) extends SessionState(ctx)
5353
*/
5454
lazy val metadataHive: HiveClient = sharedState.metadataHive.newSession()
5555

56-
/**
57-
* A Hive helper class for substituting variables in a SQL statement.
58-
*/
59-
lazy val substitutor = new VariableSubstitution
60-
6156
override lazy val conf: SQLConf = new SQLConf {
6257
override def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE, false)
6358
}
@@ -114,7 +109,7 @@ private[hive] class HiveSessionState(ctx: SQLContext) extends SessionState(ctx)
114109
/**
115110
* Parser for HiveQl query texts.
116111
*/
117-
override lazy val sqlParser: ParserInterface = new HiveSqlParser(substitutor, hiveconf)
112+
override lazy val sqlParser: ParserInterface = new HiveSqlParser(conf, hiveconf)
118113

119114
/**
120115
* Planner that takes into account Hive-specific strategies.

sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala

Lines changed: 19 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,12 @@
1717
package org.apache.spark.sql.hive.execution
1818

1919
import scala.collection.JavaConverters._
20+
import scala.util.Try
2021

2122
import org.antlr.v4.runtime.{ParserRuleContext, Token}
2223
import org.apache.hadoop.hive.conf.HiveConf
23-
import org.apache.hadoop.hive.conf.HiveConf.ConfVars
24-
import org.apache.hadoop.hive.ql.parse.{EximUtil, VariableSubstitution}
24+
import org.apache.hadoop.hive.ql.parse.VariableSubstitution
2525
import org.apache.hadoop.hive.serde.serdeConstants
26-
import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
2726

2827
import org.apache.spark.sql.catalyst.catalog._
2928
import org.apache.spark.sql.catalyst.parser._
@@ -32,18 +31,16 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
3231
import org.apache.spark.sql.execution.SparkSqlAstBuilder
3332
import org.apache.spark.sql.execution.command.{CreateTable, CreateTableLike}
3433
import org.apache.spark.sql.hive.{CreateTableAsSelect => CTAS, CreateViewAsSelect => CreateView, HiveSerDe}
35-
import org.apache.spark.sql.hive.{HiveGenericUDTF, HiveMetastoreTypes, HiveSerDe}
36-
import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
34+
import org.apache.spark.sql.internal.SQLConf
3735

3836
/**
3937
* Concrete parser for HiveQl statements.
4038
*/
41-
class HiveSqlParser(
42-
substitutor: VariableSubstitution,
43-
hiveconf: HiveConf)
44-
extends AbstractSqlParser {
39+
class HiveSqlParser(conf: SQLConf, hiveconf: HiveConf) extends AbstractSqlParser {
4540

46-
val astBuilder = new HiveSqlAstBuilder(hiveconf)
41+
val astBuilder = new HiveSqlAstBuilder(conf)
42+
43+
lazy val substitutor = new VariableSubstitution
4744

4845
protected override def parse[T](command: String)(toResult: SqlBaseParser => T): T = {
4946
super.parse(substitutor.substitute(hiveconf, command))(toResult)
@@ -57,7 +54,7 @@ class HiveSqlParser(
5754
/**
5855
* Builder that converts an ANTLR ParseTree into a LogicalPlan/Expression/TableIdentifier.
5956
*/
60-
class HiveSqlAstBuilder(hiveConf: HiveConf) extends SparkSqlAstBuilder {
57+
class HiveSqlAstBuilder(conf: SQLConf) extends SparkSqlAstBuilder {
6158
import ParserUtils._
6259

6360
/**
@@ -184,8 +181,8 @@ class HiveSqlAstBuilder(hiveConf: HiveConf) extends SparkSqlAstBuilder {
184181

185182
// Storage format
186183
val defaultStorage: CatalogStorageFormat = {
187-
val defaultStorageType = hiveConf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT)
188-
val defaultHiveSerde = HiveSerDe.sourceToSerDe(defaultStorageType, hiveConf)
184+
val defaultStorageType = conf.getConfString("hive.default.fileformat", "textfile")
185+
val defaultHiveSerde = HiveSerDe.sourceToSerDe(defaultStorageType, conf)
189186
CatalogStorageFormat(
190187
locationUri = None,
191188
inputFormat = defaultHiveSerde.flatMap(_.inputFormat)
@@ -323,7 +320,7 @@ class HiveSqlAstBuilder(hiveConf: HiveConf) extends SparkSqlAstBuilder {
323320

324321
// Decode and input/output format.
325322
type Format = (Seq[(String, String)], Option[String], Seq[(String, String)], Option[String])
326-
def format(fmt: RowFormatContext, confVar: ConfVars): Format = fmt match {
323+
def format(fmt: RowFormatContext, configKey: String): Format = fmt match {
327324
case c: RowFormatDelimitedContext =>
328325
// TODO we should use the visitRowFormatDelimited function here. However HiveScriptIOSchema
329326
// expects a seq of pairs in which the old parsers' token names are used as keys.
@@ -345,26 +342,27 @@ class HiveSqlAstBuilder(hiveConf: HiveConf) extends SparkSqlAstBuilder {
345342
val CatalogStorageFormat(None, None, None, Some(name), props) = visitRowFormatSerde(c)
346343

347344
// SPARK-10310: Special cases LazySimpleSerDe
348-
val recordHandler = if (name == classOf[LazySimpleSerDe].getCanonicalName) {
349-
Option(hiveConf.getVar(confVar))
345+
val recordHandler = if (name == "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") {
346+
Try(conf.getConfString(configKey)).toOption
350347
} else {
351348
None
352349
}
353350
(Seq.empty, Option(name), props.toSeq, recordHandler)
354351

355352
case null =>
356353
// Use default (serde) format.
357-
val name = hiveConf.getVar(ConfVars.HIVESCRIPTSERDE)
354+
val name = conf.getConfString("hive.script.serde",
355+
"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")
358356
val props = Seq(serdeConstants.FIELD_DELIM -> "\t")
359-
val recordHandler = Option(hiveConf.getVar(confVar))
357+
val recordHandler = Try(conf.getConfString(configKey)).toOption
360358
(Nil, Option(name), props, recordHandler)
361359
}
362360

363361
val (inFormat, inSerdeClass, inSerdeProps, reader) =
364-
format(inRowFormat, ConfVars.HIVESCRIPTRECORDREADER)
362+
format(inRowFormat, "hive.script.recordreader")
365363

366364
val (outFormat, outSerdeClass, outSerdeProps, writer) =
367-
format(inRowFormat, ConfVars.HIVESCRIPTRECORDWRITER)
365+
format(outRowFormat, "hive.script.recordwriter")
368366

369367
HiveScriptIOSchema(
370368
inFormat, outFormat,
@@ -374,13 +372,6 @@ class HiveSqlAstBuilder(hiveConf: HiveConf) extends SparkSqlAstBuilder {
374372
schemaLess)
375373
}
376374

377-
/**
378-
* Create location string.
379-
*/
380-
override def visitLocationSpec(ctx: LocationSpecContext): String = {
381-
EximUtil.relativeToAbsolutePath(hiveConf, super.visitLocationSpec(ctx))
382-
}
383-
384375
/** Empty storage format for default values and copies. */
385376
private val EmptyStorageFormat = CatalogStorageFormat(None, None, None, None, Map.empty)
386377

@@ -402,7 +393,7 @@ class HiveSqlAstBuilder(hiveConf: HiveConf) extends SparkSqlAstBuilder {
402393
override def visitGenericFileFormat(
403394
ctx: GenericFileFormatContext): CatalogStorageFormat = withOrigin(ctx) {
404395
val source = ctx.identifier.getText
405-
HiveSerDe.sourceToSerDe(source, hiveConf) match {
396+
HiveSerDe.sourceToSerDe(source, conf) match {
406397
case Some(s) =>
407398
EmptyStorageFormat.copy(
408399
inputFormat = s.inputFormat,

0 commit comments

Comments
 (0)