refactor

Davies Liu · Davies Liu · commit 9fe30cf6372f · 2015-10-01T10:50:07.000-07:00
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -44,37 +44,47 @@ trait FunctionRegistry {
 
   /* Get the class of the registered function by specified name. */
   def lookupFunction(name: String): Option[ExpressionInfo]
-
-  def copy(): FunctionRegistry
 }
 
 class SimpleFunctionRegistry extends FunctionRegistry {
 
   private val functionBuilders =
     StringKeyHashMap[(ExpressionInfo, FunctionBuilder)](caseSensitive = false)
 
-  override def registerFunction(name: String, info: ExpressionInfo, builder: FunctionBuilder)
-  : Unit = {
-    functionBuilders.put(name, (info, builder))
+  override def registerFunction(
+      name: String,
+      info: ExpressionInfo,
+      builder: FunctionBuilder): Unit = {
+    synchronized {
+      functionBuilders.put(name, (info, builder))
+    }
   }
 
   override def lookupFunction(name: String, children: Seq[Expression]): Expression = {
-    val func = functionBuilders.get(name).map(_._2).getOrElse {
-      throw new AnalysisException(s"undefined function $name")
+    val func = synchronized {
+      functionBuilders.get(name).map(_._2).getOrElse {
+        throw new AnalysisException(s"undefined function $name")
+      }
     }
     func(children)
   }
 
-  override def listFunction(): Seq[String] = functionBuilders.iterator.map(_._1).toList.sorted
+  override def listFunction(): Seq[String] = synchronized {
+    functionBuilders.iterator.map(_._1).toList.sorted
+  }
 
   override def lookupFunction(name: String): Option[ExpressionInfo] = {
-    functionBuilders.get(name).map(_._1)
+    synchronized {
+      functionBuilders.get(name).map(_._1)
+    }
   }
 
-  override def copy(): SimpleFunctionRegistry = {
+  def copy(): SimpleFunctionRegistry = {
     val registry = new SimpleFunctionRegistry
-    functionBuilders.iterator.foreach { case (name, (info, builder)) =>
+    synchronized {
+      functionBuilders.iterator.foreach { case (name, (info, builder)) =>
         registry.registerFunction(name, info, builder)
+      }
     }
     registry
   }
@@ -101,10 +111,6 @@ object EmptyFunctionRegistry extends FunctionRegistry {
   override def lookupFunction(name: String): Option[ExpressionInfo] = {
     throw new UnsupportedOperationException
   }
-
-  override def copy(): FunctionRegistry = {
-    this
-  }
 }
 
 
@@ -270,7 +276,7 @@ object FunctionRegistry {
     expression[InputFileName]("input_file_name")
   )
 
-  val builtin: FunctionRegistry = {
+  val builtin: SimpleFunctionRegistry = {
     val fr = new SimpleFunctionRegistry
     expressions.foreach { case (name, (info, builder)) => fr.registerFunction(name, info, builder) }
     fr
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -64,14 +64,17 @@ import org.apache.spark.util.Utils
 class SQLContext private[sql](
     @transient val sparkContext: SparkContext,
     @transient protected[sql] val cacheManager: CacheManager)
-  extends org.apache.spark.Logging
-  with Serializable {
+  extends org.apache.spark.Logging with Serializable {
 
   self =>
 
   def this(sparkContext: SparkContext) = this(sparkContext, new CacheManager)
   def this(sparkContext: JavaSparkContext) = this(sparkContext.sc)
 
+  /**
+   * Returns a SQLContext as new session, with separated SQL configurations, temporary tables,
+   * registered functions, but share the same SparkContext and CacheManager.
+   */
   def newSession(): SQLContext = {
     new SQLContext(sparkContext, cacheManager)
   }
@@ -207,6 +210,9 @@ class SQLContext private[sql](
     conf.dialect
   }
 
+  /**
+   * Add a jar to SQLContext
+   */
   protected[sql] def addJar(path: String): Unit = {
     sparkContext.addJar(path)
   }
@@ -1230,7 +1236,7 @@ object SQLContext {
   }
 
   /**
-   * Clear the SQLContext for current thread
+   * Clear the active SQLContext for current thread
    */
   def clearActive(): Unit = {
     activeContexts.remove()
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -26,7 +26,6 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.{ArrayBuffer, Map => SMap}
 import scala.util.control.NonFatal
 
-import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.metastore.api.FieldSchema
 import org.apache.hadoop.hive.shims.Utils
 import org.apache.hive.service.cli._
@@ -140,24 +139,22 @@ private[hive] class SparkExecuteStatementOperation(
     if (!runInBackground) {
       runInternal()
     } else {
-      val hiveConf = getConfigForOperation()
       val sparkServiceUGI = Utils.getUGI()
 
       // Runnable impl to call runInternal asynchronously,
       // from a different thread
       val backgroundOperation = new Runnable() {
 
         override def run(): Unit = {
-          val doAsAction = new PrivilegedExceptionAction[Object]() {
-            override def run(): Object = {
+          val doAsAction = new PrivilegedExceptionAction[Unit]() {
+            override def run(): Unit = {
               try {
                 runInternal()
               } catch {
                 case e: HiveSQLException =>
                   setOperationException(e)
                   log.error("Error running hive query: ", e)
               }
-              return null
             }
           }
 
@@ -174,7 +171,7 @@ private[hive] class SparkExecuteStatementOperation(
       try {
         // This submit blocks if no background threads are available to run this operation
         val backgroundHandle =
-          getParentSession().getSessionManager().submitBackgroundOperation(backgroundOperation)
+          parentSession.getSessionManager().submitBackgroundOperation(backgroundOperation)
         setBackgroundHandle(backgroundHandle)
       } catch {
         case rejected: RejectedExecutionException =>
@@ -193,6 +190,9 @@ private[hive] class SparkExecuteStatementOperation(
     statementId = UUID.randomUUID().toString
     logInfo(s"Running query '$statement' with $statementId")
     setState(OperationState.RUNNING)
+    val executionHiveClassLoader =
+      hiveContext.executionHive.state.getConf.getClassLoader
+    Thread.currentThread().setContextClassLoader(executionHiveClassLoader)
     HiveThriftServer2.listener.onStatementStart(
       statementId,
       parentSession.getSessionHandle.getSessionId.toString,
@@ -262,34 +262,4 @@ private[hive] class SparkExecuteStatementOperation(
       }
     }
   }
-
-  /**
-   * If there are query specific settings to overlay, then create a copy of config
-   * There are two cases we need to clone the session config that's being passed to hive driver
-   * 1. Async query -
-   *    If the client changes a config setting, that shouldn't reflect in the execution
-   *    already underway
-   * 2. confOverlay -
-   *    The query specific settings should only be applied to the query config and not session
-   * @return new configuration
-   * @throws HiveSQLException
-   */
-  private def getConfigForOperation(): HiveConf = {
-    var sqlOperationConf = getParentSession().getHiveConf()
-    if (!getConfOverlay().isEmpty() || runInBackground) {
-      // clone the partent session config for this query
-      sqlOperationConf = new HiveConf(sqlOperationConf)
-
-      // apply overlay query specific settings, if any
-      getConfOverlay().asScala.foreach { case (k, v) =>
-        try {
-          sqlOperationConf.verifyAndSet(k, v)
-        } catch {
-          case e: IllegalArgumentException =>
-            throw new HiveSQLException("Error applying statement specific settings", e)
-        }
-      }
-    }
-    return sqlOperationConf
-  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
@@ -130,9 +130,7 @@ case class HiveTableScan(
   }
 
   protected override def doExecute(): RDD[InternalRow] = if (!relation.hiveQlTable.isPartitioned) {
-    sqlContext.asInstanceOf[HiveContext].executionHive.withHiveState {
-      hadoopReader.makeRDDForTable(relation.hiveQlTable)
-    }
+    hadoopReader.makeRDDForTable(relation.hiveQlTable)
   } else {
     hadoopReader.makeRDDForPartitionedTable(
       prunePartitions(relation.getHiveQlPartitions(partitionPruningPred)))
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -103,10 +103,6 @@ private[hive] class HiveFunctionRegistry(underlying: analysis.FunctionRegistry)
       }
     }.getOrElse(None))
   }
-
-  override def copy(): HiveFunctionRegistry = {
-    this
-  }
 }
 
 private[hive] case class HiveSimpleUDF(funcWrapper: HiveFunctionWrapper, children: Seq[Expression])
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -1133,7 +1133,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
     conf.clear()
   }
 
-  // Enable this test once fix the current_database()
+  // TODO: Enable this test once fix SPARK-10902
   ignore("current_database with mutiple sessions") {
     sql("create database a")
     sql("use a")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -160,10 +160,15 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
   }
 
   test("show functions") {
-    val allFunctions =
+    val allBuiltinFunctions =
       (FunctionRegistry.builtin.listFunction().toSet[String] ++
         org.apache.hadoop.hive.ql.exec.FunctionRegistry.getFunctionNames.asScala).toList.sorted
-    checkAnswer(sql("SHOW functions"), allFunctions.map(Row(_)))
+    // The TestContext is shared by all the test cases, some functions may be registered before
+    // this, so we check that all the builtin functions are returned.
+    val allFunctions = sql("SHOW functions").collect().map(r => r(0))
+    allBuiltinFunctions.foreach { f =>
+      assert(allFunctions.contains(f))
+    }
     checkAnswer(sql("SHOW functions abs"), Row("abs"))
     checkAnswer(sql("SHOW functions 'abs'"), Row("abs"))
     checkAnswer(sql("SHOW functions abc.abs"), Row("abs"))

Original file line number	Diff line number	Diff line change
`@@ -103,10 +103,6 @@ private[hive] class HiveFunctionRegistry(underlying: analysis.FunctionRegistry)`
`103`	`103`	`}`
`104`	`104`	`}.getOrElse(None))`
`105`	`105`	`}`
`106`		`-`
`107`		`- override def copy(): HiveFunctionRegistry = {`
`108`		`- this`
`109`		`- }`
`110`	`106`	`}`
`111`	`107`
`112`	`108`	`private[hive] case class HiveSimpleUDF(funcWrapper: HiveFunctionWrapper, children: Seq[Expression])`
Original file line number	Diff line number	Diff line change
`@@ -1133,7 +1133,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {`
`1133`	`1133`	`conf.clear()`
`1134`	`1134`	`}`
`1135`	`1135`
`1136`		`- // Enable this test once fix the current_database()`
	`1136`	`+ // TODO: Enable this test once fix SPARK-10902`
`1137`	`1137`	`ignore("current_database with mutiple sessions") {`
`1138`	`1138`	`sql("create database a")`
`1139`	`1139`	`sql("use a")`