Skip to content

Commit 695cd2d

Browse files
committed
Correctly set the class loader in the conf of the state in client wrapper.
1 parent b3378fe commit 695cd2d

File tree

4 files changed

+36
-15
lines changed

4 files changed

+36
-15
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
164164
logInfo(s"Initializing execution hive, version $hiveExecutionVersion")
165165
new ClientWrapper(
166166
version = IsolatedClientLoader.hiveVersion(hiveExecutionVersion),
167-
config = newTemporaryConfiguration())
167+
config = newTemporaryConfiguration(),
168+
initClassLoader = Utils.getContextOrSparkClassLoader)
168169
}
169170
SessionState.setCurrentSessionState(executionHive.state)
170171

sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,12 @@ import org.apache.spark.sql.execution.QueryExecutionException
5454
* @param version the version of hive used when pick function calls that are not compatible.
5555
* @param config a collection of configuration options that will be added to the hive conf before
5656
* opening the hive client.
57+
* @param initClassLoader the classloader used when creating the `state` field of this ClientWrapper.
5758
*/
5859
private[hive] class ClientWrapper(
5960
version: HiveVersion,
60-
config: Map[String, String])
61+
config: Map[String, String],
62+
initClassLoader: ClassLoader)
6163
extends ClientInterface
6264
with Logging {
6365

@@ -98,11 +100,18 @@ private[hive] class ClientWrapper(
98100
// Create an internal session state for this ClientWrapper.
99101
val state = {
100102
val original = Thread.currentThread().getContextClassLoader
101-
Thread.currentThread().setContextClassLoader(getClass.getClassLoader)
103+
// Switch to the initClassLoader.
104+
Thread.currentThread().setContextClassLoader(initClassLoader)
102105
val ret = try {
103106
val oldState = SessionState.get()
104107
if (oldState == null) {
105108
val initialConf = new HiveConf(classOf[SessionState])
109+
// HiveConf is a Hadoop Configuration, which has a field of classLoader and
110+
// the initial value will be the current thread's context class loader
111+
// (i.e. initClassLoader at here).
112+
// We call initialConf.setClassLoader(initClassLoader) at here to make
113+
// this action explicit.
114+
initialConf.setClassLoader(initClassLoader)
106115
config.foreach { case (k, v) =>
107116
logDebug(s"Hive Config: $k=$v")
108117
initialConf.set(k, v)
@@ -125,20 +134,17 @@ private[hive] class ClientWrapper(
125134
def conf: HiveConf = SessionState.get().getConf
126135

127136
// TODO: should be a def?s
137+
// When we create this val client, the HiveConf of it (conf) is the one associated with state.
128138
private val client = Hive.get(conf)
129139

130140
/**
131141
* Runs `f` with ThreadLocal session state and classloaders configured for this version of hive.
132142
*/
133143
private def withHiveState[A](f: => A): A = synchronized {
134144
val original = Thread.currentThread().getContextClassLoader
135-
// This setContextClassLoader is used for Hive 0.12's metastore since Hive 0.12 will not
136-
// internally override the context class loader of the current thread with the class loader
137-
// associated with the HiveConf in `state`.
138-
Thread.currentThread().setContextClassLoader(getClass.getClassLoader)
139145
// Set the thread local metastore client to the client associated with this ClientWrapper.
140146
Hive.set(client)
141-
// Starting from Hive 0.13.0, setCurrentSessionState will use the classLoader associated
147+
// setCurrentSessionState will use the classLoader associated
142148
// with the HiveConf in `state` to override the context class loader of the current
143149
// thread.
144150
shim.setCurrentSessionState(state)

sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ import org.apache.hadoop.hive.ql.session.SessionState
4343
*/
4444
private[client] sealed abstract class Shim {
4545

46+
/**
47+
* Set the current SessionState to the given SessionState. Also, set the context classloader of
48+
* the current thread to the one set in the HiveConf of this given `state`.
49+
* @param state
50+
*/
4651
def setCurrentSessionState(state: SessionState): Unit
4752

4853
/**
@@ -159,7 +164,15 @@ private[client] class Shim_v0_12 extends Shim {
159164
JBoolean.TYPE,
160165
JBoolean.TYPE)
161166

162-
override def setCurrentSessionState(state: SessionState): Unit = startMethod.invoke(null, state)
167+
override def setCurrentSessionState(state: SessionState): Unit = {
168+
// Starting from Hive 0.13, setCurrentSessionState will internally override
169+
// the context class loader of the current thread by the class loader set in
170+
// the conf of the SessionState. So, for this Hive 0.12 shim, we add the same
171+
// behavior. So, shim.setCurrentSessionState of all Hive versions have the
172+
// consistent behavior.
173+
Thread.currentThread().setContextClassLoader(state.getConf.getClassLoader)
174+
startMethod.invoke(null, state)
175+
}
163176

164177
override def getDataLocation(table: Table): Option[String] =
165178
Option(getDataLocationMethod.invoke(table)).map(_.toString())

sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,8 @@ private[hive] object IsolatedClientLoader {
9595
* @param config A set of options that will be added to the HiveConf of the constructed client.
9696
* @param isolationOn When true, custom versions of barrier classes will be constructed. Must be
9797
* true unless loading the version of hive that is on Sparks classloader.
98-
* @param rootClassLoader The system root classloader.
99-
* @param baseClassLoader The spark classloader that is used to load shared classes. Must not know
100-
* about Hive classes.
98+
* @param rootClassLoader The system root classloader. Must not know about Hive classes.
99+
* @param baseClassLoader The spark classloader that is used to load shared classes.
101100
*/
102101
private[hive] class IsolatedClientLoader(
103102
val version: HiveVersion,
@@ -110,8 +109,8 @@ private[hive] class IsolatedClientLoader(
110109
val barrierPrefixes: Seq[String] = Seq.empty)
111110
extends Logging {
112111

113-
// Check to make sure that the base classloader does not know about Hive.
114-
assert(Try(baseClassLoader.loadClass("org.apache.hive.HiveConf")).isFailure)
112+
// Check to make sure that the root classloader does not know about Hive.
113+
assert(Try(rootClassLoader.loadClass("org.apache.hadoop.hive.conf.HiveConf")).isFailure)
115114

116115
/** All jars used by the hive specific classloader. */
117116
protected def allJars = execJars.toArray
@@ -145,13 +144,15 @@ private[hive] class IsolatedClientLoader(
145144
def doLoadClass(name: String, resolve: Boolean): Class[_] = {
146145
val classFileName = name.replaceAll("\\.", "/") + ".class"
147146
if (isBarrierClass(name) && isolationOn) {
147+
// For barrier classes, we construct a new copy of the class.
148148
val bytes = IOUtils.toByteArray(baseClassLoader.getResourceAsStream(classFileName))
149149
logDebug(s"custom defining: $name - ${util.Arrays.hashCode(bytes)}")
150150
defineClass(name, bytes, 0, bytes.length)
151151
} else if (!isSharedClass(name)) {
152152
logDebug(s"hive class: $name - ${getResource(classToPath(name))}")
153153
super.loadClass(name, resolve)
154154
} else {
155+
// For shared classes, we delegate to baseClassLoader.
155156
logDebug(s"shared class: $name")
156157
baseClassLoader.loadClass(name)
157158
}
@@ -167,7 +168,7 @@ private[hive] class IsolatedClientLoader(
167168
classLoader
168169
.loadClass(classOf[ClientWrapper].getName)
169170
.getConstructors.head
170-
.newInstance(version, config)
171+
.newInstance(version, config, classLoader)
171172
.asInstanceOf[ClientInterface]
172173
} catch {
173174
case e: InvocationTargetException =>

0 commit comments

Comments
 (0)