From 7bd149b60da1557c4c6c35256908d97a4a03481a Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Tue, 24 Oct 2023 22:56:10 +0800 Subject: [PATCH] [SPARK-45646][SQL] Remove hardcoding time variables prior to Hive 2.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? Remove the following hardcoding time variables prior to Hive 2.0 ``` hive.stats.jdbc.timeout hive.stats.retries.wait ``` ### Why are the changes needed? It's kind of a cleanup since Spark 4.0 only supports Hive 2.0 and above. The removal also reduces the warning message on `spark-sql` bootstrap. ### Does this PR introduce _any_ user-facing change? Yes, it reduces the warning message on `spark-sql` bootstrap. ```patch ➜ $ build/sbt clean package -Phive-thriftserver ➜ $ SPARK_PREPEND_CLASSES=true bin/spark-sql NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark classes ahead of assembly. 23/10/24 15:42:22 WARN Utils: Your hostname, pop-os resolves to a loopback address: 127.0.1.1; using 10.221.99.150 instead (on interface wlp61s0) 23/10/24 15:42:22 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address Setting default log level to "WARN". To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). 23/10/24 15:42:23 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable - 23/10/24 15:42:25 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout does not exist - 23/10/24 15:42:25 WARN HiveConf: HiveConf of name hive.stats.retries.wait does not exist 23/10/24 15:42:28 WARN ObjectStore: Version information not found in metastore. hive.metastore.schema.verification is not enabled so recording the schema version 2.3.0 23/10/24 15:42:28 WARN ObjectStore: setMetaStoreSchemaVersion called but recording version is disabled: version = 2.3.0, comment = Set by MetaStore chengpan127.0.1.1 23/10/24 15:42:28 WARN ObjectStore: Failed to get database default, returning NoSuchObjectException Spark Web UI available at http://10.221.99.150:4040 Spark master: local[*], Application Id: local-1698133344448 spark-sql (default)> ``` ### How was this patch tested? Pass GA and manually test. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #43506 from pan3793/SPARK-45646. Authored-by: Cheng Pan Signed-off-by: yangjie01 --- .../org/apache/spark/sql/hive/HiveUtils.scala | 15 ++------------- .../apache/spark/sql/hive/HiveUtilsSuite.scala | 13 ------------- 2 files changed, 2 insertions(+), 26 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index ab54b15af6300..5ce7977ab168c 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -269,7 +269,7 @@ private[spark] object HiveUtils extends Logging { // // Here we enumerate all time `ConfVar`s and convert their values to numeric strings according // to their output time units. - val commonTimeVars = Seq( + Seq( ConfVars.METASTORE_CLIENT_CONNECT_RETRY_DELAY -> TimeUnit.SECONDS, ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT -> TimeUnit.SECONDS, ConfVars.METASTORE_CLIENT_SOCKET_LIFETIME -> TimeUnit.SECONDS, @@ -309,18 +309,7 @@ private[spark] object HiveUtils extends Logging { ConfVars.SPARK_RPC_CLIENT_HANDSHAKE_TIMEOUT -> TimeUnit.MILLISECONDS ).map { case (confVar, unit) => confVar.varname -> HiveConf.getTimeVar(hadoopConf, confVar, unit).toString - } - - // The following configurations were removed by HIVE-12164(Hive 2.0) - val hardcodingTimeVars = Seq( - ("hive.stats.jdbc.timeout", "30s") -> TimeUnit.SECONDS, - ("hive.stats.retries.wait", "3000ms") -> TimeUnit.MILLISECONDS - ).map { case ((key, defaultValue), unit) => - val value = hadoopConf.get(key, defaultValue) - key -> HiveConf.toTime(value, unit, unit).toString - } - - (commonTimeVars ++ hardcodingTimeVars).toMap + }.toMap } /** diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala index 823ac8ed957e6..10dbbc80c9ec1 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala @@ -106,17 +106,4 @@ class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton } } } - - test("SPARK-27349: Dealing with TimeVars removed in Hive 2.x") { - // Test default value - val defaultConf = new Configuration - assert(HiveUtils.formatTimeVarsForHiveClient(defaultConf)("hive.stats.jdbc.timeout") === "30") - assert(HiveUtils.formatTimeVarsForHiveClient(defaultConf)("hive.stats.retries.wait") === "3000") - - testFormatTimeVarsForHiveClient("hive.stats.jdbc.timeout", "40s", 40) - testFormatTimeVarsForHiveClient("hive.stats.jdbc.timeout", "1d", 1 * 24 * 60 * 60) - - testFormatTimeVarsForHiveClient("hive.stats.retries.wait", "4000ms", 4000) - testFormatTimeVarsForHiveClient("hive.stats.retries.wait", "1d", 1 * 24 * 60 * 60 * 1000) - } }