From b42d235c29302b9faa4254d07db1282207345f69 Mon Sep 17 00:00:00 2001 From: Vladimir Golubev Date: Fri, 3 May 2024 15:15:19 -0700 Subject: [PATCH] [SPARK-48114][CORE] Precompile template regex to avoid unnecessary work ### What changes were proposed in this pull request? Error message template regex is now precompiled to avoid unnecessary work ### Why are the changes needed? `SparkRuntimeException` uses `SparkThrowableHelper`, which uses `ErrorClassesJsonReader` to create error message string from templates in `error-conditions.json`, but template regex is compiled on every `SparkRuntimeException` constructor invocation. This slows down error construction, in particular `UnivocityParser` + `FailureSafeParser`, where it's a hot path. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - `testOnly org.apache.spark.sql.errors.QueryExecutionErrorsSuite` - Manually checked csv parsing error ### Was this patch authored or co-authored using generative AI tooling? No Closes #46365 from vladimirg-db/vladimirg-db/precompile-regexes-in-error-classes-json-reader. Authored-by: Vladimir Golubev Signed-off-by: Dongjoon Hyun --- .../scala/org/apache/spark/ErrorClassesJSONReader.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/common/utils/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala b/common/utils/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala index 6d8ccecf9b780..ff94e87dc7461 100644 --- a/common/utils/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala +++ b/common/utils/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala @@ -49,7 +49,8 @@ class ErrorClassesJsonReader(jsonFileURLs: Seq[URL]) { sub.setEnableUndefinedVariableException(true) sub.setDisableSubstitutionInValues(true) try { - sub.replace(messageTemplate.replaceAll("<([a-zA-Z0-9_-]+)>", "\\$\\{$1\\}")) + sub.replace(ErrorClassesJsonReader.TEMPLATE_REGEX.replaceAllIn( + messageTemplate, "\\$\\{$1\\}")) } catch { case _: IllegalArgumentException => throw SparkException.internalError( s"Undefined error message parameter for error class: '$errorClass'. " + @@ -59,8 +60,7 @@ class ErrorClassesJsonReader(jsonFileURLs: Seq[URL]) { def getMessageParameters(errorClass: String): Seq[String] = { val messageTemplate = getMessageTemplate(errorClass) - val pattern = "<([a-zA-Z0-9_-]+)>".r - val matches = pattern.findAllIn(messageTemplate).toSeq + val matches = ErrorClassesJsonReader.TEMPLATE_REGEX.findAllIn(messageTemplate).toSeq matches.map(m => m.stripSuffix(">").stripPrefix("<")) } @@ -106,6 +106,8 @@ class ErrorClassesJsonReader(jsonFileURLs: Seq[URL]) { } private object ErrorClassesJsonReader { + private val TEMPLATE_REGEX = "<([a-zA-Z0-9_-]+)>".r + private val mapper: JsonMapper = JsonMapper.builder() .addModule(DefaultScalaModule) .build()