Skip to content

Commit ed7ba7d

Browse files
author
Marcelo Vanzin
committed
[SPARK-23850][SQL] Add separate config for SQL options redaction.
The old code was relying on a core configuration and extended its default value to include things that redact desired things in the app's environment. Instead, add a SQL-specific option for which options to redact, and apply both the core and SQL-specific rules when redacting the options in the save command. This is a little sub-optimal since it adds another config, but it retains the current default behavior. While there I also fixed a typo and a couple of minor config API usage issues in the related redaction option that SQL already had. Tested with existing unit tests, plus checking the env page on a shell UI. Author: Marcelo Vanzin <vanzin@cloudera.com> Closes #21158 from vanzin/SPARK-23850.
1 parent 807ba44 commit ed7ba7d

File tree

6 files changed

+27
-11
lines changed

6 files changed

+27
-11
lines changed

core/src/main/scala/org/apache/spark/internal/config/package.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ package object config {
342342
"a property key or value, the value is redacted from the environment UI and various logs " +
343343
"like YARN and event logs.")
344344
.regexConf
345-
.createWithDefault("(?i)secret|password|url|user|username".r)
345+
.createWithDefault("(?i)secret|password".r)
346346

347347
private[spark] val STRING_REDACTION_PATTERN =
348348
ConfigBuilder("spark.redaction.string.regex")

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1155,8 +1155,17 @@ object SQLConf {
11551155
.booleanConf
11561156
.createWithDefault(true)
11571157

1158+
val SQL_OPTIONS_REDACTION_PATTERN =
1159+
buildConf("spark.sql.redaction.options.regex")
1160+
.doc("Regex to decide which keys in a Spark SQL command's options map contain sensitive " +
1161+
"information. The values of options whose names that match this regex will be redacted " +
1162+
"in the explain output. This redaction is applied on top of the global redaction " +
1163+
s"configuration defined by ${SECRET_REDACTION_PATTERN.key}.")
1164+
.regexConf
1165+
.createWithDefault("(?i)url".r)
1166+
11581167
val SQL_STRING_REDACTION_PATTERN =
1159-
ConfigBuilder("spark.sql.redaction.string.regex")
1168+
buildConf("spark.sql.redaction.string.regex")
11601169
.doc("Regex to decide which parts of strings produced by Spark contain sensitive " +
11611170
"information. When this regex matches a string part, that string part is replaced by a " +
11621171
"dummy value. This is currently used to redact the output of SQL explain commands. " +
@@ -1429,7 +1438,7 @@ class SQLConf extends Serializable with Logging {
14291438

14301439
def fileCompressionFactor: Double = getConf(FILE_COMRESSION_FACTOR)
14311440

1432-
def stringRedationPattern: Option[Regex] = SQL_STRING_REDACTION_PATTERN.readFrom(reader)
1441+
def stringRedactionPattern: Option[Regex] = getConf(SQL_STRING_REDACTION_PATTERN)
14331442

14341443
def sortBeforeRepartition: Boolean = getConf(SORT_BEFORE_REPARTITION)
14351444

@@ -1738,6 +1747,17 @@ class SQLConf extends Serializable with Logging {
17381747
}.toSeq
17391748
}
17401749

1750+
/**
1751+
* Redacts the given option map according to the description of SQL_OPTIONS_REDACTION_PATTERN.
1752+
*/
1753+
def redactOptions(options: Map[String, String]): Map[String, String] = {
1754+
val regexes = Seq(
1755+
getConf(SQL_OPTIONS_REDACTION_PATTERN),
1756+
SECRET_REDACTION_PATTERN.readFrom(reader))
1757+
1758+
regexes.foldLeft(options.toSeq) { case (opts, r) => Utils.redact(Some(r), opts) }.toMap
1759+
}
1760+
17411761
/**
17421762
* Return whether a given key is set in this [[SQLConf]].
17431763
*/

sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ trait DataSourceScanExec extends LeafExecNode with CodegenSupport {
6969
* Shorthand for calling redactString() without specifying redacting rules
7070
*/
7171
private def redact(text: String): String = {
72-
Utils.redact(sqlContext.sessionState.conf.stringRedationPattern, text)
72+
Utils.redact(sqlContext.sessionState.conf.stringRedactionPattern, text)
7373
}
7474
}
7575

sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) {
225225
* Redact the sensitive information in the given string.
226226
*/
227227
private def withRedaction(message: String): String = {
228-
Utils.redact(sparkSession.sessionState.conf.stringRedationPattern, message)
228+
Utils.redact(sparkSession.sessionState.conf.stringRedactionPattern, message)
229229
}
230230

231231
/** A special namespace for commands that can be used to debug query execution. */

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,12 @@
1717

1818
package org.apache.spark.sql.execution.datasources
1919

20-
import org.apache.spark.SparkEnv
2120
import org.apache.spark.sql.{Dataset, Row, SaveMode, SparkSession}
2221
import org.apache.spark.sql.catalyst.plans.QueryPlan
2322
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
2423
import org.apache.spark.sql.execution.command.RunnableCommand
24+
import org.apache.spark.sql.internal.SQLConf
2525
import org.apache.spark.sql.sources.CreatableRelationProvider
26-
import org.apache.spark.util.Utils
2726

2827
/**
2928
* Saves the results of `query` in to a data source.
@@ -50,7 +49,7 @@ case class SaveIntoDataSourceCommand(
5049
}
5150

5251
override def simpleString: String = {
53-
val redacted = Utils.redact(SparkEnv.get.conf, options.toSeq).toMap
52+
val redacted = SQLConf.get.redactOptions(options)
5453
s"SaveIntoDataSourceCommand ${dataSource}, ${redacted}, ${mode}"
5554
}
5655
}

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,6 @@ import org.apache.spark.sql.test.SharedSQLContext
2323

2424
class SaveIntoDataSourceCommandSuite extends SharedSQLContext {
2525

26-
override protected def sparkConf: SparkConf = super.sparkConf
27-
.set("spark.redaction.regex", "(?i)password|url")
28-
2926
test("simpleString is redacted") {
3027
val URL = "connection.url"
3128
val PASS = "123"

0 commit comments

Comments
 (0)