Skip to content

Commit dc1d9f7

Browse files
author
Marcelo Vanzin
committed
[SPARK-23850][sql][branch-2.2] Add separate config for SQL options redaction.
The old code was relying on a core configuration and extended its default value to include things that redact desired things in the app's environment. Instead, add a SQL-specific option for which options to redact, and apply both the core and SQL-specific rules when redacting the options in the save command. This is a little sub-optimal since it adds another config, but it retains the current default behavior. While there I also fixed a typo and a couple of minor config API usage issues in the related redaction option that SQL already had. Tested with existing unit tests, plus checking the env page on a shell UI. Author: Marcelo Vanzin <vanzin@cloudera.com> Closes #21158 from vanzin/SPARK-23850. (cherry picked from commit ed7ba7d) Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
1 parent 8c223b6 commit dc1d9f7

File tree

5 files changed

+38
-8
lines changed

5 files changed

+38
-8
lines changed

core/src/main/scala/org/apache/spark/internal/config/package.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ package object config {
247247
"a property key or value, the value is redacted from the environment UI and various logs " +
248248
"like YARN and event logs.")
249249
.regexConf
250-
.createWithDefault("(?i)secret|password|url|user|username".r)
250+
.createWithDefault("(?i)secret|password".r)
251251

252252
private[spark] val STRING_REDACTION_PATTERN =
253253
ConfigBuilder("spark.redaction.string.regex")

core/src/main/scala/org/apache/spark/util/Utils.scala

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2625,6 +2625,17 @@ private[spark] object Utils extends Logging {
26252625
}
26262626
}
26272627

2628+
/**
2629+
* Redact the sensitive values in the given map. If a map key matches the redaction pattern then
2630+
* its value is replaced with a dummy text.
2631+
*/
2632+
def redact(regex: Option[Regex], kvs: Seq[(String, String)]): Seq[(String, String)] = {
2633+
regex match {
2634+
case None => kvs
2635+
case Some(r) => redact(r, kvs)
2636+
}
2637+
}
2638+
26282639
private def redact(redactionPattern: Regex, kvs: Seq[(String, String)]): Seq[(String, String)] = {
26292640
// If the sensitive information regex matches with either the key or the value, redact the value
26302641
// While the original intent was to only redact the value if the key matched with the regex,

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import org.apache.spark.internal.Logging
2929
import org.apache.spark.internal.config._
3030
import org.apache.spark.network.util.ByteUnit
3131
import org.apache.spark.sql.catalyst.analysis.Resolver
32+
import org.apache.spark.util.Utils
3233
import org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter
3334

3435
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -819,6 +820,15 @@ object SQLConf {
819820
.intConf
820821
.createWithDefault(UnsafeExternalSorter.DEFAULT_NUM_ELEMENTS_FOR_SPILL_THRESHOLD.toInt)
821822

823+
val SQL_OPTIONS_REDACTION_PATTERN =
824+
buildConf("spark.sql.redaction.options.regex")
825+
.doc("Regex to decide which keys in a Spark SQL command's options map contain sensitive " +
826+
"information. The values of options whose names that match this regex will be redacted " +
827+
"in the explain output. This redaction is applied on top of the global redaction " +
828+
s"configuration defined by ${SECRET_REDACTION_PATTERN.key}.")
829+
.regexConf
830+
.createWithDefault("(?i)url".r)
831+
822832
object Deprecated {
823833
val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks"
824834
}
@@ -1181,6 +1191,17 @@ class SQLConf extends Serializable with Logging {
11811191
}.toSeq
11821192
}
11831193

1194+
/**
1195+
* Redacts the given option map according to the description of SQL_OPTIONS_REDACTION_PATTERN.
1196+
*/
1197+
def redactOptions(options: Map[String, String]): Map[String, String] = {
1198+
val regexes = Seq(
1199+
getConf(SQL_OPTIONS_REDACTION_PATTERN),
1200+
SECRET_REDACTION_PATTERN.readFrom(reader))
1201+
1202+
regexes.foldLeft(options.toSeq) { case (opts, r) => Utils.redact(Some(r), opts) }.toMap
1203+
}
1204+
11841205
/**
11851206
* Return whether a given key is set in this [[SQLConf]].
11861207
*/

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,11 @@
1717

1818
package org.apache.spark.sql.execution.datasources
1919

20-
import org.apache.spark.SparkEnv
2120
import org.apache.spark.sql.{Dataset, Row, SaveMode, SparkSession}
2221
import org.apache.spark.sql.catalyst.plans.QueryPlan
2322
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
2423
import org.apache.spark.sql.execution.command.RunnableCommand
25-
import org.apache.spark.util.Utils
24+
import org.apache.spark.sql.internal.SQLConf
2625

2726
/**
2827
* Saves the results of `query` in to a data source.
@@ -53,7 +52,9 @@ case class SaveIntoDataSourceCommand(
5352
}
5453

5554
override def simpleString: String = {
56-
val redacted = Utils.redact(SparkEnv.get.conf, options.toSeq).toMap
57-
s"SaveIntoDataSourceCommand ${provider}, ${partitionColumns}, ${redacted}, ${mode}"
55+
val redacted = SparkSession.getActiveSession
56+
.map(_.sessionState.conf.redactOptions(options))
57+
.getOrElse(Map())
58+
s"SaveIntoDataSourceCommand ${provider}, ${redacted}, ${mode}"
5859
}
5960
}

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,6 @@ import org.apache.spark.sql.test.SharedSQLContext
2323

2424
class SaveIntoDataSourceCommandSuite extends SharedSQLContext {
2525

26-
override protected def sparkConf: SparkConf = super.sparkConf
27-
.set("spark.redaction.regex", "(?i)password|url")
28-
2926
test("simpleString is redacted") {
3027
val URL = "connection.url"
3128
val PASS = "123"

0 commit comments

Comments
 (0)