apache · luluorta · Nov 6, 2020 · Nov 25, 2020 · Nov 25, 2020 · cloud-fan
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
@@ -51,6 +51,8 @@ license: |
   - In Spark 3.1, the `schema_of_json` and `schema_of_csv` functions return the schema in the SQL format in which field names are quoted. In Spark 3.0, the function returns a catalog string without field quoting and in lower case. 
 
   - In Spark 3.1, refreshing a table will trigger an uncache operation for all other caches that reference the table, even if the table itself is not cached. In Spark 3.0 the operation will only be triggered if the table itself is cached.
+
+  - In Spark 3.1, creating or altering a view will capture runtime SQL configs and store them as view properties. These configs will be applied during the parsing and analysis phases of the view resolution. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.view.applySQLConfigs` to `false`.
 
 ## Upgrading from Spark SQL 3.0 to 3.0.1
 

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1016,7 +1016,9 @@ class Analyzer(override val catalogManager: CatalogManager)
               s"avoid errors. Increase the value of ${SQLConf.MAX_NESTED_VIEW_DEPTH.key} to work " +
               "around this.")
           }
-          executeSameContext(child)
+          SQLConf.withExistingConf(View.effectiveSQLConf(desc.viewQuerySQLConfigs)) {
+            executeSameContext(child)
+          }
         }
         view.copy(child = newChild)
       case p @ SubqueryAlias(_, view: View) =>

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -795,14 +795,19 @@ class SessionCatalog(
 
     if (metadata.tableType == CatalogTableType.VIEW) {
       val viewText = metadata.viewText.getOrElse(sys.error("Invalid view without text."))
-      logDebug(s"'$viewText' will be used for the view($table).")
+      val viewConfigs = metadata.viewQuerySQLConfigs
+      val viewPlan = SQLConf.withExistingConf(View.effectiveSQLConf(viewConfigs)) {
+        parser.parsePlan(viewText)
+      }
+
+      logDebug(s"'$viewText' will be used for the view($table) with configs: $viewConfigs.")
       // The relation is a view, so we wrap the relation by:
       // 1. Add a [[View]] operator over the relation to keep track of the view desc;
       // 2. Wrap the logical plan in a [[SubqueryAlias]] which tracks the name of the view.
       val child = View(
         desc = metadata,
         output = metadata.schema.toAttributes,
-        child = parser.parsePlan(viewText))
+        child = viewPlan)
       SubqueryAlias(multiParts, child)
     } else {
       SubqueryAlias(multiParts, UnresolvedCatalogRelation(metadata, options))

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -25,6 +25,7 @@ import scala.collection.mutable
 import scala.util.control.NonFatal
 
 import org.apache.commons.lang3.StringUtils
+import org.json4s.jackson.JsonMethods
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
@@ -38,6 +39,7 @@ import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.util.JsonProtocol
 
 
 /**
@@ -321,6 +323,23 @@ case class CatalogTable(
     )
   }
 
+  /**
+   * Return the SQL configs of the query that creates a view, the configs are applied when parsing
+   * and analyzing the view, should be empty if the CatalogTable is not a View or created by older
+   * versions of Spark(before 3.1.0).
+   */
+  def viewQuerySQLConfigs: Map[String, String] = {
+    try {
+      properties.get(CatalogTable.VIEW_QUERY_SQL_CONFIGS)
+        .map(confJson => JsonProtocol.mapFromJson(JsonMethods.parse(confJson)).toMap)
+        .getOrElse(Map.empty)
+    } catch {
+      case e: Exception =>
+        throw new AnalysisException(
+          "Corrupted view query SQL configs in catalog", cause = Some(e))
+    }
+  }
+
   /** Syntactic sugar to update a field in `storage`. */
   def withNewStorage(
       locationUri: Option[URI] = storage.locationUri,
@@ -414,6 +433,8 @@ object CatalogTable {
   val VIEW_QUERY_OUTPUT_PREFIX = VIEW_PREFIX + "query.out."
   val VIEW_QUERY_OUTPUT_NUM_COLUMNS = VIEW_QUERY_OUTPUT_PREFIX + "numCols"
   val VIEW_QUERY_OUTPUT_COLUMN_NAME_PREFIX = VIEW_QUERY_OUTPUT_PREFIX + "col."
+
+  val VIEW_QUERY_SQL_CONFIGS = VIEW_PREFIX + "query.sqlConfigs"
 }
 
 /**

diff --git a/...st/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/...st/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -17,17 +17,14 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
-import scala.collection.mutable
-
 import org.apache.spark.sql.catalyst.AliasIdentifier
-import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation}
+import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction}
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, RangePartitioning, RoundRobinPartitioning}
 import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.random.RandomSampler
@@ -456,6 +453,20 @@ case class View(
   }
 }
 
+object View {
+  def effectiveSQLConf(configs: Map[String, String]): SQLConf = {
+    val activeConf = SQLConf.get
+    if (!activeConf.applyViewSQLConfigs) return activeConf
+
+    val sqlConf = new SQLConf()
+    for ((k, v) <- configs) {
+      sqlConf.settings.put(k, v)
+    }
+    sqlConf.setConf(SQLConf.MAX_NESTED_VIEW_DEPTH, activeConf.maxNestedViewDepth)
+    sqlConf
+  }
+}
+
 /**
  * A container for holding named common table expressions (CTEs) and a query plan.
  * This operator will be removed during analysis and the relations will be substituted into child.

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1448,6 +1448,15 @@ object SQLConf {
         "must be positive.")
       .createWithDefault(100)
 
+  val APPLY_VIEW_SQL_CONFIGS =
+    buildConf("spark.sql.legacy.view.applySQLConfigs")
+      .internal()
+      .doc("When true, captured SQL Configs will be applied during the parsing and analysis " +
+        "phases of the view resolution.")
+      .version("3.1.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val STREAMING_FILE_COMMIT_PROTOCOL_CLASS =
     buildConf("spark.sql.streaming.commitProtocolClass")
       .version("2.1.0")
@@ -3385,6 +3394,8 @@ class SQLConf extends Serializable with Logging {
 
   def maxNestedViewDepth: Int = getConf(SQLConf.MAX_NESTED_VIEW_DEPTH)
 
+  def applyViewSQLConfigs: Boolean = getConf(SQLConf.APPLY_VIEW_SQL_CONFIGS)
+
   def starSchemaDetection: Boolean = getConf(STARSCHEMA_DETECTION)
 
   def starSchemaFTRatio: Double = getConf(STARSCHEMA_FACT_TABLE_RATIO)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.command
 
 import scala.collection.mutable
 
+import org.json4s.jackson.JsonMethods._
+
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{GlobalTempView, LocalTempView, PersistedView, UnresolvedFunction, UnresolvedRelation, ViewType}
@@ -27,9 +29,10 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeRef
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, View}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper
-import org.apache.spark.sql.internal.StaticSQLConf
+import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.types.{BooleanType, MetadataBuilder, StringType}
 import org.apache.spark.sql.util.SchemaUtils
+import org.apache.spark.util.JsonProtocol
 
 /**
  * Create or replace a view with given query plan. This command will generate some view-specific
@@ -334,6 +337,21 @@ case class ShowViewsCommand(
 
 object ViewHelper {
 
+  private val configPrefixBlacklist = Seq(
+    SQLConf.MAX_NESTED_VIEW_DEPTH.key,
+    "spark.sql.optimizer.",
+    "spark.sql.codegen.",
+    "spark.sql.execution.",
+    "spark.sql.shuffle.",
+    "spark.sql.adaptive.")
+
+  private def isConfigBlacklisted(key: String): Boolean = {
+    for (prefix <- configPrefixBlacklist if key.startsWith(prefix)) {
+       return true
+    }
+    false
+  }
+
   import CatalogTable._
 
   /**
@@ -361,11 +379,38 @@ object ViewHelper {
     }
   }
 
+  /**
+   * Convert the view query SQL configs in `properties`.
+   */
+  private def generateQuerySQLConfigs(conf: SQLConf): Map[String, String] = {
+    val modifiedConfs = conf.getAllConfs.filter { case (k, _) =>
+      conf.isModifiable(k) && !isConfigBlacklisted(k)
+    }
+    val props = new mutable.HashMap[String, String]
+    if (modifiedConfs.nonEmpty) {
+      val confJson = compact(render(JsonProtocol.mapToJson(modifiedConfs)))
+      props.put(VIEW_QUERY_SQL_CONFIGS, confJson)
+    }
+    props.toMap
+  }
+
+  /**
+   * Remove the view query SQL configs in `properties`.
+   */
+  private def removeQuerySQLConfigs(properties: Map[String, String]): Map[String, String] = {
+    // We can't use `filterKeys` here, as the map returned by `filterKeys` is not serializable,
+    // while `CatalogTable` should be serializable.
+    properties.filterNot { case (key, _) =>
+      key == VIEW_QUERY_SQL_CONFIGS
+    }
+  }
+
   /**
    * Generate the view properties in CatalogTable, including:
    * 1. view default database that is used to provide the default database name on view resolution.
    * 2. the output column names of the query that creates a view, this is used to map the output of
    *    the view child to the view output during view resolution.
+   * 3. the SQL configs when creating the view.
    *
    * @param properties the `properties` in CatalogTable.
    * @param session the spark session.
@@ -380,16 +425,19 @@ object ViewHelper {
     // for createViewCommand queryOutput may be different from fieldNames
     val queryOutput = analyzedPlan.schema.fieldNames
 
+    val conf = SQLConf.get
+
     // Generate the query column names, throw an AnalysisException if there exists duplicate column
     // names.
     SchemaUtils.checkColumnNameDuplication(
-      fieldNames, "in the view definition", session.sessionState.conf.resolver)
+      fieldNames, "in the view definition", conf.resolver)
 
     // Generate the view default catalog and namespace.
     val manager = session.sessionState.catalogManager
-    removeQueryColumnNames(properties) ++
+    removeQuerySQLConfigs(removeQueryColumnNames(properties)) ++
       catalogAndNamespaceToProps(manager.currentCatalog.name, manager.currentNamespace) ++
-      generateQueryColumnNames(queryOutput)
+      generateQueryColumnNames(queryOutput) ++
+      generateQuerySQLConfigs(conf)
   }
 
   /**