fix code style and variable name

xuanyuanking · xuanyuanking · commit ab8f5ec15b26 · 2016-10-23T17:25:22.000+08:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@@ -99,15 +99,19 @@ object FileSourceStrategy extends Strategy with Logging {
         dataColumns
           .filter(requiredAttributes.contains)
           .filterNot(partitionColumns.contains)
-      val outputSchema = if (fsRelation.sqlContext.conf.isParquetNestColumnPruning
-        && fsRelation.fileFormat.isInstanceOf[ParquetFileFormat]) {
-        val totalSchema = readDataColumns.toStructType
+      val outputSchema = if (
+          fsRelation.sqlContext.conf.parquetNestedColumnPruningEnabled &&
+          fsRelation.fileFormat.isInstanceOf[ParquetFileFormat]
+      ) {
+        val fullSchema = readDataColumns.toStructType
         val prunedSchema = StructType(
-          generateStructFieldsContainsNesting(projects, totalSchema))
+          generateStructFieldsContainsNesting(projects, fullSchema))
         // Merge schema in same StructType and merge with filterAttributes
         prunedSchema.fields.map(f => StructType(Array(f))).reduceLeft(_ merge _)
           .merge(filterAttributes.toSeq.toStructType)
-      } else readDataColumns.toStructType
+      } else {
+        readDataColumns.toStructType
+      }
       logInfo(s"Output Data Schema: ${outputSchema.simpleString(5)}")
 
       val pushedDownFilters = dataFilters.flatMap(DataSourceStrategy.translateFilter)
@@ -137,10 +141,12 @@ object FileSourceStrategy extends Strategy with Logging {
     case _ => Nil
   }
 
-  private def generateStructFieldsContainsNesting(projects: Seq[Expression],
-                                      totalSchema: StructType) : Seq[StructField] = {
-    def generateStructField(curField: List[String],
-                             node: Expression) : Seq[StructField] = {
+  private def generateStructFieldsContainsNesting(
+      projects: Seq[Expression],
+      fullSchema: StructType) : Seq[StructField] = {
+    def generateStructField(
+        curField: List[String],
+        node: Expression) : Seq[StructField] = {
       node match {
         case ai: GetArrayItem =>
           // Here we drop the previous for simplify array and map support.
@@ -151,7 +157,7 @@ object FileSourceStrategy extends Strategy with Logging {
         case mv: GetMapValue =>
           generateStructField(List.empty[String], mv.child)
         case attr: AttributeReference =>
-          Seq(getFieldRecursively(totalSchema, attr.name :: curField))
+          Seq(getFieldRecursively(fullSchema, attr.name :: curField))
         case sf: GetStructField =>
           generateStructField(sf.name.get :: curField, sf.child)
         case _ =>
@@ -163,11 +169,12 @@ object FileSourceStrategy extends Strategy with Logging {
       }
     }
 
-    def getFieldRecursively(totalSchema: StructType,
-                            name: List[String]): StructField = {
+    def getFieldRecursively(
+        schema: StructType,
+        name: List[String]): StructField = {
       if (name.length > 1) {
         val curField = name.head
-        val curFieldType = totalSchema(curField)
+        val curFieldType = schema(curField)
         curFieldType.dataType match {
           case st: StructType =>
             val newField = getFieldRecursively(StructType(st.fields), name.drop(1))
@@ -177,7 +184,7 @@ object FileSourceStrategy extends Strategy with Logging {
             throw new IllegalArgumentException(s"""Field "$curField" is not struct field.""")
         }
       } else {
-        totalSchema(name.head)
+        schema(name.head)
       }
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -212,8 +212,8 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
-  val PARQUET_NEST_COLUMN_PRUNING = SQLConfigBuilder("spark.sql.parquet.nestColumnPruning")
-    .doc("When set this to true, we will tell parquet only read the nest column`s leaf fields ")
+  val PARQUET_NESTED_COLUMN_PRUNING = SQLConfigBuilder("spark.sql.parquet.nestedColumnPruning")
+    .doc("When true, Parquet column pruning also works for nested fields.")
     .booleanConf
     .createWithDefault(false)
 
@@ -666,7 +666,7 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
 
   def isParquetINT96AsTimestamp: Boolean = getConf(PARQUET_INT96_AS_TIMESTAMP)
 
-  def isParquetNestColumnPruning: Boolean = getConf(PARQUET_NEST_COLUMN_PRUNING)
+  def parquetNestedColumnPruningEnabled: Boolean = getConf(PARQUET_NESTED_COLUMN_PRUNING)
 
   def writeLegacyParquetFormat: Boolean = getConf(PARQUET_WRITE_LEGACY_FORMAT)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -581,25 +581,24 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext
     //    |    |-- str: string (nullable = true)
     //    |-- num: long (nullable = true)
     //    |-- str: string (nullable = true)
-    val df = readResourceParquetFile("test-data/nested-struct.snappy.parquet")
-    df.createOrReplaceTempView("tmp_table")
-    // normal test
-    val query1 = "select num,col.s1.s1_1 from tmp_table"
-    val result1 = sql(query1)
-    withSQLConf(SQLConf.PARQUET_NEST_COLUMN_PRUNING.key -> "true") {
-      checkAnswer(sql(query1), result1)
-    }
-    // test for same struct meta merge
-    // col.s1.s1_1 and col.str should merge
-    // like col.[s1.s1_1, str] before pass to parquet
-    val query2 = "select col.s1.s1_1,col.str from tmp_table"
-    val result2 = sql(query2)
-    withSQLConf(SQLConf.PARQUET_NEST_COLUMN_PRUNING.key -> "true") {
-      checkAnswer(sql(query2), result2)
+    withTempView("tmp_table") {
+      val df = readResourceParquetFile("test-data/nested-struct.snappy.parquet")
+      df.createOrReplaceTempView("tmp_table")
+      // normal test
+      val query1 = "select num,col.s1.s1_1 from tmp_table"
+      val result1 = sql(query1)
+      withSQLConf(SQLConf.PARQUET_NESTED_COLUMN_PRUNING.key -> "true") {
+        checkAnswer(sql(query1), result1)
+      }
+      // test for same struct meta merge
+      // col.s1.s1_1 and col.str should merge
+      // like col.[s1.s1_1, str] before pass to parquet
+      val query2 = "select col.s1.s1_1,col.str from tmp_table"
+      val result2 = sql(query2)
+      withSQLConf(SQLConf.PARQUET_NESTED_COLUMN_PRUNING.key -> "true") {
+        checkAnswer(sql(query2), result2)
+      }
     }
-
-    spark.sessionState.catalog.dropTable(
-      TableIdentifier("tmp_table"), ignoreIfNotExists = true, purge = false)
   }
 
   test("expand UDT in StructType") {