@@ -99,15 +99,19 @@ object FileSourceStrategy extends Strategy with Logging {
99
99
dataColumns
100
100
.filter(requiredAttributes.contains)
101
101
.filterNot(partitionColumns.contains)
102
- val outputSchema = if (fsRelation.sqlContext.conf.isParquetNestColumnPruning
103
- && fsRelation.fileFormat.isInstanceOf [ParquetFileFormat ]) {
104
- val totalSchema = readDataColumns.toStructType
102
+ val outputSchema = if (
103
+ fsRelation.sqlContext.conf.parquetNestedColumnPruningEnabled &&
104
+ fsRelation.fileFormat.isInstanceOf [ParquetFileFormat ]
105
+ ) {
106
+ val fullSchema = readDataColumns.toStructType
105
107
val prunedSchema = StructType (
106
- generateStructFieldsContainsNesting(projects, totalSchema ))
108
+ generateStructFieldsContainsNesting(projects, fullSchema ))
107
109
// Merge schema in same StructType and merge with filterAttributes
108
110
prunedSchema.fields.map(f => StructType (Array (f))).reduceLeft(_ merge _)
109
111
.merge(filterAttributes.toSeq.toStructType)
110
- } else readDataColumns.toStructType
112
+ } else {
113
+ readDataColumns.toStructType
114
+ }
111
115
logInfo(s " Output Data Schema: ${outputSchema.simpleString(5 )}" )
112
116
113
117
val pushedDownFilters = dataFilters.flatMap(DataSourceStrategy .translateFilter)
@@ -137,10 +141,12 @@ object FileSourceStrategy extends Strategy with Logging {
137
141
case _ => Nil
138
142
}
139
143
140
- private def generateStructFieldsContainsNesting (projects : Seq [Expression ],
141
- totalSchema : StructType ) : Seq [StructField ] = {
142
- def generateStructField (curField : List [String ],
143
- node : Expression ) : Seq [StructField ] = {
144
+ private def generateStructFieldsContainsNesting (
145
+ projects : Seq [Expression ],
146
+ fullSchema : StructType ) : Seq [StructField ] = {
147
+ def generateStructField (
148
+ curField : List [String ],
149
+ node : Expression ) : Seq [StructField ] = {
144
150
node match {
145
151
case ai : GetArrayItem =>
146
152
// Here we drop the previous for simplify array and map support.
@@ -151,7 +157,7 @@ object FileSourceStrategy extends Strategy with Logging {
151
157
case mv : GetMapValue =>
152
158
generateStructField(List .empty[String ], mv.child)
153
159
case attr : AttributeReference =>
154
- Seq (getFieldRecursively(totalSchema , attr.name :: curField))
160
+ Seq (getFieldRecursively(fullSchema , attr.name :: curField))
155
161
case sf : GetStructField =>
156
162
generateStructField(sf.name.get :: curField, sf.child)
157
163
case _ =>
@@ -163,11 +169,12 @@ object FileSourceStrategy extends Strategy with Logging {
163
169
}
164
170
}
165
171
166
- def getFieldRecursively (totalSchema : StructType ,
167
- name : List [String ]): StructField = {
172
+ def getFieldRecursively (
173
+ schema : StructType ,
174
+ name : List [String ]): StructField = {
168
175
if (name.length > 1 ) {
169
176
val curField = name.head
170
- val curFieldType = totalSchema (curField)
177
+ val curFieldType = schema (curField)
171
178
curFieldType.dataType match {
172
179
case st : StructType =>
173
180
val newField = getFieldRecursively(StructType (st.fields), name.drop(1 ))
@@ -177,7 +184,7 @@ object FileSourceStrategy extends Strategy with Logging {
177
184
throw new IllegalArgumentException (s """ Field " $curField" is not struct field. """ )
178
185
}
179
186
} else {
180
- totalSchema (name.head)
187
+ schema (name.head)
181
188
}
182
189
}
183
190
0 commit comments