@@ -138,8 +138,7 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
138
138
if (maybePhysicalSchema.isEmpty) {
139
139
Iterator .empty
140
140
} else {
141
- val physicalSchema = maybePhysicalSchema.get
142
- OrcRelation .setRequiredColumns(conf, physicalSchema, requiredSchema)
141
+ OrcRelation .setRequiredColumns(conf, dataSchema, requiredSchema)
143
142
144
143
val orcRecordReader = {
145
144
val job = Job .getInstance(conf)
@@ -163,6 +162,7 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
163
162
// Unwraps `OrcStruct`s to `UnsafeRow`s
164
163
OrcRelation .unwrapOrcStructs(
165
164
conf,
165
+ dataSchema,
166
166
requiredSchema,
167
167
Some (orcRecordReader.getObjectInspector.asInstanceOf [StructObjectInspector ]),
168
168
recordsIterator)
@@ -272,25 +272,35 @@ private[orc] object OrcRelation extends HiveInspectors {
272
272
def unwrapOrcStructs (
273
273
conf : Configuration ,
274
274
dataSchema : StructType ,
275
+ requiredSchema : StructType ,
275
276
maybeStructOI : Option [StructObjectInspector ],
276
277
iterator : Iterator [Writable ]): Iterator [InternalRow ] = {
277
278
val deserializer = new OrcSerde
278
- val mutableRow = new SpecificInternalRow (dataSchema .map(_.dataType))
279
- val unsafeProjection = UnsafeProjection .create(dataSchema )
279
+ val mutableRow = new SpecificInternalRow (requiredSchema .map(_.dataType))
280
+ val unsafeProjection = UnsafeProjection .create(requiredSchema )
280
281
281
282
def unwrap (oi : StructObjectInspector ): Iterator [InternalRow ] = {
282
- val (fieldRefs, fieldOrdinals) = dataSchema.zipWithIndex.map {
283
- case (field, ordinal) => oi.getStructFieldRef(field.name) -> ordinal
283
+ val (fieldRefs, fieldOrdinals) = requiredSchema.zipWithIndex.map {
284
+ case (field, ordinal) =>
285
+ var ref = oi.getStructFieldRef(field.name)
286
+ if (ref == null ) {
287
+ val maybeIndex = dataSchema.getFieldIndex(field.name)
288
+ if (maybeIndex.isDefined) {
289
+ ref = oi.getStructFieldRef(" _col" + maybeIndex.get)
290
+ }
291
+ }
292
+ ref -> ordinal
284
293
}.unzip
285
294
286
- val unwrappers = fieldRefs.map(unwrapperFor)
295
+ val unwrappers = fieldRefs.map(r => if (r == null ) null else unwrapperFor(r) )
287
296
288
297
iterator.map { value =>
289
298
val raw = deserializer.deserialize(value)
290
299
var i = 0
291
300
val length = fieldRefs.length
292
301
while (i < length) {
293
- val fieldValue = oi.getStructFieldData(raw, fieldRefs(i))
302
+ val fieldRef = fieldRefs(i)
303
+ val fieldValue = if (fieldRef == null ) null else oi.getStructFieldData(raw, fieldRefs(i))
294
304
if (fieldValue == null ) {
295
305
mutableRow.setNullAt(fieldOrdinals(i))
296
306
} else {
@@ -306,8 +316,8 @@ private[orc] object OrcRelation extends HiveInspectors {
306
316
}
307
317
308
318
def setRequiredColumns (
309
- conf : Configuration , physicalSchema : StructType , requestedSchema : StructType ): Unit = {
310
- val ids = requestedSchema.map(a => physicalSchema .fieldIndex(a.name): Integer )
319
+ conf : Configuration , dataSchema : StructType , requestedSchema : StructType ): Unit = {
320
+ val ids = requestedSchema.map(a => dataSchema .fieldIndex(a.name): Integer )
311
321
val (sortedIDs, sortedNames) = ids.zip(requestedSchema.fieldNames).sorted.unzip
312
322
HiveShim .appendReadColumns(conf, sortedIDs, sortedNames)
313
323
}
0 commit comments