@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities
25
25
import org .apache .hadoop .hive .ql .metadata .{Partition => HivePartition , Table => HiveTable }
26
26
import org .apache .hadoop .hive .ql .plan .{PlanUtils , TableDesc }
27
27
import org .apache .hadoop .hive .serde2 .Deserializer
28
+ import org .apache .hadoop .hive .serde2 .objectinspector .ObjectInspectorConverters .IdentityConverter
28
29
import org .apache .hadoop .hive .serde2 .objectinspector .{ObjectInspectorConverters , StructObjectInspector }
29
30
import org .apache .hadoop .hive .serde2 .objectinspector .primitive ._
30
31
import org .apache .hadoop .io .Writable
@@ -115,7 +116,7 @@ class HadoopTableReader(
115
116
val hconf = broadcastedHiveConf.value.value
116
117
val deserializer = deserializerClass.newInstance()
117
118
deserializer.initialize(hconf, tableDesc.getProperties)
118
- HadoopTableReader .fillObject(iter, deserializer, attrsWithIndex, mutableRow)
119
+ HadoopTableReader .fillObject(iter, deserializer, attrsWithIndex, mutableRow, deserializer )
119
120
}
120
121
121
122
deserializedHadoopRDD
@@ -194,7 +195,7 @@ class HadoopTableReader(
194
195
195
196
// fill the non partition key attributes
196
197
HadoopTableReader .fillObject(iter, deserializer, nonPartitionKeyAttrs,
197
- mutableRow, Some ( tableSerDe) )
198
+ mutableRow, tableSerDe)
198
199
}
199
200
}.toSeq
200
201
@@ -264,37 +265,27 @@ private[hive] object HadoopTableReader extends HiveInspectors {
264
265
* Transform all given raw `Writable`s into `Row`s.
265
266
*
266
267
* @param iterator Iterator of all `Writable`s to be transformed
267
- * @param deserializer The `Deserializer` associated with the input `Writable`
268
+ * @param rawDeser The `Deserializer` associated with the input `Writable`
268
269
* @param nonPartitionKeyAttrs Attributes that should be filled together with their corresponding
269
270
* positions in the output schema
270
271
* @param mutableRow A reusable `MutableRow` that should be filled
271
- * @param convertdeserializer The ` Deserializer` covert the `deserializer`
272
+ * @param tableDeser Table Deserializer
272
273
* @return An `Iterator[Row]` transformed from `iterator`
273
274
*/
274
275
def fillObject (
275
276
iterator : Iterator [Writable ],
276
- deserializer : Deserializer ,
277
+ rawDeser : Deserializer ,
277
278
nonPartitionKeyAttrs : Seq [(Attribute , Int )],
278
279
mutableRow : MutableRow ,
279
- convertdeserializer : Option [ Deserializer ] = None ): Iterator [Row ] = {
280
+ tableDeser : Deserializer ): Iterator [Row ] = {
280
281
281
- val soi = convertdeserializer match {
282
- case Some (convert) =>
283
- // check need to convert
284
- if (deserializer.getObjectInspector.equals(convert.getObjectInspector)) {
285
- deserializer.getObjectInspector().asInstanceOf [StructObjectInspector ]
286
- }
287
- else {
288
- HiveShim .getConvertedOI(
289
- deserializer.getObjectInspector(),
290
- convert.getObjectInspector()).asInstanceOf [StructObjectInspector ]
291
- }
292
- case None =>
293
- deserializer.getObjectInspector().asInstanceOf [StructObjectInspector ]
294
- }
282
+ val soi = HiveShim .getConvertedOI(
283
+ rawDeser.getObjectInspector, tableDeser.getObjectInspector).asInstanceOf [StructObjectInspector ]
284
+
285
+ val inputFields = soi.getAllStructFieldRefs
295
286
296
287
val (fieldRefs, fieldOrdinals) = nonPartitionKeyAttrs.map { case (attr, ordinal) =>
297
- soi.getStructFieldRef(attr.name) -> ordinal
288
+ (inputFields.get(ordinal), ordinal)
298
289
}.unzip
299
290
300
291
// Builds specific unwrappers ahead of time according to object inspector types to avoid pattern
@@ -335,17 +326,15 @@ private[hive] object HadoopTableReader extends HiveInspectors {
335
326
}
336
327
}
337
328
338
- /**
339
- * when the soi and deserializer.getObjectInspector is equal,
340
- * we will get `IdentityConverter`,which mean it won't convert the
341
- * value when schema match
342
- */
343
- val partTblObjectInspectorConverter = ObjectInspectorConverters .getConverter(
344
- deserializer.getObjectInspector, soi)
329
+ val converter = if (rawDeser == tableDeser) {
330
+ new IdentityConverter
331
+ } else {
332
+ ObjectInspectorConverters .getConverter(rawDeser.getObjectInspector, soi)
333
+ }
345
334
346
335
// Map each tuple to a row object
347
336
iterator.map { value =>
348
- val raw = partTblObjectInspectorConverter .convert(deserializer .deserialize(value))
337
+ val raw = converter .convert(rawDeser .deserialize(value))
349
338
var i = 0
350
339
while (i < fieldRefs.length) {
351
340
val fieldValue = soi.getStructFieldData(raw, fieldRefs(i))
0 commit comments