Skip to content

Commit c879aa1

Browse files
committed
clean the code
1 parent 2a91a87 commit c879aa1

File tree

4 files changed

+21
-36
lines changed

4 files changed

+21
-36
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala

Lines changed: 18 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities
2525
import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition, Table => HiveTable}
2626
import org.apache.hadoop.hive.ql.plan.{PlanUtils, TableDesc}
2727
import org.apache.hadoop.hive.serde2.Deserializer
28+
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.IdentityConverter
2829
import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspectorConverters, StructObjectInspector}
2930
import org.apache.hadoop.hive.serde2.objectinspector.primitive._
3031
import org.apache.hadoop.io.Writable
@@ -115,7 +116,7 @@ class HadoopTableReader(
115116
val hconf = broadcastedHiveConf.value.value
116117
val deserializer = deserializerClass.newInstance()
117118
deserializer.initialize(hconf, tableDesc.getProperties)
118-
HadoopTableReader.fillObject(iter, deserializer, attrsWithIndex, mutableRow)
119+
HadoopTableReader.fillObject(iter, deserializer, attrsWithIndex, mutableRow, deserializer)
119120
}
120121

121122
deserializedHadoopRDD
@@ -194,7 +195,7 @@ class HadoopTableReader(
194195

195196
// fill the non partition key attributes
196197
HadoopTableReader.fillObject(iter, deserializer, nonPartitionKeyAttrs,
197-
mutableRow, Some(tableSerDe))
198+
mutableRow, tableSerDe)
198199
}
199200
}.toSeq
200201

@@ -264,37 +265,27 @@ private[hive] object HadoopTableReader extends HiveInspectors {
264265
* Transform all given raw `Writable`s into `Row`s.
265266
*
266267
* @param iterator Iterator of all `Writable`s to be transformed
267-
* @param deserializer The `Deserializer` associated with the input `Writable`
268+
* @param rawDeser The `Deserializer` associated with the input `Writable`
268269
* @param nonPartitionKeyAttrs Attributes that should be filled together with their corresponding
269270
* positions in the output schema
270271
* @param mutableRow A reusable `MutableRow` that should be filled
271-
* @param convertdeserializer The `Deserializer` covert the `deserializer`
272+
* @param tableDeser Table Deserializer
272273
* @return An `Iterator[Row]` transformed from `iterator`
273274
*/
274275
def fillObject(
275276
iterator: Iterator[Writable],
276-
deserializer: Deserializer,
277+
rawDeser: Deserializer,
277278
nonPartitionKeyAttrs: Seq[(Attribute, Int)],
278279
mutableRow: MutableRow,
279-
convertdeserializer: Option[Deserializer] = None): Iterator[Row] = {
280+
tableDeser: Deserializer): Iterator[Row] = {
280281

281-
val soi = convertdeserializer match {
282-
case Some(convert) =>
283-
// check need to convert
284-
if (deserializer.getObjectInspector.equals(convert.getObjectInspector)) {
285-
deserializer.getObjectInspector().asInstanceOf[StructObjectInspector]
286-
}
287-
else {
288-
HiveShim.getConvertedOI(
289-
deserializer.getObjectInspector(),
290-
convert.getObjectInspector()).asInstanceOf[StructObjectInspector]
291-
}
292-
case None =>
293-
deserializer.getObjectInspector().asInstanceOf[StructObjectInspector]
294-
}
282+
val soi = HiveShim.getConvertedOI(
283+
rawDeser.getObjectInspector, tableDeser.getObjectInspector).asInstanceOf[StructObjectInspector]
284+
285+
val inputFields = soi.getAllStructFieldRefs
295286

296287
val (fieldRefs, fieldOrdinals) = nonPartitionKeyAttrs.map { case (attr, ordinal) =>
297-
soi.getStructFieldRef(attr.name) -> ordinal
288+
(inputFields.get(ordinal), ordinal)
298289
}.unzip
299290

300291
// Builds specific unwrappers ahead of time according to object inspector types to avoid pattern
@@ -335,17 +326,15 @@ private[hive] object HadoopTableReader extends HiveInspectors {
335326
}
336327
}
337328

338-
/**
339-
* when the soi and deserializer.getObjectInspector is equal,
340-
* we will get `IdentityConverter`,which mean it won't convert the
341-
* value when schema match
342-
*/
343-
val partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(
344-
deserializer.getObjectInspector, soi)
329+
val converter = if (rawDeser == tableDeser) {
330+
new IdentityConverter
331+
} else {
332+
ObjectInspectorConverters.getConverter(rawDeser.getObjectInspector, soi)
333+
}
345334

346335
// Map each tuple to a row object
347336
iterator.map { value =>
348-
val raw = partTblObjectInspectorConverter.convert(deserializer.deserialize(value))
337+
val raw = converter.convert(rawDeser.deserialize(value))
349338
var i = 0
350339
while (i < fieldRefs.length) {
351340
val fieldValue = soi.getStructFieldData(raw, fieldRefs(i))

sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ class InsertIntoHiveTableSuite extends QueryTest {
187187
sql(s"CREATE TABLE table_with_partition(key int,value string) PARTITIONED by (ds string) location '${tmpDir.toURI.toString}' ")
188188
sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='1') SELECT key,value FROM testData")
189189

190-
//test schema is the same
190+
// test schema the same between partition and table
191191
sql("ALTER TABLE table_with_partition CHANGE COLUMN key key BIGINT")
192192
checkAnswer(sql("select key,value from table_with_partition where ds='1' "),
193193
testData.toSchemaRDD.collect.toSeq

sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -242,12 +242,9 @@ private[hive] object HiveShim {
242242
}
243243
}
244244

245-
// make getConvertedOI compatible between 0.12.0 and 0.13.1
246245
def getConvertedOI(inputOI: ObjectInspector,
247-
outputOI: ObjectInspector,
248-
equalsCheck: java.lang.Boolean =
249-
new java.lang.Boolean(true)): ObjectInspector = {
250-
ObjectInspectorConverters.getConvertedOI(inputOI, outputOI, equalsCheck)
246+
outputOI: ObjectInspector): ObjectInspector = {
247+
ObjectInspectorConverters.getConvertedOI(inputOI, outputOI, true)
251248
}
252249

253250
def prepareWritable(w: Writable): Writable = {

sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,6 @@ private[hive] object HiveShim {
399399
}
400400
}
401401

402-
// make getConvertedOI compatible between 0.12.0 and 0.13.1
403402
def getConvertedOI(inputOI: ObjectInspector, outputOI: ObjectInspector): ObjectInspector = {
404403
ObjectInspectorConverters.getConvertedOI(inputOI, outputOI)
405404
}

0 commit comments

Comments
 (0)