diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala index 09e826c506613..82c88280d7754 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala @@ -51,8 +51,10 @@ private[hive] sealed trait TableReader { * data warehouse directory. */ private[hive] -class HadoopTableReader(@transient attributes: Seq[Attribute], - @transient relation: MetastoreRelation, @transient sc: HiveContext) +class HadoopTableReader( + @transient attributes: Seq[Attribute], + @transient relation: MetastoreRelation, + @transient sc: HiveContext) extends TableReader { // Choose the minimum number of splits. If mapred.map.tasks is set, then use that unless @@ -135,7 +137,8 @@ class HadoopTableReader(@transient attributes: Seq[Attribute], * subdirectory of each partition being read. If None, then all files are accepted. */ def makeRDDForPartitionedTable( - partitionToDeserializer: Map[HivePartition, Class[_ <: Deserializer]], + partitionToDeserializer: Map[HivePartition, + Class[_ <: Deserializer]], filterOpt: Option[PathFilter]): RDD[Row] = { val hivePartitionRDDs = partitionToDeserializer.map { case (partition, partDeserializer) => val partDesc = Utilities.getPartitionDesc(partition) @@ -261,8 +264,11 @@ private[hive] object HadoopTableReader extends HiveInspectors { * * @return Iterable Row object that transformed from the given iterable input. */ - def fillObject(iter: Iterator[Writable], deserializer: Deserializer, - attrs: Seq[(Attribute, Int)], row: GenericMutableRow): Iterator[Row] = { + def fillObject( + iter: Iterator[Writable], + deserializer: Deserializer, + attrs: Seq[(Attribute, Int)], + row: GenericMutableRow): Iterator[Row] = { val soi = deserializer.getObjectInspector().asInstanceOf[StructObjectInspector] // get the field references according to the attributes(output of the reader) required val fieldRefs = attrs.map { case (attr, idx) => (soi.getStructFieldRef(attr.name), idx) } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala index e8a6f7b070257..8920e2a76a27f 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala @@ -77,7 +77,7 @@ case class HiveTableScan( val columnInternalNames = neededColumnIDs.map(HiveConf.getColumnInternalName(_)).mkString(",") if (attributes.size == relation.output.size) { - // TODO what if duplicated attributes queried? + // SQLContext#pruneFilterProject guarantees no duplicated value in `attributes` ColumnProjectionUtils.setFullyReadColumns(hiveConf) } else { ColumnProjectionUtils.appendReadColumnIDs(hiveConf, neededColumnIDs) diff --git a/sql/hive/src/test/resources/golden/partition_based_table_scan_with_different_serde-0-8d612014b2c7d3775f637020e568768d b/sql/hive/src/test/resources/golden/partition_based_table_scan_with_different_serde-0-8caed2a6e80250a6d38a59388679c298 similarity index 100% rename from sql/hive/src/test/resources/golden/partition_based_table_scan_with_different_serde-0-8d612014b2c7d3775f637020e568768d rename to sql/hive/src/test/resources/golden/partition_based_table_scan_with_different_serde-0-8caed2a6e80250a6d38a59388679c298 diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala index 04f56fedbeabd..bcb00f871d185 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala @@ -17,9 +17,10 @@ package org.apache.spark.sql.hive.execution +import org.scalatest.{BeforeAndAfterAll, FunSuite} + import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.hive.test.TestHive -import org.scalatest.{BeforeAndAfterAll, FunSuite} class HiveTableScanSuite extends HiveComparisonTest { // MINOR HACK: You must run a query before calling reset the first time. @@ -31,17 +32,17 @@ class HiveTableScanSuite extends HiveComparisonTest { | 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' | STORED AS RCFILE """.stripMargin) - TestHive.hql("""from src - | insert into table part_scan_test PARTITION (ds='2010-01-01') - | select 100,100 limit 1 + TestHive.hql("""FROM src + | INSERT INTO TABLE part_scan_test PARTITION (ds='2010-01-01') + | SELECT 100,100 LIMIT 1 """.stripMargin) - TestHive.hql("""ALTER TABLE part_scan_test set SERDE + TestHive.hql("""ALTER TABLE part_scan_test SET SERDE | 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' """.stripMargin) - TestHive.hql("""from src insert into table part_scan_test PARTITION (ds='2010-01-02') - | select 200,200 limit 1 + TestHive.hql("""FROM src INSERT INTO TABLE part_scan_test PARTITION (ds='2010-01-02') + | SELECT 200,200 LIMIT 1 """.stripMargin) createQueryTest("partition_based_table_scan_with_different_serde", - "select * from part_scan_test", false) + "SELECT * from part_scan_test", false) }