|
17 | 17 |
|
18 | 18 | package org.apache.spark.sql.parquet
|
19 | 19 |
|
20 |
| -import java.io.File |
21 |
| - |
22 | 20 | import org.scalatest.{BeforeAndAfterAll, FunSuiteLike}
|
23 | 21 |
|
24 |
| -import org.apache.avro.{SchemaBuilder, Schema} |
25 |
| -import org.apache.avro.generic.{GenericData, GenericRecord} |
26 |
| - |
27 | 22 | import org.apache.hadoop.fs.{Path, FileSystem}
|
28 | 23 | import org.apache.hadoop.mapreduce.Job
|
29 | 24 |
|
30 |
| -import parquet.avro.AvroParquetWriter |
31 | 25 | import parquet.hadoop.ParquetFileWriter
|
32 | 26 | import parquet.hadoop.util.ContextUtil
|
33 | 27 | import parquet.schema.MessageTypeParser
|
@@ -679,147 +673,6 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
|
679 | 673 | assert(result3(0)(1) === "the answer")
|
680 | 674 | Utils.deleteRecursively(tmpdir)
|
681 | 675 | }
|
682 |
| - |
683 |
| - test("Importing data generated with Avro") { |
684 |
| - val tmpdir = Utils.createTempDir() |
685 |
| - val file: File = new File(tmpdir, "test.avro") |
686 |
| - |
687 |
| - val primitiveArrayType: Schema = SchemaBuilder.array.items.intType |
688 |
| - val complexArrayType: Schema = SchemaBuilder.array.items.map.values.stringType |
689 |
| - val primitiveMapType: Schema = SchemaBuilder.map.values.booleanType |
690 |
| - val complexMapType: Schema = SchemaBuilder.map.values.array.items.floatType |
691 |
| - val schema: Schema = SchemaBuilder |
692 |
| - .record("TestRecord") |
693 |
| - .namespace("") |
694 |
| - .fields |
695 |
| - .name("testInt") |
696 |
| - .`type`. |
697 |
| - intType |
698 |
| - .noDefault |
699 |
| - .name("testDouble") |
700 |
| - .`type` |
701 |
| - .doubleType |
702 |
| - .noDefault |
703 |
| - .name("testString") |
704 |
| - .`type` |
705 |
| - .nullable |
706 |
| - .stringType |
707 |
| - .stringDefault("") |
708 |
| - .name("testPrimitiveArray") |
709 |
| - .`type`(primitiveArrayType) |
710 |
| - .noDefault |
711 |
| - .name("testComplexArray") |
712 |
| - .`type`(complexArrayType) |
713 |
| - .noDefault |
714 |
| - .name("testPrimitiveMap") |
715 |
| - .`type`(primitiveMapType) |
716 |
| - .noDefault |
717 |
| - .name("testComplexMap") |
718 |
| - .`type`(complexMapType) |
719 |
| - .noDefault |
720 |
| - .endRecord |
721 |
| - |
722 |
| - val record1: GenericRecord = new GenericData.Record(schema) |
723 |
| - |
724 |
| - // primitive fields |
725 |
| - record1.put("testInt", 256) |
726 |
| - record1.put("testDouble", 0.5) |
727 |
| - record1.put("testString", "foo") |
728 |
| - |
729 |
| - val primitiveArrayData = new GenericData.Array[Integer](10, primitiveArrayType) |
730 |
| - val complexArrayData: GenericData.Array[java.util.Map[String, String]] = |
731 |
| - new GenericData.Array[java.util.Map[String, String]](10, SchemaBuilder.array.items.map.values.stringType) |
732 |
| - |
733 |
| - // two arrays: one primitive (array of ints), one complex (array of string->string maps) |
734 |
| - primitiveArrayData.add(1) |
735 |
| - primitiveArrayData.add(2) |
736 |
| - primitiveArrayData.add(3) |
737 |
| - val map1 = new java.util.HashMap[String, String] |
738 |
| - map1.put("key11", "data11") |
739 |
| - map1.put("key12", "data12") |
740 |
| - val map2 = new java.util.HashMap[String, String] |
741 |
| - map2.put("key21", "data21") |
742 |
| - map2.put("key22", "data22") |
743 |
| - complexArrayData.add(0, map1) |
744 |
| - complexArrayData.add(1, map2) |
745 |
| - |
746 |
| - record1.put("testPrimitiveArray", primitiveArrayData) |
747 |
| - record1.put("testComplexArray", complexArrayData) |
748 |
| - |
749 |
| - // two maps: one primitive (string->boolean), one complex (string->array of floats) |
750 |
| - val primitiveMap = new java.util.HashMap[String, Boolean](10) |
751 |
| - primitiveMap.put("key1", true) |
752 |
| - primitiveMap.put("key2", false) |
753 |
| - val complexMap = new java.util.HashMap[String, GenericData.Array[Float]](10) |
754 |
| - val value1: GenericData.Array[Float] = new GenericData.Array[Float](10, SchemaBuilder.array.items.floatType) |
755 |
| - value1.add(0.1f) |
756 |
| - value1.add(0.2f) |
757 |
| - value1.add(0.3f) |
758 |
| - complexMap.put("compKey1", value1) |
759 |
| - val value2: GenericData.Array[Float] = new GenericData.Array[Float](10, SchemaBuilder.array.items.floatType) |
760 |
| - value2.add(1.1f) |
761 |
| - value2.add(1.2f) |
762 |
| - value2.add(1.3f) |
763 |
| - complexMap.put("compKey2", value2) |
764 |
| - |
765 |
| - record1.put("testPrimitiveMap", primitiveMap) |
766 |
| - record1.put("testComplexMap", complexMap) |
767 |
| - |
768 |
| - // TODO: test array or map with value type Avro record |
769 |
| - |
770 |
| - val writer = new AvroParquetWriter[GenericRecord](new Path(file.toString), schema) |
771 |
| - writer.write(record1) |
772 |
| - writer.close() |
773 |
| - |
774 |
| - val data = TestSQLContext |
775 |
| - .parquetFile(tmpdir.toString) |
776 |
| - .toSchemaRDD |
777 |
| - data.registerAsTable("avroTable") |
778 |
| - val resultPrimitives = sql("SELECT testInt, testDouble, testString FROM avroTable").collect() |
779 |
| - assert(resultPrimitives(0)(0) === 256) |
780 |
| - assert(resultPrimitives(0)(1) === 0.5) |
781 |
| - assert(resultPrimitives(0)(2) === "foo") |
782 |
| - val resultPrimitiveArray = sql("SELECT testPrimitiveArray FROM avroTable").collect() |
783 |
| - assert(resultPrimitiveArray(0)(0) |
784 |
| - .asInstanceOf[CatalystConverter.ArrayScalaType[_]](0) === 1) |
785 |
| - assert(resultPrimitiveArray(0)(0) |
786 |
| - .asInstanceOf[CatalystConverter.ArrayScalaType[_]](1) === 2) |
787 |
| - assert(resultPrimitiveArray(0)(0) |
788 |
| - .asInstanceOf[CatalystConverter.ArrayScalaType[_]](2) === 3) |
789 |
| - val resultComplexArray = sql("SELECT testComplexArray FROM avroTable").collect() |
790 |
| - assert(resultComplexArray(0)(0) |
791 |
| - .asInstanceOf[CatalystConverter.ArrayScalaType[_]].size === 2) |
792 |
| - assert( |
793 |
| - resultComplexArray(0)(0) |
794 |
| - .asInstanceOf[CatalystConverter.ArrayScalaType[_]](0) |
795 |
| - .asInstanceOf[CatalystConverter.MapScalaType[String, String]] |
796 |
| - .get("key11").get.equals("data11")) |
797 |
| - assert( |
798 |
| - resultComplexArray(0)(0) |
799 |
| - .asInstanceOf[CatalystConverter.ArrayScalaType[_]](1) |
800 |
| - .asInstanceOf[CatalystConverter.MapScalaType[String, String]] |
801 |
| - .get("key22").get.equals("data22")) |
802 |
| - val resultPrimitiveMap = sql("SELECT testPrimitiveMap FROM avroTable").collect() |
803 |
| - assert( |
804 |
| - resultPrimitiveMap(0)(0) |
805 |
| - .asInstanceOf[CatalystConverter.MapScalaType[String, Boolean]].get("key1").get === true) |
806 |
| - assert( |
807 |
| - resultPrimitiveMap(0)(0) |
808 |
| - .asInstanceOf[CatalystConverter.MapScalaType[String, Boolean]].get("key2").get === false) |
809 |
| - val resultComplexMap = sql("SELECT testComplexMap FROM avroTable").collect() |
810 |
| - val mapResult1 = |
811 |
| - resultComplexMap(0)(0) |
812 |
| - .asInstanceOf[CatalystConverter.MapScalaType[String, CatalystConverter.ArrayScalaType[_]]] |
813 |
| - .get("compKey1").get |
814 |
| - val mapResult2 = |
815 |
| - resultComplexMap(0)(0) |
816 |
| - .asInstanceOf[CatalystConverter.MapScalaType[String, CatalystConverter.ArrayScalaType[_]]] |
817 |
| - .get("compKey2").get |
818 |
| - assert(mapResult1(0) === 0.1f) |
819 |
| - assert(mapResult1(2) === 0.3f) |
820 |
| - assert(mapResult2(0) === 1.1f) |
821 |
| - assert(mapResult2(2) === 1.3f) |
822 |
| - } |
823 | 676 | }
|
824 | 677 |
|
825 | 678 | // TODO: the code below is needed temporarily until the standard parser is able to parse
|
|
0 commit comments