Skip to content

Commit 26badc4

Browse files
committed
Parquet to 1.10.1
1 parent c617757 commit 26badc4

File tree

6 files changed

+23
-17
lines changed

6 files changed

+23
-17
lines changed

dev/deps/spark-deps-hadoop-2.7-hive-2.3

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -203,12 +203,12 @@ orc-shims/1.5.12//orc-shims-1.5.12.jar
203203
oro/2.0.8//oro-2.0.8.jar
204204
osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
205205
paranamer/2.8//paranamer-2.8.jar
206-
parquet-column/1.11.1//parquet-column-1.11.1.jar
207-
parquet-common/1.11.1//parquet-common-1.11.1.jar
208-
parquet-encoding/1.11.1//parquet-encoding-1.11.1.jar
209-
parquet-format-structures/1.11.1//parquet-format-structures-1.11.1.jar
210-
parquet-hadoop/1.11.1//parquet-hadoop-1.11.1.jar
211-
parquet-jackson/1.11.1//parquet-jackson-1.11.1.jar
206+
parquet-column/1.10.1//parquet-column-1.10.1.jar
207+
parquet-common/1.10.1//parquet-common-1.10.1.jar
208+
parquet-encoding/1.10.1//parquet-encoding-1.10.1.jar
209+
parquet-format/2.4.0//parquet-format-2.4.0.jar
210+
parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar
211+
parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar
212212
protobuf-java/2.5.0//protobuf-java-2.5.0.jar
213213
py4j/0.10.9//py4j-0.10.9.jar
214214
pyrolite/4.30//pyrolite-4.30.jar

dev/deps/spark-deps-hadoop-3.2-hive-2.3

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -173,12 +173,12 @@ orc-shims/1.5.12//orc-shims-1.5.12.jar
173173
oro/2.0.8//oro-2.0.8.jar
174174
osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
175175
paranamer/2.8//paranamer-2.8.jar
176-
parquet-column/1.11.1//parquet-column-1.11.1.jar
177-
parquet-common/1.11.1//parquet-common-1.11.1.jar
178-
parquet-encoding/1.11.1//parquet-encoding-1.11.1.jar
179-
parquet-format-structures/1.11.1//parquet-format-structures-1.11.1.jar
180-
parquet-hadoop/1.11.1//parquet-hadoop-1.11.1.jar
181-
parquet-jackson/1.11.1//parquet-jackson-1.11.1.jar
176+
parquet-column/1.10.1//parquet-column-1.10.1.jar
177+
parquet-common/1.10.1//parquet-common-1.10.1.jar
178+
parquet-encoding/1.10.1//parquet-encoding-1.10.1.jar
179+
parquet-format/2.4.0//parquet-format-2.4.0.jar
180+
parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar
181+
parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar
182182
protobuf-java/2.5.0//protobuf-java-2.5.0.jar
183183
py4j/0.10.9//py4j-0.10.9.jar
184184
pyrolite/4.30//pyrolite-4.30.jar

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@
135135
<!-- note that this should be compatible with Kafka brokers version 0.10 and up -->
136136
<kafka.version>2.6.0</kafka.version>
137137
<derby.version>10.12.1.1</derby.version>
138-
<parquet.version>1.11.1</parquet.version>
138+
<parquet.version>1.10.1</parquet.version>
139139
<orc.version>1.5.12</orc.version>
140140
<jetty.version>9.4.28.v20200408</jetty.version>
141141
<javaxservlet.version>3.1.0</javaxservlet.version>

sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,10 @@ public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptCont
146146
this.sparkSchema = StructType$.MODULE$.fromString(sparkRequestedSchemaString);
147147
this.reader = new ParquetFileReader(
148148
configuration, footer.getFileMetaData(), file, blocks, requestedSchema.getColumns());
149-
this.totalRowCount += reader.getFilteredRecordCount();
149+
// use the blocks from the reader in case some do not match filters and will not be read
150+
for (BlockMetaData block : reader.getRowGroups()) {
151+
this.totalRowCount += block.getRowCount();
152+
}
150153

151154
// For test purpose.
152155
// If the last external accumulator is `NumRowGroupsAccumulator`, the row group number to read
@@ -222,7 +225,10 @@ protected void initialize(String path, List<String> columns) throws IOException
222225
this.sparkSchema = new ParquetToSparkSchemaConverter(config).convert(requestedSchema);
223226
this.reader = new ParquetFileReader(
224227
config, footer.getFileMetaData(), file, blocks, requestedSchema.getColumns());
225-
this.totalRowCount += reader.getFilteredRecordCount();
228+
// use the blocks from the reader in case some do not match filters and will not be read
229+
for (BlockMetaData block : reader.getRowGroups()) {
230+
this.totalRowCount += block.getRowCount();
231+
}
226232
}
227233

228234
@Override

sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ private void initializeInternal() throws IOException, UnsupportedOperationExcept
320320

321321
private void checkEndOfRowGroup() throws IOException {
322322
if (rowsReturned != totalCountLoadedSoFar) return;
323-
PageReadStore pages = reader.readNextFilteredRowGroup();
323+
PageReadStore pages = reader.readNextRowGroup();
324324
if (pages == null) {
325325
throw new IOException("expecting more rows but reached last block. Read "
326326
+ rowsReturned + " out of " + totalRowCount);

sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ class StreamSuite extends StreamTest {
214214
.start(outputDir.getAbsolutePath)
215215
try {
216216
query.processAllAvailable()
217-
val outputDf = spark.read.parquet(outputDir.getAbsolutePath).sort('a).as[Long]
217+
val outputDf = spark.read.parquet(outputDir.getAbsolutePath).as[Long]
218218
checkDataset[Long](outputDf, (0L to 10L).toArray: _*)
219219
} finally {
220220
query.stop()

0 commit comments

Comments
 (0)