File tree Expand file tree Collapse file tree 3 files changed +4
-5
lines changed
main/scala/org/apache/spark/sql/execution/datasources/parquet
test/scala/org/apache/spark/sql/streaming
hive/src/test/scala/org/apache/spark/sql/hive Expand file tree Collapse file tree 3 files changed +4
-5
lines changed Original file line number Diff line number Diff line change @@ -127,9 +127,6 @@ class ParquetFileFormat
127
127
conf.setEnum(ParquetOutputFormat .JOB_SUMMARY_LEVEL , JobSummaryLevel .NONE )
128
128
}
129
129
130
- // PARQUET-1746: Disables page-level CRC checksums by default.
131
- conf.setBooleanIfUnset(ParquetOutputFormat .PAGE_WRITE_CHECKSUM_ENABLED , false )
132
-
133
130
if (ParquetOutputFormat .getJobSummaryLevel(conf) != JobSummaryLevel .NONE
134
131
&& ! classOf [ParquetOutputCommitter ].isAssignableFrom(committerClass)) {
135
132
// output summary is requested, but the class is not a Parquet Committer
Original file line number Diff line number Diff line change @@ -214,7 +214,9 @@ class StreamSuite extends StreamTest {
214
214
.start(outputDir.getAbsolutePath)
215
215
try {
216
216
query.processAllAvailable()
217
- val outputDf = spark.read.parquet(outputDir.getAbsolutePath).as[Long ]
217
+ // Parquet write page-level CRC checksums will change the file size and
218
+ // affect the data order when reading these files. Please see PARQUET-1746 for details.
219
+ val outputDf = spark.read.parquet(outputDir.getAbsolutePath).sort(' a ).as[Long ]
218
220
checkDataset[Long ](outputDf, (0L to 10L ).toArray: _* )
219
221
} finally {
220
222
query.stop()
Original file line number Diff line number Diff line change @@ -1528,7 +1528,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
1528
1528
Seq (tbl, ext_tbl).foreach { tblName =>
1529
1529
sql(s " INSERT INTO $tblName VALUES (1, 'a', '2019-12-13') " )
1530
1530
1531
- val expectedSize = 639
1531
+ val expectedSize = 651
1532
1532
// analyze table
1533
1533
sql(s " ANALYZE TABLE $tblName COMPUTE STATISTICS NOSCAN " )
1534
1534
var tableStats = getTableStats(tblName)
You can’t perform that action at this time.
0 commit comments