Skip to content

Commit 72c52b6

Browse files
committed
Make "DataFrame reuse" test pass
1 parent cb7133f commit 72c52b6

File tree

3 files changed

+4
-5
lines changed

3 files changed

+4
-5
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,6 @@ class ParquetFileFormat
127127
conf.setEnum(ParquetOutputFormat.JOB_SUMMARY_LEVEL, JobSummaryLevel.NONE)
128128
}
129129

130-
// PARQUET-1746: Disables page-level CRC checksums by default.
131-
conf.setBooleanIfUnset(ParquetOutputFormat.PAGE_WRITE_CHECKSUM_ENABLED, false)
132-
133130
if (ParquetOutputFormat.getJobSummaryLevel(conf) != JobSummaryLevel.NONE
134131
&& !classOf[ParquetOutputCommitter].isAssignableFrom(committerClass)) {
135132
// output summary is requested, but the class is not a Parquet Committer

sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,9 @@ class StreamSuite extends StreamTest {
214214
.start(outputDir.getAbsolutePath)
215215
try {
216216
query.processAllAvailable()
217-
val outputDf = spark.read.parquet(outputDir.getAbsolutePath).as[Long]
217+
// Parquet write page-level CRC checksums will change the file size and
218+
// affect the data order when reading these files. Please see PARQUET-1746 for details.
219+
val outputDf = spark.read.parquet(outputDir.getAbsolutePath).sort('a).as[Long]
218220
checkDataset[Long](outputDf, (0L to 10L).toArray: _*)
219221
} finally {
220222
query.stop()

sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1528,7 +1528,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
15281528
Seq(tbl, ext_tbl).foreach { tblName =>
15291529
sql(s"INSERT INTO $tblName VALUES (1, 'a', '2019-12-13')")
15301530

1531-
val expectedSize = 639
1531+
val expectedSize = 651
15321532
// analyze table
15331533
sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS NOSCAN")
15341534
var tableStats = getTableStats(tblName)

0 commit comments

Comments
 (0)