Skip to content

Commit b364990

Browse files
committed
Add e2e test cases for codec.
1 parent 6bd491e commit b364990

File tree

6 files changed

+146
-1
lines changed

6 files changed

+146
-1
lines changed

pom.xml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,13 @@
300300
<enabled>false</enabled>
301301
</snapshots>
302302
</repository>
303+
304+
<repository>
305+
<id>jitpack.io</id>
306+
<url>https://jitpack.io</url>
307+
<name>Jitpack.io repository</name>
308+
<!-- needed for brotli-codec -->
309+
</repository>
303310
</repositories>
304311
<pluginRepositories>
305312
<pluginRepository>

project/SparkBuild.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,9 @@ object SparkBuild extends PomBuild {
274274
"gcs-maven-central-mirror" at "https://maven-central.storage-download.googleapis.com/maven2/",
275275
DefaultMavenRepository,
276276
Resolver.mavenLocal,
277-
Resolver.file("ivyLocal", file(Path.userHome.absolutePath + "/.ivy2/local"))(Resolver.ivyStylePatterns)
277+
Resolver.file("ivyLocal", file(Path.userHome.absolutePath + "/.ivy2/local"))(Resolver.ivyStylePatterns),
278+
// needed for brotli-codec
279+
"jitpack.io" at "https://jitpack.io"
278280
),
279281
externalResolvers := resolvers.value,
280282
otherResolvers := SbtPomKeys.mvnLocalRepository(dotM2 => Seq(Resolver.file("dotM2", dotM2))).value,

sql/core/pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,12 @@
178178
<artifactId>htmlunit-driver</artifactId>
179179
<scope>test</scope>
180180
</dependency>
181+
<dependency>
182+
<groupId>com.github.rdblue</groupId>
183+
<artifactId>brotli-codec</artifactId>
184+
<version>0.1.1</version>
185+
<scope>test</scope>
186+
</dependency>
181187
</dependencies>
182188
<build>
183189
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.execution.datasources
19+
20+
import org.apache.spark.sql.QueryTest
21+
import org.apache.spark.sql.test.SQLTestUtils
22+
23+
abstract class DataSourceCodecTest extends QueryTest with SQLTestUtils {
24+
25+
protected def dataSourceName: String
26+
protected val codecConfigName: String
27+
protected def availableCodecs: Seq[String]
28+
29+
def testWithAllCodecs(name: String)(f: => Unit): Unit = {
30+
for (codec <- availableCodecs) {
31+
test(s"$name - data source $dataSourceName - codec: $codec") {
32+
withSQLConf(codecConfigName -> codec) {
33+
f
34+
}
35+
}
36+
}
37+
}
38+
39+
testWithAllCodecs("write and read - single partition") {
40+
withTempPath { dir =>
41+
testData
42+
.repartition(1)
43+
.write
44+
.format(dataSourceName)
45+
.save(dir.getCanonicalPath)
46+
47+
val df = spark.read.format(dataSourceName).load(dir.getCanonicalPath)
48+
checkAnswer(df, testData)
49+
}
50+
}
51+
52+
testWithAllCodecs("write and read") {
53+
withTempPath { dir =>
54+
testData
55+
.repartition(5)
56+
.write
57+
.format(dataSourceName)
58+
.save(dir.getCanonicalPath)
59+
60+
val df = spark.read.format(dataSourceName).load(dir.getCanonicalPath)
61+
checkAnswer(df, testData)
62+
}
63+
}
64+
}
65+
66+
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.execution.datasources.orc
19+
20+
import org.apache.spark.sql.execution.datasources.DataSourceCodecTest
21+
import org.apache.spark.sql.internal.SQLConf
22+
import org.apache.spark.sql.test.SharedSparkSession
23+
24+
class OrcCodecTestSuite extends DataSourceCodecTest with SharedSparkSession{
25+
26+
override def dataSourceName: String = "orc"
27+
override val codecConfigName = SQLConf.ORC_COMPRESSION.key
28+
override protected def availableCodecs = Seq("none", "uncompressed", "snappy",
29+
"zlib", "zstd", "lz4", "lzo")
30+
}
31+
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.execution.datasources.parquet
19+
20+
import org.apache.spark.sql.execution.datasources.DataSourceCodecTest
21+
import org.apache.spark.sql.internal.SQLConf
22+
import org.apache.spark.sql.test.SharedSparkSession
23+
24+
class ParquetCodecTestSuite extends DataSourceCodecTest with SharedSparkSession {
25+
26+
override def dataSourceName: String = "parquet"
27+
override val codecConfigName = SQLConf.PARQUET_COMPRESSION.key
28+
// Exclude "lzo" because it is GPL-licenced so not included in Hadoop.
29+
// TODO (SPARK-36669): Add "lz4" back after fix it.
30+
override protected def availableCodecs: Seq[String] = Seq("none", "uncompressed", "snappy",
31+
"gzip", "brotli", "zstd")
32+
}
33+

0 commit comments

Comments
 (0)