Skip to content

Commit 8c54f1e

Browse files
dongjoon-hyungatorsmile
authored andcommitted
[SPARK-21422][BUILD] Depend on Apache ORC 1.4.0
## What changes were proposed in this pull request? Like Parquet, this PR aims to depend on the latest Apache ORC 1.4 for Apache Spark 2.3. There are key benefits for Apache ORC 1.4. - Stability: Apache ORC 1.4.0 has many fixes and we can depend on ORC community more. - Maintainability: Reduce the Hive dependency and can remove old legacy code later. Later, we can get the following two key benefits by adding new ORCFileFormat in SPARK-20728 (#17980), too. - Usability: User can use ORC data sources without hive module, i.e, -Phive. - Speed: Use both Spark ColumnarBatch and ORC RowBatch together. This will be faster than the current implementation in Spark. ## How was this patch tested? Pass the jenkins. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #18640 from dongjoon-hyun/SPARK-21422.
1 parent 07549b2 commit 8c54f1e

File tree

5 files changed

+66
-0
lines changed

5 files changed

+66
-0
lines changed

assembly/pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,12 @@
220220
<hive.deps.scope>provided</hive.deps.scope>
221221
</properties>
222222
</profile>
223+
<profile>
224+
<id>orc-provided</id>
225+
<properties>
226+
<orc.deps.scope>provided</orc.deps.scope>
227+
</properties>
228+
</profile>
223229
<profile>
224230
<id>parquet-provided</id>
225231
<properties>

dev/deps/spark-deps-hadoop-2.6

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ JavaEWAH-0.3.2.jar
22
RoaringBitmap-0.5.11.jar
33
ST4-4.0.4.jar
44
activation-1.1.1.jar
5+
aircompressor-0.3.jar
56
antlr-2.7.7.jar
67
antlr-runtime-3.4.jar
78
antlr4-runtime-4.5.3.jar
@@ -148,6 +149,8 @@ netty-3.9.9.Final.jar
148149
netty-all-4.0.43.Final.jar
149150
objenesis-2.1.jar
150151
opencsv-2.3.jar
152+
orc-core-1.4.0-nohive.jar
153+
orc-mapreduce-1.4.0-nohive.jar
151154
oro-2.0.8.jar
152155
osgi-resource-locator-1.0.1.jar
153156
paranamer-2.6.jar

dev/deps/spark-deps-hadoop-2.7

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ JavaEWAH-0.3.2.jar
22
RoaringBitmap-0.5.11.jar
33
ST4-4.0.4.jar
44
activation-1.1.1.jar
5+
aircompressor-0.3.jar
56
antlr-2.7.7.jar
67
antlr-runtime-3.4.jar
78
antlr4-runtime-4.5.3.jar
@@ -149,6 +150,8 @@ netty-3.9.9.Final.jar
149150
netty-all-4.0.43.Final.jar
150151
objenesis-2.1.jar
151152
opencsv-2.3.jar
153+
orc-core-1.4.0-nohive.jar
154+
orc-mapreduce-1.4.0-nohive.jar
152155
oro-2.0.8.jar
153156
osgi-resource-locator-1.0.1.jar
154157
paranamer-2.6.jar

pom.xml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@
132132
<hive.version.short>1.2.1</hive.version.short>
133133
<derby.version>10.12.1.1</derby.version>
134134
<parquet.version>1.8.2</parquet.version>
135+
<orc.version>1.4.0</orc.version>
136+
<orc.classifier>nohive</orc.classifier>
135137
<hive.parquet.version>1.6.0</hive.parquet.version>
136138
<jetty.version>9.3.20.v20170531</jetty.version>
137139
<javaxservlet.version>3.1.0</javaxservlet.version>
@@ -208,6 +210,7 @@
208210
<flume.deps.scope>compile</flume.deps.scope>
209211
<hadoop.deps.scope>compile</hadoop.deps.scope>
210212
<hive.deps.scope>compile</hive.deps.scope>
213+
<orc.deps.scope>compile</orc.deps.scope>
211214
<parquet.deps.scope>compile</parquet.deps.scope>
212215
<parquet.test.deps.scope>test</parquet.test.deps.scope>
213216

@@ -1695,6 +1698,44 @@
16951698
</exclusion>
16961699
</exclusions>
16971700
</dependency>
1701+
<dependency>
1702+
<groupId>org.apache.orc</groupId>
1703+
<artifactId>orc-core</artifactId>
1704+
<version>${orc.version}</version>
1705+
<classifier>${orc.classifier}</classifier>
1706+
<scope>${orc.deps.scope}</scope>
1707+
<exclusions>
1708+
<exclusion>
1709+
<groupId>org.apache.hadoop</groupId>
1710+
<artifactId>hadoop-common</artifactId>
1711+
</exclusion>
1712+
<exclusion>
1713+
<groupId>org.apache.hive</groupId>
1714+
<artifactId>hive-storage-api</artifactId>
1715+
</exclusion>
1716+
</exclusions>
1717+
</dependency>
1718+
<dependency>
1719+
<groupId>org.apache.orc</groupId>
1720+
<artifactId>orc-mapreduce</artifactId>
1721+
<version>${orc.version}</version>
1722+
<classifier>${orc.classifier}</classifier>
1723+
<scope>${orc.deps.scope}</scope>
1724+
<exclusions>
1725+
<exclusion>
1726+
<groupId>org.apache.hadoop</groupId>
1727+
<artifactId>hadoop-common</artifactId>
1728+
</exclusion>
1729+
<exclusion>
1730+
<groupId>org.apache.orc</groupId>
1731+
<artifactId>orc-core</artifactId>
1732+
</exclusion>
1733+
<exclusion>
1734+
<groupId>org.apache.hive</groupId>
1735+
<artifactId>hive-storage-api</artifactId>
1736+
</exclusion>
1737+
</exclusions>
1738+
</dependency>
16981739
<dependency>
16991740
<groupId>org.apache.parquet</groupId>
17001741
<artifactId>parquet-column</artifactId>
@@ -2727,6 +2768,9 @@
27272768
<profile>
27282769
<id>hive-provided</id>
27292770
</profile>
2771+
<profile>
2772+
<id>orc-provided</id>
2773+
</profile>
27302774
<profile>
27312775
<id>parquet-provided</id>
27322776
</profile>

sql/core/pom.xml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,16 @@
8686
<scope>test</scope>
8787
</dependency>
8888

89+
<dependency>
90+
<groupId>org.apache.orc</groupId>
91+
<artifactId>orc-core</artifactId>
92+
<classifier>${orc.classifier}</classifier>
93+
</dependency>
94+
<dependency>
95+
<groupId>org.apache.orc</groupId>
96+
<artifactId>orc-mapreduce</artifactId>
97+
<classifier>${orc.classifier}</classifier>
98+
</dependency>
8999
<dependency>
90100
<groupId>org.apache.parquet</groupId>
91101
<artifactId>parquet-column</artifactId>

0 commit comments

Comments
 (0)