Skip to content

Commit

Permalink
[update](hudi) update hudi-spark bundle to 3.4.3 (apache#35013)
Browse files Browse the repository at this point in the history
1. For security reasons, upgrade to Spark version to 3.4.3
2. Resolve jar conflict of spark, which is different between fe and be-java-extensions
3. HBase version is 2.4.9, which is not safe, and conflict with hadoop3. Later PR will update HBase version to 2.5.x. Currently use `hbase-hadoop-compat:2.5.2-hadoop3` to resolve the conflict with hadoop3.
  • Loading branch information
AshinGau authored May 21, 2024
1 parent ff6492f commit dde0493
Show file tree
Hide file tree
Showing 7 changed files with 94 additions and 85 deletions.
41 changes: 7 additions & 34 deletions fe/be-java-extensions/hudi-scanner/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,7 @@ under the License.
<fe_ut_parallel>1</fe_ut_parallel>
<scala.version>2.12.15</scala.version>
<scala.binary.version>2.12</scala.binary.version>
<spark.version>3.2.0</spark.version>
<sparkbundle.version>3.2</sparkbundle.version>
<janino.version>3.0.16</janino.version>
<avro.version>1.11.2</avro.version>
<avro.version>1.11.3</avro.version>
</properties>

<dependencyManagement>
Expand Down Expand Up @@ -91,7 +88,7 @@ under the License.
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark3.2.x_${scala.binary.version}</artifactId>
<artifactId>${hudi-spark.version}_${scala.binary.version}</artifactId>
<version>${hudi.version}</version>
<scope>provided</scope>
<exclusions>
Expand Down Expand Up @@ -119,6 +116,11 @@ under the License.
<version>1.10.1</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr4-runtime</artifactId>
<version>${antlr4.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
Expand Down Expand Up @@ -160,35 +162,6 @@ under the License.
<artifactId>spark-catalyst_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>janino</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>commons-compiler</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<!-- version of spark's janino is error -->
<groupId>org.codehaus.janino</groupId>
<artifactId>janino</artifactId>
<version>${janino.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>commons-compiler</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.codehaus.janino</groupId>
<artifactId>commons-compiler</artifactId>
<version>${janino.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<!-- version of spark's jackson module is error -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ import org.apache.hudi.io.storage.HoodieAvroHFileReader
import org.apache.hudi.metadata.HoodieTableMetadataUtil
import org.apache.hudi.{AvroConversionUtils, DataSourceReadOptions, DataSourceWriteOptions, HoodieSparkConfUtils, HoodieTableSchema, HoodieTableState}
import org.apache.log4j.Logger
import org.apache.spark.sql.adapter.Spark3_2Adapter
import org.apache.spark.sql.adapter.Spark3_4Adapter
import org.apache.spark.sql.avro.{HoodieAvroSchemaConverters, HoodieSparkAvroSchemaConverters}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
Expand All @@ -66,7 +66,7 @@ import scala.collection.JavaConverters._
import scala.util.control.NonFatal
import scala.util.{Failure, Success, Try}

class DorisSparkAdapter extends Spark3_2Adapter {
class DorisSparkAdapter extends Spark3_4Adapter {
override def getAvroSchemaConverters: HoodieAvroSchemaConverters = HoodieSparkAvroSchemaConverters
}

Expand Down Expand Up @@ -498,7 +498,7 @@ abstract class BaseSplitReader(val split: HoodieSplit) {
hadoopConf: Configuration): PartitionedFile => Iterator[InternalRow] = {
partitionedFile => {
val reader = new HoodieAvroHFileReader(
hadoopConf, new Path(partitionedFile.filePath), new CacheConfig(hadoopConf))
hadoopConf, partitionedFile.filePath.toPath, new CacheConfig(hadoopConf))

val requiredRowSchema = requiredDataSchema.structTypeSchema
// NOTE: Schema has to be parsed at this point, since Avro's [[Schema]] aren't serializable
Expand Down Expand Up @@ -573,7 +573,7 @@ abstract class BaseSplitReader(val split: HoodieSplit) {

BaseFileReader(
read = partitionedFile => {
val extension = FSUtils.getFileExtension(partitionedFile.filePath)
val extension = FSUtils.getFileExtension(partitionedFile.filePath.toString())
if (tableBaseFileFormat.getFileExtension.equals(extension)) {
read(partitionedFile)
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import org.apache.hudi.HoodieBaseRelation.convertToAvroSchema
import org.apache.hudi.avro.HoodieAvroUtils
import org.apache.hudi.common.model.HoodieLogFile
import org.apache.hudi.{DataSourceReadOptions, HoodieMergeOnReadFileSplit, HoodieTableSchema}
import org.apache.spark.paths.SparkPath
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.execution.datasources.PartitionedFile
Expand Down Expand Up @@ -80,7 +81,7 @@ class MORSnapshotSplitReader(override val split: HoodieSplit) extends BaseSplitR
val partitionedBaseFile = if (split.dataFilePath.isEmpty) {
None
} else {
Some(PartitionedFile(getPartitionColumnsAsInternalRow(), split.dataFilePath, 0, split.dataFileLength))
Some(PartitionedFile(getPartitionColumnsAsInternalRow(), SparkPath.fromPathString(split.dataFilePath), 0, split.dataFileLength))
}
HoodieMergeOnReadFileSplit(partitionedBaseFile, logFiles)
}
Expand Down
46 changes: 12 additions & 34 deletions fe/be-java-extensions/preload-extensions/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ under the License.
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<scala.binary.version>2.12</scala.binary.version>
<spark.version>3.2.0</spark.version>
<janino.version>3.0.16</janino.version>
</properties>

<dependencies>
Expand Down Expand Up @@ -63,6 +61,12 @@ under the License.
<!-- Must be provided, we use hadoop_libs in BE's 3rd party instead -->
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-annotations</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark-client</artifactId>
Expand All @@ -83,14 +87,19 @@ under the License.
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr4-runtime</artifactId>
<version>${antlr4.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark3-common</artifactId>
<version>${hudi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark3.2.x_${scala.binary.version}</artifactId>
<artifactId>${hudi-spark.version}_${scala.binary.version}</artifactId>
<version>${hudi.version}</version>
<exclusions>
<exclusion>
Expand Down Expand Up @@ -158,37 +167,6 @@ under the License.
<artifactId>spark-catalyst_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>janino</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>commons-compiler</artifactId>
</exclusion>
<exclusion>
<artifactId>antlr4-runtime</artifactId>
<groupId>org.antlr</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<!-- version of spark's janino is error -->
<groupId>org.codehaus.janino</groupId>
<artifactId>janino</artifactId>
<version>${janino.version}</version>
<exclusions>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>commons-compiler</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.codehaus.janino</groupId>
<artifactId>commons-compiler</artifactId>
<version>${janino.version}</version>
</dependency>
<dependency>
<!-- version of spark's jackson module is error -->
Expand Down
35 changes: 26 additions & 9 deletions fe/fe-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ under the License.
<doris.home>${basedir}/../../</doris.home>
<doris.thirdparty>${basedir}/../../thirdparty</doris.thirdparty>
<fe_ut_parallel>1</fe_ut_parallel>
<antlr4.version>4.13.1</antlr4.version>
<awssdk.version>2.20.131</awssdk.version>
<huaweiobs.version>3.1.1-hw-46</huaweiobs.version>
<tencentcos.version>8.2.7</tencentcos.version>
Expand Down Expand Up @@ -433,9 +432,26 @@ under the License.
</exclusion>
</exclusions>
</dependency>
<!-- antl4 The version of antlr-runtime in trino parser is need to be consistent with doris,
when upgrade doris antlr-runtime version, should take care of trino-parser.-->
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr4-runtime</artifactId>
<version>${antlr4.version}</version>
</dependency>
<dependency>
<groupId>com.aliyun.odps</groupId>
<artifactId>odps-sdk-core</artifactId>
<exclusions>
<exclusion>
<artifactId>antlr-runtime</artifactId>
<groupId>org.antlr</groupId>
</exclusion>
<exclusion>
<artifactId>antlr4</artifactId>
<groupId>org.antlr</groupId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.springframework.boot/spring-boot-starter-web -->
<dependency>
Expand Down Expand Up @@ -655,14 +671,6 @@ under the License.
<artifactId>mariadb-java-client</artifactId>
</dependency>

<!-- antl4 The version of antlr-runtime in trino parser is need to be consistent with doris,
when upgrade doris antlr-runtime version, should take care of trino-parser.-->
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr4-runtime</artifactId>
<version>${antlr4.version}</version>
</dependency>

<dependency>
<groupId>com.zaxxer</groupId>
<artifactId>HikariCP</artifactId>
Expand Down Expand Up @@ -808,6 +816,15 @@ under the License.
<artifactId>ap-loader-all</artifactId>
<version>3.0-8</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-hadoop-compat</artifactId>
<version>2.5.2-hadoop3</version>
</dependency>
</dependencies>
<repositories>
<!-- for huawei obs sdk -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ public TablePartitionValues getPartitionValues(HMSExternalTable table, HoodieTab
partitionValues.writeLock().unlock();
}
} catch (Exception e) {
LOG.warn("Failed to get hudi partitions", e);
throw new CacheException("Failed to get hudi partitions", e);
}
}
Expand Down
45 changes: 42 additions & 3 deletions fe/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ under the License.
<!-- NOTE: Using grpc-java whose version is newer than 1.34.0 will break the build on CentOS 6 due to the obsolete GLIBC -->
<grpc-java.version>1.34.0</grpc-java.version>
<grpc.version>1.60.1</grpc.version>
<check.freamework.version>3.42.0</check.freamework.version>
<check.freamework.version>3.43.0</check.freamework.version>
<protobuf.version>3.24.3</protobuf.version>
<!-- we use protoc-jar-maven-plugin to generate protobuf generated code -->
<!-- see https://repo.maven.apache.org/maven2/com/google/protobuf/protoc/ to get correct version -->
Expand All @@ -294,12 +294,13 @@ under the License.
<zjsonpatch.version>0.2.3</zjsonpatch.version>
<kafka-clients.version>3.4.0</kafka-clients.version>
<oshi-core.version>6.4.5</oshi-core.version>
<xnio-nio.version>3.8.9.Final</xnio-nio.version>
<xnio-nio.version>3.8.14.Final</xnio-nio.version>
<javax.annotation-api.version>1.3.2</javax.annotation-api.version>
<javax.activation.version>1.2.0</javax.activation.version>
<jaxws-api.version>2.3.0</jaxws-api.version>
<RoaringBitmap.version>0.8.13</RoaringBitmap.version>
<spark.version>3.4.1</spark.version>
<spark.version>3.4.3</spark.version>
<hudi-spark.version>hudi-spark3.4.x</hudi-spark.version>
<hive.version>3.1.3</hive.version>
<hive.common.version>2.3.9</hive.common.version>
<nimbusds.version>9.35</nimbusds.version>
Expand Down Expand Up @@ -331,6 +332,8 @@ under the License.
<aws-java-sdk.version>1.12.669</aws-java-sdk.version>
<mariadb-java-client.version>3.0.9</mariadb-java-client.version>
<hadoop.version>3.3.6</hadoop.version>
<hbase.version>2.4.9</hbase.version>
<antlr4.version>4.13.1</antlr4.version>
<joda.version>2.8.1</joda.version>
<project.scm.id>github</project.scm.id>
<spring.version>2.7.13</spring.version>
Expand Down Expand Up @@ -528,6 +531,14 @@ under the License.
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
</exclusion>
</exclusions>
</dependency>
Expand Down Expand Up @@ -558,6 +569,29 @@ under the License.
<artifactId>kerb-simplekdc</artifactId>
<version>${kerby.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>${hbase.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-hadoop2-compat</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-annotations</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.kerby</groupId>
<artifactId>kerb-core</artifactId>
Expand Down Expand Up @@ -1145,6 +1179,11 @@ under the License.
<artifactId>xnio-nio</artifactId>
<version>${xnio-nio.version}</version>
</dependency>
<dependency>
<groupId>org.jboss.xnio</groupId>
<artifactId>xnio-api</artifactId>
<version>${xnio-nio.version}</version>
</dependency>
<!-- support jdk9 -->
<dependency>
<groupId>javax.annotation</groupId>
Expand Down

0 comments on commit dde0493

Please sign in to comment.