From 651e07b5e8e2732a5f446d9f4c10b29d222c6290 Mon Sep 17 00:00:00 2001 From: miomiocat <284487410@qq.com> Date: Wed, 28 Dec 2022 19:07:36 +0800 Subject: [PATCH] [Enhancement] upgrade aliyun jindo SDK and remove useless codes (#15606) --- be/src/exec/hdfs_scanner.cpp | 2 -- be/src/util/hdfs_util.cpp | 21 --------------------- be/src/util/hdfs_util.h | 10 ---------- build.sh | 4 ++-- thirdparty/build-thirdparty.sh | 11 ++++++----- thirdparty/vars.sh | 13 +++++++------ 6 files changed, 15 insertions(+), 46 deletions(-) diff --git a/be/src/exec/hdfs_scanner.cpp b/be/src/exec/hdfs_scanner.cpp index 75327988b8d61..dd2c01d7c0058 100644 --- a/be/src/exec/hdfs_scanner.cpp +++ b/be/src/exec/hdfs_scanner.cpp @@ -14,8 +14,6 @@ #include "exec/hdfs_scanner.h" -#include - #include "column/column_helper.h" #include "exec/exec_node.h" #include "io/compressed_input_stream.h" diff --git a/be/src/util/hdfs_util.cpp b/be/src/util/hdfs_util.cpp index 004fb22b5a6f5..cbf54630bcf5f 100644 --- a/be/src/util/hdfs_util.cpp +++ b/be/src/util/hdfs_util.cpp @@ -63,25 +63,4 @@ Status get_namenode_from_path(const std::string& path, std::string* namenode) { return Status::OK(); } -std::string get_bucket_from_namenode(const std::string& namenode) { - auto n = namenode.find("://"); - if (n == std::string::npos) return ""; - n += 3; - auto n2 = namenode.find('/', n); - if (n2 == std::string::npos) return ""; - return namenode.substr(n, n2 - n); -} - -std::string get_endpoint_from_oss_bucket(const std::string& default_bucket, std::string* bucket) { - auto endpoint_start_index = bucket->find('.'); - if (endpoint_start_index == std::string::npos) { - return default_bucket; - } - endpoint_start_index = endpoint_start_index + 1; - auto endpoint_end_index = bucket->size(); - std::string endpoint = bucket->substr(endpoint_start_index, endpoint_end_index - endpoint_start_index + 1); - *bucket = bucket->substr(0, endpoint_start_index - 1); - return endpoint; -} - } // namespace starrocks diff --git a/be/src/util/hdfs_util.h b/be/src/util/hdfs_util.h index c9bbfe8fdf58e..dbe08708f0c86 100644 --- a/be/src/util/hdfs_util.h +++ b/be/src/util/hdfs_util.h @@ -23,15 +23,5 @@ namespace starrocks { std::string get_hdfs_err_msg(); Status get_namenode_from_path(const std::string& path, std::string* namenode); -std::string get_bucket_from_namenode(const std::string& namenode); -std::string get_endpoint_from_oss_bucket(const std::string& default_bucket, std::string* bucket); - -// Returns true if the path refers to a location on an HDFS filesystem. -bool is_hdfs_path(const char* path); - -// Returns true if the path refers to a location on object storage filesystem. -bool is_object_storage_path(const char* path); -bool is_s3a_path(const char* path); -bool is_oss_path(const char* path); } // namespace starrocks diff --git a/build.sh b/build.sh index 4fe57e6342f13..930e46dac3204 100755 --- a/build.sh +++ b/build.sh @@ -368,7 +368,7 @@ if [ ${BUILD_FE} -eq 1 -o ${BUILD_SPARK_DPP} -eq 1 ]; then cp -r -p ${STARROCKS_HOME}/fe/fe-core/target/starrocks-fe.jar ${STARROCKS_OUTPUT}/fe/lib/ cp -r -p ${STARROCKS_HOME}/webroot/* ${STARROCKS_OUTPUT}/fe/webroot/ cp -r -p ${STARROCKS_HOME}/fe/spark-dpp/target/spark-dpp-*-jar-with-dependencies.jar ${STARROCKS_OUTPUT}/fe/spark-dpp/ - cp -r -p ${STARROCKS_THIRDPARTY}/installed/aliyun_oss_jars/* ${STARROCKS_OUTPUT}/fe/lib/ + cp -r -p ${STARROCKS_THIRDPARTY}/installed/jindosdk/* ${STARROCKS_OUTPUT}/fe/lib/ cp -r -p ${STARROCKS_THIRDPARTY}/installed/broker_thirdparty_jars/* ${STARROCKS_OUTPUT}/fe/lib/ elif [ ${BUILD_SPARK_DPP} -eq 1 ]; then @@ -424,7 +424,7 @@ if [ ${BUILD_BE} -eq 1 ]; then else cp -r -p ${STARROCKS_THIRDPARTY}/installed/open_jdk/jre/lib/amd64 ${STARROCKS_OUTPUT}/be/lib/jvm/ fi - cp -r -p ${STARROCKS_THIRDPARTY}/installed/aliyun_oss_jars/* ${STARROCKS_OUTPUT}/be/lib/hadoop/hdfs/ + cp -r -p ${STARROCKS_THIRDPARTY}/installed/jindosdk/* ${STARROCKS_OUTPUT}/be/lib/hadoop/hdfs/ cp -r -p ${STARROCKS_THIRDPARTY}/installed/broker_thirdparty_jars/* ${STARROCKS_OUTPUT}/be/lib/hadoop/hdfs/ fi diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh index f938987831387..c53f0ea243bb4 100755 --- a/thirdparty/build-thirdparty.sh +++ b/thirdparty/build-thirdparty.sh @@ -842,10 +842,11 @@ build_mariadb() { export BUILD_SYSTEM=$OLD_BUILD_SYSTEM } -# aliyun_oss_jars -build_aliyun_oss_jars() { - check_if_source_exist $ALIYUN_OSS_JARS_SOURCE - cp -r $TP_SOURCE_DIR/$ALIYUN_OSS_JARS_SOURCE $TP_INSTALL_DIR/aliyun_oss_jars +# jindosdk for Aliyun OSS +build_aliyun_jindosdk() { + check_if_source_exist $JINDOSDK_SOURCE + mkdir -p $TP_INSTALL_DIR/jindosdk + cp -r $TP_SOURCE_DIR/$JINDOSDK_SOURCE/lib/*.jar $TP_INSTALL_DIR/jindosdk } build_tencent_cos_jars() { @@ -1014,7 +1015,7 @@ build_jdk build_ragel build_hyperscan build_mariadb -build_aliyun_oss_jars +build_aliyun_jindosdk build_tencent_cos_jars build_aws_cpp_sdk build_vpack diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh index 8c634a4814892..cbd92d5a9d5a5 100644 --- a/thirdparty/vars.sh +++ b/thirdparty/vars.sh @@ -291,12 +291,13 @@ MARIADB_NAME="mariadb-connector-c-3.1.14.tar.gz" MARIADB_SOURCE="mariadb-connector-c-3.1.14" MARIADB_MD5SUM="86c4052adeb8447900bf33b4e2ddd1f9" -# aliyun_oss_jars -ALIYUN_OSS_JARS_DOWNLOAD="http://cdn-thirdparty.starrocks.com/aliyun-oss-sdk-3.7.2.tar.gz" -ALIYUN_OSS_JARS_NAME="aliyun-oss-sdk-3.7.2.tar.gz" -ALIYUN_OSS_JARS_SOURCE="aliyun-oss-sdk-3.7.2" -ALIYUN_OSS_JARS_MD5SUM="1e37382831598f4ed049eb276b8e8b29" +# jindosdk for Aliyun OSS +JINDOSDK_DOWNLOAD="https://jindodata-binary.oss-cn-shanghai.aliyuncs.com/release/4.6.2/jindosdk-4.6.2.tar.gz" +JINDOSDK_NAME="jindosdk-4.6.2.tar.gz" +JINDOSDK_SOURCE="jindosdk-4.6.2" +JINDOSDK_MD5SUM="7288ffb8f2fbdde6b907d15041a0f79c" +# aws-sdk-cpp AWS_SDK_CPP_DOWNLOAD="https://github.com/aws/aws-sdk-cpp/archive/refs/tags/1.9.179.tar.gz" AWS_SDK_CPP_NAME="aws-sdk-cpp-1.9.179.tar.gz" AWS_SDK_CPP_SOURCE="aws-sdk-cpp-1.9.179" @@ -347,5 +348,5 @@ BROKER_THIRDPARTY_JARS_MD5SUM="87433dd5e54091d8eb63fbdb35622ea9" # all thirdparties which need to be downloaded is set in array TP_ARCHIVES TP_ARCHIVES="LIBEVENT OPENSSL THRIFT PROTOBUF GFLAGS GLOG GTEST RAPIDJSON SIMDJSON SNAPPY GPERFTOOLS ZLIB LZ4 BZIP CURL \ RE2 BOOST LEVELDB BRPC ROCKSDB LIBRDKAFKA PULSAR FLATBUFFERS ARROW BROTLI ZSTD S2 BITSHUFFLE CROARINGBITMAP \ - JEMALLOC CCTZ FMT RYU BREAK_PAD HADOOP JDK RAGEL HYPERSCAN MARIADB ALIYUN_OSS_JARS AWS_SDK_CPP VPACK OPENTELEMETRY \ + JEMALLOC CCTZ FMT RYU BREAK_PAD HADOOP JDK RAGEL HYPERSCAN MARIADB JINDOSDK AWS_SDK_CPP VPACK OPENTELEMETRY \ BENCHMARK FAST_FLOAT CACHELIB STREAMVBYTE BROKER_THIRDPARTY_JARS"