Skip to content

Commit

Permalink
[Enhancement] upgrade aliyun jindo SDK and remove useless codes (Star…
Browse files Browse the repository at this point in the history
  • Loading branch information
miomiocat authored Dec 28, 2022
1 parent 337cebf commit 651e07b
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 46 deletions.
2 changes: 0 additions & 2 deletions be/src/exec/hdfs_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@

#include "exec/hdfs_scanner.h"

#include <boost/algorithm/string.hpp>

#include "column/column_helper.h"
#include "exec/exec_node.h"
#include "io/compressed_input_stream.h"
Expand Down
21 changes: 0 additions & 21 deletions be/src/util/hdfs_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,25 +63,4 @@ Status get_namenode_from_path(const std::string& path, std::string* namenode) {
return Status::OK();
}

std::string get_bucket_from_namenode(const std::string& namenode) {
auto n = namenode.find("://");
if (n == std::string::npos) return "";
n += 3;
auto n2 = namenode.find('/', n);
if (n2 == std::string::npos) return "";
return namenode.substr(n, n2 - n);
}

std::string get_endpoint_from_oss_bucket(const std::string& default_bucket, std::string* bucket) {
auto endpoint_start_index = bucket->find('.');
if (endpoint_start_index == std::string::npos) {
return default_bucket;
}
endpoint_start_index = endpoint_start_index + 1;
auto endpoint_end_index = bucket->size();
std::string endpoint = bucket->substr(endpoint_start_index, endpoint_end_index - endpoint_start_index + 1);
*bucket = bucket->substr(0, endpoint_start_index - 1);
return endpoint;
}

} // namespace starrocks
10 changes: 0 additions & 10 deletions be/src/util/hdfs_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,5 @@ namespace starrocks {
std::string get_hdfs_err_msg();

Status get_namenode_from_path(const std::string& path, std::string* namenode);
std::string get_bucket_from_namenode(const std::string& namenode);
std::string get_endpoint_from_oss_bucket(const std::string& default_bucket, std::string* bucket);

// Returns true if the path refers to a location on an HDFS filesystem.
bool is_hdfs_path(const char* path);

// Returns true if the path refers to a location on object storage filesystem.
bool is_object_storage_path(const char* path);
bool is_s3a_path(const char* path);
bool is_oss_path(const char* path);

} // namespace starrocks
4 changes: 2 additions & 2 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ if [ ${BUILD_FE} -eq 1 -o ${BUILD_SPARK_DPP} -eq 1 ]; then
cp -r -p ${STARROCKS_HOME}/fe/fe-core/target/starrocks-fe.jar ${STARROCKS_OUTPUT}/fe/lib/
cp -r -p ${STARROCKS_HOME}/webroot/* ${STARROCKS_OUTPUT}/fe/webroot/
cp -r -p ${STARROCKS_HOME}/fe/spark-dpp/target/spark-dpp-*-jar-with-dependencies.jar ${STARROCKS_OUTPUT}/fe/spark-dpp/
cp -r -p ${STARROCKS_THIRDPARTY}/installed/aliyun_oss_jars/* ${STARROCKS_OUTPUT}/fe/lib/
cp -r -p ${STARROCKS_THIRDPARTY}/installed/jindosdk/* ${STARROCKS_OUTPUT}/fe/lib/
cp -r -p ${STARROCKS_THIRDPARTY}/installed/broker_thirdparty_jars/* ${STARROCKS_OUTPUT}/fe/lib/

elif [ ${BUILD_SPARK_DPP} -eq 1 ]; then
Expand Down Expand Up @@ -424,7 +424,7 @@ if [ ${BUILD_BE} -eq 1 ]; then
else
cp -r -p ${STARROCKS_THIRDPARTY}/installed/open_jdk/jre/lib/amd64 ${STARROCKS_OUTPUT}/be/lib/jvm/
fi
cp -r -p ${STARROCKS_THIRDPARTY}/installed/aliyun_oss_jars/* ${STARROCKS_OUTPUT}/be/lib/hadoop/hdfs/
cp -r -p ${STARROCKS_THIRDPARTY}/installed/jindosdk/* ${STARROCKS_OUTPUT}/be/lib/hadoop/hdfs/
cp -r -p ${STARROCKS_THIRDPARTY}/installed/broker_thirdparty_jars/* ${STARROCKS_OUTPUT}/be/lib/hadoop/hdfs/
fi

Expand Down
11 changes: 6 additions & 5 deletions thirdparty/build-thirdparty.sh
Original file line number Diff line number Diff line change
Expand Up @@ -842,10 +842,11 @@ build_mariadb() {
export BUILD_SYSTEM=$OLD_BUILD_SYSTEM
}

# aliyun_oss_jars
build_aliyun_oss_jars() {
check_if_source_exist $ALIYUN_OSS_JARS_SOURCE
cp -r $TP_SOURCE_DIR/$ALIYUN_OSS_JARS_SOURCE $TP_INSTALL_DIR/aliyun_oss_jars
# jindosdk for Aliyun OSS
build_aliyun_jindosdk() {
check_if_source_exist $JINDOSDK_SOURCE
mkdir -p $TP_INSTALL_DIR/jindosdk
cp -r $TP_SOURCE_DIR/$JINDOSDK_SOURCE/lib/*.jar $TP_INSTALL_DIR/jindosdk
}

build_tencent_cos_jars() {
Expand Down Expand Up @@ -1014,7 +1015,7 @@ build_jdk
build_ragel
build_hyperscan
build_mariadb
build_aliyun_oss_jars
build_aliyun_jindosdk
build_tencent_cos_jars
build_aws_cpp_sdk
build_vpack
Expand Down
13 changes: 7 additions & 6 deletions thirdparty/vars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -291,12 +291,13 @@ MARIADB_NAME="mariadb-connector-c-3.1.14.tar.gz"
MARIADB_SOURCE="mariadb-connector-c-3.1.14"
MARIADB_MD5SUM="86c4052adeb8447900bf33b4e2ddd1f9"

# aliyun_oss_jars
ALIYUN_OSS_JARS_DOWNLOAD="http://cdn-thirdparty.starrocks.com/aliyun-oss-sdk-3.7.2.tar.gz"
ALIYUN_OSS_JARS_NAME="aliyun-oss-sdk-3.7.2.tar.gz"
ALIYUN_OSS_JARS_SOURCE="aliyun-oss-sdk-3.7.2"
ALIYUN_OSS_JARS_MD5SUM="1e37382831598f4ed049eb276b8e8b29"
# jindosdk for Aliyun OSS
JINDOSDK_DOWNLOAD="https://jindodata-binary.oss-cn-shanghai.aliyuncs.com/release/4.6.2/jindosdk-4.6.2.tar.gz"
JINDOSDK_NAME="jindosdk-4.6.2.tar.gz"
JINDOSDK_SOURCE="jindosdk-4.6.2"
JINDOSDK_MD5SUM="7288ffb8f2fbdde6b907d15041a0f79c"

# aws-sdk-cpp
AWS_SDK_CPP_DOWNLOAD="https://github.com/aws/aws-sdk-cpp/archive/refs/tags/1.9.179.tar.gz"
AWS_SDK_CPP_NAME="aws-sdk-cpp-1.9.179.tar.gz"
AWS_SDK_CPP_SOURCE="aws-sdk-cpp-1.9.179"
Expand Down Expand Up @@ -347,5 +348,5 @@ BROKER_THIRDPARTY_JARS_MD5SUM="87433dd5e54091d8eb63fbdb35622ea9"
# all thirdparties which need to be downloaded is set in array TP_ARCHIVES
TP_ARCHIVES="LIBEVENT OPENSSL THRIFT PROTOBUF GFLAGS GLOG GTEST RAPIDJSON SIMDJSON SNAPPY GPERFTOOLS ZLIB LZ4 BZIP CURL \
RE2 BOOST LEVELDB BRPC ROCKSDB LIBRDKAFKA PULSAR FLATBUFFERS ARROW BROTLI ZSTD S2 BITSHUFFLE CROARINGBITMAP \
JEMALLOC CCTZ FMT RYU BREAK_PAD HADOOP JDK RAGEL HYPERSCAN MARIADB ALIYUN_OSS_JARS AWS_SDK_CPP VPACK OPENTELEMETRY \
JEMALLOC CCTZ FMT RYU BREAK_PAD HADOOP JDK RAGEL HYPERSCAN MARIADB JINDOSDK AWS_SDK_CPP VPACK OPENTELEMETRY \
BENCHMARK FAST_FLOAT CACHELIB STREAMVBYTE BROKER_THIRDPARTY_JARS"

0 comments on commit 651e07b

Please sign in to comment.