Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

install dependencies needed to support Azure Storage ABFS Connector #6418

Closed
1 change: 1 addition & 0 deletions .circleci/dist_compile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,7 @@ jobs:
"-DVELOX_ENABLE_HDFS=ON"
"-DVELOX_ENABLE_S3=ON"
"-DVELOX_ENABLE_GCS=ON"
"-DVELOX_ENABLE_ABFS=ON"
"-DVELOX_ENABLE_SUBSTRAIT=ON"
"-DVELOX_ENABLE_REMOTE_FUNCTIONS=ON"
)
Expand Down
14 changes: 14 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ option(VELOX_ENABLE_BENCHMARKS "Enable Velox top level benchmarks." OFF)
option(VELOX_ENABLE_BENCHMARKS_BASIC "Enable Velox basic benchmarks." OFF)
option(VELOX_ENABLE_S3 "Build S3 Connector" OFF)
option(VELOX_ENABLE_GCS "Build GCS Connector" OFF)
option(VELOX_ENABLE_ABFS "Build Abfs Connector" OFF)
option(VELOX_ENABLE_HDFS "Build Hdfs Connector" OFF)
option(VELOX_ENABLE_PARQUET "Enable Parquet support" OFF)
option(VELOX_ENABLE_ARROW "Enable Arrow support" OFF)
Expand Down Expand Up @@ -104,6 +105,7 @@ if(${VELOX_BUILD_MINIMAL})
set(VELOX_ENABLE_EXAMPLES OFF)
set(VELOX_ENABLE_S3 OFF)
set(VELOX_ENABLE_GCS OFF)
set(VELOX_ENABLE_ABFS OFF)
set(VELOX_ENABLE_SUBSTRAIT OFF)
set(VELOX_CODEGEN_SUPPORT OFF)
endif()
Expand Down Expand Up @@ -138,6 +140,7 @@ if(${VELOX_BUILD_BENCHMARKS})
set(VELOX_BUILD_TESTING OFF)
set(VELOX_ENABLE_EXAMPLES OFF)
set(VELOX_ENABLE_GCS OFF)
set(VELOX_ENABLE_ABFS OFF)
set(VELOX_ENABLE_SUBSTRAIT OFF)
set(VELOX_CODEGEN_SUPPORT OFF)
endif()
Expand All @@ -156,6 +159,7 @@ if(${VELOX_BUILD_PYTHON_PACKAGE})
set(VELOX_ENABLE_EXAMPLES OFF)
set(VELOX_ENABLE_S3 OFF)
set(VELOX_ENABLE_GCS OFF)
set(VELOX_ENABLE_ABFS OFF)
set(VELOX_ENABLE_SUBSTRAIT OFF)
set(VELOX_CODEGEN_SUPPORT OFF)
set(VELOX_ENABLE_BENCHMARKS_BASIC OFF)
Expand Down Expand Up @@ -198,6 +202,16 @@ if(VELOX_ENABLE_GCS)
add_definitions(-DVELOX_ENABLE_GCS)
endif()

if(VELOX_ENABLE_ABFS)
# Set AZURESDK_ROOT_DIR if you have a custom install location of Azure Storage
# SDK CPP.
if(AZURESDK_ROOT_DIR)
list(APPEND CMAKE_PREFIX_PATH ${AZURESDK_ROOT_DIR})
endif()
find_package(azure-storage-blobs-cpp CONFIG REQUIRED)
add_definitions(-DVELOX_ENABLE_ABFS)
endif()

if(VELOX_ENABLE_HDFS)
find_library(
LIBHDFS3
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ ifdef GCSSDK_ROOT_DIR
CMAKE_FLAGS += -DGCSSDK_ROOT_DIR=$(GCSSDK_ROOT_DIR)
endif

ifdef AZURESDK_ROOT_DIR
CMAKE_FLAGS += -DAZURESDK_ROOT_DIR=$(AZURESDK_ROOT_DIR)
endif

# Use Ninja if available. If Ninja is used, pass through parallelism control flags.
USE_NINJA ?= 1
ifeq ($(USE_NINJA), 1)
Expand Down
43 changes: 43 additions & 0 deletions scripts/setup-adapters.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,35 @@ function install_gcs-sdk-cpp {
-DGOOGLE_CLOUD_CPP_ENABLE=storage
}

function install_azure-storage-sdk-cpp {
github_checkout azure/azure-sdk-for-cpp azure-storage-blobs_12.8.0

cd sdk/core/azure-core
if ! grep -q "baseline" vcpkg.json; then
# build and install azure-core with the version compatible with system pre-installed openssl
openssl_version=$(openssl version -v | awk '{print $2}')
if [[ "$openssl_version" == 1.1.1* ]]; then
openssl_version="1.1.1n"
fi
sed -i 's/"version-string"/"builtin-baseline": "dafef74af53669ef1cc9015f55e0ce809ead62aa","version-string"/' vcpkg.json
sed -i "s/\"version-string\"/\"overrides\": [{ \"name\": \"openssl\", \"version-string\": \"$openssl_version\" }],\"version-string\"/" vcpkg.json
fi
cmake_install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF

cd -
# install azure-storage-common
cd sdk/storage/azure-storage-common
cmake_install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF

cd -
# install azure-storage-blobs
cd sdk/storage/azure-storage-blobs
if ! grep -q "baseline" vcpkg.json; then
sed -i 's/"version-semver"/"builtin-baseline": "dafef74af53669ef1cc9015f55e0ce809ead62aa","version-semver"/' vcpkg.json
fi
cmake_install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF
}

function install_libhdfs3 {
github_checkout apache/hawq master
cd $DEPENDENCY_DIR/hawq/depends/libhdfs3
Expand All @@ -87,10 +116,15 @@ if [[ "$OSTYPE" == "linux-gnu"* ]]; then
apt install -y --no-install-recommends libxml2-dev libgsasl7-dev uuid-dev
# Dependencies of GCS, probably a workaround until the docker image is rebuilt
apt install -y --no-install-recommends libc-ares-dev libcurl4-openssl-dev
# Dependencies of Azure Storage Blob cpp
apt install -y openssl
else # Assume Fedora/CentOS
yum -y install libxml2-devel libgsasl-devel libuuid-devel
# Dependencies of GCS, probably a workaround until the docker image is rebuilt
yum -y install curl-devel c-ares-devel
# Dependencies of Azure Storage Blob Cpp
yum -y install perl-IPC-Cmd
yum -y install openssl
fi
fi

Expand All @@ -101,12 +135,14 @@ fi
install_aws=0
install_gcs=0
install_hdfs=0
install_abfs=0

if [ "$#" -eq 0 ]; then
# Install all adapters by default
install_aws=1
install_gcs=1
install_hdfs=1
install_abfs=1
fi

while [[ $# -gt 0 ]]; do
Expand All @@ -123,6 +159,10 @@ while [[ $# -gt 0 ]]; do
install_hdfs=1
shift # past argument
;;
abfs)
install_abfs=1
shift # past argument
;;
*)
echo "ERROR: Unknown option $1! will be ignored!"
shift
Expand All @@ -139,6 +179,9 @@ fi
if [ $install_hdfs -eq 1 ]; then
install_libhdfs3
fi
if [ $install_abfs -eq 1 ]; then
install_azure-storage-sdk-cpp
fi

_ret=$?
if [ $_ret -eq 0 ] ; then
Expand Down