Skip to content

Commit

Permalink
Add libhdfs3 support
Browse files Browse the repository at this point in the history
  • Loading branch information
JkSelf committed Aug 14, 2024
1 parent 8f351ea commit 8c446dc
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 9 deletions.
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ option(VELOX_ENABLE_S3 "Build S3 Connector" OFF)
option(VELOX_ENABLE_GCS "Build GCS Connector" OFF)
option(VELOX_ENABLE_ABFS "Build Abfs Connector" OFF)
option(VELOX_ENABLE_HDFS "Build Hdfs Connector" OFF)
option(VELOX_ENABLE_HDFS3 "Build Hdfs Connector" OFF)
option(VELOX_ENABLE_PARQUET "Enable Parquet support" OFF)
option(VELOX_ENABLE_ARROW "Enable Arrow support" OFF)
option(VELOX_ENABLE_REMOTE_FUNCTIONS "Enable remote function support" OFF)
Expand Down Expand Up @@ -241,6 +242,10 @@ if(VELOX_ENABLE_ABFS)
endif()

if(VELOX_ENABLE_HDFS)
add_definitions(-DVELOX_ENABLE_HDFS)
endif()

if(VELOX_ENABLE_HDFS3)
add_definitions(-DVELOX_ENABLE_HDFS3)
endif()

Expand Down
5 changes: 5 additions & 0 deletions velox/connectors/hive/HiveConnector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@

#include "velox/connectors/hive/storage_adapters/abfs/RegisterAbfsFileSystem.h" // @manual
#include "velox/connectors/hive/storage_adapters/gcs/RegisterGCSFileSystem.h" // @manual
<<<<<<< HEAD
=======
#endif
#if defined(VELOX_ENABLE_HDFS3) || defined(VELOX_ENABLE_HDFS)
>>>>>>> Add libhdfs3 support
#include "velox/connectors/hive/storage_adapters/hdfs/RegisterHdfsFileSystem.h" // @manual
#include "velox/connectors/hive/storage_adapters/s3fs/RegisterS3FileSystem.h" // @manual
#include "velox/dwio/dwrf/reader/DwrfReader.h"
Expand Down
4 changes: 2 additions & 2 deletions velox/connectors/hive/storage_adapters/hdfs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@

velox_add_library(velox_hdfs RegisterHdfsFileSystem.cpp)

if(VELOX_ENABLE_HDFS)
if(DEFINED VELOX_ENABLE_HDFS OR DEFINED VELOX_ENABLE_HDFS3)
velox_sources(velox_hdfs PRIVATE HdfsFileSystem.cpp HdfsReadFile.cpp
HdfsWriteFile.cpp hdfs_internal.cc)
HdfsWriteFile.cpp hdfs_internal.cc)
velox_link_libraries(velox_hdfs Folly::folly xsimd arrow)

if(${VELOX_BUILD_TESTING})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

#ifdef VELOX_ENABLE_HDFS3
#if defined(VELOX_ENABLE_HDFS3) || defined(VELOX_ENABLE_HDFS)
#include "folly/concurrency/ConcurrentHashMap.h"

#include "velox/common/config/Config.h"
Expand All @@ -25,7 +25,7 @@

namespace facebook::velox::filesystems {

#ifdef VELOX_ENABLE_HDFS3
#if defined(VELOX_ENABLE_HDFS3) || defined(VELOX_ENABLE_HDFS)
std::mutex mtx;

std::function<std::shared_ptr<
Expand Down Expand Up @@ -96,7 +96,7 @@ hdfsWriteFileSinkGenerator() {
#endif

void registerHdfsFileSystem() {
#ifdef VELOX_ENABLE_HDFS3
#if defined(VELOX_ENABLE_HDFS3) || defined(VELOX_ENABLE_HDFS)
registerFileSystem(HdfsFileSystem::isHdfsFile, hdfsFileSystemGenerator());
dwio::common::FileSink::registerFactory(hdfsWriteFileSinkGenerator());
#endif
Expand Down
21 changes: 17 additions & 4 deletions velox/connectors/hive/storage_adapters/hdfs/hdfs_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,9 @@ void InsertEnvVarFilename(
::arrow::Result<std::vector<PlatformFilename>> get_potential_libhdfs_paths() {
std::vector<PlatformFilename> potential_paths;
std::string file_name;

// Common paths
ARROW_ASSIGN_OR_RAISE(auto search_paths, MakeFilenameVector({"", "."}));
#ifdef VELOX_ENABLE_HDFS
// OS-specific file name
#ifdef _WIN32
file_name = "hdfs.dll";
Expand All @@ -157,12 +159,23 @@ ::arrow::Result<std::vector<PlatformFilename>> get_potential_libhdfs_paths() {
file_name = "libhdfs.so";
#endif

// Common paths
ARROW_ASSIGN_OR_RAISE(auto search_paths, MakeFilenameVector({"", "."}));

// Path from environment variable
AppendEnvVarFilename("HADOOP_HOME", "lib/native", &search_paths);
AppendEnvVarFilename("ARROW_LIBHDFS_DIR", &search_paths);
#endif

#ifdef VELOX_ENABLE_HDFS3
// OS-specific file name
#ifdef __APPLE__
file_name = "libhdfs3.dylib";
#else
file_name = "libhdfs3.so";
#endif

// Path from environment variable
AppendEnvVarFilename("HDFS3_HOME", &search_paths);
#endif


// All paths with file name
for (const auto& path : search_paths) {
Expand Down

0 comments on commit 8c446dc

Please sign in to comment.