Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Throw kFileNotFound in all storage adapters #8662

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 18 additions & 10 deletions velox/common/base/tests/GTestUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,27 +63,35 @@
<< "Expected error message to contain '" << (_errorMessage) \
<< "', but received '" << status.message() << "'."

#define VELOX_ASSERT_ERROR_CODE_IMPL(_type, _expression, _errorCode) \
#define VELOX_ASSERT_ERROR_CODE_IMPL( \
_type, _expression, _errorCode, _errorMessage) \
try { \
(_expression); \
FAIL() << "Expected an exception"; \
} catch (const _type& e) { \
ASSERT_TRUE(e.errorCode() == _errorCode) \
<< "Expected error code to be '" << _errorCode << "', but received '" \
<< e.errorCode() << "'."; \
ASSERT_TRUE(e.message().find(_errorMessage) != std::string::npos) \
<< "Expected error message to contain '" << (_errorMessage) \
<< "', but received '" << e.message() << "'."; \
}

#define VELOX_ASSERT_THROW_CODE(_expression, _errorCode) \
VELOX_ASSERT_ERROR_CODE_IMPL( \
facebook::velox::VeloxException, _expression, _errorCode)
#define VELOX_ASSERT_THROW_CODE(_expression, _errorCode, _errorMessage) \
VELOX_ASSERT_ERROR_CODE_IMPL( \
facebook::velox::VeloxException, _expression, _errorCode, _errorMessage)

#define VELOX_ASSERT_USER_THROW_CODE(_expression, _errorCode) \
VELOX_ASSERT_ERROR_CODE_IMPL( \
facebook::velox::VeloxUserError, _expression, _errorCode)
#define VELOX_ASSERT_USER_THROW_CODE(_expression, _errorCode, _errorMessage) \
VELOX_ASSERT_ERROR_CODE_IMPL( \
facebook::velox::VeloxUserError, _expression, _errorCode, _errorMessage)

#define VELOX_ASSERT_RUNTIME_THROW_CODE(_expression, _errorCode) \
VELOX_ASSERT_ERROR_CODE_IMPL( \
facebook::velox::VeloxRuntimeError, _expression, _errorCode)
#define VELOX_ASSERT_RUNTIME_THROW_CODE( \
_expression, _errorCode, _errorMessage) \
VELOX_ASSERT_ERROR_CODE_IMPL( \
facebook::velox::VeloxRuntimeError, \
_expression, \
_errorCode, \
_errorMessage)

#ifndef NDEBUG
#define DEBUG_ONLY_TEST(test_fixture, test_name) TEST(test_fixture, test_name)
Expand Down
4 changes: 3 additions & 1 deletion velox/common/file/tests/FileTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -324,5 +324,7 @@ TEST(LocalFile, fileNotFound) {
auto path = fmt::format("{}/file", tempFolder->path);
auto localFs = filesystems::getFileSystem(path, nullptr);
VELOX_ASSERT_RUNTIME_THROW_CODE(
localFs->openFileForRead(path), error_code::kFileNotFound);
localFs->openFileForRead(path),
error_code::kFileNotFound,
"No such file or directory");
}
6 changes: 5 additions & 1 deletion velox/connectors/hive/storage_adapters/abfs/AbfsUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,15 @@ inline const std::string throwStorageExceptionWithOperationDetails(
std::string operation,
std::string path,
Azure::Storage::StorageException& error) {
VELOX_FAIL(
const auto errMsg = fmt::format(
"Operation '{}' to path '{}' encountered azure storage exception, Details: '{}'.",
operation,
path,
error.what());
if (error.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) {
VELOX_FILE_NOT_FOUND_ERROR(errMsg);
}
VELOX_FAIL(errMsg);
}

} // namespace facebook::velox::filesystems::abfs
Original file line number Diff line number Diff line change
Expand Up @@ -171,18 +171,14 @@ TEST_F(AbfsFileSystemTest, multipleThreadsWithReadFile) {
}

TEST_F(AbfsFileSystemTest, missingFile) {
try {
auto hiveConfig = AbfsFileSystemTest::hiveConfig(
{{"fs.azure.account.key.test.dfs.core.windows.net",
azuriteServer->connectionStr()}});
const std::string abfsFile =
facebook::velox::filesystems::test::AzuriteABFSEndpoint + "test.txt";
auto abfs = std::make_shared<filesystems::abfs::AbfsFileSystem>(hiveConfig);
auto readFile = abfs->openFileForRead(abfsFile);
FAIL() << "Expected VeloxException";
} catch (VeloxException const& err) {
EXPECT_TRUE(err.message().find("404") != std::string::npos);
}
auto hiveConfig = AbfsFileSystemTest::hiveConfig(
{{"fs.azure.account.key.test.dfs.core.windows.net",
azuriteServer->connectionStr()}});
const std::string abfsFile =
facebook::velox::filesystems::test::AzuriteABFSEndpoint + "test.txt";
auto abfs = std::make_shared<filesystems::abfs::AbfsFileSystem>(hiveConfig);
VELOX_ASSERT_RUNTIME_THROW_CODE(
abfs->openFileForRead(abfsFile), error_code::kFileNotFound, "404");
}

TEST_F(AbfsFileSystemTest, openFileForWriteNotImplemented) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,17 @@ inline void checkGCSStatus(
const std::string& bucket,
const std::string& key) {
if (!outcome.ok()) {
auto error = outcome.error_info();
VELOX_FAIL(
const auto errMsg = fmt::format(
"{} due to: Path:'{}', SDK Error Type:{}, GCS Status Code:{}, Message:'{}'",
errorMsgPrefix,
gcsURI(bucket, key),
error.domain(),
outcome.error_info().domain(),
getErrorStringFromGCSError(outcome.code()),
outcome.message());
if (outcome.code() == gc::StatusCode::kNotFound) {
VELOX_FILE_NOT_FOUND_ERROR(errMsg);
}
VELOX_FAIL(errMsg);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,30 +285,20 @@ TEST_F(GCSFileSystemTest, missingFile) {
const std::string gcsFile = gcsURI(preexistingBucketName(), file);
filesystems::GCSFileSystem gcfs(testGcsOptions());
gcfs.initializeClient();
try {
gcfs.openFileForRead(gcsFile);
FAIL() << "Expected VeloxException";
} catch (VeloxException const& err) {
EXPECT_THAT(
err.message(),
::testing::HasSubstr(
"\\\"message\\\": \\\"Live version of object test1-gcs/newTest.txt does not exist.\\\""));
}
VELOX_ASSERT_RUNTIME_THROW_CODE(
gcfs.openFileForRead(gcsFile),
error_code::kFileNotFound,
"\\\"message\\\": \\\"Live version of object test1-gcs/newTest.txt does not exist.\\\"");
}

TEST_F(GCSFileSystemTest, missingBucket) {
filesystems::GCSFileSystem gcfs(testGcsOptions());
gcfs.initializeClient();
try {
const char* gcsFile = "gs://dummy/foo.txt";
gcfs.openFileForRead(gcsFile);
FAIL() << "Expected VeloxException";
} catch (VeloxException const& err) {
EXPECT_THAT(
err.message(),
::testing::HasSubstr(
"\\\"message\\\": \\\"Bucket dummy does not exist.\\\""));
}
const char* gcsFile = "gs://dummy/foo.txt";
VELOX_ASSERT_RUNTIME_THROW_CODE(
gcfs.openFileForRead(gcsFile),
error_code::kFileNotFound,
"\\\"message\\\": \\\"Bucket dummy does not exist.\\\"");
}

TEST_F(GCSFileSystemTest, credentialsConfig) {
Expand Down
16 changes: 11 additions & 5 deletions velox/connectors/hive/storage_adapters/hdfs/HdfsReadFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,17 @@ namespace facebook::velox {
HdfsReadFile::HdfsReadFile(hdfsFS hdfs, const std::string_view path)
: hdfsClient_(hdfs), filePath_(path) {
fileInfo_ = hdfsGetPathInfo(hdfsClient_, filePath_.data());
VELOX_CHECK_NOT_NULL(
fileInfo_,
"Unable to get file path info for file: {}. got error: {}",
filePath_,
hdfsGetLastError());
if (fileInfo_ == nullptr) {
auto error = hdfsGetLastError();
auto errMsg = fmt::format(
"Unable to get file path info for file: {}. got error: {}",
filePath_,
error);
if (std::strstr(error, "FileNotFoundException") != nullptr) {
VELOX_FILE_NOT_FOUND_ERROR(errMsg);
}
VELOX_FAIL(errMsg);
}
}

HdfsReadFile::~HdfsReadFile() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,20 +218,14 @@ TEST_F(HdfsFileSystemTest, oneFsInstanceForOneEndpoint) {
}

TEST_F(HdfsFileSystemTest, missingFileViaFileSystem) {
try {
auto memConfig =
std::make_shared<const core::MemConfig>(configurationValues);
auto hdfsFileSystem =
filesystems::getFileSystem(fullDestinationPath, memConfig);
auto readFile = hdfsFileSystem->openFileForRead(
"hdfs://localhost:7777/path/that/does/not/exist");
FAIL() << "expected VeloxException";
} catch (VeloxException const& error) {
EXPECT_THAT(
error.message(),
testing::HasSubstr(
"Unable to get file path info for file: /path/that/does/not/exist. got error: FileNotFoundException: Path /path/that/does/not/exist does not exist."));
}
auto memConfig = std::make_shared<const core::MemConfig>(configurationValues);
auto hdfsFileSystem =
filesystems::getFileSystem(fullDestinationPath, memConfig);
VELOX_ASSERT_RUNTIME_THROW_CODE(
hdfsFileSystem->openFileForRead(
"hdfs://localhost:7777/path/that/does/not/exist"),
error_code::kFileNotFound,
"Unable to get file path info for file: /path/that/does/not/exist. got error: FileNotFoundException: Path /path/that/does/not/exist does not exist.");
}

TEST_F(HdfsFileSystemTest, missingHost) {
Expand Down
10 changes: 8 additions & 2 deletions velox/connectors/hive/storage_adapters/s3fs/S3Util.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@

#include "velox/common/base/Exceptions.h"

#include <fmt/format.h>

namespace facebook::velox {

namespace {
Expand Down Expand Up @@ -158,7 +160,7 @@ inline std::string getRequestID(
{ \
if (!outcome.IsSuccess()) { \
auto error = outcome.GetError(); \
VELOX_FAIL( \
auto errMsg = fmt::format( \
"{} due to: '{}'. Path:'{}', SDK Error Type:{}, HTTP Status Code:{}, S3 Service:'{}', Message:'{}', RequestID:'{}'", \
errorMsgPrefix, \
getErrorStringFromS3Error(error), \
Expand All @@ -167,7 +169,11 @@ inline std::string getRequestID(
error.GetResponseCode(), \
getS3BackendService(error.GetResponseHeaders()), \
error.GetMessage(), \
getRequestID(error.GetResponseHeaders())) \
getRequestID(error.GetResponseHeaders())); \
if (error.GetResponseCode() == Aws::Http::HttpResponseCode::NOT_FOUND) { \
VELOX_FILE_NOT_FOUND_ERROR(errMsg); \
} \
VELOX_FAIL(errMsg) \
} \
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,16 +113,18 @@ TEST_F(S3FileSystemTest, missingFile) {
addBucket(bucketName);
auto hiveConfig = minioServer_->hiveConfig();
filesystems::S3FileSystem s3fs(hiveConfig);
VELOX_ASSERT_THROW(
VELOX_ASSERT_RUNTIME_THROW_CODE(
s3fs.openFileForRead(s3File),
error_code::kFileNotFound,
"Failed to get metadata for S3 object due to: 'Resource not found'. Path:'s3://data1/i-do-not-exist.txt', SDK Error Type:16, HTTP Status Code:404, S3 Service:'MinIO', Message:'No response body.'");
}

TEST_F(S3FileSystemTest, missingBucket) {
auto hiveConfig = minioServer_->hiveConfig();
filesystems::S3FileSystem s3fs(hiveConfig);
VELOX_ASSERT_THROW(
VELOX_ASSERT_RUNTIME_THROW_CODE(
s3fs.openFileForRead(kDummyPath),
error_code::kFileNotFound,
"Failed to get metadata for S3 object due to: 'Resource not found'. Path:'s3://dummy/foo.txt', SDK Error Type:16, HTTP Status Code:404, S3 Service:'MinIO', Message:'No response body.'");
}

Expand Down
3 changes: 3 additions & 0 deletions velox/docs/develop/connectors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ Storage Adapters
Hive Connector allows reading and writing files from a variety of distributed storage systems.
The supported storage API are S3, HDFS, GCS, Linux FS.

If file is not found when reading, `openFileForRead` API throws `VeloxRuntimeError` with `error_code::kFileNotFound`.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's clarify that this behavior is necessary to support xxx configuration property.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, thanks.

This behavior is necessary to support the `ignore_missing_files` configuration property.

S3 is supported using the `AWS SDK for C++ <https://github.com/aws/aws-sdk-cpp>`_ library.
S3 supported schemes are `s3://` (Amazon S3, Minio), `s3a://` (Hadoop 3.x), `s3n://` (Deprecated in Hadoop 3.x),
`oss://` (Alibaba cloud storage), and `cos://`, `cosn://` (Tencent cloud storage).
Expand Down
4 changes: 3 additions & 1 deletion velox/exec/tests/TableScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1400,7 +1400,9 @@ TEST_F(TableScanTest, fileNotFound) {
};
assertMissingFile(true);
VELOX_ASSERT_RUNTIME_THROW_CODE(
assertMissingFile(false), error_code::kFileNotFound);
assertMissingFile(false),
error_code::kFileNotFound,
"No such file or directory");
}

// A valid ORC file (containing headers) but no data.
Expand Down
Loading