-
Notifications
You must be signed in to change notification settings - Fork 6
Add {_snowflake_id} wildcard support to object storage #789
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#pragma once | ||
|
||
#include <cstdint> | ||
|
||
namespace DB | ||
{ | ||
|
||
uint64_t generateSnowflakeID(); | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,6 +36,7 @@ namespace Setting | |
extern const SettingsMaxThreads max_threads; | ||
extern const SettingsBool optimize_count_from_files; | ||
extern const SettingsBool use_hive_partitioning; | ||
extern const SettingsBool object_storage_treat_key_related_wildcards_as_star; | ||
} | ||
|
||
namespace ErrorCodes | ||
|
@@ -373,21 +374,35 @@ void StorageObjectStorage::read( | |
if (update_configuration_on_read) | ||
configuration->update(object_storage, local_context); | ||
|
||
if (partition_by && configuration->withPartitionWildcard()) | ||
auto config_clone = configuration->clone(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We make a clone every time, but actually change only in specific cases. |
||
|
||
if (config_clone->withPartitionWildcard() || config_clone->withSnowflakeIdWildcard()) | ||
{ | ||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, | ||
"Reading from a partitioned {} storage is not implemented yet", | ||
getName()); | ||
/* | ||
* Replace `_partition_id` and `_snowflake_id` wildcards with `*` so that any files that match this pattern can be retrieved. | ||
*/ | ||
if (local_context->getSettingsRef()[Setting::object_storage_treat_key_related_wildcards_as_star]) | ||
{ | ||
config_clone->setPath(getPathWithKeyRelatedWildcardsReplacedWithStar(config_clone->getPath())); | ||
} | ||
|
||
if (config_clone->withPartitionWildcard() || config_clone->withSnowflakeIdWildcard()) | ||
{ | ||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, | ||
"Reading from a globbed path {} on storage {} is not implemented yet," | ||
"except when the only globs are `_snowflake_id` and/or `_partition_id` with `object_storage_treat_key_related_wildcards_as_star=1`", | ||
config_clone->getPath(), getName()); | ||
} | ||
} | ||
|
||
const auto read_from_format_info = configuration->prepareReadingFromFormat( | ||
const auto read_from_format_info = config_clone->prepareReadingFromFormat( | ||
object_storage, column_names, storage_snapshot, supportsSubsetOfColumns(local_context), local_context); | ||
const bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) | ||
&& local_context->getSettingsRef()[Setting::optimize_count_from_files]; | ||
|
||
auto read_step = std::make_unique<ReadFromObjectStorageStep>( | ||
object_storage, | ||
configuration, | ||
config_clone, | ||
fmt::format("{}({})", getName(), getStorageID().getFullTableName()), | ||
column_names, | ||
getVirtualsList(), | ||
|
@@ -421,7 +436,7 @@ SinkToStoragePtr StorageObjectStorage::write( | |
configuration->getPath()); | ||
} | ||
|
||
if (configuration->withGlobsIgnorePartitionWildcard()) | ||
if (configuration->withGlobsIgnorePartitionWildcardAndSnowflakeIdWildcard()) | ||
{ | ||
throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, | ||
"Path '{}' contains globs, so the table is in readonly mode", | ||
|
@@ -650,6 +665,7 @@ StorageObjectStorage::Configuration::Configuration(const Configuration & other) | |
format = other.format; | ||
compression_method = other.compression_method; | ||
structure = other.structure; | ||
storage_settings = other.storage_settings; | ||
} | ||
|
||
bool StorageObjectStorage::Configuration::withPartitionWildcard() const | ||
|
@@ -659,13 +675,29 @@ bool StorageObjectStorage::Configuration::withPartitionWildcard() const | |
|| getNamespace().find(PARTITION_ID_WILDCARD) != String::npos; | ||
} | ||
|
||
bool StorageObjectStorage::Configuration::withSnowflakeIdWildcard() const | ||
{ | ||
static const String PARTITION_ID_WILDCARD = "{_snowflake_id}"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
return getPath().find(PARTITION_ID_WILDCARD) != String::npos | ||
|| getNamespace().find(PARTITION_ID_WILDCARD) != String::npos; | ||
} | ||
|
||
bool StorageObjectStorage::Configuration::withGlobsIgnorePartitionWildcard() const | ||
{ | ||
if (!withPartitionWildcard()) | ||
return withGlobs(); | ||
return PartitionedSink::replaceWildcards(getPath(), "").find_first_of("*?{") != std::string::npos; | ||
} | ||
|
||
bool StorageObjectStorage::Configuration::withGlobsIgnorePartitionWildcardAndSnowflakeIdWildcard() const | ||
{ | ||
const auto path_without_partition_id_wildcard = PartitionedSink::replaceWildcards(getPath(), ""); | ||
|
||
const auto path_without_snowflake_id_wildcard = replaceSnowflakeIdWildcard(path_without_partition_id_wildcard, ""); | ||
|
||
return path_without_snowflake_id_wildcard.find_first_of("*?{") != std::string::npos; | ||
} | ||
|
||
bool StorageObjectStorage::Configuration::isPathWithGlobs() const | ||
{ | ||
return getPath().find_first_of("*?{") != std::string::npos; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Three options here: