Skip to content

Commit 5185ddf

Browse files
committed
first draft
fix build fix ObjectInfo creation another cleaned attempt fix schema id cache population correct splitting of URI into parts tmp: lazy way to debug cluster fix storage schema normalization store per-file object_storage_ptr in object_info tmp upd upd fix for local make new storages properly remove cloneObjectStorage attempt to fix cluster use proper storage on worker's side + code cleanup tmp fix build after rebase tmp poke ci again
1 parent 48110e0 commit 5185ddf

33 files changed

+573
-212
lines changed

src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ class AzureIteratorAsync final : public IObjectStorageIteratorAsync
6565
}
6666

6767
private:
68-
bool getBatchAndCheckNext(RelativePathsWithMetadata & batch) override
68+
bool getBatchAndCheckNext(PathsWithMetadata & batch) override
6969
{
7070
ProfileEvents::increment(ProfileEvents::AzureListObjects);
7171
if (client->IsClientForDisk())
@@ -78,7 +78,7 @@ class AzureIteratorAsync final : public IObjectStorageIteratorAsync
7878

7979
for (const auto & blob : blobs_list)
8080
{
81-
batch.emplace_back(std::make_shared<RelativePathWithMetadata>(
81+
batch.emplace_back(std::make_shared<PathWithMetadata>(
8282
blob.Name,
8383
ObjectMetadata{
8484
static_cast<uint64_t>(blob.BlobSize),
@@ -160,7 +160,7 @@ ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_pr
160160
return std::make_shared<AzureIteratorAsync>(path_prefix, client_ptr, max_keys ? max_keys : settings_ptr->list_object_keys_size);
161161
}
162162

163-
void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
163+
void AzureObjectStorage::listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const
164164
{
165165
auto client_ptr = client.get();
166166

@@ -182,7 +182,7 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith
182182

183183
for (const auto & blob : blobs_list)
184184
{
185-
children.emplace_back(std::make_shared<RelativePathWithMetadata>(
185+
children.emplace_back(std::make_shared<PathWithMetadata>(
186186
blob.Name,
187187
ObjectMetadata{
188188
static_cast<uint64_t>(blob.BlobSize),

src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class AzureObjectStorage : public IObjectStorage
3737

3838
bool supportsListObjectsCache() override { return true; }
3939

40-
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
40+
void listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const override;
4141

4242
/// Sanitizer build may crash with max_keys=1; this looks like a false positive.
4343
ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const override;

src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ void CachedObjectStorage::copyObject( // NOLINT
193193
object_storage->copyObject(object_from, object_to, read_settings, write_settings, object_to_attributes);
194194
}
195195

196-
void CachedObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
196+
void CachedObjectStorage::listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const
197197
{
198198
object_storage->listObjects(path, children, max_keys);
199199
}

src/Disks/ObjectStorages/Cached/CachedObjectStorage.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class CachedObjectStorage final : public IObjectStorage
6464
IObjectStorage & object_storage_to,
6565
std::optional<ObjectAttributes> object_to_attributes = {}) override;
6666

67-
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
67+
void listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const override;
6868

6969
ObjectMetadata getObjectMetadata(const std::string & path) const override;
7070

src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) co
167167
return metadata;
168168
}
169169

170-
void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
170+
void HDFSObjectStorage::listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const
171171
{
172172
initializeHDFSFS();
173173
LOG_TEST(log, "Trying to list files for {}", path);
@@ -203,7 +203,7 @@ void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithM
203203
}
204204
else
205205
{
206-
children.emplace_back(std::make_shared<RelativePathWithMetadata>(
206+
children.emplace_back(std::make_shared<PathWithMetadata>(
207207
String(file_path),
208208
ObjectMetadata{
209209
static_cast<uint64_t>(ls.file_info[i].mSize),

src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ class HDFSObjectStorage : public IObjectStorage, public HDFSErrorWrapper
9292
const WriteSettings & write_settings,
9393
std::optional<ObjectAttributes> object_to_attributes = {}) override;
9494

95-
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
95+
void listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const override;
9696

9797
String getObjectsNamespace() const override { return ""; }
9898

src/Disks/ObjectStorages/IObjectStorage.cpp

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,20 +32,20 @@ const MetadataStorageMetrics & IObjectStorage::getMetadataStorageMetrics() const
3232

3333
bool IObjectStorage::existsOrHasAnyChild(const std::string & path) const
3434
{
35-
RelativePathsWithMetadata files;
35+
PathsWithMetadata files;
3636
listObjects(path, files, 1);
3737
return !files.empty();
3838
}
3939

40-
void IObjectStorage::listObjects(const std::string &, RelativePathsWithMetadata &, size_t) const
40+
void IObjectStorage::listObjects(const std::string &, PathsWithMetadata &, size_t) const
4141
{
4242
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "listObjects() is not supported");
4343
}
4444

4545

4646
ObjectStorageIteratorPtr IObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const
4747
{
48-
RelativePathsWithMetadata files;
48+
PathsWithMetadata files;
4949
listObjects(path_prefix, files, max_keys);
5050

5151
return std::make_shared<ObjectStorageIteratorFromList>(std::move(files));
@@ -104,9 +104,15 @@ WriteSettings IObjectStorage::patchSettings(const WriteSettings & write_settings
104104
return write_settings;
105105
}
106106

107-
RelativePathWithMetadata::RelativePathWithMetadata(const String & task_string, std::optional<ObjectMetadata> metadata_)
107+
PathWithMetadata::PathWithMetadata(
108+
const String & task_string,
109+
std::optional<ObjectMetadata> metadata_,
110+
std::optional<String> absolute_path_,
111+
std::optional<ObjectStoragePtr> object_storage_to_use_)
108112
: metadata(std::move(metadata_))
109113
, command(task_string)
114+
, absolute_path(absolute_path_)
115+
, object_storage_to_use(object_storage_to_use_)
110116
{
111117
if (!command.isParsed())
112118
relative_path = task_string;
@@ -119,14 +125,20 @@ RelativePathWithMetadata::RelativePathWithMetadata(const String & task_string, s
119125
}
120126
}
121127

122-
RelativePathWithMetadata::RelativePathWithMetadata(const DataFileInfo & info, std::optional<ObjectMetadata> metadata_)
128+
PathWithMetadata::PathWithMetadata(
129+
const DataFileInfo & info,
130+
std::optional<ObjectMetadata> metadata_,
131+
std::optional<String> absolute_path_,
132+
std::optional<ObjectStoragePtr> object_storage_to_use_)
123133
: metadata(std::move(metadata_))
134+
, absolute_path(absolute_path_)
135+
, object_storage_to_use(object_storage_to_use_)
124136
{
125137
relative_path = info.file_path;
126138
file_meta_info = info.file_meta_info;
127139
}
128140

129-
void RelativePathWithMetadata::loadMetadata(ObjectStoragePtr object_storage, bool ignore_non_existent_file)
141+
void PathWithMetadata::loadMetadata(ObjectStoragePtr object_storage, bool ignore_non_existent_file)
130142
{
131143
if (!metadata)
132144
{
@@ -143,7 +155,7 @@ void RelativePathWithMetadata::loadMetadata(ObjectStoragePtr object_storage, boo
143155
}
144156
}
145157

146-
RelativePathWithMetadata::CommandInTaskResponse::CommandInTaskResponse(const std::string & task)
158+
PathWithMetadata::CommandInTaskResponse::CommandInTaskResponse(const std::string & task)
147159
{
148160
Poco::JSON::Parser parser;
149161
try
@@ -167,7 +179,7 @@ RelativePathWithMetadata::CommandInTaskResponse::CommandInTaskResponse(const std
167179
}
168180
}
169181

170-
std::string RelativePathWithMetadata::CommandInTaskResponse::toString() const
182+
std::string PathWithMetadata::CommandInTaskResponse::toString() const
171183
{
172184
Poco::JSON::Object json;
173185

src/Disks/ObjectStorages/IObjectStorage.h

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ struct DataFileInfo;
106106
class DataFileMetaInfo;
107107
using DataFileMetaInfoPtr = std::shared_ptr<DataFileMetaInfo>;
108108

109-
struct RelativePathWithMetadata
109+
struct PathWithMetadata
110110
{
111111
class CommandInTaskResponse
112112
{
@@ -138,18 +138,30 @@ struct RelativePathWithMetadata
138138
std::optional<ObjectMetadata> metadata;
139139
CommandInTaskResponse command;
140140
std::optional<DataFileMetaInfoPtr> file_meta_info;
141+
std::optional<String> absolute_path;
142+
std::optional<ObjectStoragePtr> object_storage_to_use = std::nullopt;
141143

142-
RelativePathWithMetadata() = default;
144+
PathWithMetadata() = default;
143145

144-
explicit RelativePathWithMetadata(const String & task_string, std::optional<ObjectMetadata> metadata_ = std::nullopt);
145-
explicit RelativePathWithMetadata(const DataFileInfo & info, std::optional<ObjectMetadata> metadata_ = std::nullopt);
146+
explicit PathWithMetadata(
147+
const String & task_string,
148+
std::optional<ObjectMetadata> metadata_ = std::nullopt,
149+
std::optional<String> absolute_path_ = std::nullopt,
150+
std::optional<ObjectStoragePtr> object_storage_to_use_ = std::nullopt);
146151

147-
virtual ~RelativePathWithMetadata() = default;
152+
explicit PathWithMetadata(
153+
const DataFileInfo & info,
154+
std::optional<ObjectMetadata> metadata_ = std::nullopt,
155+
std::optional<String> absolute_path_ = std::nullopt,
156+
std::optional<ObjectStoragePtr> object_storage_to_use_ = std::nullopt);
157+
158+
virtual ~PathWithMetadata() = default;
148159

149160
virtual std::string getFileName() const { return std::filesystem::path(relative_path).filename(); }
150161
virtual std::string getFileNameWithoutExtension() const { return std::filesystem::path(relative_path).stem(); }
151162

152163
virtual std::string getPath() const { return relative_path; }
164+
virtual std::optional<std::string> getAbsolutePath() const { return absolute_path; }
153165
virtual bool isArchive() const { return false; }
154166
virtual std::string getPathToArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); }
155167
virtual size_t fileSizeInArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); }
@@ -160,6 +172,8 @@ struct RelativePathWithMetadata
160172

161173
void loadMetadata(ObjectStoragePtr object_storage, bool ignore_non_existent_file);
162174
const CommandInTaskResponse & getCommand() const { return command; }
175+
176+
std::optional<ObjectStoragePtr> getObjectStorage() const { return object_storage_to_use; }
163177
};
164178

165179
struct ObjectKeyWithMetadata
@@ -175,8 +189,8 @@ struct ObjectKeyWithMetadata
175189
{}
176190
};
177191

178-
using RelativePathWithMetadataPtr = std::shared_ptr<RelativePathWithMetadata>;
179-
using RelativePathsWithMetadata = std::vector<RelativePathWithMetadataPtr>;
192+
using PathWithMetadataPtr = std::shared_ptr<PathWithMetadata>;
193+
using PathsWithMetadata = std::vector<PathWithMetadataPtr>;
180194
using ObjectKeysWithMetadata = std::vector<ObjectKeyWithMetadata>;
181195

182196
class IObjectStorageIterator;
@@ -217,7 +231,7 @@ class IObjectStorage
217231
virtual bool existsOrHasAnyChild(const std::string & path) const;
218232

219233
/// List objects recursively by certain prefix.
220-
virtual void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const;
234+
virtual void listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const;
221235

222236
/// List objects recursively by certain prefix. Use it instead of listObjects, if you want to list objects lazily.
223237
virtual ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const;

src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ ObjectMetadata LocalObjectStorage::getObjectMetadata(const std::string & path) c
151151
return object_metadata;
152152
}
153153

154-
void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t/* max_keys */) const
154+
void LocalObjectStorage::listObjects(const std::string & path, PathsWithMetadata & children, size_t/* max_keys */) const
155155
{
156156
if (!fs::exists(path) || !fs::is_directory(path))
157157
return;
@@ -164,7 +164,7 @@ void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWith
164164
continue;
165165
}
166166

167-
children.emplace_back(std::make_shared<RelativePathWithMetadata>(entry.path(), getObjectMetadata(entry.path())));
167+
children.emplace_back(std::make_shared<PathWithMetadata>(entry.path(), getObjectMetadata(entry.path())));
168168
}
169169
}
170170

src/Disks/ObjectStorages/Local/LocalObjectStorage.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ class LocalObjectStorage : public IObjectStorage
6262

6363
ObjectMetadata getObjectMetadata(const std::string & path) const override;
6464

65-
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
65+
void listObjects(const std::string & path, PathsWithMetadata & children, size_t max_keys) const override;
6666

6767
bool existsOrHasAnyChild(const std::string & path) const override;
6868

0 commit comments

Comments
 (0)