Skip to content

Commit 45e5cb3

Browse files
authored
Use primary_keys in CBO Statistics for s3 provider (#4865)
1 parent c503e71 commit 45e5cb3

File tree

2 files changed

+21
-2
lines changed

2 files changed

+21
-2
lines changed

ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@
1515
#include <ydb/library/yql/providers/s3/range_helpers/file_tree_builder.h>
1616
#include <ydb/library/yql/providers/s3/range_helpers/path_list_reader.h>
1717
#include <ydb/library/yql/utils/log/log.h>
18+
#include <ydb/library/yql/core/yql_opt_utils.h>
1819

1920
#include <library/cpp/json/writer/json_value.h>
21+
#include <library/cpp/yson/node/node_io.h>
2022

2123
namespace NYql {
2224

@@ -179,7 +181,7 @@ class TS3DqIntegration: public TDqIntegrationBase {
179181
TMaybe<TOptimizerStatistics> ReadStatistics(const TExprNode::TPtr& sourceWrap, TExprContext& ctx) override {
180182
Y_UNUSED(ctx);
181183
double size = 0;
182-
double cols = 0;
184+
int cols = 0;
183185
double rows = 0;
184186
if (const auto& maybeParseSettings = TMaybeNode<TS3ParseSettings>(sourceWrap->Child(0))) {
185187
const auto& parseSettings = maybeParseSettings.Cast();
@@ -195,12 +197,28 @@ class TS3DqIntegration: public TDqIntegrationBase {
195197
}
196198
}
197199

200+
TVector<TString>* primaryKey = nullptr;
201+
if (auto constraints = GetSetting(parseSettings.Settings().Ref(), "constraints"sv)) {
202+
auto node = NYT::NodeFromYsonString(constraints->Child(1)->Content());
203+
auto* primaryKeyNode = node.AsMap().FindPtr("primary_key");
204+
if (primaryKeyNode) {
205+
TVector<TString> parsed;
206+
for (auto col : primaryKeyNode->AsList()) {
207+
parsed.push_back(col.AsString());
208+
}
209+
State_->PrimaryKeys.emplace_back(std::move(parsed));
210+
primaryKey = &State_->PrimaryKeys.back();
211+
}
212+
}
213+
198214
if (parseSettings.RowType().Maybe<TCoStructType>()) {
199215
cols = parseSettings.RowType().Ptr()->ChildrenSize();
200216
}
201217

202218
rows = size / 1024; // magic estimate
203-
return TOptimizerStatistics(BaseTable, rows, cols, size);
219+
return primaryKey
220+
? TOptimizerStatistics(BaseTable, rows, cols, size, size, *primaryKey)
221+
: TOptimizerStatistics(BaseTable, rows, cols, size, size);
204222
} else {
205223
return Nothing();
206224
}

ydb/library/yql/providers/s3/provider/yql_s3_provider.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ struct TS3State : public TThrRefBase
2929
ISecuredServiceAccountCredentialsFactory::TPtr CredentialsFactory;
3030
IHTTPGateway::TPtr Gateway;
3131
ui32 ExecutorPoolId = 0;
32+
std::list<TVector<TString>> PrimaryKeys;
3233
};
3334

3435
TDataProviderInitializer GetS3DataProviderInitializer(IHTTPGateway::TPtr gateway, ISecuredServiceAccountCredentialsFactory::TPtr credentialsFactory = nullptr, bool allowLocalFiles = false);

0 commit comments

Comments
 (0)