Skip to content

Commit 336f81b

Browse files
authored
Merge 0c04c91 into c0dd9da
2 parents c0dd9da + 0c04c91 commit 336f81b

File tree

5 files changed

+214
-44
lines changed

5 files changed

+214
-44
lines changed

ydb/core/tx/tiering/tier/object.cpp

Lines changed: 5 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "object.h"
2+
#include "s3_uri.h"
23

34
#include <library/cpp/json/writer/json_value.h>
45
#include <library/cpp/protobuf/json/proto2json.h>
@@ -46,50 +47,11 @@ TConclusionStatus TTierConfig::DeserializeFromProto(const NKikimrSchemeOp::TExte
4647
}
4748
}
4849

49-
NUri::TUri url;
50-
if (url.Parse(proto.GetLocation(), NUri::TFeature::FeaturesAll) != NUri::TState::EParsed::ParsedOK) {
51-
return TConclusionStatus::Fail("Cannot parse url: " + proto.GetLocation());
52-
}
53-
54-
switch (url.GetScheme()) {
55-
case NUri::TScheme::SchemeEmpty:
56-
break;
57-
case NUri::TScheme::SchemeHTTP:
58-
ProtoConfig.SetScheme(::NKikimrSchemeOp::TS3Settings_EScheme_HTTP);
59-
break;
60-
case NUri::TScheme::SchemeHTTPS:
61-
ProtoConfig.SetScheme(::NKikimrSchemeOp::TS3Settings_EScheme_HTTPS);
62-
break;
63-
default:
64-
return TConclusionStatus::Fail("Unknown schema in url");
65-
}
66-
67-
{
68-
TStringBuf endpoint;
69-
TStringBuf bucket;
70-
71-
TStringBuf host = url.GetHost();
72-
TStringBuf path = url.GetField(NUri::TField::FieldPath);
73-
if (!path.Empty()) {
74-
endpoint = host;
75-
bucket = path;
76-
bucket.SkipPrefix("/");
77-
if (bucket.Contains("/")) {
78-
return TConclusionStatus::Fail(TStringBuilder() << "Not a bucket (contains directories): " << bucket);
79-
}
80-
} else {
81-
if (!path.TrySplit('.', endpoint, bucket)) {
82-
return TConclusionStatus::Fail(TStringBuilder() << "Bucket is not specified in URL: " << path);
83-
}
84-
}
85-
86-
if (url.GetField(NUri::TField::FieldPort)) {
87-
ProtoConfig.SetEndpoint(TStringBuilder() << endpoint << ":" << url.GetPort());
88-
} else {
89-
ProtoConfig.SetEndpoint(TString(endpoint));
90-
}
91-
ProtoConfig.SetBucket(TString(bucket));
50+
auto parsedUri = TS3Uri::ParseUri(proto.GetLocation());
51+
if (parsedUri.IsFail()) {
52+
return parsedUri;
9253
}
54+
parsedUri->FillSettings(ProtoConfig);
9355

9456
return TConclusionStatus::Success();
9557
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#include "s3_uri.h"
2+
3+
namespace NKikimr::NColumnShard::NTiers {
4+
}

ydb/core/tx/tiering/tier/s3_uri.h

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
#include <ydb/core/protos/flat_scheme_op.pb.h>
2+
3+
#include <ydb/library/accessor/accessor.h>
4+
#include <ydb/library/conclusion/result.h>
5+
6+
#include <library/cpp/uri/uri.h>
7+
#include <util/string/builder.h>
8+
9+
#include <regex>
10+
11+
namespace NKikimr::NColumnShard::NTiers {
12+
13+
class TS3Uri {
14+
private:
15+
YDB_READONLY_DEF(TString, Host);
16+
YDB_READONLY_DEF(TString, Bucket);
17+
YDB_READONLY_DEF(std::optional<TString>, Folder)
18+
YDB_READONLY_DEF(std::optional<ui16>, Port);
19+
YDB_READONLY_DEF(std::optional<NKikimrSchemeOp::TS3Settings_EScheme>, Scheme);
20+
21+
enum TUriStyle {
22+
PATH_STYLE = 1,
23+
VIRTUAL_HOSTED_STYLE = 2,
24+
};
25+
26+
private:
27+
static TStringBuf StripPath(const TStringBuf& path) {
28+
TStringBuf stripped = path;
29+
while (stripped.SkipPrefix("/")) {
30+
}
31+
while (stripped.ChopSuffix("/")) {
32+
}
33+
return stripped;
34+
}
35+
36+
static std::optional<TUriStyle> DeduceUriStyle(const NUri::TUri& uri) {
37+
{
38+
static const std::regex StrictEndpointPattern = std::regex{ "^(.+\\.)?s3[.-].*" };
39+
std::match_results<TString::const_iterator> match;
40+
if (std::regex_match(uri.GetHost().begin(), uri.GetHost().end(), match, StrictEndpointPattern)) {
41+
if (match[1].length()) {
42+
return VIRTUAL_HOSTED_STYLE;
43+
} else {
44+
return PATH_STYLE;
45+
}
46+
}
47+
}
48+
49+
const bool hasSubdomain = std::count(uri.GetHost().begin(), uri.GetHost().end(), '.') >= 2;
50+
const bool hasPath = !StripPath(uri.GetField(NUri::TField::FieldPath)).Empty();
51+
if (hasSubdomain && !hasPath) {
52+
return VIRTUAL_HOSTED_STYLE;
53+
}
54+
if (!hasSubdomain && hasPath) {
55+
return PATH_STYLE;
56+
}
57+
58+
return std::nullopt;
59+
}
60+
61+
static TConclusion<TS3Uri> ParsePathStyleUri(const NUri::TUri& input) {
62+
TS3Uri result;
63+
64+
TStringBuf path = StripPath(input.GetField(NUri::TField::FieldPath));
65+
66+
if (path.Empty()) {
67+
return TConclusionStatus::Fail(TStringBuilder() << "Missing bucket in path-style S3 uri: " << input.Serialize());
68+
}
69+
70+
TStringBuf folder;
71+
TStringBuf bucket;
72+
if (path.TryRSplit('/', folder, bucket)) {
73+
result.Folder = folder;
74+
result.Bucket = bucket;
75+
} else {
76+
result.Bucket = path;
77+
}
78+
79+
result.Host = input.GetHost();
80+
81+
if (auto status = result.FillStyleAgnosticFields(input); status.IsFail()) {
82+
return status;
83+
}
84+
return result;
85+
}
86+
87+
static TConclusion<TS3Uri> ParseVirtualHostedStyleUri(const NUri::TUri& input) {
88+
TS3Uri result;
89+
90+
{
91+
TStringBuf host;
92+
TStringBuf bucket;
93+
if (input.GetHost().TrySplit('.', bucket, host)) {
94+
result.Host = host;
95+
result.Bucket = bucket;
96+
} else {
97+
return TConclusionStatus::Fail(TStringBuilder() << "Missing bucket in virtual-hosted style S3 uri: " << input.Serialize());
98+
}
99+
}
100+
101+
if (TStringBuf path = StripPath(input.GetField(NUri::TField::FieldPath))) {
102+
result.Folder = path;
103+
}
104+
105+
if (auto status = result.FillStyleAgnosticFields(input); status.IsFail()) {
106+
return status;
107+
}
108+
return result;
109+
}
110+
111+
TConclusionStatus FillStyleAgnosticFields(const NUri::TUri& from) {
112+
if (from.GetField(NUri::TField::FieldPort)) {
113+
Port = from.GetPort();
114+
}
115+
116+
switch (from.GetScheme()) {
117+
case NUri::TScheme::SchemeEmpty:
118+
break;
119+
case NUri::TScheme::SchemeHTTP:
120+
Scheme = NKikimrSchemeOp::TS3Settings_EScheme_HTTP;
121+
break;
122+
case NUri::TScheme::SchemeHTTPS:
123+
Scheme = NKikimrSchemeOp::TS3Settings_EScheme_HTTPS;
124+
break;
125+
default:
126+
return TConclusionStatus::Fail(TStringBuilder() << "Unexpected scheme in url: " << from.Serialize());
127+
}
128+
129+
return TConclusionStatus::Success();
130+
}
131+
132+
public:
133+
static TConclusion<TS3Uri> ParseUri(const TString& input) {
134+
NUri::TUri uri;
135+
if (uri.Parse(input, NUri::TFeature::NewFeaturesRecommended) != NUri::TState::EParsed::ParsedOK) {
136+
return TConclusionStatus::Fail("Cannot parse URI: " + input);
137+
}
138+
139+
TUriStyle uriStyle;
140+
if (const auto deducedStyle = DeduceUriStyle(uri)) {
141+
uriStyle = *deducedStyle;
142+
} else {
143+
uriStyle = PATH_STYLE;
144+
}
145+
146+
switch (uriStyle) {
147+
case PATH_STYLE:
148+
return ParsePathStyleUri(uri);
149+
case VIRTUAL_HOSTED_STYLE:
150+
return ParseVirtualHostedStyleUri(uri);
151+
}
152+
}
153+
154+
TString GetEndpoint() const {
155+
TString endpoint = Host;
156+
if (Port) {
157+
endpoint += TStringBuilder() << ':' << *Port;
158+
}
159+
if (Folder) {
160+
endpoint += TStringBuilder() << '/' << *Folder;
161+
}
162+
return endpoint;
163+
}
164+
165+
void FillSettings(NKikimrSchemeOp::TS3Settings& settings) const {
166+
settings.SetEndpoint(GetEndpoint());
167+
settings.SetBucket(Bucket);
168+
if (Scheme) {
169+
settings.SetScheme(*Scheme);
170+
}
171+
}
172+
};
173+
174+
} // namespace NKikimr::NColumnShard::NTiers

ydb/core/tx/tiering/tier/ya.make

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@ LIBRARY()
22

33
SRCS(
44
object.cpp
5+
s3_uri.cpp
56
)
67

78
PEERDIR(
89
ydb/library/conclusion
910
ydb/services/metadata/secret/accessor
11+
contrib/restricted/aws/aws-crt-cpp
1012
)
1113

1214
YQL_LAST_ABI_VERSION()

ydb/core/tx/tiering/ut/ut_object.cpp

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
#include <ydb/core/tx/tiering/tier/object.h>
2+
#include <ydb/core/tx/tiering/tier/s3_uri.h>
23

34
#include <library/cpp/testing/unittest/registar.h>
45

56
namespace NKikimr {
67

78
using namespace NColumnShard;
89

9-
Y_UNIT_TEST_SUITE(S3SettingsConvertion) {
10+
Y_UNIT_TEST_SUITE(S3SettingsConversion) {
1011
void ValidateConversion(
1112
const NKikimrSchemeOp::TExternalDataSourceDescription& input, TConclusion<const NKikimrSchemeOp::TS3Settings> expectedResult) {
1213
NTiers::TTierConfig config;
@@ -69,6 +70,33 @@ Y_UNIT_TEST_SUITE(S3SettingsConvertion) {
6970
)", &output));
7071
ValidateConversion(input, output);
7172
}
73+
74+
Y_UNIT_TEST(Folders) {
75+
std::vector<TString> uris = {
76+
"http://s3.yandexcloud.net:8080/my-folder/subfolder/bucket",
77+
"http://bucket.s3.yandexcloud.net:8080/my-folder/subfolder",
78+
"http://storage.yandexcloud.net:8080/my-folder/subfolder/bucket",
79+
"http://storage.yandexcloud.net:8080///my-folder/subfolder/bucket//",
80+
};
81+
for (const auto& input : uris) {
82+
NTiers::TS3Uri uri = NTiers::TS3Uri::ParseUri(input).DetachResult();
83+
UNIT_ASSERT_STRINGS_EQUAL_C(uri.GetEndpoint(), "s3.yandexcloud.net:8080/my-folder/subfolder", input);
84+
UNIT_ASSERT_STRINGS_EQUAL_C(uri.GetBucket(), "bucket", input);
85+
}
86+
}
87+
88+
Y_UNIT_TEST(StyleDeduction) {
89+
std::vector<TString> uris = {
90+
"http://storage.yandexcloud.net/bucket",
91+
"http://my-s3.net/bucket",
92+
"http://bucket.my-s3.net",
93+
"http://bucket.my-s3.net/",
94+
};
95+
for (const auto& input : uris) {
96+
NTiers::TS3Uri uri = NTiers::TS3Uri::ParseUri(input).DetachResult();
97+
UNIT_ASSERT_STRINGS_EQUAL_C(uri.GetBucket(), "bucket", input);
98+
}
99+
}
72100
}
73101

74102
} // namespace NKikimr

0 commit comments

Comments
 (0)