Skip to content

Commit 178fd7e

Browse files
authored
YQ-3410 improve s3 url escape, added '#', '?' symbols (#7921)
1 parent f179f77 commit 178fd7e

File tree

10 files changed

+107
-7
lines changed

10 files changed

+107
-7
lines changed

ydb/core/kqp/ut/federated_query/s3/kqp_federated_query_ut.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,22 @@ using namespace NTestUtils;
2121
using namespace fmt::literals;
2222

2323
Y_UNIT_TEST_SUITE(KqpFederatedQuery) {
24+
TString GetSymbolsString(char start, char end, const TString& skip = "") {
25+
TStringBuilder result;
26+
for (char symbol = start; symbol <= end; ++symbol) {
27+
if (skip.Contains(symbol)) {
28+
continue;
29+
}
30+
result << symbol;
31+
}
32+
return result;
33+
}
34+
2435
Y_UNIT_TEST(ExecuteScriptWithExternalTableResolve) {
2536
const TString externalDataSourceName = "/Root/external_data_source";
2637
const TString externalTableName = "/Root/test_binding_resolve";
2738
const TString bucket = "test_bucket1";
28-
const TString object = "test_object";
39+
const TString object = TStringBuilder() << "test_" << GetSymbolsString(' ', '~', "{}") << "_object";
2940

3041
CreateBucketWithObject(bucket, object, TEST_CONTENT);
3142

@@ -50,7 +61,7 @@ Y_UNIT_TEST_SUITE(KqpFederatedQuery) {
5061
"external_source"_a = externalDataSourceName,
5162
"external_table"_a = externalTableName,
5263
"location"_a = GetBucketLocation(bucket),
53-
"object"_a = object
64+
"object"_a = EscapeC(object)
5465
);
5566
auto result = session.ExecuteSchemeQuery(query).GetValueSync();
5667
UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString());
@@ -931,7 +942,7 @@ Y_UNIT_TEST_SUITE(KqpFederatedQuery) {
931942
const TString externalDataSourceName = "/Root/external_data_source";
932943
const TString externalTableName = "/Root/test_binding_resolve";
933944
const TString bucket = "test_bucket1";
934-
const TString object = "year=1/month=2/test_object";
945+
const TString object = TStringBuilder() << "year=1/month=2/test_" << GetSymbolsString(' ', '~') << "_object";
935946
const TString content = "data,year,month\ntest,1,2";
936947

937948
CreateBucketWithObject(bucket, object, content);

ydb/library/yql/providers/s3/actors/yql_s3_raw_read_actor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ class TS3ReadActor : public NActors::TActorBootstrapped<TS3ReadActor>, public ID
167167
const auto& authInfo = Credentials.GetAuthInfo();
168168
LOG_D("TS3ReadActor", "Download: " << url << ", ID: " << id << ", request id: [" << requestId << "]");
169169
Gateway->Download(
170-
UrlEscapeRet(url, true),
170+
NS3Util::UrlEscapeRet(url),
171171
IHTTPGateway::MakeYcHeaders(requestId, authInfo.GetToken(), {}, authInfo.GetAwsUserPwd(), authInfo.GetAwsSigV4()),
172172
0U,
173173
std::min(object.GetSize(), SizeLimit),

ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ struct TRetryStuff {
184184
const TString& requestId,
185185
const IHTTPGateway::TRetryPolicy::TPtr& retryPolicy
186186
) : Gateway(std::move(gateway))
187-
, Url(UrlEscapeRet(url, true))
187+
, Url(NS3Util::UrlEscapeRet(url))
188188
, Headers(headers)
189189
, Offset(0U)
190190
, SizeLimit(sizeLimit)

ydb/library/yql/providers/s3/actors/yql_s3_write_actor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -578,7 +578,7 @@ class TS3WriteActor : public TActorBootstrapped<TS3WriteActor>, public IDqComput
578578
Gateway,
579579
Credentials,
580580
key,
581-
UrlEscapeRet(Url + Path + key + MakeOutputName() + Extension, true),
581+
NS3Util::UrlEscapeRet(Url + Path + key + MakeOutputName() + Extension),
582582
Compression,
583583
RetryPolicy, DirtyWrite, Token);
584584
keyIt->second.emplace_back(fileWrite.get());
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
UNITTEST_FOR(ydb/library/yql/providers/s3/common)
2+
3+
SRCS(
4+
util_ut.cpp
5+
)
6+
7+
PEERDIR(
8+
ydb/library/yql/public/udf/service/stub
9+
ydb/library/yql/sql/pg_dummy
10+
)
11+
12+
END()

ydb/library/yql/providers/s3/common/util.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,35 @@
11
#include "util.h"
22

3+
#include <library/cpp/string_utils/quote/quote.h>
4+
5+
36
namespace NYql::NS3Util {
47

8+
namespace {
9+
10+
inline char d2x(unsigned x) {
11+
return (char)((x < 10) ? ('0' + x) : ('A' + x - 10));
12+
}
13+
14+
char* UrlEscape(char* to, const char* from) {
15+
while (*from) {
16+
if (*from == '%' || *from == '#' || *from == '?' || (unsigned char)*from <= ' ' || (unsigned char)*from > '~') {
17+
*to++ = '%';
18+
*to++ = d2x((unsigned char)*from >> 4);
19+
*to++ = d2x((unsigned char)*from & 0xF);
20+
} else {
21+
*to++ = *from;
22+
}
23+
++from;
24+
}
25+
26+
*to = 0;
27+
28+
return to;
29+
}
30+
31+
}
32+
533
TIssues AddParentIssue(const TStringBuilder& prefix, TIssues&& issues) {
634
if (!issues) {
735
return TIssues{};
@@ -13,4 +41,11 @@ TIssues AddParentIssue(const TStringBuilder& prefix, TIssues&& issues) {
1341
return TIssues{result};
1442
}
1543

44+
TString UrlEscapeRet(const TStringBuf from) {
45+
TString to;
46+
to.ReserveAndResize(CgiEscapeBufLen(from.size()));
47+
to.resize(UrlEscape(to.begin(), from.begin()) - to.data());
48+
return to;
49+
}
50+
1651
}

ydb/library/yql/providers/s3/common/util.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,9 @@ namespace NYql::NS3Util {
77

88
TIssues AddParentIssue(const TStringBuilder& prefix, TIssues&& issues);
99

10+
// Like UrlEscape with forceEscape = true
11+
// from ydb/library/cpp/string_utils/quote/quote.h, but also escapes:
12+
// '#', '?'
13+
TString UrlEscapeRet(const TStringBuf from);
14+
1015
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#include "util.h"
2+
3+
#include <library/cpp/string_utils/quote/quote.h>
4+
#include <library/cpp/testing/unittest/registar.h>
5+
6+
7+
namespace NYql::NS3Util {
8+
9+
Y_UNIT_TEST_SUITE(TestS3UrlEscape) {
10+
// Tests on force UrlEscape copied from library/cpp/string_utils/quote/quote_ut.cpp
11+
Y_UNIT_TEST(EscapeEscapedForce) {
12+
TString s;
13+
14+
s = "hello%3dworld";
15+
UNIT_ASSERT_VALUES_EQUAL(NS3Util::UrlEscapeRet(s), "hello%253dworld");
16+
}
17+
18+
Y_UNIT_TEST(EscapeUnescapeForceRet) {
19+
TString s;
20+
21+
s = "hello%3dworld";
22+
UNIT_ASSERT_VALUES_EQUAL(UrlUnescapeRet(NS3Util::UrlEscapeRet(s)), "hello%3dworld");
23+
}
24+
25+
// Test additional symbols escape
26+
Y_UNIT_TEST(EscapeAdditionalSymbols) {
27+
TString s = "hello#?world";
28+
29+
UNIT_ASSERT_VALUES_EQUAL(NS3Util::UrlEscapeRet(s), "hello%23%3Fworld");
30+
}
31+
}
32+
33+
} // namespace NYql::NS3Util

ydb/library/yql/providers/s3/common/ya.make

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,7 @@ IF (CLANG AND NOT WITH_VALGRIND)
3434
ENDIF()
3535

3636
END()
37+
38+
RECURSE_FOR_TESTS(
39+
ut
40+
)

ydb/library/yql/providers/s3/object_listers/yql_s3_list.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ class TS3Lister : public IS3Lister {
255255
MakeFilter(listingRequest.Pattern, listingRequest.PatternType, sharedCtx);
256256

257257
auto request = listingRequest;
258-
request.Url = UrlEscapeRet(request.Url, true);
258+
request.Url = NS3Util::UrlEscapeRet(request.Url);
259259
auto ctx = TListingContext{
260260
std::move(sharedCtx),
261261
std::move(filter),

0 commit comments

Comments
 (0)