Skip to content

YQ-3410 improve s3 url escape, added '#', '?' symbols #7881

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions ydb/core/kqp/ut/federated_query/s3/kqp_federated_query_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,22 @@ using namespace NTestUtils;
using namespace fmt::literals;

Y_UNIT_TEST_SUITE(KqpFederatedQuery) {
TString GetSymbolsString(char start, char end, const TString& skip = "") {
TStringBuilder result;
for (char symbol = start; symbol <= end; ++symbol) {
if (skip.Contains(symbol)) {
continue;
}
result << symbol;
}
return result;
}

Y_UNIT_TEST(ExecuteScriptWithExternalTableResolve) {
const TString externalDataSourceName = "/Root/external_data_source";
const TString externalTableName = "/Root/test_binding_resolve";
const TString bucket = "test_bucket1";
const TString object = "test_object";
const TString object = TStringBuilder() << "test_" << GetSymbolsString(' ', '~', "{}") << "_object";

CreateBucketWithObject(bucket, object, TEST_CONTENT);

Expand All @@ -49,7 +60,7 @@ Y_UNIT_TEST_SUITE(KqpFederatedQuery) {
"external_source"_a = externalDataSourceName,
"external_table"_a = externalTableName,
"location"_a = GetBucketLocation(bucket),
"object"_a = object
"object"_a = EscapeC(object)
);
auto result = session.ExecuteSchemeQuery(query).GetValueSync();
UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString());
Expand Down Expand Up @@ -930,7 +941,7 @@ Y_UNIT_TEST_SUITE(KqpFederatedQuery) {
const TString externalDataSourceName = "/Root/external_data_source";
const TString externalTableName = "/Root/test_binding_resolve";
const TString bucket = "test_bucket1";
const TString object = "year=1/month=2/test_object";
const TString object = TStringBuilder() << "year=1/month=2/test_" << GetSymbolsString(' ', '~') << "_object";
const TString content = "data,year,month\ntest,1,2";

CreateBucketWithObject(bucket, object, content);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ class TS3ReadActor : public NActors::TActorBootstrapped<TS3ReadActor>, public ID
const auto& authInfo = Credentials.GetAuthInfo();
LOG_D("TS3ReadActor", "Download: " << url << ", ID: " << id << ", request id: [" << requestId << "]");
Gateway->Download(
UrlEscapeRet(url, true),
NS3Util::UrlEscapeRet(url),
IHTTPGateway::MakeYcHeaders(requestId, authInfo.GetToken(), {}, authInfo.GetAwsUserPwd(), authInfo.GetAwsSigV4()),
0U,
std::min(object.GetSize(), SizeLimit),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ struct TRetryStuff {
const TString& requestId,
const IHTTPGateway::TRetryPolicy::TPtr& retryPolicy
) : Gateway(std::move(gateway))
, Url(UrlEscapeRet(url, true))
, Url(NS3Util::UrlEscapeRet(url))
, Headers(headers)
, Offset(0U)
, SizeLimit(sizeLimit)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ class TS3WriteActor : public TActorBootstrapped<TS3WriteActor>, public IDqComput
Gateway,
Credentials,
key,
UrlEscapeRet(Url + Path + key + MakeOutputName() + Extension, true),
NS3Util::UrlEscapeRet(Url + Path + key + MakeOutputName() + Extension),
Compression,
RetryPolicy, DirtyWrite, Token);
keyIt->second.emplace_back(fileWrite.get());
Expand Down
12 changes: 12 additions & 0 deletions ydb/library/yql/providers/s3/common/ut/ya.make
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
UNITTEST_FOR(ydb/library/yql/providers/s3/common)

SRCS(
util_ut.cpp
)

PEERDIR(
ydb/library/yql/public/udf/service/stub
ydb/library/yql/sql/pg_dummy
)

END()
35 changes: 35 additions & 0 deletions ydb/library/yql/providers/s3/common/util.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,35 @@
#include "util.h"

#include <library/cpp/string_utils/quote/quote.h>


namespace NYql::NS3Util {

namespace {

inline char d2x(unsigned x) {
return (char)((x < 10) ? ('0' + x) : ('A' + x - 10));
}

char* UrlEscape(char* to, const char* from) {
while (*from) {
if (*from == '%' || *from == '#' || *from == '?' || (unsigned char)*from <= ' ' || (unsigned char)*from > '~') {
*to++ = '%';
*to++ = d2x((unsigned char)*from >> 4);
*to++ = d2x((unsigned char)*from & 0xF);
} else {
*to++ = *from;
}
++from;
}

*to = 0;

return to;
}

}

TIssues AddParentIssue(const TStringBuilder& prefix, TIssues&& issues) {
if (!issues) {
return TIssues{};
Expand All @@ -13,4 +41,11 @@ TIssues AddParentIssue(const TStringBuilder& prefix, TIssues&& issues) {
return TIssues{result};
}

TString UrlEscapeRet(const TStringBuf from) {
TString to;
to.ReserveAndResize(CgiEscapeBufLen(from.size()));
to.resize(UrlEscape(to.begin(), from.begin()) - to.data());
return to;
}

}
5 changes: 5 additions & 0 deletions ydb/library/yql/providers/s3/common/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,9 @@ namespace NYql::NS3Util {

TIssues AddParentIssue(const TStringBuilder& prefix, TIssues&& issues);

// Like UrlEscape with forceEscape = true
// from ydb/library/cpp/string_utils/quote/quote.h, but also escapes:
// '#', '?'
TString UrlEscapeRet(const TStringBuf from);

}
33 changes: 33 additions & 0 deletions ydb/library/yql/providers/s3/common/util_ut.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#include "util.h"

#include <library/cpp/string_utils/quote/quote.h>
#include <library/cpp/testing/unittest/registar.h>


namespace NYql::NS3Util {

Y_UNIT_TEST_SUITE(TestS3UrlEscape) {
// Tests on force UrlEscape copied from library/cpp/string_utils/quote/quote_ut.cpp
Y_UNIT_TEST(EscapeEscapedForce) {
TString s;

s = "hello%3dworld";
UNIT_ASSERT_VALUES_EQUAL(NS3Util::UrlEscapeRet(s), "hello%253dworld");
}

Y_UNIT_TEST(EscapeUnescapeForceRet) {
TString s;

s = "hello%3dworld";
UNIT_ASSERT_VALUES_EQUAL(UrlUnescapeRet(NS3Util::UrlEscapeRet(s)), "hello%3dworld");
}

// Test additional symbols escape
Y_UNIT_TEST(EscapeAdditionalSymbols) {
TString s = "hello#?world";

UNIT_ASSERT_VALUES_EQUAL(NS3Util::UrlEscapeRet(s), "hello%23%3Fworld");
}
}

} // namespace NYql::NS3Util
4 changes: 4 additions & 0 deletions ydb/library/yql/providers/s3/common/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,7 @@ IF (CLANG AND NOT WITH_VALGRIND)
ENDIF()

END()

RECURSE_FOR_TESTS(
ut
)
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ class TS3Lister : public IS3Lister {
MakeFilter(listingRequest.Pattern, listingRequest.PatternType, sharedCtx);

auto request = listingRequest;
request.Url = UrlEscapeRet(request.Url, true);
request.Url = NS3Util::UrlEscapeRet(request.Url);
auto ctx = TListingContext{
std::move(sharedCtx),
std::move(filter),
Expand Down
Loading