Skip to content

Merge of important bugfixes for PQ read proxy & kafka proxy #15515

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions ydb/core/kafka_proxy/actors/kafka_metadata_actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,8 +230,8 @@ void TKafkaMetadataActor::AddTopicResponse(
void TKafkaMetadataActor::HandleLocationResponse(TEvLocationResponse::TPtr ev, const TActorContext& ctx) {
--PendingResponses;

auto* r = ev->Get();
auto actorIter = TopicIndexes.find(ev->Sender);
TSimpleSharedPtr<TEvLocationResponse> locationResponse{ev->Release()};

Y_DEBUG_ABORT_UNLESS(!actorIter.IsEnd());
Y_DEBUG_ABORT_UNLESS(!actorIter->second.empty());
Expand All @@ -248,12 +248,12 @@ void TKafkaMetadataActor::HandleLocationResponse(TEvLocationResponse::TPtr ev, c

for (auto index : actorIter->second) {
auto& topic = Response->Topics[index];
if (r->Status == Ydb::StatusIds::SUCCESS) {
if (locationResponse->Status == Ydb::StatusIds::SUCCESS) {
KAFKA_LOG_D("Describe topic '" << topic.Name << "' location finishied successful");
PendingTopicResponses.insert(std::make_pair(index, ev->Release()));
PendingTopicResponses.emplace(index, locationResponse);
} else {
KAFKA_LOG_ERROR("Describe topic '" << topic.Name << "' location finishied with error: Code=" << r->Status << ", Issues=" << r->Issues.ToOneLineString());
AddTopicError(topic, ConvertErrorCode(r->Status));
KAFKA_LOG_ERROR("Describe topic '" << topic.Name << "' location finishied with error: Code=" << locationResponse->Status << ", Issues=" << locationResponse->Issues.ToOneLineString());
AddTopicError(topic, ConvertErrorCode(locationResponse->Status));
}
}
RespondIfRequired(ctx);
Expand Down Expand Up @@ -307,7 +307,7 @@ void TKafkaMetadataActor::RespondIfRequired(const TActorContext& ctx) {

if (NeedAllNodes) {
for (const auto& [id, nodeInfo] : Nodes)
AddBroker(id, nodeInfo.Host, nodeInfo.Port);
AddBroker(id, nodeInfo.Host, nodeInfo.Port);
}

Respond();
Expand Down
3 changes: 2 additions & 1 deletion ydb/core/kafka_proxy/actors/kafka_metadata_actor.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,11 @@ class TKafkaMetadataActor: public NActors::TActorBootstrapped<TKafkaMetadataActo
bool NeedAllNodes = false;
bool HaveError = false;
bool FallbackToIcDiscovery = false;
TMap<ui64, TAutoPtr<TEvLocationResponse>> PendingTopicResponses;
TMap<ui64, TSimpleSharedPtr<TEvLocationResponse>> PendingTopicResponses;

THashMap<ui64, TNodeInfo> Nodes;
THashMap<TString, TActorId> PartitionActors;
THashSet<ui64> HaveBrokers;

};

Expand Down
41 changes: 29 additions & 12 deletions ydb/core/kafka_proxy/ut/port_discovery_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,12 +189,14 @@ namespace NKafka::NTests {
}

void CreateMetarequestActor(
const TActorId& edge, const TString& topicPath, auto* runtime, const auto& kafkaConfig, const TActorId& fakeCacheId = {}
const TActorId& edge, const TVector<TString>& topics, auto* runtime, const auto& kafkaConfig, const TActorId& fakeCacheId = {}
) {
TMetadataRequestData::TPtr metaRequest = std::make_shared<TMetadataRequestData>();
metaRequest->Topics.emplace_back();
auto& topic = metaRequest->Topics[0];
topic.Name = topicPath;
for (const auto& topicPath : topics) {
metaRequest->Topics.emplace_back();
auto& topic = metaRequest->Topics.back();
topic.Name = topicPath;
}

auto context = std::make_shared<TContext>(kafkaConfig);
context->ConnectionId = edge;
Expand All @@ -215,14 +217,16 @@ namespace NKafka::NTests {
runtime->EnableScheduleForActor(actorId);
}

void CheckKafkaMetaResponse(TTestActorRuntime* runtime, ui64 kafkaPort, bool error = false) {
void CheckKafkaMetaResponse(TTestActorRuntime* runtime, ui64 kafkaPort, bool error = false, ui64 expectedCount = 1) {
TAutoPtr<IEventHandle> handle;
auto* ev = runtime->GrabEdgeEvent<TEvKafka::TEvResponse>(handle);
UNIT_ASSERT(ev);
auto response = dynamic_cast<TMetadataResponseData*>(ev->Response.get());
UNIT_ASSERT_VALUES_EQUAL(response->Topics.size(), 1);
UNIT_ASSERT_VALUES_EQUAL(response->Topics.size(), expectedCount);
if (!error) {
UNIT_ASSERT(response->Topics[0].ErrorCode == EKafkaErrors::NONE_ERROR);
for (const auto& topic : response->Topics) {
UNIT_ASSERT(topic.ErrorCode == EKafkaErrors::NONE_ERROR);
}
} else {
UNIT_ASSERT(response->Topics[0].ErrorCode == EKafkaErrors::LISTENER_NOT_FOUND);
UNIT_ASSERT(ev->ErrorCode == EKafkaErrors::LISTENER_NOT_FOUND);
Expand All @@ -239,7 +243,7 @@ namespace NKafka::NTests {
auto* runtime = server.GetRuntime();
auto edge = runtime->AllocateEdgeActor();

CreateMetarequestActor(edge, NKikimr::JoinPath({"/Root/PQ/", topicName}), runtime,
CreateMetarequestActor(edge, {NKikimr::JoinPath({"/Root/PQ/", topicName})}, runtime,
config);

CheckKafkaMetaResponse(runtime, kafkaPort);
Expand All @@ -262,7 +266,7 @@ namespace NKafka::NTests {
ep->set_node_id(9998);
auto fakeCache = runtime->Register(new TFakeDiscoveryCache(leResult, false));
runtime->EnableScheduleForActor(fakeCache);
CreateMetarequestActor(edge, NKikimr::JoinPath({"/Root/PQ/", topicName}), runtime,
CreateMetarequestActor(edge, {NKikimr::JoinPath({"/Root/PQ/", topicName})}, runtime,
config, fakeCache);

CheckKafkaMetaResponse(runtime, kafkaPort);
Expand All @@ -277,7 +281,7 @@ namespace NKafka::NTests {
Ydb::Discovery::ListEndpointsResult leResult;
auto fakeCache = runtime->Register(new TFakeDiscoveryCache(leResult, true));
runtime->EnableScheduleForActor(fakeCache);
CreateMetarequestActor(edge, NKikimr::JoinPath({"/Root/PQ/", topicName}), runtime,
CreateMetarequestActor(edge, {NKikimr::JoinPath({"/Root/PQ/", topicName})}, runtime,
config, fakeCache);

CheckKafkaMetaResponse(runtime, kafkaPort, true);
Expand All @@ -296,10 +300,23 @@ namespace NKafka::NTests {
ep->set_node_id(runtime->GetNodeId(0));
auto fakeCache = runtime->Register(new TFakeDiscoveryCache(leResult, false));
runtime->EnableScheduleForActor(fakeCache);
CreateMetarequestActor(edge, NKikimr::JoinPath({"/Root/PQ/", topicName}), runtime,
CreateMetarequestActor(edge, {NKikimr::JoinPath({"/Root/PQ/", topicName})}, runtime,
config, fakeCache);

CheckKafkaMetaResponse(runtime, 12345);
}


Y_UNIT_TEST(MetadataActorDoubleTopic) {
auto [server, kafkaPort, config, topicName] = SetupServer("topic1");

auto* runtime = server.GetRuntime();
auto edge = runtime->AllocateEdgeActor();

auto path = NKikimr::JoinPath({"/Root/PQ/", topicName});
CreateMetarequestActor(edge, {path, path}, runtime, config);

CheckKafkaMetaResponse(runtime, kafkaPort, false, 2);
}
}
}
}
2 changes: 1 addition & 1 deletion ydb/core/persqueue/partition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2963,6 +2963,7 @@ TPartition::EProcessResult TPartition::PreProcessUserAct(
void TPartition::CommitUserAct(TEvPQ::TEvSetClientInfo& act) {
const bool strictCommitOffset = (act.Type == TEvPQ::TEvSetClientInfo::ESCI_OFFSET && act.Strict);
const TString& user = act.ClientId;
RemoveUserAct(user);
const auto& ctx = ActorContext();
if (!PendingUsersInfo.contains(user) && AffectedUsers.contains(user)) {
switch (act.Type) {
Expand Down Expand Up @@ -3082,7 +3083,6 @@ void TPartition::CommitUserAct(TEvPQ::TEvSetClientInfo& act) {
return;
}

RemoveUserAct(act.ClientId);
return EmulatePostProcessUserAct(act, userInfo, ActorContext());
}

Expand Down
1 change: 1 addition & 0 deletions ydb/core/persqueue/partition.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ namespace NKikimr::NPQ {

static const ui32 MAX_BLOB_PART_SIZE = 500_KB;
static const ui32 DEFAULT_BUCKET_COUNTER_MULTIPLIER = 20;
static const ui32 MAX_USER_ACTS = 1000;

using TPartitionLabeledCounters = TProtobufTabletLabeledCounters<EPartitionLabeledCounters_descriptor>;

Expand Down
2 changes: 0 additions & 2 deletions ydb/core/persqueue/partition_read.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@

namespace NKikimr::NPQ {

static const ui32 MAX_USER_ACTS = 1000;

TMaybe<TInstant> GetReadFrom(ui32 maxTimeLagMs, ui64 readTimestampMs, TInstant consumerReadFromTimestamp, const TActorContext& ctx) {
if (!(maxTimeLagMs > 0 || readTimestampMs > 0 || consumerReadFromTimestamp > TInstant::MilliSeconds(1))) {
return {};
Expand Down
30 changes: 27 additions & 3 deletions ydb/core/persqueue/ut/partition_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,8 @@ class TPartitionFixture : public NUnitTest::TBaseFixture {
void SendSetOffset(ui64 cookie,
const TString& clientId,
ui64 offset,
const TString& sessionId);
const TString& sessionId,
bool strict = false);
void SendGetOffset(ui64 cookie,
const TString& clientId);
void WaitCmdWrite(const TCmdWriteMatcher& matcher = {});
Expand Down Expand Up @@ -488,7 +489,8 @@ void TPartitionFixture::SendCreateSession(ui64 cookie,
void TPartitionFixture::SendSetOffset(ui64 cookie,
const TString& clientId,
ui64 offset,
const TString& sessionId)
const TString& sessionId,
bool strict)
{
auto event = MakeHolder<TEvPQ::TEvSetClientInfo>(cookie,
clientId,
Expand All @@ -498,6 +500,7 @@ void TPartitionFixture::SendSetOffset(ui64 cookie,
0,
0,
TActorId{});
event->Strict = strict;
Ctx->Runtime->SingleSys()->Send(new IEventHandle(ActorId, Ctx->Edge, event.Release()));
}

Expand Down Expand Up @@ -712,7 +715,6 @@ void TPartitionFixture::WaitErrorResponse(const TErrorMatcher& matcher)
}

if (matcher.Error) {

UNIT_ASSERT_VALUES_EQUAL(*matcher.Error, event->Error);
}
}
Expand Down Expand Up @@ -1796,6 +1798,28 @@ void TPartitionTxTestHelper::WaitTxPredicateReplyImpl(ui64 userActId, bool statu
#endif
}

Y_UNIT_TEST_F(UserActCount, TPartitionFixture)
{
// In the test, we check that the reference count for `UserInfo` decreases in case of errors. To do this,
// we send a large number of requests to which the server will respond with an error.

CreatePartition();

SendCreateSession(1, "client", "session-id", 2, 3);
WaitCmdWrite({.Count=2, .UserInfos={{0, {.Session="session-id", .Offset=0, .Generation=2, .Step=3}}}});
SendCmdWriteResponse(NMsgBusProxy::MSTATUS_OK);
WaitProxyResponse({.Cookie=1});

for (ui64 k = 0; k <= MAX_USER_ACTS; ++k) {
const ui64 cookie = 2 + k;
// 1 > EndOffset
SendSetOffset(cookie, "client", 1, "session-id", true); // strict = true
WaitCmdWrite({.Count=2, .UserInfos={{0, {.Session="session-id", .Offset=0, .Generation=2, .Step=3}}}});
SendCmdWriteResponse(NMsgBusProxy::MSTATUS_OK);
WaitErrorResponse({.Cookie=cookie, .ErrorCode=NPersQueue::NErrorCode::SET_OFFSET_ERROR_COMMIT_TO_FUTURE});
}
}

Y_UNIT_TEST_F(Batching, TPartitionFixture)
{
CreatePartition();
Expand Down
6 changes: 3 additions & 3 deletions ydb/services/persqueue_v1/actors/partition_actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1305,12 +1305,12 @@ void TPartitionActor::Handle(TEvPQProxy::TEvRead::TPtr& ev, const TActorContext&
const auto req = ev->Get();

auto request = MakeReadRequest(ReadOffset, 0, req->MaxCount, req->MaxSize, req->MaxTimeLagMs, req->ReadTimestampMs, DirectReadId);

RequestInfly = true;
CurrentRequest = request;

if (!PipeClient) //Pipe will be recreated soon
return;

RequestInfly = true;
CurrentRequest = request;
TAutoPtr<TEvPersQueue::TEvRequest> event(new TEvPersQueue::TEvRequest);
event->Record.Swap(&request);

Expand Down
Loading