@@ -4540,6 +4540,130 @@ Y_UNIT_TEST_SUITE(DataShardSnapshots) {
45404540 }
45414541 }
45424542
4543+ void CompactBorrowed (TTestActorRuntime& runtime, ui64 shardId, const TTableId& tableId) {
4544+ auto msg = MakeHolder<TEvDataShard::TEvCompactBorrowed>(tableId.PathId );
4545+ auto sender = runtime.AllocateEdgeActor ();
4546+ runtime.SendToPipe (shardId, sender, msg.Release (), 0 , GetPipeConfigWithRetries ());
4547+ runtime.GrabEdgeEventRethrow <TEvDataShard::TEvCompactBorrowedResult>(sender);
4548+ }
4549+
4550+ Y_UNIT_TEST (PostMergeNotCompactedTooEarly) {
4551+ TPortManager pm;
4552+ TServerSettings serverSettings (pm.GetPort (2134 ));
4553+ serverSettings.SetDomainName (" Root" )
4554+ .SetUseRealThreads (false )
4555+ .SetDomainPlanResolution (100 );
4556+
4557+ Tests::TServer::TPtr server = new TServer (serverSettings);
4558+ auto &runtime = *server->GetRuntime ();
4559+ auto sender = runtime.AllocateEdgeActor ();
4560+
4561+ runtime.SetLogPriority (NKikimrServices::TX_DATASHARD, NLog::PRI_TRACE);
4562+
4563+ InitRoot (server, sender);
4564+
4565+ TDisableDataShardLogBatching disableDataShardLogBatching;
4566+
4567+ KqpSchemeExec (runtime, R"(
4568+ CREATE TABLE `/Root/table` (key int, value bytes, PRIMARY KEY (key))
4569+ WITH (AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 1,
4570+ PARTITION_AT_KEYS = (5));
4571+ )" );
4572+
4573+ const auto shards = GetTableShards (server, sender, " /Root/table" );
4574+ UNIT_ASSERT_VALUES_EQUAL (shards.size (), 2u );
4575+ const auto tableId = ResolveTableId (server, sender, " /Root/table" );
4576+
4577+ for (int i = 0 ; i < 20 ; ++i) {
4578+ Cerr << " ... upserting key " << i << Endl;
4579+ auto query = Sprintf (R"(
4580+ UPSERT INTO `/Root/table` (key, value) VALUES (%d, '%s');
4581+ )" , i, TString (128 * 1024 , ' x' ).c_str ());
4582+ ExecSQL (server, sender, query);
4583+ if (i >= 5 ) {
4584+ Cerr << " ... compacting shard " << shards.at (1 ) << Endl;
4585+ CompactTable (runtime, shards.at (1 ), tableId, false );
4586+ } else if (i == 4 ) {
4587+ Cerr << " ... compacting shard " << shards.at (0 ) << Endl;
4588+ CompactTable (runtime, shards.at (0 ), tableId, false );
4589+ }
4590+ }
4591+
4592+ // Read (and snapshot) current data, so it doesn't go away on compaction
4593+ UNIT_ASSERT_VALUES_EQUAL (
4594+ KqpSimpleExec (runtime, " SELECT COUNT(*) FROM `/Root/table`;" ),
4595+ " { items { uint64_value: 20 } }" );
4596+
4597+ // Delete all the data in shard 0, this is small and will stay in memtable
4598+ // But when borrowed dst compaction will have pressure to compact it all
4599+ ExecSQL (server, sender, " DELETE FROM `/Root/table` WHERE key < 5" );
4600+
4601+ std::vector<TEvDataShard::TEvSplitTransferSnapshot::TPtr> snapshots;
4602+ auto captureSnapshots = runtime.AddObserver <TEvDataShard::TEvSplitTransferSnapshot>(
4603+ [&](TEvDataShard::TEvSplitTransferSnapshot::TPtr& ev) {
4604+ auto * msg = ev->Get ();
4605+ Cerr << " ... captured snapshot from " << msg->Record .GetSrcTabletId () << Endl;
4606+ snapshots.emplace_back (ev.Release ());
4607+ });
4608+
4609+ Cerr << " ... merging table" << Endl;
4610+ SetSplitMergePartCountLimit (server->GetRuntime (), -1 );
4611+ ui64 txId = AsyncMergeTable (server, sender, " /Root/table" , shards);
4612+ Cerr << " ... started merge " << txId << Endl;
4613+ WaitFor (runtime, [&]{ return snapshots.size () >= 2 ; }, " both src tablet snapshots" );
4614+
4615+ std::vector<TEvBlobStorage::TEvGet::TPtr> gets;
4616+ auto captureGets = runtime.AddObserver <TEvBlobStorage::TEvGet>(
4617+ [&](TEvBlobStorage::TEvGet::TPtr& ev) {
4618+ auto * msg = ev->Get ();
4619+ if (msg->Queries [0 ].Id .TabletID () == shards.at (1 )) {
4620+ Cerr << " ... blocking blob get of " << msg->Queries [0 ].Id << Endl;
4621+ gets.emplace_back (ev.Release ());
4622+ }
4623+ });
4624+
4625+ // Release snapshot for shard 0 then shard 1
4626+ captureSnapshots.Remove ();
4627+ Cerr << " ... unlocking snapshots from tablet " << shards.at (0 ) << Endl;
4628+ for (auto & ev : snapshots) {
4629+ if (ev && ev->Get ()->Record .GetSrcTabletId () == shards.at (0 )) {
4630+ runtime.Send (ev.Release (), 0 , true );
4631+ }
4632+ }
4633+ Cerr << " ... unblocking snapshots from tablet " << shards.at (1 ) << Endl;
4634+ for (auto & ev : snapshots) {
4635+ if (ev && ev->Get ()->Record .GetSrcTabletId () == shards.at (1 )) {
4636+ runtime.Send (ev.Release (), 0 , true );
4637+ }
4638+ }
4639+
4640+ // Let it commit above snapshots and incorrectly compact after the first one is loaded and merged
4641+ runtime.SimulateSleep (TDuration::Seconds (1 ));
4642+ UNIT_ASSERT (gets.size () > 0 );
4643+
4644+ Cerr << " ... unblocking blob gets" << Endl;
4645+ captureGets.Remove ();
4646+ for (auto & ev : gets) {
4647+ runtime.Send (ev.Release (), 0 , true );
4648+ }
4649+
4650+ // Let it finish loading the second snapshot
4651+ runtime.SimulateSleep (TDuration::Seconds (1 ));
4652+
4653+ // Wait for merge to complete and start a borrowed compaction
4654+ // When bug is present it will cause newly compacted to part to have epoch larger than previously compacted
4655+ WaitTxNotification (server, sender, txId);
4656+ const auto merged = GetTableShards (server, sender, " /Root/table" );
4657+ UNIT_ASSERT_VALUES_EQUAL (merged.size (), 1u );
4658+ Cerr << " ... compacting borrowed parts in shard " << merged.at (0 ) << Endl;
4659+ CompactBorrowed (runtime, merged.at (0 ), tableId);
4660+
4661+ // Validate we have an expected number of rows
4662+ UNIT_ASSERT_VALUES_EQUAL (
4663+ KqpSimpleExec (runtime, " SELECT COUNT(*) FROM `/Root/table`;" ),
4664+ " { items { uint64_value: 15 } }" );
4665+ }
4666+
45434667}
45444668
45454669} // namespace NKikimr
0 commit comments