Skip to content

Commit dcb70d0

Browse files
authored
Recover lost locks as broken (#13181)
1 parent baa0367 commit dcb70d0

File tree

5 files changed

+71
-15
lines changed

5 files changed

+71
-15
lines changed

.github/config/muted_ya.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ ydb/core/keyvalue/ut_trace TKeyValueTracingTest.WriteSmall
2525
ydb/core/kqp/ut/cost KqpCost.OlapWriteRow
2626
ydb/core/kqp/ut/data_integrity KqpDataIntegrityTrails.Select
2727
ydb/core/kqp/ut/data_integrity KqpDataIntegrityTrails.UpsertEvWrite
28-
ydb/core/kqp/ut/olap KqpOlap.DeleteAbsent+Reboot
2928
ydb/core/kqp/ut/olap KqpDecimalColumnShard.TestAggregation
3029
ydb/core/kqp/ut/olap KqpDecimalColumnShard.TestFilterCompare
3130
ydb/core/kqp/ut/olap KqpOlap.ManyColumnShardsWithRestarts

ydb/core/kqp/ut/olap/kqp_olap_ut.cpp

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2992,9 +2992,9 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
29922992
UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS);
29932993
}
29942994

2995-
Y_UNIT_TEST_TWIN(DeleteAbsent, Reboot) {
2996-
//This test tries to DELETE from a table with WHERE condition that matches no rows
2997-
//It corresponds to a SCAN, then NO write then COMMIT
2995+
void TestDeleteAbsent(const size_t shardCount, bool reboot) {
2996+
//This test tries to DELETE from a table when there is no rows to delete at some shard
2997+
//It corresponds to a SCAN, then NO write then COMMIT on that shard
29982998
auto csController = NYDBTest::TControllers::RegisterCSControllerGuard<NYDBTest::NColumnShard::TController>();
29992999

30003000
NKikimrConfig::TAppConfig appConfig;
@@ -3006,20 +3006,73 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
30063006
TTestHelper::TColumnSchema().SetName("value").SetType(NScheme::NTypeIds::Int32).SetNullable(true),
30073007
};
30083008
TTestHelper::TColumnTable testTable;
3009-
testTable.SetName("/Root/ttt").SetPrimaryKey({ "id" }).SetSharding({ "id" }).SetSchema(schema);
3009+
testTable.SetName("/Root/ttt").SetPrimaryKey({ "id" }).SetSharding({ "id" }).SetSchema(schema).SetMinPartitionsCount(shardCount);
30103010
testHelper.CreateTable(testTable);
3011+
auto client = testHelper.GetKikimr().GetQueryClient();
3012+
//1. Insert exactlly one row into a table, so the only shard will contain a row
3013+
const auto result = client
3014+
.ExecuteQuery(
3015+
R"(
3016+
INSERT INTO `/Root/ttt` (id, value) VALUES
3017+
(1, 11)
3018+
)",
3019+
NYdb::NQuery::TTxControl::BeginTx().CommitTx())
3020+
.GetValueSync();
3021+
UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString());
3022+
//2. Ensure that there is actually 1 row in the table
3023+
{
3024+
const auto resultSelect = client
3025+
.ExecuteQuery(
3026+
"SELECT * FROM `/Root/ttt`",
3027+
NYdb::NQuery::TTxControl::BeginTx().CommitTx())
3028+
.GetValueSync();
30113029

3012-
if (Reboot) {
3030+
UNIT_ASSERT_C(resultSelect.IsSuccess(), resultSelect.GetIssues().ToString());
3031+
const auto resultSets = resultSelect.GetResultSets();
3032+
UNIT_ASSERT_VALUES_EQUAL(resultSets.size(), 1);
3033+
const auto resultSet = resultSets[0];
3034+
UNIT_ASSERT_VALUES_EQUAL(resultSet.RowsCount(), 1);
3035+
}
3036+
if (reboot) {
30133037
csController->SetRestartOnLocalTxCommitted("TProposeWriteTransaction");
30143038
}
3015-
auto client = testHelper.GetKikimr().GetQueryClient();
3039+
//DELETE 1 row from one shard and 0 rows from others
30163040
const auto resultDelete =
30173041
client
30183042
.ExecuteQuery(
3019-
"DELETE from `/Root/ttt` WHERE value % 2 == 1;",
3043+
"DELETE from `/Root/ttt` ",
30203044
NYdb::NQuery::TTxControl::BeginTx().CommitTx())
30213045
.GetValueSync();
3022-
UNIT_ASSERT_C(resultDelete.IsSuccess(), resultDelete.GetIssues().ToString());
3046+
UNIT_ASSERT_C(resultDelete.IsSuccess() != reboot, resultDelete.GetIssues().ToString());
3047+
{
3048+
const auto resultSelect = client
3049+
.ExecuteQuery(
3050+
"SELECT * FROM `/Root/ttt`",
3051+
NYdb::NQuery::TTxControl::BeginTx().CommitTx())
3052+
.GetValueSync();
3053+
3054+
UNIT_ASSERT_C(resultSelect.IsSuccess(), resultSelect.GetIssues().ToString());
3055+
const auto resultSets = resultSelect.GetResultSets();
3056+
UNIT_ASSERT_VALUES_EQUAL(resultSets.size(), 1);
3057+
const auto resultSet = resultSets[0];
3058+
UNIT_ASSERT_VALUES_EQUAL(resultSet.RowsCount(), reboot ? 1 : 0);
3059+
3060+
}
3061+
//DELETE 0 rows from every shard
3062+
const auto resultDelete2 =
3063+
client
3064+
.ExecuteQuery(
3065+
"DELETE from `/Root/ttt` WHERE id < 100",
3066+
NYdb::NQuery::TTxControl::BeginTx().CommitTx())
3067+
.GetValueSync();
3068+
UNIT_ASSERT_C(resultDelete2.IsSuccess() != reboot, result.GetIssues().ToString());
3069+
}
3070+
Y_UNIT_TEST_TWIN(DeleteAbsentSingleShard, Reboot) {
3071+
TestDeleteAbsent(1, Reboot);
3072+
}
3073+
3074+
Y_UNIT_TEST_TWIN(DeleteAbsentMultipleShards, Reboot) {
3075+
TestDeleteAbsent(2, Reboot);
30233076
}
30243077
}
30253078

ydb/core/tx/columnshard/operations/manager.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,11 @@ bool TOperationsManager::Load(NTabletFlatExecutor::TTransactionContext& txc) {
5555
while (!rowset.EndOfSet()) {
5656
const ui64 lockId = rowset.GetValue<Schema::OperationTxIds::LockId>();
5757
const ui64 txId = rowset.GetValue<Schema::OperationTxIds::TxId>();
58-
AFL_VERIFY(LockFeatures.contains(lockId))("lock_id", lockId);
58+
if (auto it = LockFeatures.find(lockId); it == LockFeatures.end()) {
59+
auto lock = TLockFeatures(lockId, 0);
60+
lock.SetBroken();
61+
LockFeatures.emplace(lockId, std::move(lock));
62+
}
5963
AFL_VERIFY(Tx2Lock.emplace(txId, lockId).second);
6064
if (!rowset.Next()) {
6165
return false;

ydb/core/tx/columnshard/operations/manager.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ namespace NKikimr::NColumnShard {
1616
class TColumnShard;
1717
class TLockFeatures;
1818

19-
class TLockSharingInfo {
19+
class TLockSharingInfo: TMoveOnly {
2020
private:
2121
const ui64 LockId;
2222
const ui64 Generation;
2323
TAtomicCounter InternalGenerationCounter = 0;
24-
TAtomicCounter Broken = 0;
24+
std::atomic<bool> Broken = false;
2525
TAtomicCounter WritesCounter = 0;
2626
friend class TLockFeatures;
2727

@@ -43,7 +43,7 @@ class TLockSharingInfo {
4343
}
4444

4545
bool IsBroken() const {
46-
return Broken.Val();
46+
return Broken;
4747
}
4848

4949
ui64 GetCounter() const {

ydb/core/tx/columnshard/transactions/locks/interaction.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,8 @@ class TPointInfo {
185185
return StartTxIds.empty() && FinishTxIds.empty() && IntervalTxIds.empty();
186186
}
187187

188-
void ProvideTxIdsFrom(const TPointInfo& previouse) {
189-
for (auto&& i : previouse.IntervalTxIds) {
188+
void ProvideTxIdsFrom(const TPointInfo& previous) {
189+
for (auto&& i : previous.IntervalTxIds) {
190190
auto provided = i.second;
191191
{
192192
auto it = StartTxIds.find(i.first);

0 commit comments

Comments
 (0)