Skip to content

Commit 10e6f72

Browse files
committed
storage: override MVCCMetadata.TxnDidNotUpdateMeta in mixed version cluster
Specifically, if the cluster version indicates that there can be nodes with the broken SingleDelete logic for separated intent resolution, the MVCCMetadata.TxnDidNotUpdateMeta field will never be set to true. See code comments and #69891 (comment) Informs #69891 Release justification: fix for a release blocker that causes incorrect behavior for transactional writes. Release note: None
1 parent 30c199a commit 10e6f72

16 files changed

+551
-35
lines changed

pkg/ccl/storageccl/engineccl/encrypted_fs_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"github.com/cockroachdb/cockroach/pkg/ccl/baseccl"
2323
"github.com/cockroachdb/cockroach/pkg/ccl/storageccl/engineccl/enginepbccl"
2424
"github.com/cockroachdb/cockroach/pkg/roachpb"
25+
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
2526
"github.com/cockroachdb/cockroach/pkg/storage"
2627
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
2728
"github.com/cockroachdb/cockroach/pkg/util/hlc"
@@ -247,6 +248,7 @@ func TestPebbleEncryption(t *testing.T) {
247248
StorageConfig: base.StorageConfig{
248249
Attrs: roachpb.Attributes{},
249250
MaxSize: 512 << 20,
251+
Settings: cluster.MakeTestingClusterSettings(),
250252
UseFileRegistry: true,
251253
EncryptionOptions: encOptionsBytes,
252254
},

pkg/kv/kvserver/rangefeed/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ go_test(
4848
"//pkg/base",
4949
"//pkg/keys",
5050
"//pkg/roachpb:with-mocks",
51+
"//pkg/settings/cluster",
5152
"//pkg/storage",
5253
"//pkg/storage/enginepb",
5354
"//pkg/testutils",

pkg/kv/kvserver/rangefeed/catchup_scan_bench_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"github.com/cockroachdb/cockroach/pkg/base"
2222
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/rangefeed"
2323
"github.com/cockroachdb/cockroach/pkg/roachpb"
24+
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
2425
"github.com/cockroachdb/cockroach/pkg/storage"
2526
"github.com/cockroachdb/cockroach/pkg/testutils"
2627
"github.com/cockroachdb/cockroach/pkg/util/encoding"
@@ -184,7 +185,7 @@ func setupMVCCPebble(b testing.TB, dir string, lBaseMaxBytes int64, readOnly boo
184185
peb, err := storage.NewPebble(
185186
context.Background(),
186187
storage.PebbleConfig{
187-
StorageConfig: base.StorageConfig{Dir: dir},
188+
StorageConfig: base.StorageConfig{Dir: dir, Settings: cluster.MakeTestingClusterSettings()},
188189
Opts: opts,
189190
})
190191
if err != nil {

pkg/kv/kvserver/replica_test.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,8 @@ func (tc *testContext) StartWithStoreConfigAndVersion(
248248
storage.InMemory(),
249249
storage.Attributes(roachpb.Attributes{Attrs: []string{"dc1", "mem"}}),
250250
storage.MaxSize(1<<20),
251-
storage.SetSeparatedIntents(disableSeparatedIntents))
251+
storage.SetSeparatedIntents(disableSeparatedIntents),
252+
storage.Settings(cfg.Settings))
252253
if err != nil {
253254
t.Fatal(err)
254255
}
@@ -6430,9 +6431,9 @@ func TestRangeStatsComputation(t *testing.T) {
64306431
}
64316432
expMS = baseStats
64326433
expMS.Add(enginepb.MVCCStats{
6433-
LiveBytes: 103,
6434+
LiveBytes: 101,
64346435
KeyBytes: 28,
6435-
ValBytes: 75,
6436+
ValBytes: 73,
64366437
IntentBytes: 23,
64376438
LiveCount: 2,
64386439
KeyCount: 2,
@@ -6442,6 +6443,10 @@ func TestRangeStatsComputation(t *testing.T) {
64426443
if tc.engine.IsSeparatedIntentsEnabledForTesting(ctx) {
64436444
expMS.SeparatedIntentCount++
64446445
}
6446+
if !tc.engine.OverrideTxnDidNotUpdateMetaToFalse(ctx) {
6447+
expMS.LiveBytes += 2
6448+
expMS.ValBytes += 2
6449+
}
64456450
if err := verifyRangeStats(tc.engine, tc.repl.RangeID, expMS); err != nil {
64466451
t.Fatal(err)
64476452
}

pkg/kv/kvserver/spanset/batch.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,10 @@ func (s spanSetWriter) LogLogicalOp(
676676
s.w.LogLogicalOp(op, details)
677677
}
678678

679+
func (s spanSetWriter) OverrideTxnDidNotUpdateMetaToFalse(ctx context.Context) bool {
680+
return s.w.OverrideTxnDidNotUpdateMetaToFalse(ctx)
681+
}
682+
679683
// ReadWriter is used outside of the spanset package internally, in ccl.
680684
type ReadWriter struct {
681685
spanSetReader

pkg/storage/engine.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -679,6 +679,31 @@ type Writer interface {
679679
//
680680
// It is safe to modify the contents of the arguments after it returns.
681681
SingleClearEngineKey(key EngineKey) error
682+
683+
// OverrideTxnDidNotUpdateMetaToFalse is a temporary method that will be removed
684+
// for 22.1.
685+
//
686+
// See #69891 for details on the bug related to usage of SingleDelete in
687+
// separated intent resolution. The following is needed for correctly
688+
// migrating from 21.1 to 21.2.
689+
//
690+
// We have fixed the intent resolution code path in 21.2-beta to use
691+
// SingleDelete more conservatively. The 21.2-GA will also likely include
692+
// Pebble changes to make the old buggy usage of SingleDelete correct.
693+
// However, there is a problem if someone upgrades from 21.1 to
694+
// 21.2-beta/21.2-GA:
695+
// 21.1 nodes will not write separated intents while they are the
696+
// leaseholder for a range. However they can become the leaseholder for a
697+
// range after a separated intent was written (in a mixed version cluster).
698+
// Hence they can resolve separated intents. The logic in 21.1 for using
699+
// SingleDelete when resolving intents is similarly buggy, and the Pebble
700+
// code included in 21.1 will not make this buggy usage correct. The
701+
// solution is for 21.2 nodes to never set txnDidNotUpdateMeta=true when
702+
// writing separated intents, until the cluster version is at the version
703+
// when the buggy code was fixed in 21.2. So 21.1 code will never use
704+
// SingleDelete when resolving these separated intents (since the only
705+
// separated intents being written are by 21.2 nodes).
706+
OverrideTxnDidNotUpdateMetaToFalse(ctx context.Context) bool
682707
}
683708

684709
// ReadWriter is the read/write interface to an engine's data.

pkg/storage/mvcc.go

Lines changed: 60 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,8 +1075,7 @@ func (b *putBuffer) putIntentMeta(
10751075
ctx context.Context,
10761076
writer Writer,
10771077
key MVCCKey,
1078-
state PrecedingIntentState,
1079-
txnDidNotUpdateMeta bool,
1078+
helper txnDidNotUpdateMetaHelper,
10801079
meta *enginepb.MVCCMetadata,
10811080
) (keyBytes, valBytes int64, separatedIntentCountDelta int, err error) {
10821081
if meta.Txn != nil && meta.Timestamp.ToTimestamp() != meta.Txn.WriteTimestamp {
@@ -1085,32 +1084,62 @@ func (b *putBuffer) putIntentMeta(
10851084
return 0, 0, 0, errors.AssertionFailedf(
10861085
"meta.Timestamp != meta.Txn.WriteTimestamp: %s != %s", meta.Timestamp, meta.Txn.WriteTimestamp)
10871086
}
1088-
// All nodes in this cluster understand separated intents, so can fiddle
1089-
// with TxnDidNotUpdateMeta, which is not understood by older nodes (which
1090-
// are no longer present, and will never again be present).
1091-
//
1092-
// NB: the parameter txnDidNotUpdateMeta is about what happened prior to
1093-
// this Put, and is passed through to writer below. The field
1094-
// TxnDidNotUpdateMeta, in the MVCCMetadata we are about to write,
1095-
// includes what happened in this Put.
1096-
if state == NoExistingIntent {
1097-
meta.TxnDidNotUpdateMeta = &trueValue
1098-
} else {
1099-
// Absence represents false.
1100-
meta.TxnDidNotUpdateMeta = nil
1101-
}
1102-
1087+
helper.populateMeta(ctx, meta)
11031088
bytes, err := b.marshalMeta(meta)
11041089
if err != nil {
11051090
return 0, 0, 0, err
11061091
}
11071092
if separatedIntentCountDelta, err = writer.PutIntent(
1108-
ctx, key.Key, bytes, state, txnDidNotUpdateMeta, meta.Txn.ID); err != nil {
1093+
ctx, key.Key, bytes, helper.state, helper.valueForPutIntent(), meta.Txn.ID); err != nil {
11091094
return 0, 0, 0, err
11101095
}
11111096
return int64(key.EncodedSize()), int64(len(bytes)), separatedIntentCountDelta, nil
11121097
}
11131098

1099+
// txnDidNotUpdateMetaHelper is used to decide what to put in the MVCCMetadata
1100+
// proto, and what value to pass in the txnDidNotUpdateMeta parameter of
1101+
// PutIntent.
1102+
//
1103+
// Note that all nodes in this cluster understand separated intents, so can
1104+
// fiddle with TxnDidNotUpdateMeta, which is not understood by older nodes
1105+
// (which are no longer present, and will never again be present). Our
1106+
// assumption is that 21.1 nodes will never be writing separated intents since
1107+
// it is disabled by default and the code is known to have a bug. However they
1108+
// can set MVCCMetadata.TxnDidNotUpdateMeta to true when writing interleaved
1109+
// intents (this is harmless since interleaved intents do not invoke the
1110+
// SingleDelete optimization).
1111+
// - The txnDidNotUpdateMeta field is about what happened prior to this Put,
1112+
// and is intended to be passed through to Writer.PutIntent. It is only used
1113+
// by intentDemuxWriter when there was an existing separated intent that was
1114+
// written once and the writer is writing interleaved intents, and the true
1115+
// value enables SingleDelete of the existing separated intent. This
1116+
// transition can happen when an intent written at a 21.2 node is rewritten
1117+
// on a 21.1 node. But since 21.2 nodes never set
1118+
// MVCCMetadata.TxnDidNotUpdateMeta to true in a mixed version cluster (see
1119+
// next bullet), this will always be false in such a situation.
1120+
// - The state field describes the preceding intent state. It is intended to
1121+
// be used for populating the MVCCMetadata.TxnDidNotUpdateMeta. This is
1122+
// where we use Writer.OverrideTxnDidNotUpdateMetaToFalse to override to
1123+
// false until there can never be 21.1 nodes.
1124+
type txnDidNotUpdateMetaHelper struct {
1125+
txnDidNotUpdateMeta bool
1126+
state PrecedingIntentState
1127+
w Writer
1128+
}
1129+
1130+
func (t txnDidNotUpdateMetaHelper) valueForPutIntent() bool {
1131+
return t.txnDidNotUpdateMeta
1132+
}
1133+
1134+
func (t txnDidNotUpdateMetaHelper) populateMeta(ctx context.Context, meta *enginepb.MVCCMetadata) {
1135+
if t.state == NoExistingIntent && !t.w.OverrideTxnDidNotUpdateMetaToFalse(ctx) {
1136+
meta.TxnDidNotUpdateMeta = &trueValue
1137+
} else {
1138+
// Absence represents false.
1139+
meta.TxnDidNotUpdateMeta = nil
1140+
}
1141+
}
1142+
11141143
// MVCCPut sets the value for a specified key. It will save the value
11151144
// with different versions according to its timestamp and update the
11161145
// key metadata. The timestamp must be passed as a parameter; using
@@ -1746,7 +1775,12 @@ func mvccPutInternal(
17461775
var separatedIntentCountDelta int
17471776
if newMeta.Txn != nil {
17481777
metaKeySize, metaValSize, separatedIntentCountDelta, err = buf.putIntentMeta(
1749-
ctx, writer, metaKey, precedingIntentState, txnDidNotUpdateMeta, newMeta)
1778+
ctx, writer, metaKey,
1779+
txnDidNotUpdateMetaHelper{
1780+
txnDidNotUpdateMeta: txnDidNotUpdateMeta,
1781+
state: precedingIntentState,
1782+
w: writer,
1783+
}, newMeta)
17501784
if err != nil {
17511785
return err
17521786
}
@@ -3176,7 +3210,13 @@ func mvccResolveWriteIntent(
31763210
// to do anything to update the intent but to move the timestamp forward,
31773211
// even if it can.
31783212
metaKeySize, metaValSize, separatedIntentCountDelta, err = buf.putIntentMeta(
3179-
ctx, rw, metaKey, precedingIntentState, canSingleDelHelper.v(), &buf.newMeta)
3213+
ctx, rw, metaKey,
3214+
txnDidNotUpdateMetaHelper{
3215+
txnDidNotUpdateMeta: canSingleDelHelper.v(),
3216+
state: precedingIntentState,
3217+
w: rw,
3218+
},
3219+
&buf.newMeta)
31803220
} else {
31813221
metaKeySize = int64(metaKey.EncodedSize())
31823222
separatedIntentCountDelta, err =

pkg/storage/mvcc_history_test.go

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,10 @@ import (
2020
"strings"
2121
"testing"
2222

23+
"github.com/cockroachdb/cockroach/pkg/clusterversion"
2324
"github.com/cockroachdb/cockroach/pkg/keys"
2425
"github.com/cockroachdb/cockroach/pkg/roachpb"
26+
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
2527
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
2628
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
2729
"github.com/cockroachdb/cockroach/pkg/util/hlc"
@@ -109,7 +111,30 @@ func TestMVCCHistories(t *testing.T) {
109111
"randomly setting enableSeparated: %t", enabledSeparated)
110112
}
111113
// We start from a clean slate in every test file.
112-
engine, err := Open(ctx, InMemory(), CacheSize(1<<20 /* 1 MiB */), SetSeparatedIntents(!enabledSeparated))
114+
engine, err := Open(ctx, InMemory(), CacheSize(1<<20 /* 1 MiB */),
115+
SetSeparatedIntents(!enabledSeparated),
116+
func(cfg *engineConfig) error {
117+
if !overridden {
118+
// Latest cluster version, since these tests are not ones where we
119+
// are examining differences related to separated intents.
120+
cfg.Settings = cluster.MakeTestingClusterSettings()
121+
} else {
122+
if !enabledSeparated {
123+
// 21.1, which has the old code that is unaware about the changes
124+
// we have made for OverrideTxnDidNotUpdateMetaToFalse. By using
125+
// the latest cluster version, we effectively undo these changes.
126+
cfg.Settings = cluster.MakeTestingClusterSettings()
127+
} else if strings.Contains(path, "mixed_cluster") {
128+
v21_1 := clusterversion.ByKey(clusterversion.V21_1)
129+
cfg.Settings =
130+
cluster.MakeTestingClusterSettingsWithVersions(v21_1, v21_1, true)
131+
} else {
132+
// Latest cluster version.
133+
cfg.Settings = cluster.MakeTestingClusterSettings()
134+
}
135+
}
136+
return nil
137+
})
113138
if err != nil {
114139
t.Fatal(err)
115140
}

pkg/storage/pebble.go

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1010,6 +1010,11 @@ func (p *Pebble) PutEngineKey(key EngineKey, value []byte) error {
10101010
return p.db.Set(key.Encode(), value, pebble.Sync)
10111011
}
10121012

1013+
// OverrideTxnDidNotUpdateMetaToFalse implements the Engine interface.
1014+
func (p *Pebble) OverrideTxnDidNotUpdateMetaToFalse(ctx context.Context) bool {
1015+
return overrideTxnDidNotUpdateMetaToFalse(ctx, p.settings)
1016+
}
1017+
10131018
// IsSeparatedIntentsEnabledForTesting implements the Engine interface.
10141019
func (p *Pebble) IsSeparatedIntentsEnabledForTesting(ctx context.Context) bool {
10151020
return !p.disableSeparatedIntents
@@ -1265,7 +1270,7 @@ func (p *Pebble) GetAuxiliaryDir() string {
12651270
func (p *Pebble) NewBatch() Batch {
12661271
return newPebbleBatch(
12671272
p.db, p.db.NewIndexedBatch(), false, /* writeOnly */
1268-
p.disableSeparatedIntents)
1273+
p.disableSeparatedIntents, overrideTxnDidNotUpdateMetaToFalse(context.TODO(), p.settings))
12691274
}
12701275

12711276
// NewReadOnly implements the Engine interface.
@@ -1275,7 +1280,8 @@ func (p *Pebble) NewReadOnly() ReadWriter {
12751280

12761281
// NewUnindexedBatch implements the Engine interface.
12771282
func (p *Pebble) NewUnindexedBatch(writeOnly bool) Batch {
1278-
return newPebbleBatch(p.db, p.db.NewBatch(), writeOnly, p.disableSeparatedIntents)
1283+
return newPebbleBatch(p.db, p.db.NewBatch(), writeOnly, p.disableSeparatedIntents,
1284+
overrideTxnDidNotUpdateMetaToFalse(context.TODO(), p.settings))
12791285
}
12801286

12811287
// NewSnapshot implements the Engine interface.
@@ -1806,6 +1812,10 @@ func (p *pebbleReadOnly) LogLogicalOp(op MVCCLogicalOpType, details MVCCLogicalO
18061812
panic("not implemented")
18071813
}
18081814

1815+
func (p *pebbleReadOnly) OverrideTxnDidNotUpdateMetaToFalse(ctx context.Context) bool {
1816+
panic("not implemented")
1817+
}
1818+
18091819
// pebbleSnapshot represents a snapshot created using Pebble.NewSnapshot().
18101820
type pebbleSnapshot struct {
18111821
snapshot *pebble.Snapshot
@@ -2097,3 +2107,11 @@ func pebbleExportToSst(
20972107

20982108
return rows.BulkOpSummary, MVCCKey{Key: resumeKey, Timestamp: resumeTS}, nil
20992109
}
2110+
2111+
// See the comment for Writer.OverrideTxnDidNotUpdateMetaToFalse.
2112+
func overrideTxnDidNotUpdateMetaToFalse(ctx context.Context, st *cluster.Settings) bool {
2113+
// The fix to the single delete bug in 21.2 has nothing to do with
2114+
// PebbleFormatVersioned, but both are part of the 21.2 beta, which will be
2115+
// the earliest production version of 21.2.
2116+
return !st.Version.ActiveVersionOrEmpty(ctx).IsActive(clusterversion.PebbleFormatVersioned)
2117+
}

pkg/storage/pebble_batch.go

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,14 @@ type pebbleBatch struct {
4343
// engine state. This relies on the fact that all pebbleIterators created
4444
// here are marked as reusable, which causes pebbleIterator.Close to not
4545
// close iter. iter will be closed when pebbleBatch.Close is called.
46-
prefixIter pebbleIterator
47-
normalIter pebbleIterator
48-
prefixEngineIter pebbleIterator
49-
normalEngineIter pebbleIterator
50-
iter cloneableIter
51-
writeOnly bool
52-
closed bool
46+
prefixIter pebbleIterator
47+
normalIter pebbleIterator
48+
prefixEngineIter pebbleIterator
49+
normalEngineIter pebbleIterator
50+
iter cloneableIter
51+
writeOnly bool
52+
closed bool
53+
overrideTxnDidNotUpdateMetaToFalse bool
5354

5455
wrappedIntentWriter intentDemuxWriter
5556
// scratch space for wrappedIntentWriter.
@@ -66,7 +67,11 @@ var pebbleBatchPool = sync.Pool{
6667

6768
// Instantiates a new pebbleBatch.
6869
func newPebbleBatch(
69-
db *pebble.DB, batch *pebble.Batch, writeOnly bool, disableSeparatedIntents bool,
70+
db *pebble.DB,
71+
batch *pebble.Batch,
72+
writeOnly bool,
73+
disableSeparatedIntents bool,
74+
overrideTxnDidNotUpdateMetaToFalse bool,
7075
) *pebbleBatch {
7176
pb := pebbleBatchPool.Get().(*pebbleBatch)
7277
*pb = pebbleBatch{
@@ -94,6 +99,10 @@ func newPebbleBatch(
9499
reusable: true,
95100
},
96101
writeOnly: writeOnly,
102+
// A batch is not long-lived, so using the same value (which could be
103+
// slightly stale) is fine for the lifetime of the batch. Staleness is not
104+
// a correctness issue.
105+
overrideTxnDidNotUpdateMetaToFalse: overrideTxnDidNotUpdateMetaToFalse,
97106
}
98107
pb.wrappedIntentWriter = wrapIntentWriter(context.Background(), pb, disableSeparatedIntents)
99108
return pb
@@ -474,6 +483,11 @@ func (p *pebbleBatch) PutEngineKey(key EngineKey, value []byte) error {
474483
return p.batch.Set(p.buf, value, nil)
475484
}
476485

486+
// OverrideTxnDidNotUpdateMetaToFalse implements the Batch interface.
487+
func (p *pebbleBatch) OverrideTxnDidNotUpdateMetaToFalse(_ context.Context) bool {
488+
return p.overrideTxnDidNotUpdateMetaToFalse
489+
}
490+
477491
func (p *pebbleBatch) put(key MVCCKey, value []byte) error {
478492
if len(key.Key) == 0 {
479493
return emptyKeyError()

0 commit comments

Comments
 (0)