diff --git a/pkg/kv/kvserver/gc/gc_iterator.go b/pkg/kv/kvserver/gc/gc_iterator.go index a4c3fd5bd7af..04ec0a9496a4 100644 --- a/pkg/kv/kvserver/gc/gc_iterator.go +++ b/pkg/kv/kvserver/gc/gc_iterator.go @@ -35,6 +35,9 @@ type gcIterator struct { cachedRangeTombstoneKey roachpb.Key } +// TODO(sumeer): change gcIterator to use MVCCValueLenAndIsTombstone(). It +// needs to get the value only for intents. + func makeGCIterator( desc *roachpb.RangeDescriptor, snap storage.Reader, diff --git a/pkg/kv/kvserver/rangefeed/task_test.go b/pkg/kv/kvserver/rangefeed/task_test.go index ce8079a163f4..8e06346033b3 100644 --- a/pkg/kv/kvserver/rangefeed/task_test.go +++ b/pkg/kv/kvserver/rangefeed/task_test.go @@ -190,6 +190,16 @@ func (s *testIterator) UnsafeValue() []byte { return s.curKV().Value } +func (s *testIterator) MVCCValueLenAndIsTombstone() (int, bool, error) { + rawV := s.curKV().Value + v, err := storage.DecodeMVCCValue(rawV) + return len(rawV), v.IsTombstone(), err +} + +func (s *testIterator) ValueLen() int { + return len(s.curKV().Value) +} + func (s *testIterator) curKV() storage.MVCCKeyValue { return s.kvs[s.cur] } diff --git a/pkg/kv/kvserver/spanset/batch.go b/pkg/kv/kvserver/spanset/batch.go index a691a8684c55..c2ab2a99fd82 100644 --- a/pkg/kv/kvserver/spanset/batch.go +++ b/pkg/kv/kvserver/spanset/batch.go @@ -174,6 +174,16 @@ func (i *MVCCIterator) UnsafeValue() []byte { return i.i.UnsafeValue() } +// MVCCValueLenAndIsTombstone implements the MVCCIterator interface. +func (i *MVCCIterator) MVCCValueLenAndIsTombstone() (int, bool, error) { + return i.i.MVCCValueLenAndIsTombstone() +} + +// ValueLen implements the MVCCIterator interface. +func (i *MVCCIterator) ValueLen() int { + return i.i.ValueLen() +} + // HasPointAndRange implements SimpleMVCCIterator. func (i *MVCCIterator) HasPointAndRange() (bool, bool) { return i.i.HasPointAndRange() diff --git a/pkg/storage/engine.go b/pkg/storage/engine.go index 5112438985d4..7533dd616c89 100644 --- a/pkg/storage/engine.go +++ b/pkg/storage/engine.go @@ -165,11 +165,26 @@ type SimpleMVCCIterator interface { UnsafeKey() MVCCKey // UnsafeValue returns the current point key value as a byte slice. // This must only be called when it is known that the iterator is positioned - // at a point value, i.e. HasPointAndRange has returned (true, *). + // at a point value, i.e. HasPointAndRange has returned (true, *). If + // possible, use MVCCValueLenAndIsTombstone() instead. // // The memory is invalidated on the next call to {Next,NextKey,Prev,SeekGE,SeekLT,Close}. // Use Value() if that is undesirable. UnsafeValue() []byte + // MVCCValueLenAndIsTombstone should be called only for MVCC (i.e., + // UnsafeKey().IsValue()) point values, when the actual point value is not + // needed, for example when updating stats and making GC decisions, and it + // is sufficient for the caller to know the length (len(UnsafeValue()), and + // whether the underlying MVCCValue is a tombstone + // (MVCCValue.IsTombstone()). This is an optimization that can allow the + // underlying storage layer to avoid retrieving the value. + // REQUIRES: HasPointAndRange() has returned (true, *). + MVCCValueLenAndIsTombstone() (int, bool, error) + // ValueLen can be called for MVCC or non-MVCC values, when only the value + // length is needed. This is an optimization that can allow the underlying + // storage layer to avoid retrieving the value. + // REQUIRES: HasPointAndRange() has returned (true, *). + ValueLen() int // HasPointAndRange returns whether the current iterator position has a point // key and/or a range key. Must check Valid() first. At least one of these // will always be true for a valid iterator. For details on range keys, see @@ -1559,6 +1574,24 @@ func assertSimpleMVCCIteratorInvariants(iter SimpleMVCCIterator) error { return errors.AssertionFailedf("hasRange=false but RangeKeys=%s", r) } } + if hasPoint { + value := iter.UnsafeValue() + valueLen := iter.ValueLen() + if len(value) != valueLen { + return errors.AssertionFailedf("length of UnsafeValue %d != ValueLen %d", len(value), valueLen) + } + if key.IsValue() { + valueLen2, _, err := iter.MVCCValueLenAndIsTombstone() + if err == nil { + if len(value) != valueLen2 { + return errors.AssertionFailedf("length of UnsafeValue %d != MVCCValueLenAndIsTombstone %d", + len(value), valueLen2) + } + } + // Else err != nil. Ignore, since SimpleMVCCIterator is not to be held + // responsible for data corruption or tests writing non-MVCCValues. + } + } return nil } diff --git a/pkg/storage/intent_interleaving_iter.go b/pkg/storage/intent_interleaving_iter.go index b0a116447477..2cd6491a5504 100644 --- a/pkg/storage/intent_interleaving_iter.go +++ b/pkg/storage/intent_interleaving_iter.go @@ -895,6 +895,20 @@ func (i *intentInterleavingIter) UnsafeValue() []byte { return i.iter.UnsafeValue() } +func (i *intentInterleavingIter) MVCCValueLenAndIsTombstone() (int, bool, error) { + if i.isCurAtIntentIter() { + return 0, false, errors.Errorf("not at MVCC value") + } + return i.iter.MVCCValueLenAndIsTombstone() +} + +func (i *intentInterleavingIter) ValueLen() int { + if i.isCurAtIntentIter() { + return i.intentIter.ValueLen() + } + return i.iter.ValueLen() +} + func (i *intentInterleavingIter) Key() MVCCKey { key := i.UnsafeKey() keyCopy := make([]byte, len(key.Key)) diff --git a/pkg/storage/multi_iterator.go b/pkg/storage/multi_iterator.go index c31cb23e5725..f2cd93fb1433 100644 --- a/pkg/storage/multi_iterator.go +++ b/pkg/storage/multi_iterator.go @@ -98,6 +98,16 @@ func (f *multiIterator) UnsafeValue() []byte { return f.iters[f.currentIdx].UnsafeValue() } +// MVCCValueLenAndIsTombstone implements the SimpleMVCCIterator interface. +func (f *multiIterator) MVCCValueLenAndIsTombstone() (int, bool, error) { + return f.iters[f.currentIdx].MVCCValueLenAndIsTombstone() +} + +// ValueLen implements the SimpleMVCCIterator interface. +func (f *multiIterator) ValueLen() int { + return f.iters[f.currentIdx].ValueLen() +} + // HasPointAndRange implements SimpleMVCCIterator. func (f *multiIterator) HasPointAndRange() (bool, bool) { panic("not implemented") diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go index c528e84ecf08..4995142c4981 100644 --- a/pkg/storage/mvcc.go +++ b/pkg/storage/mvcc.go @@ -607,22 +607,16 @@ func updateStatsOnRangeKeyPutVersion( } // updateStatsOnRangeKeyCover updates MVCCStats for when an MVCC range key -// covers an MVCC point key at the given timestamp. -func updateStatsOnRangeKeyCover(ts hlc.Timestamp, key MVCCKey, valueRaw []byte) enginepb.MVCCStats { +// covers an MVCC point key at the given timestamp. The valueLen and +// isTombstone are attributes of the point key. +func updateStatsOnRangeKeyCover( + ts hlc.Timestamp, key MVCCKey, valueLen int, isTombstone bool, +) enginepb.MVCCStats { var ms enginepb.MVCCStats ms.AgeTo(ts.WallTime) - - // Determine whether the point key was a tombstone. If decoding fails, we - // assume the point key is live. - value, ok, err := tryDecodeSimpleMVCCValue(valueRaw) - if !ok && err == nil { - value, err = decodeExtendedMVCCValue(valueRaw) - } - isTombstone := err == nil && value.IsTombstone() - if !isTombstone { ms.LiveCount-- - ms.LiveBytes -= int64(key.EncodedSize()) + int64(len(valueRaw)) + ms.LiveBytes -= int64(key.EncodedSize()) + int64(valueLen) } return ms } @@ -1849,14 +1843,13 @@ func mvccPutInternal( // existing MVCC range tombstone. If it isn't, account for it. _, hasRange := iter.HasPointAndRange() if !hasRange || iter.RangeKeys().Versions[0].Timestamp.Less(prevUnsafeKey.Timestamp) { - prevValRaw := iter.UnsafeValue() - prevVal, err := DecodeMVCCValue(prevValRaw) + prevValLen, prevValIsTombstone, err := iter.MVCCValueLenAndIsTombstone() if err != nil { return err } - if prevVal.Value.IsPresent() { - prevIsValue = prevVal.Value.IsPresent() - prevValSize = int64(len(prevValRaw)) + if !prevValIsTombstone { + prevIsValue = !prevValIsTombstone + prevValSize = int64(prevValLen) } } } @@ -2670,8 +2663,7 @@ func MVCCClearTimeRange( } // Process point keys. - vRaw := iter.UnsafeValue() - v, err := DecodeMVCCValue(vRaw) + valueLen, valueIsTombstone, err := iter.MVCCValueLenAndIsTombstone() if err != nil { return nil, err } @@ -2683,8 +2675,8 @@ func MVCCClearTimeRange( // Since the key matches, our previous clear "restored" this revision of // the this key, so update the stats with this as the "restored" key. restoredMeta.KeyBytes = MVCCVersionTimestampSize - restoredMeta.ValBytes = int64(len(vRaw)) - restoredMeta.Deleted = v.IsTombstone() + restoredMeta.ValBytes = int64(valueLen) + restoredMeta.Deleted = valueIsTombstone restoredMeta.Timestamp = k.Timestamp.ToLegacyTimestamp() // If there was an MVCC range tombstone between this version and the @@ -2730,7 +2722,7 @@ func MVCCClearTimeRange( // previous point key above. We must also check that the clear actually // revealed the key, since it may have been covered by the point key that // we cleared or a different range tombstone below the one we cleared. - if !v.IsTombstone() { + if !valueIsTombstone { if v, ok := clearRangeKeys.FirstAtOrAbove(k.Timestamp); ok { if !clearedMetaKey.Key.Equal(k.Key) || !clearedMeta.Timestamp.ToTimestamp().LessEq(v.Timestamp) { @@ -2739,7 +2731,7 @@ func MVCCClearTimeRange( ms.Add(enginepb.MVCCStats{ LastUpdateNanos: v.Timestamp.WallTime, LiveCount: 1, - LiveBytes: int64(k.EncodedSize()) + int64(len(vRaw)), + LiveBytes: int64(k.EncodedSize()) + int64(valueLen), }) } } @@ -2752,8 +2744,8 @@ func MVCCClearTimeRange( clearMatchingKey(k) clearedMetaKey.Key = append(clearedMetaKey.Key[:0], k.Key...) clearedMeta.KeyBytes = MVCCVersionTimestampSize - clearedMeta.ValBytes = int64(len(vRaw)) - clearedMeta.Deleted = v.IsTombstone() + clearedMeta.ValBytes = int64(valueLen) + clearedMeta.Deleted = valueIsTombstone clearedMeta.Timestamp = k.Timestamp.ToLegacyTimestamp() // Move the iterator to the next key/value in linear iteration even if it @@ -3344,7 +3336,11 @@ func MVCCDeleteRangeUsingTombstone( return errors.Errorf("can't write range tombstone across inline key %s", key) } if ms != nil { - ms.Add(updateStatsOnRangeKeyCover(timestamp, key, iter.UnsafeValue())) + valueLen, isTombstone, err := iter.MVCCValueLenAndIsTombstone() + if err != nil { + return err + } + ms.Add(updateStatsOnRangeKeyCover(timestamp, key, valueLen, isTombstone)) } iter.NextKey() } @@ -3922,6 +3918,7 @@ type iterForKeyVersions interface { Next() UnsafeKey() MVCCKey UnsafeValue() []byte + MVCCValueLenAndIsTombstone() (int, bool, error) ValueProto(msg protoutil.Message) error RangeKeys() MVCCRangeKeyStack } @@ -4032,6 +4029,13 @@ func (s *separatedIntentAndVersionIter) UnsafeValue() []byte { return s.engineIter.UnsafeValue() } +func (s *separatedIntentAndVersionIter) MVCCValueLenAndIsTombstone() (int, bool, error) { + if !s.atMVCCIter { + return 0, false, errors.AssertionFailedf("not at MVCC value") + } + return s.mvccIter.MVCCValueLenAndIsTombstone() +} + func (s *separatedIntentAndVersionIter) ValueProto(msg protoutil.Message) error { if s.atMVCCIter { return s.mvccIter.ValueProto(msg) @@ -4404,16 +4408,12 @@ func mvccResolveWriteIntent( if hasPoint, hasRange := iter.HasPointAndRange(); hasPoint { if unsafeKey := iter.UnsafeKey(); unsafeKey.Key.Equal(oldKey.Key) { if !hasRange || iter.RangeKeys().Versions[0].Timestamp.Less(unsafeKey.Timestamp) { - unsafeValRaw := iter.UnsafeValue() - prevVal, prevValOK, err := tryDecodeSimpleMVCCValue(unsafeValRaw) - if !prevValOK && err == nil { - prevVal, err = decodeExtendedMVCCValue(unsafeValRaw) - } + prevValLen, prevValIsTombstone, err := iter.MVCCValueLenAndIsTombstone() if err != nil { return false, err } - prevIsValue = prevVal.Value.IsPresent() - prevValSize = int64(len(iter.UnsafeValue())) + prevIsValue = !prevValIsTombstone + prevValSize = int64(prevValLen) } } } @@ -4482,8 +4482,13 @@ func mvccResolveWriteIntent( }) ok = false + + // These variables containing the next key-value information are initialized + // in the following if-block when ok is set to true. These are only read + // after the if-block when ok is true (i.e., they were initialized). var unsafeNextKey MVCCKey - var unsafeNextValueRaw []byte + var nextValueLen int + var nextValueIsTombstone bool if nextKey := latestKey.Next(); nextKey.IsValue() { // The latestKey was not the smallest possible timestamp {WallTime: 0, // Logical: 1}. Practically, this is the only case that will occur in @@ -4511,21 +4516,18 @@ func mvccResolveWriteIntent( // particular timestamp. return false, errors.Errorf("expected an MVCC value key: %s", unsafeNextKey) } - unsafeNextValueRaw = iter.UnsafeValue() - // If a non-tombstone point key is covered by a range tombstone, then - // synthesize a point tombstone at the lowest range tombstone covering it. - // This is where the point key ceases to exist, contributing to GCBytesAge. - unsafeNextValue, unsafeNextValueOK, err := tryDecodeSimpleMVCCValue(unsafeNextValueRaw) - if !unsafeNextValueOK && err == nil { - unsafeNextValue, err = decodeExtendedMVCCValue(unsafeNextValueRaw) - } + nextValueLen, nextValueIsTombstone, err = iter.MVCCValueLenAndIsTombstone() if err != nil { return false, err } - if !unsafeNextValue.IsTombstone() && hasRange { + // If a non-tombstone point key is covered by a range tombstone, then + // synthesize a point tombstone at the lowest range tombstone covering it. + // This is where the point key ceases to exist, contributing to GCBytesAge. + if !nextValueIsTombstone && hasRange { if v, found := iter.RangeKeys().FirstAtOrAbove(unsafeNextKey.Timestamp); found { unsafeNextKey.Timestamp = v.Timestamp - unsafeNextValueRaw = []byte{} + nextValueIsTombstone = true + nextValueLen = 0 } } } @@ -4546,16 +4548,11 @@ func mvccResolveWriteIntent( return true, nil } - // Get the bytes for the next version so we have size for stat counts. - unsafeNextValue, err := DecodeMVCCValue(unsafeNextValueRaw) - if err != nil { - return false, err - } // Update the keyMetadata with the next version. buf.newMeta = enginepb.MVCCMetadata{ - Deleted: unsafeNextValue.IsTombstone(), + Deleted: nextValueIsTombstone, KeyBytes: MVCCVersionTimestampSize, - ValBytes: int64(len(unsafeNextValueRaw)), + ValBytes: int64(nextValueLen), } if err = rw.ClearIntent(metaKey.Key, canSingleDelHelper.onAbortIntent(), meta.Txn.ID); err != nil { return false, err @@ -5002,22 +4999,17 @@ func MVCCGarbageCollect( break } if ms != nil { - unsafeValRaw := iter.UnsafeValue() - unsafeVal, unsafeValOK, err := tryDecodeSimpleMVCCValue(unsafeValRaw) - if !unsafeValOK && err == nil { - unsafeVal, err = decodeExtendedMVCCValue(unsafeValRaw) - } + valLen, valIsTombstone, err := iter.MVCCValueLenAndIsTombstone() if err != nil { return err } - keySize := MVCCVersionTimestampSize - valSize := int64(len(unsafeValRaw)) + valSize := int64(valLen) // A non-deletion becomes non-live when its newer neighbor shows up. // A deletion tombstone becomes non-live right when it is created. fromNS := prevNanos - if unsafeVal.IsTombstone() { + if valIsTombstone { fromNS = unsafeIterKey.Timestamp.WallTime } else if !rangeTombstones.IsEmpty() { // For non deletions, we need to find if we had a range tombstone @@ -5541,10 +5533,11 @@ func computeStatsForIterWithVisitors( } unsafeKey := iter.UnsafeKey() - unsafeValue := iter.UnsafeValue() if pointKeyVisitor != nil { - if err := pointKeyVisitor(unsafeKey, unsafeValue); err != nil { + // NB: pointKeyVisitor is typically nil, so we will typically not call + // iter.UnsafeValue(). + if err := pointKeyVisitor(unsafeKey, iter.UnsafeValue()); err != nil { return enginepb.MVCCStats{}, err } } @@ -5588,24 +5581,25 @@ func computeStatsForIterWithVisitors( } } + var valueLen int + var mvccValueIsTombstone bool + if isValue { + // MVCC value + var err error + valueLen, mvccValueIsTombstone, err = iter.MVCCValueLenAndIsTombstone() + if err != nil { + return enginepb.MVCCStats{}, errors.Wrap(err, "unable to decode MVCCValue") + } + } else { + valueLen = iter.ValueLen() + } if implicitMeta { + // INVARIANT: implicitMeta => isValue. // No MVCCMetadata entry for this series of keys. - var isTombstone bool - { - mvccValue, ok, err := tryDecodeSimpleMVCCValue(unsafeValue) - if !ok && err == nil { - mvccValue, err = decodeExtendedMVCCValue(unsafeValue) - } - if err != nil { - return ms, errors.Wrap(err, "unable to decode MVCCValue") - } - isTombstone = mvccValue.IsTombstone() - } - meta.Reset() meta.KeyBytes = MVCCVersionTimestampSize - meta.ValBytes = int64(len(unsafeValue)) - meta.Deleted = isTombstone + meta.ValBytes = int64(valueLen) + meta.Deleted = mvccValueIsTombstone meta.Timestamp.WallTime = unsafeKey.Timestamp.WallTime } @@ -5613,13 +5607,13 @@ func computeStatsForIterWithVisitors( metaKeySize := int64(len(unsafeKey.Key)) + 1 var metaValSize int64 if !implicitMeta { - metaValSize = int64(len(unsafeValue)) + metaValSize = int64(valueLen) } totalBytes := metaKeySize + metaValSize first = true if !implicitMeta { - if err := protoutil.Unmarshal(unsafeValue, &meta); err != nil { + if err := protoutil.Unmarshal(iter.UnsafeValue(), &meta); err != nil { return ms, errors.Wrap(err, "unable to decode MVCCMetadata") } } @@ -5654,7 +5648,7 @@ func computeStatsForIterWithVisitors( } } - totalBytes := int64(len(unsafeValue)) + MVCCVersionTimestampSize + totalBytes := int64(valueLen) + MVCCVersionTimestampSize if isSys { ms.SysBytes += totalBytes } else { @@ -5680,26 +5674,14 @@ func computeStatsForIterWithVisitors( return ms, errors.Errorf("expected mvcc metadata key bytes to equal %d; got %d "+ "(meta: %s)", MVCCVersionTimestampSize, meta.KeyBytes, &meta) } - if meta.ValBytes != int64(len(unsafeValue)) { + if meta.ValBytes != int64(valueLen) { return ms, errors.Errorf("expected mvcc metadata val bytes to equal %d; got %d "+ - "(meta: %s)", len(unsafeValue), meta.ValBytes, &meta) + "(meta: %s)", valueLen, meta.ValBytes, &meta) } accrueGCAgeNanos = meta.Timestamp.WallTime } else { // Overwritten value. Is it a deletion tombstone? - var isTombstone bool - { - mvccValue, ok, err := tryDecodeSimpleMVCCValue(unsafeValue) - if !ok && err == nil { - mvccValue, err = decodeExtendedMVCCValue(unsafeValue) - } - if err != nil { - return ms, errors.Wrap(err, "unable to decode MVCCValue") - } - isTombstone = mvccValue.IsTombstone() - } - - if isTombstone { + if mvccValueIsTombstone { // The contribution of the tombstone picks up GCByteAge from its own timestamp on. ms.GCBytesAge += totalBytes * (nowNanos/1e9 - unsafeKey.Timestamp.WallTime/1e9) } else if nextRangeTombstone.IsSet() && nextRangeTombstone.WallTime < accrueGCAgeNanos { @@ -5715,7 +5697,7 @@ func computeStatsForIterWithVisitors( accrueGCAgeNanos = unsafeKey.Timestamp.WallTime } ms.KeyBytes += MVCCVersionTimestampSize - ms.ValBytes += int64(len(unsafeValue)) + ms.ValBytes += int64(valueLen) ms.ValCount++ } } @@ -6288,10 +6270,11 @@ func ReplacePointTombstonesWithRangeTombstones( } // Skip non-tombstone values. - valueRaw := iter.UnsafeValue() - if value, err := DecodeMVCCValue(valueRaw); err != nil { + valueLen, isTombstone, err := iter.MVCCValueLenAndIsTombstone() + if err != nil { return err - } else if !value.IsTombstone() { + } + if !isTombstone { iter.NextKey() continue } @@ -6307,7 +6290,7 @@ func ReplacePointTombstonesWithRangeTombstones( // Clear the point key, and construct a meta record for stats. clearedMeta := &enginepb.MVCCMetadata{ KeyBytes: MVCCVersionTimestampSize, - ValBytes: int64(len(valueRaw)), + ValBytes: int64(valueLen), Deleted: true, Timestamp: key.Timestamp.ToLegacyTimestamp(), } @@ -6326,15 +6309,14 @@ func ReplacePointTombstonesWithRangeTombstones( return err } else if ok { if key = iter.UnsafeKey(); key.Key.Equal(clearedKey.Key) { - valueRaw = iter.UnsafeValue() - value, err := DecodeMVCCValue(valueRaw) + valueLen, isTombstone, err = iter.MVCCValueLenAndIsTombstone() if err != nil { return err } restoredMeta = &enginepb.MVCCMetadata{ KeyBytes: MVCCVersionTimestampSize, - ValBytes: int64(len(valueRaw)), - Deleted: value.IsTombstone(), + ValBytes: int64(valueLen), + Deleted: isTombstone, Timestamp: key.Timestamp.ToLegacyTimestamp(), } if _, hasRange := iter.HasPointAndRange(); hasRange { diff --git a/pkg/storage/mvcc_history_metamorphic_iterator_test.go b/pkg/storage/mvcc_history_metamorphic_iterator_test.go index f5d99eaba741..692fc04b027c 100644 --- a/pkg/storage/mvcc_history_metamorphic_iterator_test.go +++ b/pkg/storage/mvcc_history_metamorphic_iterator_test.go @@ -320,6 +320,14 @@ func (m *metamorphicIterator) UnsafeValue() []byte { return m.it.UnsafeValue() } +func (m *metamorphicIterator) MVCCValueLenAndIsTombstone() (int, bool, error) { + return m.it.MVCCValueLenAndIsTombstone() +} + +func (m *metamorphicIterator) ValueLen() int { + return m.it.ValueLen() +} + func (m *metamorphicIterator) HasPointAndRange() (bool, bool) { return m.it.HasPointAndRange() } diff --git a/pkg/storage/mvcc_incremental_iterator.go b/pkg/storage/mvcc_incremental_iterator.go index 38a97f0f75f3..559052ddc605 100644 --- a/pkg/storage/mvcc_incremental_iterator.go +++ b/pkg/storage/mvcc_incremental_iterator.go @@ -653,6 +653,16 @@ func (i *MVCCIncrementalIterator) UnsafeValue() []byte { return i.iter.UnsafeValue() } +// MVCCValueLenAndIsTombstone implements the SimpleMVCCIterator interface. +func (i *MVCCIncrementalIterator) MVCCValueLenAndIsTombstone() (int, bool, error) { + return i.iter.MVCCValueLenAndIsTombstone() +} + +// ValueLen implements the SimpleMVCCIterator interface. +func (i *MVCCIncrementalIterator) ValueLen() int { + return i.iter.ValueLen() +} + // updateIgnoreTime updates the iterator's metadata and handles intents depending on the iterator's // intent policy. func (i *MVCCIncrementalIterator) updateIgnoreTime() { diff --git a/pkg/storage/pebble_iterator.go b/pkg/storage/pebble_iterator.go index 451fa44bcd14..f7adb7b3f329 100644 --- a/pkg/storage/pebble_iterator.go +++ b/pkg/storage/pebble_iterator.go @@ -550,6 +550,26 @@ func (p *pebbleIterator) UnsafeValue() []byte { return p.iter.Value() } +// MVCCValueLenAndIsTombstone implements the MVCCIterator interface. +func (p *pebbleIterator) MVCCValueLenAndIsTombstone() (int, bool, error) { + val := p.iter.Value() + // NB: don't move the following code into a helper since we desire inlining + // of tryDecodeSimpleMVCCValue. + v, ok, err := tryDecodeSimpleMVCCValue(val) + if err == nil && !ok { + v, err = decodeExtendedMVCCValue(val) + } + if err != nil { + return 0, false, err + } + return len(val), v.IsTombstone(), nil +} + +// ValueLen implements the MVCCIterator interface. +func (p *pebbleIterator) ValueLen() int { + return len(p.iter.Value()) +} + // SeekLT implements the MVCCIterator interface. func (p *pebbleIterator) SeekLT(key MVCCKey) { p.mvccDirIsReverse = true @@ -856,7 +876,7 @@ func findSplitKeyUsingIterator( bestSplitKey.Key = append(bestSplitKey.Key[:0], prevKey.Key...) } - sizeSoFar += int64(len(iter.UnsafeValue())) + sizeSoFar += int64(iter.ValueLen()) if mvccKey.IsValue() && bytes.Equal(prevKey.Key, mvccKey.Key) { // We only advanced timestamps, but not new mvcc keys. sizeSoFar += timestampLen diff --git a/pkg/storage/point_synthesizing_iter.go b/pkg/storage/point_synthesizing_iter.go index 4e625a8e3aa5..bb736d790310 100644 --- a/pkg/storage/point_synthesizing_iter.go +++ b/pkg/storage/point_synthesizing_iter.go @@ -643,6 +643,31 @@ func (i *PointSynthesizingIter) UnsafeValue() []byte { return i.rangeKeys[i.rangeKeysIdx].Value } +// MVCCValueLenAndIsTombstone implements the MVCCIterator interface. +func (i *PointSynthesizingIter) MVCCValueLenAndIsTombstone() (int, bool, error) { + if i.atPoint { + return i.iter.MVCCValueLenAndIsTombstone() + } + if i.rangeKeysIdx >= len(i.rangeKeys) || i.rangeKeysIdx < 0 { + return 0, false, errors.Errorf("iter is not Valid") + } + val := i.rangeKeys[i.rangeKeysIdx].Value + // All range keys are tombstones + return len(val), true, nil +} + +// ValueLen implements the MVCCIterator interface. +func (i *PointSynthesizingIter) ValueLen() int { + if i.atPoint { + return i.iter.ValueLen() + } + if i.rangeKeysIdx >= len(i.rangeKeys) || i.rangeKeysIdx < 0 { + // Caller has violated invariant! + return 0 + } + return len(i.rangeKeys[i.rangeKeysIdx].Value) +} + // ValueProto implements MVCCIterator. func (i *PointSynthesizingIter) ValueProto(msg protoutil.Message) error { return protoutil.Unmarshal(i.UnsafeValue(), msg) diff --git a/pkg/storage/read_as_of_iterator.go b/pkg/storage/read_as_of_iterator.go index 316992515401..18f494463e77 100644 --- a/pkg/storage/read_as_of_iterator.go +++ b/pkg/storage/read_as_of_iterator.go @@ -109,6 +109,16 @@ func (f *ReadAsOfIterator) UnsafeValue() []byte { return f.iter.UnsafeValue() } +// MVCCValueLenAndIsTombstone implements the SimpleMVCCIterator interface. +func (f *ReadAsOfIterator) MVCCValueLenAndIsTombstone() (int, bool, error) { + return f.iter.MVCCValueLenAndIsTombstone() +} + +// ValueLen implements the SimpleMVCCIterator interface. +func (f *ReadAsOfIterator) ValueLen() int { + return f.iter.ValueLen() +} + // HasPointAndRange implements SimpleMVCCIterator. func (f *ReadAsOfIterator) HasPointAndRange() (bool, bool) { return true, false diff --git a/pkg/storage/sst.go b/pkg/storage/sst.go index b67209c2d79e..e912d3f32028 100644 --- a/pkg/storage/sst.go +++ b/pkg/storage/sst.go @@ -337,6 +337,12 @@ func CheckSSTConflicts( continue } + // TODO(sumeer): extValueRaw is not always needed below. In many cases + // MVCCValueLenAndIsTombstone() suffices. This will require some + // rearrangement of the logic in compareForCollision. This is not a + // pressing optimization since currently the value is cheap to retrieve + // for the latest version of a key, and we are seeing the latest version + // because of the extIter.SeekGE call above. extKey, extValueRaw := extIter.UnsafeKey(), extIter.UnsafeValue() sstKey, sstValueRaw := sstIter.UnsafeKey(), sstIter.UnsafeValue() @@ -427,10 +433,7 @@ func CheckSSTConflicts( sstBottomTombstone := sstRangeKeys.Versions[len(sstRangeKeys.Versions)-1] sstTopTombstone := sstRangeKeys.Versions[0] extKey := extIter.UnsafeKey() - extValue, ok, err := tryDecodeSimpleMVCCValue(extIter.UnsafeValue()) - if !ok && err == nil { - extValue, err = decodeExtendedMVCCValue(extIter.UnsafeValue()) - } + extValueLen, extValueIsTombstone, err := extIter.MVCCValueLenAndIsTombstone() if err != nil { return enginepb.MVCCStats{}, err } @@ -444,7 +447,7 @@ func CheckSSTConflicts( // Check if shadowing a live key is allowed. Deleting a live key counts // as a shadow. extValueDeleted := extHasRange && extRangeKeys.Covers(extKey) - if !extValue.IsTombstone() && !extValueDeleted && (!disallowShadowingBelow.IsEmpty() || disallowShadowing) { + if !extValueIsTombstone && !extValueDeleted && (!disallowShadowingBelow.IsEmpty() || disallowShadowing) { // Note that we don't check for value equality here, unlike in the // point key shadow case. This is because a range key and a point key // by definition have different values. @@ -458,12 +461,14 @@ func CheckSSTConflicts( } sstPointShadowsExtPoint := sstHasPoint && sstIter.UnsafeKey().Key.Equal(extKey.Key) if (extKeyChanged || sstRangeKeysChanged) && !sstPointShadowsExtPoint { - statsDiff.Add(updateStatsOnRangeKeyCover(sstRangeKeyVersion.Timestamp, extKey, extIter.UnsafeValue())) + statsDiff.Add(updateStatsOnRangeKeyCover( + sstRangeKeyVersion.Timestamp, extKey, extValueLen, extValueIsTombstone)) } else if !extKeyChanged && sstPointShadowsExtPoint { // This is either a conflict, shadow, or idempotent operation. // Subtract the RangeKeyCover stats diff from the last iteration, as // compareForCollision will account for the shadow. - statsDiff.Subtract(updateStatsOnRangeKeyCover(sstRangeKeyVersion.Timestamp, extKey, extIter.UnsafeValue())) + statsDiff.Subtract(updateStatsOnRangeKeyCover( + sstRangeKeyVersion.Timestamp, extKey, extValueLen, extValueIsTombstone)) } } } @@ -725,7 +730,7 @@ func CheckSSTConflicts( extPrevRangeKeys = extRangeKeys.Clone() } - extKey, extValueRaw := extIter.UnsafeKey(), extIter.UnsafeValue() + extKey := extIter.UnsafeKey() sstKey, sstValueRaw := sstIter.UnsafeKey(), sstIter.UnsafeValue() // Keep seeking the iterators until both keys are equal. @@ -761,6 +766,10 @@ func CheckSSTConflicts( extValueDeletedByRange := extHasRange && extHasPoint && extRangeKeys.Covers(extKey) if sstHasPoint && extHasPoint && !extValueDeletedByRange { + // TODO(sumeer): extValueRaw is not always needed below. In many cases + // MVCCValueLenAndIsTombstone() suffices. This will require some + // rearrangement of the logic in compareForCollision. + extValueRaw := extIter.UnsafeValue() if err := compareForCollision(sstKey, extKey, sstValueRaw, extValueRaw); err != nil { return enginepb.MVCCStats{}, err } diff --git a/pkg/storage/sst_iterator.go b/pkg/storage/sst_iterator.go index 54feb32c888a..7be1514928ce 100644 --- a/pkg/storage/sst_iterator.go +++ b/pkg/storage/sst_iterator.go @@ -201,6 +201,26 @@ func (r *legacySSTIterator) UnsafeValue() []byte { return r.value } +// MVCCValueLenAndIsTombstone implements the SimpleMVCCIterator interface. +func (r *legacySSTIterator) MVCCValueLenAndIsTombstone() (int, bool, error) { + // NB: don't move the following code into a helper since we desire inlining + // of tryDecodeSimpleMVCCValue. + v, ok, err := tryDecodeSimpleMVCCValue(r.value) + if err == nil && !ok { + v, err = decodeExtendedMVCCValue(r.value) + } + if err != nil { + return 0, false, err + } + return len(r.value), v.IsTombstone(), nil + +} + +// ValueLen implements the SimpleMVCCIterator interface. +func (r *legacySSTIterator) ValueLen() int { + return len(r.value) +} + // verifyValue verifies the checksum of the current value. func (r *legacySSTIterator) verifyValue() { mvccValue, ok, err := tryDecodeSimpleMVCCValue(r.value) diff --git a/pkg/storage/verifying_iterator.go b/pkg/storage/verifying_iterator.go index ab64384072fa..24e2c8bbc469 100644 --- a/pkg/storage/verifying_iterator.go +++ b/pkg/storage/verifying_iterator.go @@ -106,6 +106,25 @@ func (i *verifyingMVCCIterator) UnsafeValue() []byte { return i.value } +// MVCCValueLenAndIsTombstone implements MVCCIterator. +func (i *verifyingMVCCIterator) MVCCValueLenAndIsTombstone() (int, bool, error) { + // NB: don't move the following code into a helper since we desire inlining + // of tryDecodeSimpleMVCCValue. + v, ok, err := tryDecodeSimpleMVCCValue(i.value) + if err == nil && !ok { + v, err = decodeExtendedMVCCValue(i.value) + } + if err != nil { + return 0, false, err + } + return len(i.value), v.IsTombstone(), nil +} + +// ValueLen implements MVCCIterator. +func (i *verifyingMVCCIterator) ValueLen() int { + return len(i.value) +} + // Valid implements MVCCIterator. func (i *verifyingMVCCIterator) Valid() (bool, error) { return i.valid, i.err