Skip to content

Commit

Permalink
storage,kvserver: support lazy value fetching in SimpleMVCCIterator
Browse files Browse the repository at this point in the history
SimpleMVCCIterator already separates the methods to retrieve the
key and value, and callers who do not need any information about the
value do not call UnsafeValue(). However, there are callers (like
stats calculation) who need to know the length of the value and
in some cases whether an MVCC value is a tombstone. The
MVCCValueLenAndIsTombstone() and ValueLen() methods are introduced
for these cases. This is preparation for the near future where
retrieving the value in Pebble will have actual additional cost,
since the value may be in a different part of the file or a different
file.

Non-test callers that can use these new interfaces have been
modified. There are some todos to modify some non-trivial callers,
related to deciding what to GC, and checking sstable conflicts,
which will happen in future PRs.

When new methods are introduced to retrieve these value attributes
in pebble.Iterator, only pebbleIterator.{MVCCValueLenAndIsTombstone,
ValueLen} will need to be modified to make use of them.

Release note: None
  • Loading branch information
sumeerbhola committed Oct 4, 2022
1 parent d270005 commit 491de61
Show file tree
Hide file tree
Showing 15 changed files with 295 additions and 112 deletions.
3 changes: 3 additions & 0 deletions pkg/kv/kvserver/gc/gc_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ type gcIterator struct {
cachedRangeTombstoneKey roachpb.Key
}

// TODO(sumeer): change gcIterator to use MVCCValueLenAndIsTombstone(). It
// needs to get the value only for intents.

func makeGCIterator(
desc *roachpb.RangeDescriptor,
snap storage.Reader,
Expand Down
10 changes: 10 additions & 0 deletions pkg/kv/kvserver/rangefeed/task_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,16 @@ func (s *testIterator) UnsafeValue() []byte {
return s.curKV().Value
}

func (s *testIterator) MVCCValueLenAndIsTombstone() (int, bool, error) {
rawV := s.curKV().Value
v, err := storage.DecodeMVCCValue(rawV)
return len(rawV), v.IsTombstone(), err
}

func (s *testIterator) ValueLen() int {
return len(s.curKV().Value)
}

func (s *testIterator) curKV() storage.MVCCKeyValue {
return s.kvs[s.cur]
}
Expand Down
10 changes: 10 additions & 0 deletions pkg/kv/kvserver/spanset/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,16 @@ func (i *MVCCIterator) UnsafeValue() []byte {
return i.i.UnsafeValue()
}

// MVCCValueLenAndIsTombstone implements the MVCCIterator interface.
func (i *MVCCIterator) MVCCValueLenAndIsTombstone() (int, bool, error) {
return i.i.MVCCValueLenAndIsTombstone()
}

// ValueLen implements the MVCCIterator interface.
func (i *MVCCIterator) ValueLen() int {
return i.i.ValueLen()
}

// HasPointAndRange implements SimpleMVCCIterator.
func (i *MVCCIterator) HasPointAndRange() (bool, bool) {
return i.i.HasPointAndRange()
Expand Down
35 changes: 34 additions & 1 deletion pkg/storage/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,26 @@ type SimpleMVCCIterator interface {
UnsafeKey() MVCCKey
// UnsafeValue returns the current point key value as a byte slice.
// This must only be called when it is known that the iterator is positioned
// at a point value, i.e. HasPointAndRange has returned (true, *).
// at a point value, i.e. HasPointAndRange has returned (true, *). If
// possible, use MVCCValueLenAndIsTombstone() instead.
//
// The memory is invalidated on the next call to {Next,NextKey,Prev,SeekGE,SeekLT,Close}.
// Use Value() if that is undesirable.
UnsafeValue() []byte
// MVCCValueLenAndIsTombstone should be called only for MVCC (i.e.,
// UnsafeKey().IsValue()) point values, when the actual point value is not
// needed, for example when updating stats and making GC decisions, and it
// is sufficient for the caller to know the length (len(UnsafeValue()), and
// whether the underlying MVCCValue is a tombstone
// (MVCCValue.IsTombstone()). This is an optimization that can allow the
// underlying storage layer to avoid retrieving the value.
// REQUIRES: HasPointAndRange() has returned (true, *).
MVCCValueLenAndIsTombstone() (int, bool, error)
// ValueLen can be called for MVCC or non-MVCC values, when only the value
// length is needed. This is an optimization that can allow the underlying
// storage layer to avoid retrieving the value.
// REQUIRES: HasPointAndRange() has returned (true, *).
ValueLen() int
// HasPointAndRange returns whether the current iterator position has a point
// key and/or a range key. Must check Valid() first. At least one of these
// will always be true for a valid iterator. For details on range keys, see
Expand Down Expand Up @@ -1559,6 +1574,24 @@ func assertSimpleMVCCIteratorInvariants(iter SimpleMVCCIterator) error {
return errors.AssertionFailedf("hasRange=false but RangeKeys=%s", r)
}
}
if hasPoint {
value := iter.UnsafeValue()
valueLen := iter.ValueLen()
if len(value) != valueLen {
return errors.AssertionFailedf("length of UnsafeValue %d != ValueLen %d", len(value), valueLen)
}
if key.IsValue() {
valueLen2, _, err := iter.MVCCValueLenAndIsTombstone()
if err == nil {
if len(value) != valueLen2 {
return errors.AssertionFailedf("length of UnsafeValue %d != MVCCValueLenAndIsTombstone %d",
len(value), valueLen2)
}
}
// Else err != nil. Ignore, since SimpleMVCCIterator is not to be held
// responsible for data corruption or tests writing non-MVCCValues.
}
}

return nil
}
Expand Down
14 changes: 14 additions & 0 deletions pkg/storage/intent_interleaving_iter.go
Original file line number Diff line number Diff line change
Expand Up @@ -895,6 +895,20 @@ func (i *intentInterleavingIter) UnsafeValue() []byte {
return i.iter.UnsafeValue()
}

func (i *intentInterleavingIter) MVCCValueLenAndIsTombstone() (int, bool, error) {
if i.isCurAtIntentIter() {
return 0, false, errors.Errorf("not at MVCC value")
}
return i.iter.MVCCValueLenAndIsTombstone()
}

func (i *intentInterleavingIter) ValueLen() int {
if i.isCurAtIntentIter() {
return i.intentIter.ValueLen()
}
return i.iter.ValueLen()
}

func (i *intentInterleavingIter) Key() MVCCKey {
key := i.UnsafeKey()
keyCopy := make([]byte, len(key.Key))
Expand Down
10 changes: 10 additions & 0 deletions pkg/storage/multi_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,16 @@ func (f *multiIterator) UnsafeValue() []byte {
return f.iters[f.currentIdx].UnsafeValue()
}

// MVCCValueLenAndIsTombstone implements the SimpleMVCCIterator interface.
func (f *multiIterator) MVCCValueLenAndIsTombstone() (int, bool, error) {
return f.iters[f.currentIdx].MVCCValueLenAndIsTombstone()
}

// ValueLen implements the SimpleMVCCIterator interface.
func (f *multiIterator) ValueLen() int {
return f.iters[f.currentIdx].ValueLen()
}

// HasPointAndRange implements SimpleMVCCIterator.
func (f *multiIterator) HasPointAndRange() (bool, bool) {
panic("not implemented")
Expand Down
Loading

0 comments on commit 491de61

Please sign in to comment.