Skip to content

Commit 84267c4

Browse files
Make skiplist grow (#1695)
This change makes the skiplist grow for the case of sorted skiplist builder. The normal skiplist still cannot grow. Note: The growing skiplist is not thread safe. Co-authored-by: Ahsan Barkati <ahsanbarkati@gmail.com>
1 parent dc42de2 commit 84267c4

File tree

3 files changed

+107
-69
lines changed

3 files changed

+107
-69
lines changed

skl/arena.go

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,9 @@ const (
3535

3636
// Arena should be lock-free.
3737
type Arena struct {
38-
n uint32
39-
buf []byte
38+
n uint32
39+
shouldGrow bool
40+
buf []byte
4041
}
4142

4243
// newArena returns a new arena.
@@ -50,6 +51,33 @@ func newArena(n int64) *Arena {
5051
return out
5152
}
5253

54+
func (s *Arena) allocate(sz uint32) uint32 {
55+
offset := atomic.AddUint32(&s.n, sz)
56+
if !s.shouldGrow {
57+
y.AssertTrue(int(offset) <= len(s.buf))
58+
return offset - sz
59+
}
60+
61+
// We are keeping extra bytes in the end so that the checkptr doesn't fail. We apply some
62+
// intelligence to reduce the size of the node by only keeping towers upto valid height and not
63+
// maxHeight. This reduces the node's size, but checkptr doesn't know about its reduced size.
64+
// checkptr tries to verify that the node of size MaxNodeSize resides on a single heap
65+
// allocation which causes this error: checkptr:converted pointer straddles multiple allocations
66+
if int(offset) > len(s.buf)-MaxNodeSize {
67+
growBy := uint32(len(s.buf))
68+
if growBy > 1<<30 {
69+
growBy = 1 << 30
70+
}
71+
if growBy < sz {
72+
growBy = sz
73+
}
74+
newBuf := make([]byte, len(s.buf)+int(growBy))
75+
y.AssertTrue(len(s.buf) == copy(newBuf, s.buf))
76+
s.buf = newBuf
77+
}
78+
return offset - sz
79+
}
80+
5381
func (s *Arena) size() int64 {
5482
return int64(atomic.LoadUint32(&s.n))
5583
}
@@ -63,11 +91,10 @@ func (s *Arena) putNode(height int) uint32 {
6391

6492
// Pad the allocation with enough bytes to ensure pointer alignment.
6593
l := uint32(MaxNodeSize - unusedSize + nodeAlign)
66-
n := atomic.AddUint32(&s.n, l)
67-
y.AssertTrue(int(n) <= len(s.buf))
94+
n := s.allocate(l)
6895

6996
// Return the aligned offset.
70-
m := (n - l + uint32(nodeAlign)) & ^uint32(nodeAlign)
97+
m := (n + uint32(nodeAlign)) & ^uint32(nodeAlign)
7198
return m
7299
}
73100

@@ -77,23 +104,17 @@ func (s *Arena) putNode(height int) uint32 {
77104
// decoding will incur some overhead.
78105
func (s *Arena) putVal(v y.ValueStruct) uint32 {
79106
l := uint32(v.EncodedSize())
80-
n := atomic.AddUint32(&s.n, l)
81-
y.AssertTrue(int(n) <= len(s.buf))
82-
m := n - l
83-
v.Encode(s.buf[m:])
84-
return m
107+
offset := s.allocate(l)
108+
v.Encode(s.buf[offset:])
109+
return offset
85110
}
86111

87112
func (s *Arena) putKey(key []byte) uint32 {
88-
l := uint32(len(key))
89-
n := atomic.AddUint32(&s.n, l)
90-
y.AssertTrue(int(n) <= len(s.buf))
91-
// m is the offset where you should write.
92-
// n = new len - key len give you the offset at which you should write.
93-
m := n - l
94-
// Copy to buffer from m:n
95-
y.AssertTrue(len(key) == copy(s.buf[m:n], key))
96-
return m
113+
keySz := uint32(len(key))
114+
offset := s.allocate(keySz)
115+
buf := s.buf[offset : offset+keySz]
116+
y.AssertTrue(len(key) == copy(buf, key))
117+
return offset
97118
}
98119

99120
// getNode returns a pointer to the node located at offset. If the offset is
@@ -102,7 +123,6 @@ func (s *Arena) getNode(offset uint32) *node {
102123
if offset == 0 {
103124
return nil
104125
}
105-
106126
return (*node)(unsafe.Pointer(&s.buf[offset]))
107127
}
108128

skl/skl.go

Lines changed: 65 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,11 @@ type node struct {
7575
}
7676

7777
type Skiplist struct {
78-
height int32 // Current height. 1 <= height <= kMaxHeight. CAS.
79-
head *node
80-
ref int32
81-
arena *Arena
82-
OnClose func()
78+
height int32 // Current height. 1 <= height <= kMaxHeight. CAS.
79+
headOffset uint32
80+
ref int32
81+
arena *Arena
82+
OnClose func()
8383
}
8484

8585
// IncrRef increases the refcount
@@ -100,19 +100,19 @@ func (s *Skiplist) DecrRef() {
100100
// Indicate we are closed. Good for testing. Also, lets GC reclaim memory. Race condition
101101
// here would suggest we are accessing skiplist when we are supposed to have no reference!
102102
s.arena = nil
103-
// Since the head references the arena's buf, as long as the head is kept around
104-
// GC can't release the buf.
105-
s.head = nil
106103
}
107104

108105
func newNode(arena *Arena, key []byte, v y.ValueStruct, height int) *node {
109106
// The base level is already allocated in the node struct.
110-
offset := arena.putNode(height)
111-
node := arena.getNode(offset)
112-
node.keyOffset = arena.putKey(key)
107+
nodeOffset := arena.putNode(height)
108+
keyOffset := arena.putKey(key)
109+
val := encodeValue(arena.putVal(v), v.EncodedSize())
110+
111+
node := arena.getNode(nodeOffset)
112+
node.keyOffset = keyOffset
113113
node.keySize = uint16(len(key))
114114
node.height = uint16(height)
115-
node.value = encodeValue(arena.putVal(v), v.EncodedSize())
115+
node.value = val
116116
return node
117117
}
118118

@@ -130,14 +130,23 @@ func decodeValue(value uint64) (valOffset uint32, valSize uint32) {
130130
func NewSkiplist(arenaSize int64) *Skiplist {
131131
arena := newArena(arenaSize)
132132
head := newNode(arena, nil, y.ValueStruct{}, maxHeight)
133+
ho := arena.getNodeOffset(head)
133134
return &Skiplist{
134-
height: 1,
135-
head: head,
136-
arena: arena,
137-
ref: 1,
135+
height: 1,
136+
headOffset: ho,
137+
arena: arena,
138+
ref: 1,
138139
}
139140
}
140141

142+
// NewGrowingSkiplist returns a new skiplist which can grow. Note that this skiplist is not thread
143+
// safe and must be used for serial operations only.
144+
func NewGrowingSkiplist(arenaSize int64) *Skiplist {
145+
s := NewSkiplist(arenaSize)
146+
s.arena.shouldGrow = true
147+
return s
148+
}
149+
141150
func (s *node) getValueOffset() (uint32, uint32) {
142151
value := atomic.LoadUint64(&s.value)
143152
return decodeValue(value)
@@ -147,10 +156,8 @@ func (s *node) key(arena *Arena) []byte {
147156
return arena.getKey(s.keyOffset, s.keySize)
148157
}
149158

150-
func (s *node) setValue(arena *Arena, v y.ValueStruct) {
151-
valOffset := arena.putVal(v)
152-
value := encodeValue(valOffset, v.EncodedSize())
153-
atomic.StoreUint64(&s.value, value)
159+
func (s *node) setValue(arena *Arena, vo uint64) {
160+
atomic.StoreUint64(&s.value, vo)
154161
}
155162

156163
func (s *node) getNextOffset(h int) uint32 {
@@ -180,14 +187,18 @@ func (s *Skiplist) getNext(nd *node, height int) *node {
180187
return s.arena.getNode(nd.getNextOffset(height))
181188
}
182189

190+
func (s *Skiplist) getHead() *node {
191+
return s.arena.getNode(s.headOffset)
192+
}
193+
183194
// findNear finds the node near to key.
184195
// If less=true, it finds rightmost node such that node.key < key (if allowEqual=false) or
185196
// node.key <= key (if allowEqual=true).
186197
// If less=false, it finds leftmost node such that node.key > key (if allowEqual=false) or
187198
// node.key >= key (if allowEqual=true).
188199
// Returns the node found. The bool returned is true if the node has key equal to given key.
189200
func (s *Skiplist) findNear(key []byte, less bool, allowEqual bool) (*node, bool) {
190-
x := s.head
201+
x := s.getHead()
191202
level := int(s.getHeight() - 1)
192203
for {
193204
// Assume x.key < key.
@@ -204,7 +215,7 @@ func (s *Skiplist) findNear(key []byte, less bool, allowEqual bool) (*node, bool
204215
return nil, false
205216
}
206217
// Try to return x. Make sure it is not a head node.
207-
if x == s.head {
218+
if x == s.getHead() {
208219
return nil, false
209220
}
210221
return x, false
@@ -232,7 +243,7 @@ func (s *Skiplist) findNear(key []byte, less bool, allowEqual bool) (*node, bool
232243
continue
233244
}
234245
// On base level. Return x.
235-
if x == s.head {
246+
if x == s.getHead() {
236247
return nil, false
237248
}
238249
return x, false
@@ -247,7 +258,7 @@ func (s *Skiplist) findNear(key []byte, less bool, allowEqual bool) (*node, bool
247258
return next, false
248259
}
249260
// Try to return x. Make sure it is not a head node.
250-
if x == s.head {
261+
if x == s.getHead() {
251262
return nil, false
252263
}
253264
return x, false
@@ -258,14 +269,16 @@ func (s *Skiplist) findNear(key []byte, less bool, allowEqual bool) (*node, bool
258269
// The input "before" tells us where to start looking.
259270
// If we found a node with the same key, then we return outBefore = outAfter.
260271
// Otherwise, outBefore.key < key < outAfter.key.
261-
func (s *Skiplist) findSpliceForLevel(key []byte, before *node, level int) (*node, *node) {
272+
func (s *Skiplist) findSpliceForLevel(key []byte, before uint32, level int) (uint32, uint32) {
262273
for {
263274
// Assume before.key < key.
264-
next := s.getNext(before, level)
265-
if next == nil {
275+
beforeNode := s.arena.getNode(before)
276+
next := beforeNode.getNextOffset(level)
277+
nextNode := s.arena.getNode(next)
278+
if nextNode == nil {
266279
return before, next
267280
}
268-
nextKey := next.key(s.arena)
281+
nextKey := nextNode.key(s.arena)
269282
cmp := y.CompareKeys(key, nextKey)
270283
if cmp == 0 {
271284
// Equality case.
@@ -289,15 +302,17 @@ func (s *Skiplist) Put(key []byte, v y.ValueStruct) {
289302
// increase the height. Let's defer these actions.
290303

291304
listHeight := s.getHeight()
292-
var prev [maxHeight + 1]*node
293-
var next [maxHeight + 1]*node
294-
prev[listHeight] = s.head
295-
next[listHeight] = nil
305+
var prev [maxHeight + 1]uint32
306+
var next [maxHeight + 1]uint32
307+
prev[listHeight] = s.headOffset
296308
for i := int(listHeight) - 1; i >= 0; i-- {
297309
// Use higher level to speed up for current level.
298310
prev[i], next[i] = s.findSpliceForLevel(key, prev[i+1], i)
299311
if prev[i] == next[i] {
300-
prev[i].setValue(s.arena, v)
312+
vo := s.arena.putVal(v)
313+
encValue := encodeValue(vo, v.EncodedSize())
314+
prevNode := s.arena.getNode(prev[i])
315+
prevNode.setValue(s.arena, encValue)
301316
return
302317
}
303318
}
@@ -320,18 +335,18 @@ func (s *Skiplist) Put(key []byte, v y.ValueStruct) {
320335
// create a node in the level above because it would have discovered the node in the base level.
321336
for i := 0; i < height; i++ {
322337
for {
323-
if prev[i] == nil {
338+
if s.arena.getNode(prev[i]) == nil {
324339
y.AssertTrue(i > 1) // This cannot happen in base level.
325340
// We haven't computed prev, next for this level because height exceeds old listHeight.
326341
// For these levels, we expect the lists to be sparse, so we can just search from head.
327-
prev[i], next[i] = s.findSpliceForLevel(key, s.head, i)
342+
prev[i], next[i] = s.findSpliceForLevel(key, s.headOffset, i)
328343
// Someone adds the exact same key before we are able to do so. This can only happen on
329344
// the base level. But we know we are not on the base level.
330345
y.AssertTrue(prev[i] != next[i])
331346
}
332-
nextOffset := s.arena.getNodeOffset(next[i])
333-
x.tower[i] = nextOffset
334-
if prev[i].casNextOffset(i, nextOffset, s.arena.getNodeOffset(x)) {
347+
x.tower[i] = next[i]
348+
pnode := s.arena.getNode(prev[i])
349+
if pnode.casNextOffset(i, next[i], s.arena.getNodeOffset(x)) {
335350
// Managed to insert x between prev[i] and next[i]. Go to the next level.
336351
break
337352
}
@@ -341,7 +356,10 @@ func (s *Skiplist) Put(key []byte, v y.ValueStruct) {
341356
prev[i], next[i] = s.findSpliceForLevel(key, prev[i], i)
342357
if prev[i] == next[i] {
343358
y.AssertTruef(i == 0, "Equality can happen only on base level: %d", i)
344-
prev[i].setValue(s.arena, v)
359+
vo := s.arena.putVal(v)
360+
encValue := encodeValue(vo, v.EncodedSize())
361+
prevNode := s.arena.getNode(prev[i])
362+
prevNode.setValue(s.arena, encValue)
345363
return
346364
}
347365
}
@@ -356,7 +374,7 @@ func (s *Skiplist) Empty() bool {
356374
// findLast returns the last element. If head (empty list), we return nil. All the find functions
357375
// will NEVER return the head nodes.
358376
func (s *Skiplist) findLast() *node {
359-
n := s.head
377+
n := s.getHead()
360378
level := int(s.getHeight()) - 1
361379
for {
362380
next := s.getNext(n, level)
@@ -365,7 +383,7 @@ func (s *Skiplist) findLast() *node {
365383
continue
366384
}
367385
if level == 0 {
368-
if n == s.head {
386+
if n == s.getHead() {
369387
return nil
370388
}
371389
return n
@@ -460,7 +478,7 @@ func (s *Iterator) SeekForPrev(target []byte) {
460478
// SeekToFirst seeks position at the first entry in list.
461479
// Final state of iterator is Valid() iff list is not empty.
462480
func (s *Iterator) SeekToFirst() {
463-
s.n = s.list.getNext(s.list.head, 0)
481+
s.n = s.list.getNext(s.list.getHead(), 0)
464482
}
465483

466484
// SeekToLast seeks position at the last entry in list.
@@ -528,15 +546,15 @@ func (s *UniIterator) Close() error { return s.iter.Close() }
528546
// sorted order.
529547
type Builder struct {
530548
s *Skiplist
531-
prev [maxHeight + 1]*node
549+
prev [maxHeight + 1]uint32
532550
prevKey []byte
533551
}
534552

535553
func NewBuilder(arenaSize int64) *Builder {
536-
s := NewSkiplist(arenaSize)
554+
s := NewGrowingSkiplist(arenaSize)
537555
b := &Builder{s: s}
538556
for i := 0; i < maxHeight+1; i++ {
539-
b.prev[i] = s.head
557+
b.prev[i] = s.headOffset
540558
}
541559
return b
542560
}
@@ -561,8 +579,8 @@ func (b *Builder) Add(k []byte, v y.ValueStruct) {
561579
x := newNode(s.arena, k, v, height)
562580
nodeOffset := s.arena.getNodeOffset(x)
563581
for i := 0; i < height; i++ {
564-
node := b.prev[i]
582+
node := s.arena.getNode(b.prev[i])
565583
node.tower[i] = nodeOffset
566-
b.prev[i] = x
584+
b.prev[i] = nodeOffset
567585
}
568586
}

skl/skl_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ func newValue(v int) []byte {
4242

4343
// length iterates over skiplist to give exact size.
4444
func length(s *Skiplist) int {
45-
x := s.getNext(s.head, 0)
45+
x := s.getNext(s.getHead(), 0)
4646
count := 0
4747
for x != nil {
4848
count++
@@ -460,7 +460,7 @@ func randomKey(rng *rand.Rand) []byte {
460460

461461
func TestBuilder(t *testing.T) {
462462
N := 1 << 16
463-
b := NewBuilder(32 << 20)
463+
b := NewBuilder(32 << 10)
464464
buf := make([]byte, 8)
465465
for i := 0; i < N; i++ {
466466
binary.BigEndian.PutUint64(buf, uint64(i))

0 commit comments

Comments
 (0)