-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Remove a long-standing TODO item in the code - let's use the great loser tree implementation by Bryan. It is faster than the heap because less comparisons are needed. Should be a nice improvement given that the heap is used in a lot of hot paths. Since Prometheus also uses this library, it's tricky to import the "any" version. I tried doing bboreham/go-loser#3 but it's still impossible to do that. Let's just copy/paste the code, it's not a lot. Bench: ``` goos: linux goarch: amd64 pkg: github.com/thanos-io/thanos/pkg/store cpu: Intel(R) Core(TM) i9-10885H CPU @ 2.40GHz │ oldkway │ newkway │ │ sec/op │ sec/op vs base │ KWayMerge-16 2.292m ± 3% 2.075m ± 15% -9.47% (p=0.023 n=10) │ oldkway │ newkway │ │ B/op │ B/op vs base │ KWayMerge-16 1.553Mi ± 0% 1.585Mi ± 0% +2.04% (p=0.000 n=10) │ oldkway │ newkway │ │ allocs/op │ allocs/op vs base │ KWayMerge-16 27.26k ± 0% 26.27k ± 0% -3.66% (p=0.000 n=10) ``` Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
- Loading branch information
Showing
9 changed files
with
402 additions
and
140 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
// Copyright (c) The Thanos Authors. | ||
// Licensed under the Apache License 2.0. | ||
|
||
// Original version copyright Bryan Boreham, 2024. | ||
// https://github.com/bboreham/go-loser/tree/any. | ||
// Loser tree, from https://en.wikipedia.org/wiki/K-way_merge_algorithm#Tournament_Tree | ||
|
||
package losertree | ||
|
||
type Sequence interface { | ||
Next() bool // Advances and returns true if there is a value at this new position. | ||
} | ||
|
||
func New[E any, S Sequence](sequences []S, maxVal E, at func(S) E, less func(E, E) bool, close func(S)) *Tree[E, S] { | ||
nSequences := len(sequences) | ||
t := Tree[E, S]{ | ||
maxVal: maxVal, | ||
at: at, | ||
less: less, | ||
close: close, | ||
nodes: make([]node[E, S], nSequences*2), | ||
} | ||
for i, s := range sequences { | ||
t.nodes[i+nSequences].items = s | ||
t.moveNext(i + nSequences) // Must call Next on each item so that At() has a value. | ||
} | ||
if nSequences > 0 { | ||
t.nodes[0].index = -1 // flag to be initialized on first call to Next(). | ||
} | ||
return &t | ||
} | ||
|
||
// Call the close function on all sequences that are still open. | ||
func (t *Tree[E, S]) Close() { | ||
for _, e := range t.nodes[len(t.nodes)/2 : len(t.nodes)] { | ||
if e.index == -1 { | ||
continue | ||
} | ||
t.close(e.items) | ||
} | ||
} | ||
|
||
// A loser tree is a binary tree laid out such that nodes N and N+1 have parent N/2. | ||
// We store M leaf nodes in positions M...2M-1, and M-1 internal nodes in positions 1..M-1. | ||
// Node 0 is a special node, containing the winner of the contest. | ||
type Tree[E any, S Sequence] struct { | ||
maxVal E | ||
at func(S) E | ||
less func(E, E) bool | ||
close func(S) // Called when Next() returns false. | ||
nodes []node[E, S] | ||
} | ||
|
||
type node[E any, S Sequence] struct { | ||
index int // This is the loser for all nodes except the 0th, where it is the winner. | ||
value E // Value copied from the loser node, or winner for node 0. | ||
items S // Only populated for leaf nodes. | ||
} | ||
|
||
func (t *Tree[E, S]) moveNext(index int) bool { | ||
n := &t.nodes[index] | ||
if n.items.Next() { | ||
n.value = t.at(n.items) | ||
return true | ||
} | ||
t.close(n.items) // Next() returned false; close it and mark as finished. | ||
n.value = t.maxVal | ||
n.index = -1 | ||
return false | ||
} | ||
|
||
func (t *Tree[E, S]) Winner() S { | ||
return t.nodes[t.nodes[0].index].items | ||
} | ||
|
||
func (t *Tree[E, S]) At() E { | ||
return t.nodes[0].value | ||
} | ||
|
||
func (t *Tree[E, S]) Next() bool { | ||
nodes := t.nodes | ||
if len(nodes) == 0 { | ||
return false | ||
} | ||
if nodes[0].index == -1 { // If tree has not been initialized yet, do that. | ||
t.initialize() | ||
return nodes[nodes[0].index].index != -1 | ||
} | ||
if nodes[nodes[0].index].index == -1 { // already exhausted. | ||
return false | ||
} | ||
t.moveNext(nodes[0].index) | ||
t.replayGames(nodes[0].index) | ||
return nodes[nodes[0].index].index != -1 | ||
} | ||
|
||
// Current winner has been advanced independently; fix up the loser tree. | ||
func (t *Tree[E, S]) Fix(closed bool) { | ||
nodes := t.nodes | ||
cur := &nodes[nodes[0].index] | ||
if closed { | ||
cur.value = t.maxVal | ||
cur.index = -1 | ||
} else { | ||
cur.value = t.at(cur.items) | ||
} | ||
t.replayGames(nodes[0].index) | ||
} | ||
|
||
func (t *Tree[E, S]) IsEmpty() bool { | ||
nodes := t.nodes | ||
if nodes[0].index == -1 { // If tree has not been initialized yet, do that. | ||
t.initialize() | ||
} | ||
return nodes[nodes[0].index].index == -1 | ||
} | ||
|
||
func (t *Tree[E, S]) initialize() { | ||
winner := t.playGame(1) | ||
t.nodes[0].index = winner | ||
t.nodes[0].value = t.nodes[winner].value | ||
} | ||
|
||
// Find the winner at position pos; if it is a non-leaf node, store the loser. | ||
// pos must be >= 1 and < len(t.nodes). | ||
func (t *Tree[E, S]) playGame(pos int) int { | ||
nodes := t.nodes | ||
if pos >= len(nodes)/2 { | ||
return pos | ||
} | ||
left := t.playGame(pos * 2) | ||
right := t.playGame(pos*2 + 1) | ||
var loser, winner int | ||
if t.less(nodes[left].value, nodes[right].value) { | ||
loser, winner = right, left | ||
} else { | ||
loser, winner = left, right | ||
} | ||
nodes[pos].index = loser | ||
nodes[pos].value = nodes[loser].value | ||
return winner | ||
} | ||
|
||
// Starting at pos, which is a winner, re-consider all values up to the root. | ||
func (t *Tree[E, S]) replayGames(pos int) { | ||
nodes := t.nodes | ||
winningValue := nodes[pos].value | ||
for n := parent(pos); n != 0; n = parent(n) { | ||
node := &nodes[n] | ||
if t.less(node.value, winningValue) { | ||
// Record pos as the loser here, and the old loser is the new winner. | ||
node.index, pos = pos, node.index | ||
node.value, winningValue = winningValue, node.value | ||
} | ||
} | ||
// pos is now the winner; store it in node 0. | ||
nodes[0].index = pos | ||
nodes[0].value = winningValue | ||
} | ||
|
||
func parent(i int) int { return i >> 1 } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
// Copyright (c) The Thanos Authors. | ||
// Licensed under the Apache License 2.0. | ||
|
||
// Original version copyright Bryan Boreham, 2024. | ||
// https://github.com/bboreham/go-loser/tree/any. | ||
package losertree | ||
|
||
import ( | ||
"math" | ||
"testing" | ||
) | ||
|
||
type List struct { | ||
list []uint64 | ||
cur uint64 | ||
} | ||
|
||
func NewList(list ...uint64) *List { | ||
return &List{list: list} | ||
} | ||
|
||
func (it *List) At() uint64 { | ||
return it.cur | ||
} | ||
|
||
func (it *List) Next() bool { | ||
if len(it.list) > 0 { | ||
it.cur = it.list[0] | ||
it.list = it.list[1:] | ||
return true | ||
} | ||
it.cur = 0 | ||
return false | ||
} | ||
|
||
func (it *List) Seek(val uint64) bool { | ||
for it.cur < val && len(it.list) > 0 { | ||
it.cur = it.list[0] | ||
it.list = it.list[1:] | ||
} | ||
return len(it.list) > 0 | ||
} | ||
|
||
func checkIterablesEqual[E any, S1 Sequence, S2 Sequence](t *testing.T, a S1, b S2, at1 func(S1) E, at2 func(S2) E, less func(E, E) bool) { | ||
t.Helper() | ||
count := 0 | ||
for a.Next() { | ||
count++ | ||
if !b.Next() { | ||
t.Fatalf("b ended before a after %d elements", count) | ||
} | ||
if less(at1(a), at2(b)) || less(at2(b), at1(a)) { | ||
t.Fatalf("position %d: %v != %v", count, at1(a), at2(b)) | ||
} | ||
} | ||
if b.Next() { | ||
t.Fatalf("a ended before b after %d elements", count) | ||
} | ||
} | ||
|
||
var testCases = []struct { | ||
name string | ||
args []*List | ||
want *List | ||
}{ | ||
{ | ||
name: "empty input", | ||
want: NewList(), | ||
}, | ||
{ | ||
name: "one list", | ||
args: []*List{NewList(1, 2, 3, 4)}, | ||
want: NewList(1, 2, 3, 4), | ||
}, | ||
{ | ||
name: "two lists", | ||
args: []*List{NewList(3, 4, 5), NewList(1, 2)}, | ||
want: NewList(1, 2, 3, 4, 5), | ||
}, | ||
{ | ||
name: "two lists, first empty", | ||
args: []*List{NewList(), NewList(1, 2)}, | ||
want: NewList(1, 2), | ||
}, | ||
{ | ||
name: "two lists, second empty", | ||
args: []*List{NewList(1, 2), NewList()}, | ||
want: NewList(1, 2), | ||
}, | ||
{ | ||
name: "two lists b", | ||
args: []*List{NewList(1, 2), NewList(3, 4, 5)}, | ||
want: NewList(1, 2, 3, 4, 5), | ||
}, | ||
{ | ||
name: "two lists c", | ||
args: []*List{NewList(1, 3), NewList(2, 4, 5)}, | ||
want: NewList(1, 2, 3, 4, 5), | ||
}, | ||
{ | ||
name: "three lists", | ||
args: []*List{NewList(1, 3), NewList(2, 4), NewList(5)}, | ||
want: NewList(1, 2, 3, 4, 5), | ||
}, | ||
} | ||
|
||
func TestMerge(t *testing.T) { | ||
at := func(s *List) uint64 { return s.At() } | ||
less := func(a, b uint64) bool { return a < b } | ||
at2 := func(s *Tree[uint64, *List]) uint64 { return s.Winner().At() } | ||
for _, tt := range testCases { | ||
t.Run(tt.name, func(t *testing.T) { | ||
numCloses := 0 | ||
closeFn := func(_ *List) { | ||
numCloses++ | ||
} | ||
lt := New(tt.args, math.MaxUint64, at, less, closeFn) | ||
checkIterablesEqual(t, tt.want, lt, at, at2, less) | ||
if numCloses != len(tt.args) { | ||
t.Errorf("Expected %d closes, got %d", len(tt.args), numCloses) | ||
} | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.