Skip to content
This repository was archived by the owner on Jun 27, 2023. It is now read-only.

Commit dafe0c3

Browse files
committed
WIP
1 parent b0a25f8 commit dafe0c3

File tree

5 files changed

+337
-23
lines changed

5 files changed

+337
-23
lines changed

hamt/hamt.go

Lines changed: 94 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,11 @@ const (
3737
HashMurmur3 uint64 = 0x22
3838
)
3939

40-
func (ds *Shard) isValueNode() bool {
40+
// Hash function declared as global variable only for testing purposes.
41+
// FIXME: We shoul have a cleaner way to replace this during tests.
42+
var HAMTHashFunction = murmur3Hash
43+
44+
func (ds *Shard) IsValueNode() bool {
4145
return ds.key != "" && ds.val != nil
4246
}
4347

@@ -47,13 +51,22 @@ type Shard struct {
4751

4852
childer *childer
4953

54+
// Entries per node (number of possible childs indexed by the partial key).
5055
tableSize int
56+
// Bits needed to encode child indexes (log2 of number of entries). This is
57+
// the number of bits taken from the hash key on each level of the tree.
5158
tableSizeLg2 int
5259

5360
builder cid.Builder
5461
hashFunc uint64
5562

63+
// String format with number of zeros that will be present in the hexadecimal
64+
// encoding of the child index to always reach the fixed maxpadlen chars.
65+
// Example: maxpadlen = 4 => prefixPadStr: "%04X" (print number in hexadecimal
66+
// format padding with zeros to always reach 4 characters).
5667
prefixPadStr string
68+
// Length in chars of string that encodes child indexes. We encode indexes
69+
// as hexadecimal strings to this is log4 of number of entries.
5770
maxpadlen int
5871

5972
dserv ipld.DAGService
@@ -70,6 +83,7 @@ func NewShard(dserv ipld.DAGService, size int) (*Shard, error) {
7083
return nil, err
7184
}
7285

86+
// FIXME: Make this at least a static configuration for testing.
7387
ds.hashFunc = HashMurmur3
7488
return ds, nil
7589
}
@@ -214,7 +228,7 @@ func (ds *Shard) Set(ctx context.Context, name string, nd ipld.Node) error {
214228
// name key in this Shard or its children. It also returns the previous link
215229
// under that name key (if any).
216230
func (ds *Shard) SetAndPrevious(ctx context.Context, name string, node ipld.Node) (*ipld.Link, error) {
217-
hv := &hashBits{b: hash([]byte(name))}
231+
hv := newHashBits(name)
218232
err := ds.dserv.Add(ctx, node)
219233
if err != nil {
220234
return nil, err
@@ -224,6 +238,9 @@ func (ds *Shard) SetAndPrevious(ctx context.Context, name string, node ipld.Node
224238
if err != nil {
225239
return nil, err
226240
}
241+
242+
// FIXME: We don't need to set the name here, it will get overwritten.
243+
// This is confusing, confirm and remove this line.
227244
lnk.Name = ds.linkNamePrefix(0) + name
228245

229246
return ds.setValue(ctx, hv, name, lnk)
@@ -239,13 +256,13 @@ func (ds *Shard) Remove(ctx context.Context, name string) error {
239256
// RemoveAndPrevious is similar to the public Remove but also returns the
240257
// old removed link (if it exists).
241258
func (ds *Shard) RemoveAndPrevious(ctx context.Context, name string) (*ipld.Link, error) {
242-
hv := &hashBits{b: hash([]byte(name))}
259+
hv := newHashBits(name)
243260
return ds.setValue(ctx, hv, name, nil)
244261
}
245262

246263
// Find searches for a child node by 'name' within this hamt
247264
func (ds *Shard) Find(ctx context.Context, name string) (*ipld.Link, error) {
248-
hv := &hashBits{b: hash([]byte(name))}
265+
hv := newHashBits(name)
249266

250267
var out *ipld.Link
251268
err := ds.getValue(ctx, hv, name, func(sv *Shard) error {
@@ -279,7 +296,7 @@ func (ds *Shard) childLinkType(lnk *ipld.Link) (linkType, error) {
279296

280297
// Link returns a merklelink to this shard node
281298
func (ds *Shard) Link() (*ipld.Link, error) {
282-
if ds.isValueNode() {
299+
if ds.IsValueNode() {
283300
return ds.val, nil
284301
}
285302

@@ -308,7 +325,7 @@ func (ds *Shard) getValue(ctx context.Context, hv *hashBits, key string, cb func
308325
return err
309326
}
310327

311-
if child.isValueNode() {
328+
if child.IsValueNode() {
312329
if child.key == key {
313330
return cb(child)
314331
}
@@ -335,6 +352,20 @@ func (ds *Shard) EnumLinks(ctx context.Context) ([]*ipld.Link, error) {
335352
return links, nil
336353
}
337354

355+
func (ds *Shard) EnumAll(ctx context.Context) ([]*ipld.Link, error) {
356+
var links []*ipld.Link
357+
358+
linkResults := ds.EnumAllAsync(ctx)
359+
360+
for linkResult := range linkResults {
361+
if linkResult.Err != nil {
362+
return links, linkResult.Err
363+
}
364+
links = append(links, linkResult.Link)
365+
}
366+
return links, nil
367+
}
368+
338369
// ForEachLink walks the Shard and calls the given function.
339370
func (ds *Shard) ForEachLink(ctx context.Context, f func(*ipld.Link) error) error {
340371
return ds.walkTrie(ctx, func(sv *Shard) error {
@@ -348,6 +379,26 @@ func (ds *Shard) ForEachLink(ctx context.Context, f func(*ipld.Link) error) erro
348379
// EnumLinksAsync returns a channel which will receive Links in the directory
349380
// as they are enumerated, where order is not guaranteed
350381
func (ds *Shard) EnumLinksAsync(ctx context.Context) <-chan format.LinkResult {
382+
linkResults := make(chan format.LinkResult)
383+
ctx, cancel := context.WithCancel(ctx)
384+
go func() {
385+
defer close(linkResults)
386+
defer cancel()
387+
getLinks := makeAsyncTrieGetLinks(ds.dserv, linkResults)
388+
cset := cid.NewSet()
389+
// FIXME: Make concurrency an option for testing.
390+
//err := dag.Walk(ctx, getLinks, ds.cid, cset.Visit, dag.Concurrent())
391+
err := dag.Walk(ctx, getLinks, ds.cid, cset.Visit)
392+
if err != nil {
393+
emitResult(ctx, linkResults, format.LinkResult{Link: nil, Err: err})
394+
}
395+
}()
396+
return linkResults
397+
}
398+
399+
// EnumLinksAsync returns a channel which will receive Links in the directory
400+
// as they are enumerated, where order is not guaranteed
401+
func (ds *Shard) EnumAllAsync(ctx context.Context) <-chan format.LinkResult {
351402
linkResults := make(chan format.LinkResult)
352403
ctx, cancel := context.WithCancel(ctx)
353404
go func() {
@@ -403,6 +454,39 @@ func makeAsyncTrieGetLinks(dagService ipld.DAGService, linkResults chan<- format
403454
}
404455
}
405456

457+
// same as makeAsyncTrieGetLinks but return all
458+
// FIXME: Check how to abstract this.
459+
func makeAsyncTrieGetAll(dagService ipld.DAGService, linkResults chan<- format.LinkResult) dag.GetLinks {
460+
461+
return func(ctx context.Context, currentCid cid.Cid) ([]*ipld.Link, error) {
462+
node, err := dagService.Get(ctx, currentCid)
463+
if err != nil {
464+
return nil, err
465+
}
466+
directoryShard, err := NewHamtFromDag(dagService, node)
467+
if err != nil {
468+
return nil, err
469+
}
470+
471+
childShards := make([]*ipld.Link, 0, directoryShard.childer.length())
472+
links := directoryShard.childer.links
473+
for idx := range directoryShard.childer.children {
474+
lnk := links[idx]
475+
// We don't care about the link type (shard or value), just count
476+
// *all* nodes in this HAMT.
477+
emitResult(ctx, linkResults, format.LinkResult{Link: lnk, Err: nil})
478+
lnkLinkType, err := directoryShard.childLinkType(lnk)
479+
if err != nil {
480+
return nil, err
481+
}
482+
if lnkLinkType == shardLink {
483+
childShards = append(childShards, lnk)
484+
}
485+
}
486+
return childShards, nil
487+
}
488+
}
489+
406490
func emitResult(ctx context.Context, linkResults chan<- format.LinkResult, r format.LinkResult) {
407491
// make sure that context cancel is processed first
408492
// the reason is due to the concurrency of EnumerateChildrenAsync
@@ -421,7 +505,7 @@ func emitResult(ctx context.Context, linkResults chan<- format.LinkResult, r for
421505

422506
func (ds *Shard) walkTrie(ctx context.Context, cb func(*Shard) error) error {
423507
return ds.childer.each(ctx, func(s *Shard) error {
424-
if s.isValueNode() {
508+
if s.IsValueNode() {
425509
if err := cb(s); err != nil {
426510
return err
427511
}
@@ -453,7 +537,7 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value *
453537
return
454538
}
455539

456-
if child.isValueNode() {
540+
if child.IsValueNode() {
457541
// Leaf node. This is the base case of this recursive function.
458542
// FIXME: Misleading: the base case also includes a recursive call
459543
// in the case both keys share the same slot in the new child shard
@@ -493,10 +577,7 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value *
493577
return nil, err
494578
}
495579
child.builder = ds.builder
496-
chhv := &hashBits{
497-
b: hash([]byte(grandChild.key)),
498-
consumed: hv.consumed,
499-
}
580+
chhv := newConsumedHashBits(grandChild.key, hv.consumed)
500581

501582
// We explicitly ignore the oldValue returned by the next two insertions
502583
// (which will be nil) to highlight there is no overwrite here: they are
@@ -545,7 +626,7 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value *
545626
// Have we loaded the child? Prefer that.
546627
schild := child.childer.child(0)
547628
if schild != nil {
548-
if schild.isValueNode() {
629+
if schild.IsValueNode() {
549630
ds.childer.set(schild, i)
550631
}
551632
return

hamt/util.go

Lines changed: 74 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
package hamt
22

33
import (
4+
"context"
5+
"encoding/binary"
46
"fmt"
5-
7+
ipld "github.com/ipfs/go-ipld-format"
8+
"github.com/ipfs/go-unixfs"
69
"github.com/spaolacci/murmur3"
10+
"math"
711
"math/bits"
812
)
913

@@ -13,6 +17,16 @@ type hashBits struct {
1317
consumed int
1418
}
1519

20+
func newHashBits(val string) *hashBits {
21+
return &hashBits{b: HAMTHashFunction([]byte(val))}
22+
}
23+
24+
func newConsumedHashBits(val string, consumed int) *hashBits {
25+
hv := &hashBits{b: HAMTHashFunction([]byte(val))}
26+
hv.consumed = consumed
27+
return hv
28+
}
29+
1630
func mkmask(n int) byte {
1731
return (1 << uint(n)) - 1
1832
}
@@ -61,8 +75,66 @@ func logtwo(v int) (int, error) {
6175
return lg2, nil
6276
}
6377

64-
func hash(val []byte) []byte {
78+
func murmur3Hash(val []byte) []byte {
6579
h := murmur3.New64()
6680
h.Write(val)
6781
return h.Sum(nil)
6882
}
83+
84+
// ONLY FOR TESTING: Return the same value as the hash.
85+
func IdHash(val []byte) []byte {
86+
return val
87+
}
88+
89+
// TESTING ONLY. Creates a full HAMT tree with ID hashes. This uses very
90+
// low-level functions. Curretnly used for Basic-HAMT transition testing,
91+
// not for production.
92+
// FIXME: HAMTHashFunction needs to be set to IdHash for it to work.
93+
// We don't store hasher information in the node as the murmur3 is
94+
// hard-coded at the moment.
95+
func CreateFullShard(ds ipld.DAGService, treeHeight int) (rootNode ipld.Node, err error) {
96+
// FIXME: We need to review how to do this.
97+
oldHashFunc := HAMTHashFunction
98+
defer func() { HAMTHashFunction = oldHashFunc }()
99+
HAMTHashFunction = IdHash
100+
// FIXME: If we didn't rehash inside setValue then we could just generate
101+
// the fake hash as in SetAndPrevious and pass it as an argument making
102+
// the hash independent of the tree manipulation (which should be the
103+
// correct way to go) and we wouldn't need this.
104+
105+
// FIXME: for now leave the childsPerNode at the default of 256 (1 byte per
106+
// level)
107+
//childsPerNode := io.DefaultShardWidth
108+
childsPerNode := 256 // FIXME: 'import cycle not allowed' from io package
109+
if treeHeight < 1 {
110+
panic("treeHeight < 1")
111+
}
112+
if treeHeight > 8 {
113+
panic("treeHeight > 8: we don't allow a key larger than what can be enconded in a 64-bit word")
114+
}
115+
root, err := NewShard(ds, childsPerNode)
116+
if err != nil {
117+
return
118+
}
119+
// FIXME: Do we need to set the CID builder? Not part of the NewShard
120+
// interface so it shouldn't be mandatory.
121+
122+
// Use set value with the IdHash and the bitfield.
123+
// FIXME: Set the biggest value possible within fixed sizes, we don't allow
124+
// trees bigger than that.
125+
totalChildren := int(math.Pow(float64(childsPerNode), float64(treeHeight)))
126+
for i:= 0; i < totalChildren; i++ {
127+
var hashbuf [8]byte
128+
binary.LittleEndian.PutUint64(hashbuf[:], uint64(i))
129+
_, err = root.SetAndPrevious(context.Background(), string(hashbuf[:treeHeight]), unixfs.EmptyFileNode())
130+
if err != nil {
131+
return
132+
}
133+
}
134+
135+
rootNode, err = root.Node()
136+
if err != nil {
137+
return
138+
}
139+
return
140+
}

hamt/util_test.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
package hamt
22

33
import (
4+
"context"
5+
mdtest "github.com/ipfs/go-merkledag/test"
6+
"github.com/stretchr/testify/assert"
7+
"math"
48
"testing"
59
)
610

@@ -62,3 +66,22 @@ func TestHashBitsUneven(t *testing.T) {
6266
t.Fatalf("expected 20269, but got %b (%d)", v, v)
6367
}
6468
}
69+
70+
func TestCreateFullShard(t *testing.T) {
71+
ds := mdtest.Mock()
72+
childsPerNode := 256
73+
treeHeight := 2 // This is the limit of what we can fastly generate,
74+
// the default width is too big (256). We may need to refine
75+
// CreateFullShard encoding of the key to reduce the tableSize.
76+
node, err := CreateFullShard(ds, treeHeight)
77+
assert.NoError(t, err)
78+
79+
shard, err := NewHamtFromDag(ds, node)
80+
links, err := shard.EnumAll(context.Background())
81+
assert.NoError(t, err)
82+
83+
childNodes := int(math.Pow(float64(childsPerNode), float64(treeHeight)))
84+
internalNodes := int(math.Pow(float64(childsPerNode), float64(treeHeight-1)))
85+
totalNodes := childNodes + internalNodes
86+
assert.Equal(t, totalNodes, len(links))
87+
}

0 commit comments

Comments
 (0)