Skip to content
This repository was archived by the owner on Jun 27, 2023. It is now read-only.

create internal and private packages #108

Merged
10 changes: 6 additions & 4 deletions hamt/hamt.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,23 @@ import (
"fmt"
"os"

format "github.com/ipfs/go-unixfs"
"github.com/ipfs/go-unixfs/internal"

bitfield "github.com/ipfs/go-bitfield"
cid "github.com/ipfs/go-cid"
ipld "github.com/ipfs/go-ipld-format"
dag "github.com/ipfs/go-merkledag"
format "github.com/ipfs/go-unixfs"
)

const (
// HashMurmur3 is the multiformats identifier for Murmur3
HashMurmur3 uint64 = 0x22
)

// Hash function declared as global variable only for testing purposes.
// FIXME: We shoul have a cleaner way to replace this during tests.
var HAMTHashFunction = murmur3Hash
func init() {
internal.HAMTHashFunction = murmur3Hash
}

func (ds *Shard) IsValueNode() bool {
return ds.key != "" && ds.val != nil
Expand Down
77 changes: 6 additions & 71 deletions hamt/util.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
package hamt

import (
"context"
"encoding/binary"
"fmt"
ipld "github.com/ipfs/go-ipld-format"
"github.com/ipfs/go-unixfs"
"github.com/spaolacci/murmur3"
"math"
"math/bits"

"github.com/ipfs/go-unixfs/internal"

"github.com/spaolacci/murmur3"
)

// hashBits is a helper that allows the reading of the 'next n bits' as an integer.
Expand All @@ -18,11 +16,11 @@ type hashBits struct {
}

func newHashBits(val string) *hashBits {
return &hashBits{b: HAMTHashFunction([]byte(val))}
return &hashBits{b: internal.HAMTHashFunction([]byte(val))}
}

func newConsumedHashBits(val string, consumed int) *hashBits {
hv := &hashBits{b: HAMTHashFunction([]byte(val))}
hv := &hashBits{b: internal.HAMTHashFunction([]byte(val))}
hv.consumed = consumed
return hv
}
Expand Down Expand Up @@ -80,66 +78,3 @@ func murmur3Hash(val []byte) []byte {
h.Write(val)
return h.Sum(nil)
}

// ONLY FOR TESTING: Return the same value as the hash.
func IdHash(val []byte) []byte {
return val
}

// CreateCompleteHAMT creates a HAMT the following properties:
// * its height (distance/edges from root to deepest node) is specified by treeHeight.
// * all leaf Shard nodes have the same depth (and have only 'value' links).
// * all internal Shard nodes point only to other Shards (and hence have zero 'value' links).
// * the total number of 'value' links (directory entries) is:
// io.DefaultShardWidth ^ (treeHeight + 1).
// FIXME: HAMTHashFunction needs to be set to IdHash by the caller. We depend on
// this simplification for the current logic to work. (HAMTHashFunction is a
// global setting of the package, it is hard-coded in the serialized Shard node
// and not allowed to be changed on a per HAMT/Shard basis.)
// (If we didn't rehash inside setValue then we could just generate
// the fake hash as in io.SetAndPrevious through `newHashBits()` and pass
// it as an argument making the hash independent of tree manipulation; that
// sounds as the correct way to go in general and we wouldn't need this.)
func CreateCompleteHAMT(ds ipld.DAGService, treeHeight int, childsPerNode int) (ipld.Node, error) {
if treeHeight < 1 {
panic("treeHeight < 1")
}
if treeHeight > 8 {
panic("treeHeight > 8: we don't allow a key larger than what can be enconded in a 64-bit word")
}
//if HAMTHashFunction != IdHash {
// panic("we do not support a hash function other than ID")
//}
// FIXME: Any clean and simple way to do this? Otherwise remove check.

rootShard, err := NewShard(ds, childsPerNode)
if err != nil {
return nil, err
}
// FIXME: Do we need to set the CID builder? Not part of the NewShard
// interface so it shouldn't be mandatory.

// Assuming we are using the ID hash function we can just insert all
// the combinations of a byte slice that will reach the desired height.
totalChildren := int(math.Pow(float64(childsPerNode), float64(treeHeight)))
for i := 0; i < totalChildren; i++ {
var hashbuf [8]byte
binary.LittleEndian.PutUint64(hashbuf[:], uint64(i))
var oldLink *ipld.Link
// FIXME: This is wrong for childsPerNode/DefaultShardWidth different
// than 256 (i.e., one byte of key per level).
oldLink, err = rootShard.SetAndPrevious(context.Background(), string(hashbuf[:treeHeight]), unixfs.EmptyFileNode())
if err != nil {
return nil, err
}
if oldLink != nil {
// We shouldn't be overwriting any value, otherwise the tree
// won't be complete.
return nil, fmt.Errorf("we have overwritten entry %s",
oldLink.Cid)
}
}
// FIXME: Check depth of every Shard to be sure?

return rootShard.Node()
}
24 changes: 0 additions & 24 deletions hamt/util_test.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
package hamt

import (
"context"
mdtest "github.com/ipfs/go-merkledag/test"
"github.com/stretchr/testify/assert"
"math"
"testing"
)

Expand Down Expand Up @@ -66,23 +62,3 @@ func TestHashBitsUneven(t *testing.T) {
t.Fatalf("expected 20269, but got %b (%d)", v, v)
}
}

func TestCreateCompleteShard(t *testing.T) {
ds := mdtest.Mock()
childsPerNode := 256
treeHeight := 2 // This is the limit of what we can fastly generate,
// the default width is too big (256). We may need to refine
// CreateCompleteHAMT encoding of the key to reduce the tableSize.
node, err := CreateCompleteHAMT(ds, treeHeight, 256)
assert.NoError(t, err)

shard, err := NewHamtFromDag(ds, node)
assert.NoError(t, err)
links, err := shard.EnumAll(context.Background())
assert.NoError(t, err)

childNodes := int(math.Pow(float64(childsPerNode), float64(treeHeight)))
//internalNodes := int(math.Pow(float64(childsPerNode), float64(treeHeight-1)))
//totalNodes := childNodes + internalNodes
assert.Equal(t, childNodes, len(links))
}
3 changes: 3 additions & 0 deletions internal/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
package internal

var HAMTHashFunction func(val []byte) []byte
30 changes: 17 additions & 13 deletions io/directory.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@ package io
import (
"context"
"fmt"
mdag "github.com/ipfs/go-merkledag"
format "github.com/ipfs/go-unixfs"
"github.com/ipfs/go-unixfs/hamt"
"os"

"github.com/ipfs/go-unixfs/hamt"
"github.com/ipfs/go-unixfs/private/linksize"

"github.com/ipfs/go-cid"
ipld "github.com/ipfs/go-ipld-format"
logging "github.com/ipfs/go-log"
mdag "github.com/ipfs/go-merkledag"
format "github.com/ipfs/go-unixfs"
)

var log = logging.Logger("unixfs")
Expand Down Expand Up @@ -79,7 +81,9 @@ func productionLinkSize(linkName string, linkCid cid.Cid) int {
return len(linkName) + linkCid.ByteLen()
}

var estimatedLinkSize = productionLinkSize
func init() {
linksize.LinkSizeFunction = productionLinkSize
}

// BasicDirectory is the basic implementation of `Directory`. All the entries
// are stored in a single node.
Expand Down Expand Up @@ -191,11 +195,11 @@ func (d *BasicDirectory) computeEstimatedSize() {
}

func (d *BasicDirectory) addToEstimatedSize(name string, linkCid cid.Cid) {
d.estimatedSize += estimatedLinkSize(name, linkCid)
d.estimatedSize += linksize.LinkSizeFunction(name, linkCid)
}

func (d *BasicDirectory) removeFromEstimatedSize(name string, linkCid cid.Cid) {
d.estimatedSize -= estimatedLinkSize(name, linkCid)
d.estimatedSize -= linksize.LinkSizeFunction(name, linkCid)
if d.estimatedSize < 0 {
// Something has gone very wrong. Log an error and recompute the
// size from scratch.
Expand Down Expand Up @@ -232,10 +236,10 @@ func (d *BasicDirectory) needsToSwitchToHAMTDir(name string, nodeToAdd ipld.Node
if err != nil {
return false, err
}
operationSizeChange -= estimatedLinkSize(name, entryToRemove.Cid)
operationSizeChange -= linksize.LinkSizeFunction(name, entryToRemove.Cid)
}
if nodeToAdd != nil {
operationSizeChange += estimatedLinkSize(name, nodeToAdd.Cid())
operationSizeChange += linksize.LinkSizeFunction(name, nodeToAdd.Cid())
}

return d.estimatedSize+operationSizeChange >= HAMTShardingSize, nil
Expand Down Expand Up @@ -461,11 +465,11 @@ func (d *HAMTDirectory) switchToBasic(ctx context.Context) (*BasicDirectory, err
}

func (d *HAMTDirectory) addToSizeChange(name string, linkCid cid.Cid) {
d.sizeChange += estimatedLinkSize(name, linkCid)
d.sizeChange += linksize.LinkSizeFunction(name, linkCid)
}

func (d *HAMTDirectory) removeFromSizeChange(name string, linkCid cid.Cid) {
d.sizeChange -= estimatedLinkSize(name, linkCid)
d.sizeChange -= linksize.LinkSizeFunction(name, linkCid)
}

// Evaluate a switch from HAMTDirectory to BasicDirectory in case the size will
Expand All @@ -488,12 +492,12 @@ func (d *HAMTDirectory) needsToSwitchToBasicDir(ctx context.Context, name string
if err != nil {
return false, err
}
operationSizeChange -= estimatedLinkSize(name, entryToRemove.Cid)
operationSizeChange -= linksize.LinkSizeFunction(name, entryToRemove.Cid)
}

// For the AddEntry case compute the size addition of the new entry.
if nodeToAdd != nil {
operationSizeChange += estimatedLinkSize(name, nodeToAdd.Cid())
operationSizeChange += linksize.LinkSizeFunction(name, nodeToAdd.Cid())
}

if d.sizeChange+operationSizeChange >= 0 {
Expand Down Expand Up @@ -530,7 +534,7 @@ func (d *HAMTDirectory) sizeBelowThreshold(ctx context.Context, sizeChange int)
return false, linkResult.Err
}

partialSize += estimatedLinkSize(linkResult.Link.Name, linkResult.Link.Cid)
partialSize += linksize.LinkSizeFunction(linkResult.Link.Name, linkResult.Link.Cid)
if partialSize+sizeChange >= HAMTShardingSize {
// We have already fetched enough shards to assert we are
// above the threshold, so no need to keep fetching.
Expand Down
26 changes: 14 additions & 12 deletions io/directory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ import (
mdtest "github.com/ipfs/go-merkledag/test"

ft "github.com/ipfs/go-unixfs"
"github.com/ipfs/go-unixfs/hamt"
"github.com/ipfs/go-unixfs/internal"
"github.com/ipfs/go-unixfs/private/completehamt"
"github.com/ipfs/go-unixfs/private/linksize"

"github.com/stretchr/testify/assert"
)
Expand Down Expand Up @@ -134,15 +136,15 @@ func TestHAMTDirectory_sizeChange(t *testing.T) {
func fullSizeEnumeration(dir Directory) int {
size := 0
dir.ForEachLink(context.Background(), func(l *ipld.Link) error {
size += estimatedLinkSize(l.Name, l.Cid)
size += linksize.LinkSizeFunction(l.Name, l.Cid)
return nil
})
return size
}

func testDirectorySizeEstimation(t *testing.T, dir Directory, ds ipld.DAGService, size func(Directory) int) {
estimatedLinkSize = mockLinkSizeFunc(1)
defer func() { estimatedLinkSize = productionLinkSize }()
linksize.LinkSizeFunction = mockLinkSizeFunc(1)
defer func() { linksize.LinkSizeFunction = productionLinkSize }()

ctx := context.Background()
child := ft.EmptyFileNode()
Expand Down Expand Up @@ -241,8 +243,8 @@ func TestUpgradeableDirectorySwitch(t *testing.T) {
oldHamtOption := HAMTShardingSize
defer func() { HAMTShardingSize = oldHamtOption }()
HAMTShardingSize = 0 // Disable automatic switch at the start.
estimatedLinkSize = mockLinkSizeFunc(1)
defer func() { estimatedLinkSize = productionLinkSize }()
linksize.LinkSizeFunction = mockLinkSizeFunc(1)
defer func() { linksize.LinkSizeFunction = productionLinkSize }()

ds := mdtest.Mock()
dir := NewDirectory(ds)
Expand Down Expand Up @@ -327,15 +329,15 @@ func TestHAMTEnumerationWhenComputingSize(t *testing.T) {
// Set all link sizes to a uniform 1 so the estimated directory size
// is just the count of its entry links (in HAMT/Shard terminology these
// are the "value" links pointing to anything that is *not* another Shard).
estimatedLinkSize = mockLinkSizeFunc(1)
defer func() { estimatedLinkSize = productionLinkSize }()
linksize.LinkSizeFunction = mockLinkSizeFunc(1)
defer func() { linksize.LinkSizeFunction = productionLinkSize }()

// Use an identity hash function to ease the construction of "complete" HAMTs
// (see CreateCompleteHAMT below for more details). (Ideally this should be
// a parameter we pass and not a global option we modify in the caller.)
oldHashFunc := hamt.HAMTHashFunction
defer func() { hamt.HAMTHashFunction = oldHashFunc }()
hamt.HAMTHashFunction = hamt.IdHash
oldHashFunc := internal.HAMTHashFunction
defer func() { internal.HAMTHashFunction = oldHashFunc }()
internal.HAMTHashFunction = completehamt.IdHash

oldHamtOption := HAMTShardingSize
defer func() { HAMTShardingSize = oldHamtOption }()
Expand All @@ -358,7 +360,7 @@ func TestHAMTEnumerationWhenComputingSize(t *testing.T) {
// will need to fetch in order to reach the HAMTShardingSize threshold in
// sizeBelowThreshold (assuming a sequential DAG walk function).
ds := mdtest.Mock()
completeHAMTRoot, err := hamt.CreateCompleteHAMT(ds, treeHeight, shardWidth)
completeHAMTRoot, err := completehamt.CreateCompleteHAMT(ds, treeHeight, shardWidth)
assert.NoError(t, err)

// With this structure and a BFS traversal (from `parallelWalkDepth`) then
Expand Down
Loading