This repository has been archived by the owner on Jun 27, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 53
feat: switch to HAMT based on size #91
Merged
Merged
Changes from 1 commit
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
51ba1e2
feat: switch to HAMT based on size
schomatis 7237e57
fix doc
schomatis 0d6f81a
fix: make basic dir constructors private
schomatis 2f8cfa7
fix: if underflow log instead of panic
schomatis 13a7f61
fix: don't ignore RemoveChild error
schomatis 53f2df1
fix: resolve fixmes
schomatis 06ef3e8
fix: sort imports
schomatis 6a50780
fix: use (Cid).ByteLen()
schomatis 527eae4
fix: estimatedSize comment
schomatis File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next
Next commit
feat: switch to HAMT based on size
- Loading branch information
commit 51ba1e2de961619a5f9eafbba62bb867549215d0
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,9 +14,13 @@ import ( | |
ipld "github.com/ipfs/go-ipld-format" | ||
) | ||
|
||
// UseHAMTSharding is a global flag that signifies whether or not to use the | ||
// HAMT sharding scheme for directory creation | ||
var UseHAMTSharding = false | ||
// UseHAMTSharding is a global option that allows switching to a HAMTDirectory | ||
// when the BasicDirectory grows above the size (in bytes) signalled by this | ||
// flag. The default size of 0 disables the option. | ||
// The size is not the *exact* block size of the encoded BasicDirectory but just | ||
// the estimated size based byte length of links name and CID (BasicDirectory's | ||
// ProtoNode doesn't use the Data field so this estimate is pretty accurate). | ||
var HAMTShardingSize = 0 | ||
|
||
// DefaultShardWidth is the default value used for hamt sharding width. | ||
var DefaultShardWidth = 256 | ||
|
@@ -72,6 +76,12 @@ type Directory interface { | |
type BasicDirectory struct { | ||
node *mdag.ProtoNode | ||
dserv ipld.DAGService | ||
|
||
// Internal variable used to cache the estimated size used for the | ||
// HAMTShardingSize option. We maintain this value even if the | ||
// HAMTShardingSize is off since potentially the option could be activated | ||
// on the fly. | ||
estimatedSize int | ||
} | ||
|
||
// HAMTDirectory is the HAMT implementation of `Directory`. | ||
|
@@ -81,26 +91,29 @@ type HAMTDirectory struct { | |
dserv ipld.DAGService | ||
} | ||
|
||
func NewEmptyBasicDirectory(dserv ipld.DAGService) *BasicDirectory { | ||
schomatis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return NewBasicDirectoryFromNode(dserv, format.EmptyDirNode()) | ||
} | ||
|
||
func NewBasicDirectoryFromNode(dserv ipld.DAGService, node *mdag.ProtoNode) *BasicDirectory { | ||
schomatis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
basicDir := new(BasicDirectory) | ||
basicDir.node = node | ||
basicDir.dserv = dserv | ||
|
||
// Scan node links (if any) to restore estimated size. | ||
basicDir.ForEachLink(nil, func(l *ipld.Link) error { | ||
basicDir.addToEstimatedSize(l.Name, l.Cid) | ||
return nil | ||
}) | ||
return basicDir | ||
} | ||
|
||
// NewDirectory returns a Directory that can either be a HAMTDirectory if the | ||
// UseHAMTSharding is set, or otherwise an UpgradeableDirectory containing a | ||
// BasicDirectory that can be converted to a HAMTDirectory if the option is | ||
// set in the future. | ||
func NewDirectory(dserv ipld.DAGService) Directory { | ||
if UseHAMTSharding { | ||
dir := new(HAMTDirectory) | ||
s, err := hamt.NewShard(dserv, DefaultShardWidth) | ||
if err != nil { | ||
panic(err) // will only panic if DefaultShardWidth is a bad value | ||
} | ||
dir.shard = s | ||
dir.dserv = dserv | ||
return dir | ||
} | ||
|
||
basicDir := new(BasicDirectory) | ||
basicDir.node = format.EmptyDirNode() | ||
basicDir.dserv = dserv | ||
return &UpgradeableDirectory{basicDir} | ||
return &UpgradeableDirectory{NewEmptyBasicDirectory(dserv)} | ||
} | ||
|
||
// ErrNotADir implies that the given node was not a unixfs directory | ||
|
@@ -121,10 +134,7 @@ func NewDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (Directory, err | |
|
||
switch fsNode.Type() { | ||
case format.TDirectory: | ||
return &BasicDirectory{ | ||
dserv: dserv, | ||
node: protoBufNode.Copy().(*mdag.ProtoNode), | ||
}, nil | ||
return NewBasicDirectoryFromNode(dserv, protoBufNode.Copy().(*mdag.ProtoNode)), nil | ||
case format.THAMTShard: | ||
shard, err := hamt.NewHamtFromDag(dserv, node) | ||
if err != nil { | ||
|
@@ -139,6 +149,19 @@ func NewDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (Directory, err | |
return nil, ErrNotADir | ||
} | ||
|
||
func (d *BasicDirectory) addToEstimatedSize(name string, linkCid cid.Cid) { | ||
d.estimatedSize += len(name) + len(linkCid.Bytes()) | ||
schomatis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// FIXME: Ideally we may want to track the Link size as well but it is | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's remove this FIXME, we don't want someone to actually come along and fix this in code (it's a spec change, effectively. |
||
// minor in comparison with the other two. | ||
} | ||
|
||
func (d *BasicDirectory) removeFromEstimatedSize(name string, linkCid cid.Cid) { | ||
d.estimatedSize -= len(name) + len(linkCid.Bytes()) | ||
schomatis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if d.estimatedSize < 0 { | ||
panic("BasicDirectory's estimatedSize went below 0") | ||
schomatis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} | ||
|
||
// SetCidBuilder implements the `Directory` interface. | ||
func (d *BasicDirectory) SetCidBuilder(builder cid.Builder) { | ||
d.node.SetCidBuilder(builder) | ||
|
@@ -147,10 +170,15 @@ func (d *BasicDirectory) SetCidBuilder(builder cid.Builder) { | |
// AddChild implements the `Directory` interface. It adds (or replaces) | ||
// a link to the given `node` under `name`. | ||
func (d *BasicDirectory) AddChild(ctx context.Context, name string, node ipld.Node) error { | ||
d.node.RemoveNodeLink(name) | ||
// Remove old link (if it existed), don't check a potential `ErrNotFound`. | ||
d.RemoveChild(ctx, name) | ||
schomatis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
return d.node.AddNodeLink(name, node) | ||
err := d.node.AddNodeLink(name, node) | ||
if err != nil { | ||
return err | ||
} | ||
d.addToEstimatedSize(name, node.Cid()) | ||
return nil | ||
} | ||
|
||
// EnumLinksAsync returns a channel which will receive Links in the directory | ||
|
@@ -203,11 +231,26 @@ func (d *BasicDirectory) Find(ctx context.Context, name string) (ipld.Node, erro | |
|
||
// RemoveChild implements the `Directory` interface. | ||
func (d *BasicDirectory) RemoveChild(ctx context.Context, name string) error { | ||
err := d.node.RemoveNodeLink(name) | ||
// We need to *retrieve* the link before removing it to update the estimated | ||
// size. | ||
// FIXME: If this is too much of a potential penalty we could leave a fixed | ||
// CID size estimation based on the most common one used (normally SHA-256). | ||
// Alternatively we could add a GetAndRemoveLink method in `merkledag` to | ||
// iterate node links slice only once. | ||
link, err := d.node.GetNodeLink(name) | ||
schomatis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if err == mdag.ErrLinkNotFound { | ||
err = os.ErrNotExist | ||
return os.ErrNotExist | ||
} | ||
return err | ||
if err != nil { | ||
return err // at the moment there is no other error besides ErrLinkNotFound | ||
} | ||
|
||
// The name actually existed so we should update the estimated size. | ||
d.removeFromEstimatedSize(link.Name, link.Cid) | ||
|
||
return d.node.RemoveNodeLink(name) | ||
// GetNodeLink didn't return ErrLinkNotFound so this won't fail with that | ||
// and we don't need to convert the error again. | ||
} | ||
|
||
// GetNode implements the `Directory` interface. | ||
|
@@ -309,15 +352,31 @@ var _ Directory = (*UpgradeableDirectory)(nil) | |
// AddChild implements the `Directory` interface. We check when adding new entries | ||
// if we should switch to HAMTDirectory according to global option(s). | ||
func (d *UpgradeableDirectory) AddChild(ctx context.Context, name string, nd ipld.Node) error { | ||
if UseHAMTSharding { | ||
if basicDir, ok := d.Directory.(*BasicDirectory); ok { | ||
hamtDir, err := basicDir.SwitchToSharding(ctx) | ||
if err != nil { | ||
return err | ||
} | ||
d.Directory = hamtDir | ||
err := d.Directory.AddChild(ctx, name, nd) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
// Evaluate possible HAMT upgrade. | ||
if HAMTShardingSize == 0 { | ||
return nil | ||
} | ||
basicDir, ok := d.Directory.(*BasicDirectory) | ||
if !ok { | ||
return nil | ||
} | ||
if basicDir.estimatedSize >= HAMTShardingSize { | ||
// FIXME: Ideally to minimize performance we should check if this last | ||
// `AddChild` call would bring the directory size over the threshold | ||
// *before* executing it since we would end up switching anyway and | ||
// that call would be "wasted". This is a minimal performance impact | ||
// and we prioritize a simple code base. | ||
schomatis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
hamtDir, err := basicDir.SwitchToSharding(ctx) | ||
if err != nil { | ||
return err | ||
} | ||
d.Directory = hamtDir | ||
} | ||
|
||
return d.Directory.AddChild(ctx, name, nd) | ||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's:
This "estimate" needs to accurately follow the algorithm to achieve convergence. While this is more of a "SHOULD" from a spec standpoint, I'd like to be as accurate as possible.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The full documentation is in the exported
HAMTShardingSize
(referenced here) which is what the user has access to; this is just an internal cache. I'd like to retain this name (or something similar to this effect) to better represent what we want to achieve (estimation of directory size) rather than the how (traversing links).Also not following the talk about a spec (even if figuratively) and a reference to an algorithm which I'm not sure what part of the code it is designating (is it the
addToEstimatedSize
/removeFromEstimatedSize
functions?).All this said, if this is a blocking change feel free to push any documentation you want or rename however you prefer. Not very opinionated on this and it won't change how this PR works anyway.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess I just want something to indicate that this isn't just some random estimate that someone can just change, but that's not critical or merge blocking.