@@ -14,9 +14,13 @@ import (
14
14
ipld "github.com/ipfs/go-ipld-format"
15
15
)
16
16
17
- // UseHAMTSharding is a global flag that signifies whether or not to use the
18
- // HAMT sharding scheme for directory creation
19
- var UseHAMTSharding = false
17
+ // UseHAMTSharding is a global option that allows switching to a HAMTDirectory
18
+ // when the BasicDirectory grows above the size (in bytes) signalled by this
19
+ // flag. The default size of 0 disables the option.
20
+ // The size is not the *exact* block size of the encoded BasicDirectory but just
21
+ // the estimated size based byte length of links name and CID (BasicDirectory's
22
+ // ProtoNode doesn't use the Data field so this estimate is pretty accurate).
23
+ var HAMTShardingSize = 0
20
24
21
25
// DefaultShardWidth is the default value used for hamt sharding width.
22
26
var DefaultShardWidth = 256
@@ -72,6 +76,12 @@ type Directory interface {
72
76
type BasicDirectory struct {
73
77
node * mdag.ProtoNode
74
78
dserv ipld.DAGService
79
+
80
+ // Internal variable used to cache the estimated size used for the
81
+ // HAMTShardingSize option. We maintain this value even if the
82
+ // HAMTShardingSize is off since potentially the option could be activated
83
+ // on the fly.
84
+ estimatedSize int
75
85
}
76
86
77
87
// HAMTDirectory is the HAMT implementation of `Directory`.
@@ -81,26 +91,29 @@ type HAMTDirectory struct {
81
91
dserv ipld.DAGService
82
92
}
83
93
94
+ func NewEmptyBasicDirectory (dserv ipld.DAGService ) * BasicDirectory {
95
+ return NewBasicDirectoryFromNode (dserv , format .EmptyDirNode ())
96
+ }
97
+
98
+ func NewBasicDirectoryFromNode (dserv ipld.DAGService , node * mdag.ProtoNode ) * BasicDirectory {
99
+ basicDir := new (BasicDirectory )
100
+ basicDir .node = node
101
+ basicDir .dserv = dserv
102
+
103
+ // Scan node links (if any) to restore estimated size.
104
+ basicDir .ForEachLink (nil , func (l * ipld.Link ) error {
105
+ basicDir .addToEstimatedSize (l .Name , l .Cid )
106
+ return nil
107
+ })
108
+ return basicDir
109
+ }
110
+
84
111
// NewDirectory returns a Directory that can either be a HAMTDirectory if the
85
112
// UseHAMTSharding is set, or otherwise an UpgradeableDirectory containing a
86
113
// BasicDirectory that can be converted to a HAMTDirectory if the option is
87
114
// set in the future.
88
115
func NewDirectory (dserv ipld.DAGService ) Directory {
89
- if UseHAMTSharding {
90
- dir := new (HAMTDirectory )
91
- s , err := hamt .NewShard (dserv , DefaultShardWidth )
92
- if err != nil {
93
- panic (err ) // will only panic if DefaultShardWidth is a bad value
94
- }
95
- dir .shard = s
96
- dir .dserv = dserv
97
- return dir
98
- }
99
-
100
- basicDir := new (BasicDirectory )
101
- basicDir .node = format .EmptyDirNode ()
102
- basicDir .dserv = dserv
103
- return & UpgradeableDirectory {basicDir }
116
+ return & UpgradeableDirectory {NewEmptyBasicDirectory (dserv )}
104
117
}
105
118
106
119
// ErrNotADir implies that the given node was not a unixfs directory
@@ -121,10 +134,7 @@ func NewDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (Directory, err
121
134
122
135
switch fsNode .Type () {
123
136
case format .TDirectory :
124
- return & BasicDirectory {
125
- dserv : dserv ,
126
- node : protoBufNode .Copy ().(* mdag.ProtoNode ),
127
- }, nil
137
+ return NewBasicDirectoryFromNode (dserv , protoBufNode .Copy ().(* mdag.ProtoNode )), nil
128
138
case format .THAMTShard :
129
139
shard , err := hamt .NewHamtFromDag (dserv , node )
130
140
if err != nil {
@@ -139,6 +149,19 @@ func NewDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (Directory, err
139
149
return nil , ErrNotADir
140
150
}
141
151
152
+ func (d * BasicDirectory ) addToEstimatedSize (name string , linkCid cid.Cid ) {
153
+ d .estimatedSize += len (name ) + len (linkCid .Bytes ())
154
+ // FIXME: Ideally we may want to track the Link size as well but it is
155
+ // minor in comparison with the other two.
156
+ }
157
+
158
+ func (d * BasicDirectory ) removeFromEstimatedSize (name string , linkCid cid.Cid ) {
159
+ d .estimatedSize -= len (name ) + len (linkCid .Bytes ())
160
+ if d .estimatedSize < 0 {
161
+ panic ("BasicDirectory's estimatedSize went below 0" )
162
+ }
163
+ }
164
+
142
165
// SetCidBuilder implements the `Directory` interface.
143
166
func (d * BasicDirectory ) SetCidBuilder (builder cid.Builder ) {
144
167
d .node .SetCidBuilder (builder )
@@ -147,10 +170,15 @@ func (d *BasicDirectory) SetCidBuilder(builder cid.Builder) {
147
170
// AddChild implements the `Directory` interface. It adds (or replaces)
148
171
// a link to the given `node` under `name`.
149
172
func (d * BasicDirectory ) AddChild (ctx context.Context , name string , node ipld.Node ) error {
150
- d .node .RemoveNodeLink (name )
151
173
// Remove old link (if it existed), don't check a potential `ErrNotFound`.
174
+ d .RemoveChild (ctx , name )
152
175
153
- return d .node .AddNodeLink (name , node )
176
+ err := d .node .AddNodeLink (name , node )
177
+ if err != nil {
178
+ return err
179
+ }
180
+ d .addToEstimatedSize (name , node .Cid ())
181
+ return nil
154
182
}
155
183
156
184
// EnumLinksAsync returns a channel which will receive Links in the directory
@@ -203,11 +231,26 @@ func (d *BasicDirectory) Find(ctx context.Context, name string) (ipld.Node, erro
203
231
204
232
// RemoveChild implements the `Directory` interface.
205
233
func (d * BasicDirectory ) RemoveChild (ctx context.Context , name string ) error {
206
- err := d .node .RemoveNodeLink (name )
234
+ // We need to *retrieve* the link before removing it to update the estimated
235
+ // size.
236
+ // FIXME: If this is too much of a potential penalty we could leave a fixed
237
+ // CID size estimation based on the most common one used (normally SHA-256).
238
+ // Alternatively we could add a GetAndRemoveLink method in `merkledag` to
239
+ // iterate node links slice only once.
240
+ link , err := d .node .GetNodeLink (name )
207
241
if err == mdag .ErrLinkNotFound {
208
- err = os .ErrNotExist
242
+ return os .ErrNotExist
209
243
}
210
- return err
244
+ if err != nil {
245
+ return err // at the moment there is no other error besides ErrLinkNotFound
246
+ }
247
+
248
+ // The name actually existed so we should update the estimated size.
249
+ d .removeFromEstimatedSize (link .Name , link .Cid )
250
+
251
+ return d .node .RemoveNodeLink (name )
252
+ // GetNodeLink didn't return ErrLinkNotFound so this won't fail with that
253
+ // and we don't need to convert the error again.
211
254
}
212
255
213
256
// GetNode implements the `Directory` interface.
@@ -309,15 +352,31 @@ var _ Directory = (*UpgradeableDirectory)(nil)
309
352
// AddChild implements the `Directory` interface. We check when adding new entries
310
353
// if we should switch to HAMTDirectory according to global option(s).
311
354
func (d * UpgradeableDirectory ) AddChild (ctx context.Context , name string , nd ipld.Node ) error {
312
- if UseHAMTSharding {
313
- if basicDir , ok := d .Directory .(* BasicDirectory ); ok {
314
- hamtDir , err := basicDir .SwitchToSharding (ctx )
315
- if err != nil {
316
- return err
317
- }
318
- d .Directory = hamtDir
355
+ err := d .Directory .AddChild (ctx , name , nd )
356
+ if err != nil {
357
+ return err
358
+ }
359
+
360
+ // Evaluate possible HAMT upgrade.
361
+ if HAMTShardingSize == 0 {
362
+ return nil
363
+ }
364
+ basicDir , ok := d .Directory .(* BasicDirectory )
365
+ if ! ok {
366
+ return nil
367
+ }
368
+ if basicDir .estimatedSize >= HAMTShardingSize {
369
+ // FIXME: Ideally to minimize performance we should check if this last
370
+ // `AddChild` call would bring the directory size over the threshold
371
+ // *before* executing it since we would end up switching anyway and
372
+ // that call would be "wasted". This is a minimal performance impact
373
+ // and we prioritize a simple code base.
374
+ hamtDir , err := basicDir .SwitchToSharding (ctx )
375
+ if err != nil {
376
+ return err
319
377
}
378
+ d .Directory = hamtDir
320
379
}
321
380
322
- return d . Directory . AddChild ( ctx , name , nd )
381
+ return nil
323
382
}
0 commit comments