Skip to content

feat: partial DAG provides with Reprovider.Strategy=mfs|pinned+mfs #10754

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Apr 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions core/node/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ func FetcherConfig(bs blockservice.BlockService) FetchersOut {
// path resolution should not fetch new blocks via exchange.
offlineBs := blockservice.New(bs.Blockstore(), offline.Exchange(bs.Blockstore()))
offlineIpldFetcher := bsfetcher.NewFetcherConfig(offlineBs)
offlineIpldFetcher.SkipNotFound = true // carries onto the UnixFSFetcher below
offlineIpldFetcher.PrototypeChooser = dagpb.AddSupportToChooser(bsfetcher.DefaultPrototypeChooser)
offlineUnixFSFetcher := offlineIpldFetcher.WithReifier(unixfsnode.Reify)

Expand Down
88 changes: 60 additions & 28 deletions core/node/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ import (

"github.com/ipfs/boxo/blockstore"
"github.com/ipfs/boxo/fetcher"
"github.com/ipfs/boxo/mfs"
pin "github.com/ipfs/boxo/pinning/pinner"
provider "github.com/ipfs/boxo/provider"
"github.com/ipfs/go-cid"
"github.com/ipfs/kubo/repo"
irouting "github.com/ipfs/kubo/routing"
"go.uber.org/fx"
Expand Down Expand Up @@ -136,14 +138,8 @@ func OnlineProviders(useStrategicProviding bool, reprovideStrategy string, repro

var keyProvider fx.Option
switch reprovideStrategy {
case "all", "":
keyProvider = fx.Provide(newProvidingStrategy(false, false))
case "roots":
keyProvider = fx.Provide(newProvidingStrategy(true, true))
case "pinned":
keyProvider = fx.Provide(newProvidingStrategy(true, false))
case "flat":
keyProvider = fx.Provide(provider.NewBlockstoreProvider)
case "all", "", "roots", "pinned", "mfs", "pinned+mfs", "flat":
keyProvider = fx.Provide(newProvidingStrategy(reprovideStrategy))
default:
return fx.Error(fmt.Errorf("unknown reprovider strategy %q", reprovideStrategy))
}
Expand All @@ -159,32 +155,68 @@ func OfflineProviders() fx.Option {
return fx.Provide(provider.NewNoopProvider)
}

func newProvidingStrategy(onlyPinned, onlyRoots bool) interface{} {
func mfsProvider(mfsRoot *mfs.Root, fetcher fetcher.Factory) provider.KeyChanFunc {
return func(ctx context.Context) (<-chan cid.Cid, error) {
err := mfsRoot.FlushMemFree(ctx)
if err != nil {
return nil, fmt.Errorf("error flushing mfs, cannot provide MFS: %w", err)
}
rootNode, err := mfsRoot.GetDirectory().GetNode()
if err != nil {
return nil, fmt.Errorf("error loading mfs root, cannot provide MFS: %w", err)
}

kcf := provider.NewDAGProvider(rootNode.Cid(), fetcher)
return kcf(ctx)
}

}

func mfsRootProvider(mfsRoot *mfs.Root) provider.KeyChanFunc {
return func(ctx context.Context) (<-chan cid.Cid, error) {
rootNode, err := mfsRoot.GetDirectory().GetNode()
if err != nil {
return nil, fmt.Errorf("error loading mfs root, cannot provide MFS: %w", err)
}
ch := make(chan cid.Cid, 1)
ch <- rootNode.Cid()
close(ch)
return ch, nil
}
}

func newProvidingStrategy(strategy string) interface{} {
type input struct {
fx.In
Pinner pin.Pinner
Blockstore blockstore.Blockstore
IPLDFetcher fetcher.Factory `name:"ipldFetcher"`
Pinner pin.Pinner
Blockstore blockstore.Blockstore
OfflineIPLDFetcher fetcher.Factory `name:"offlineIpldFetcher"`
OfflineUnixFSFetcher fetcher.Factory `name:"offlineUnixfsFetcher"`
MFSRoot *mfs.Root
}
return func(in input) provider.KeyChanFunc {
// Pinner-related CIDs will be buffered in memory to avoid
// deadlocking the pinner when the providing process is slow.

if onlyRoots {
return provider.NewBufferedProvider(
provider.NewPinnedProvider(true, in.Pinner, in.IPLDFetcher),
switch strategy {
case "roots":
return provider.NewBufferedProvider(provider.NewPinnedProvider(true, in.Pinner, in.OfflineIPLDFetcher))
case "pinned":
return provider.NewBufferedProvider(provider.NewPinnedProvider(false, in.Pinner, in.OfflineIPLDFetcher))
case "pinned+mfs":
return provider.NewPrioritizedProvider(
provider.NewBufferedProvider(provider.NewPinnedProvider(false, in.Pinner, in.OfflineIPLDFetcher)),
mfsProvider(in.MFSRoot, in.OfflineUnixFSFetcher),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The downside here is that there is no strategy that only provide "recursive-pinned" content, and no mfs.

If we don't provide mfs here, there will be no policy that provides only "recursive-pinned+mfs" content... but it could be added as pinned+mfs or so...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MFS is implicitly pinned (protected from GC) so I think it is fine to keep it under pinned.
But things get hairy with roots (we dont have ability to only announce dir and file roots as we walk MFS, right?).

Maybe keeping pinned and roots as-is, and adding separate pinned+mfs for now is the right way of handling this? (we dont change behavior for existing users, and new users can choose pinned+mfs).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it a problem to only announce the MFS-root? (consider it a big file)... Or you mean this could cause misunderstandings?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in the meantime I restored "pinned" and "roots" and added "pinned+mfs"

Copy link
Member

@lidel lidel Mar 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm thinking:

  • if you pin something then that pin is usually recursive
    • pinned announces recursive pins by walking the entire DAG behind each recursively pinned CID
    • roots announces only root cids of pins (no DAG walk at all)
  • pinned+mfs should do the same as pinned – walk the MFS DAG and announce MFS recursively (only local blocks).
  • roots
    • easy: could also announce only MFS root (no need to create roots+mfs just for one CID, fine to announce MFS root with other pin roots)
    • harder: make MFS walk smart enough to only yield root CIDs of files and directories (imo this comes with extra overhead, and no longer can be roots and requires separate profile roots+mfs)

I think its fine to do "easy" roots for now – announce the MFS root for now (without walking dag and discovering sub-entity roots).
We can add roots+mfs if users ask for it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could also announce only MFS root (no need to create roots+mfs just for one CID, fine to announce MFS root with other pin roots)

This is how it was.

)
}

if onlyPinned {
return provider.NewBufferedProvider(
provider.NewPinnedProvider(false, in.Pinner, in.IPLDFetcher),
case "mfs":
return mfsProvider(in.MFSRoot, in.OfflineUnixFSFetcher)
case "flat":
return provider.NewBlockstoreProvider(in.Blockstore)
default: // "all", ""
return provider.NewPrioritizedProvider(
provider.NewPrioritizedProvider(
provider.NewBufferedProvider(provider.NewPinnedProvider(true, in.Pinner, in.OfflineIPLDFetcher)),
mfsRootProvider(in.MFSRoot),
),
provider.NewBlockstoreProvider(in.Blockstore),
)
}

return provider.NewPrioritizedProvider(
provider.NewBufferedProvider(provider.NewPinnedProvider(true, in.Pinner, in.IPLDFetcher)),
provider.NewBlockstoreProvider(in.Blockstore),
)
}
}
11 changes: 11 additions & 0 deletions docs/changelogs/v0.35.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ This release was brought to you by the [Shipyard](http://ipshipyard.com/) team.

- [Overview](#overview)
- [🔦 Highlights](#-highlights)
- [Dedicated `Reprovider.Strategy` for MFS](#dedicated-reproviderstrategy-for-mfs)
- [📦️ Important dependency updates](#-important-dependency-updates)
- [📝 Changelog](#-changelog)
- [👨‍👩‍👧‍👦 Contributors](#-contributors)
Expand All @@ -18,6 +19,16 @@ This release was brought to you by the [Shipyard](http://ipshipyard.com/) team.

### 🔦 Highlights

#### Dedicated `Reprovider.Strategy` for MFS

The [Mutable File System (MFS)](https://docs.ipfs.tech/concepts/glossary/#mfs) in Kubo is a UnixFS filesystem managed with [`ipfs files`](https://docs.ipfs.tech/reference/kubo/cli/#ipfs-files) commands. It supports familiar file operations like cp and mv within a folder-tree structure, automatically updating a MerkleDAG and a "root CID" that reflects the current MFS state. Files in MFS are protected from garbage collection, offering a simpler alternative to `ipfs pin`. This makes it a popular choice for tools like [IPFS Desktop](https://docs.ipfs.tech/install/ipfs-desktop/) and the [WebUI](https://github.com/ipfs/ipfs-webui/#readme).

Previously, the `pinned` reprovider strategy required manual pin management: each dataset update meant pinning the new version and unpinning the old one. Now, new strategies—`mfs` and `pinned+mfs`—let users limit announcements to data explicitly placed in MFS. This simplifies updating datasets and announcing only the latest version to the Amino DHT.

Users relying on the `pinned` strategy can switch to `pinned+mfs` and use MFS alone to manage updates and announcements, eliminating the need for manual pinning and unpinning. We hope this makes it easier to publish just the data that matters to you.

See [`Reprovider.Strategy`](https://github.com/ipfs/kubo/blob/master/docs/config.md#reproviderstrategy) for more details.

##### `Routing.IgnoreProviders`

This new option allows ignoring specific peer IDs when returned by the content
Expand Down
13 changes: 9 additions & 4 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -1587,21 +1587,26 @@ Type: `optionalDuration` (unset for the default)
Tells reprovider what should be announced. Valid strategies are:

- `"all"` - announce all CIDs of stored blocks
- Order: root blocks of direct and recursive pins are announced first, then the rest of blockstore
- `"pinned"` - only announce pinned CIDs recursively (both roots and child blocks)
- Order: root blocks of direct and recursive pins and MFS root are announced first, then the rest of blockstore
- `"pinned"` - only announce recursively pinned CIDs (`ipfs pin add -r`, both roots and child blocks)
- Order: root blocks of direct and recursive pins are announced first, then the child blocks of recursive pins
- `"roots"` - only announce the root block of explicitly pinned CIDs
- `"roots"` - only announce the root block of explicitly pinned CIDs (`ipfs pin add`)
- **⚠️ BE CAREFUL:** node with `roots` strategy will not announce child blocks.
It makes sense only for use cases where the entire DAG is fetched in full,
and a graceful resume does not have to be guaranteed: the lack of child
announcements means an interrupted retrieval won't be able to find
providers for the missing block in the middle of a file, unless the peer
happens to already be connected to a provider and ask for child CID over
bitswap.
- `"mfs"` - announce only the local CIDs that are part of the MFS (`ipfs files`)
- Note: MFS is lazy-loaded. Only the MFS blocks present in local datastore are announced.
- `"pinned+mfs"` - a combination of the `pinned` and `mfs` strategies.
- **ℹ️ NOTE:** This is the suggested strategy for users who run without GC and don't want to provide everything in cache.
- Order: first `pinned` and then the locally available part of `mfs`.
- `"flat"` - same as `all`, announce all CIDs of stored blocks, but without prioritizing anything.

> [!IMPORTANT]
> Reproviding larger pinsets using the `all`, `pinned`, or `roots` strategies requires additional memory, with an estimated ~1 GiB of RAM per 20 million items for reproviding to the Amino DHT.
> Reproviding larger pinsets using the `all`, `mfs`, `pinned`, `pinned+mfs` or `roots` strategies requires additional memory, with an estimated ~1 GiB of RAM per 20 million items for reproviding to the Amino DHT.
> This is due to the use of a buffered provider, which avoids holding a lock on the entire pinset during the reprovide cycle.
> The `flat` strategy can be used to lower memory requirements, but only recommended if memory utilization is too high, prioritization of pins is not necessary, and it is acceptable to announce every block cached in the local repository.

Expand Down
2 changes: 1 addition & 1 deletion docs/examples/kubo-as-a-library/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ go 1.24
replace github.com/ipfs/kubo => ./../../..

require (
github.com/ipfs/boxo v0.29.1
github.com/ipfs/boxo v0.29.2-0.20250409154342-bbaf2e146dfb
github.com/ipfs/kubo v0.0.0-00010101000000-000000000000
github.com/libp2p/go-libp2p v0.41.1
github.com/multiformats/go-multiaddr v0.15.0
Expand Down
4 changes: 2 additions & 2 deletions docs/examples/kubo-as-a-library/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -298,8 +298,8 @@ github.com/ipfs-shipyard/nopfs/ipfs v0.25.0 h1:OqNqsGZPX8zh3eFMO8Lf8EHRRnSGBMqcd
github.com/ipfs-shipyard/nopfs/ipfs v0.25.0/go.mod h1:BxhUdtBgOXg1B+gAPEplkg/GpyTZY+kCMSfsJvvydqU=
github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs=
github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0=
github.com/ipfs/boxo v0.29.1 h1:z61ZT4YDfTHLjXTsu/+3wvJ8aJlExthDSOCpx6Nh8xc=
github.com/ipfs/boxo v0.29.1/go.mod h1:MkDJStXiJS9U99cbAijHdcmwNfVn5DKYBmQCOgjY2NU=
github.com/ipfs/boxo v0.29.2-0.20250409154342-bbaf2e146dfb h1:kA7c3CF6/d8tUwGJR/SwIfaRz7Xk7Fbyoh2ePZAFMlw=
github.com/ipfs/boxo v0.29.2-0.20250409154342-bbaf2e146dfb/go.mod h1:omQZmLS7LegSpBy3m4CrAB9/SO7Fq3pfv+5y1FOd+gI=
github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA=
github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU=
github.com/ipfs/go-bitswap v0.11.0 h1:j1WVvhDX1yhG32NTC9xfxnqycqYIlhzEzLXG/cU1HyQ=
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ require (
github.com/hashicorp/go-version v1.7.0
github.com/ipfs-shipyard/nopfs v0.0.14
github.com/ipfs-shipyard/nopfs/ipfs v0.25.0
github.com/ipfs/boxo v0.29.1
github.com/ipfs/boxo v0.29.2-0.20250409154342-bbaf2e146dfb
github.com/ipfs/go-block-format v0.2.0
github.com/ipfs/go-cid v0.5.0
github.com/ipfs/go-cidutil v0.1.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -362,8 +362,8 @@ github.com/ipfs-shipyard/nopfs/ipfs v0.25.0 h1:OqNqsGZPX8zh3eFMO8Lf8EHRRnSGBMqcd
github.com/ipfs-shipyard/nopfs/ipfs v0.25.0/go.mod h1:BxhUdtBgOXg1B+gAPEplkg/GpyTZY+kCMSfsJvvydqU=
github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs=
github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0=
github.com/ipfs/boxo v0.29.1 h1:z61ZT4YDfTHLjXTsu/+3wvJ8aJlExthDSOCpx6Nh8xc=
github.com/ipfs/boxo v0.29.1/go.mod h1:MkDJStXiJS9U99cbAijHdcmwNfVn5DKYBmQCOgjY2NU=
github.com/ipfs/boxo v0.29.2-0.20250409154342-bbaf2e146dfb h1:kA7c3CF6/d8tUwGJR/SwIfaRz7Xk7Fbyoh2ePZAFMlw=
github.com/ipfs/boxo v0.29.2-0.20250409154342-bbaf2e146dfb/go.mod h1:omQZmLS7LegSpBy3m4CrAB9/SO7Fq3pfv+5y1FOd+gI=
github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA=
github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU=
github.com/ipfs/go-bitswap v0.11.0 h1:j1WVvhDX1yhG32NTC9xfxnqycqYIlhzEzLXG/cU1HyQ=
Expand Down
2 changes: 1 addition & 1 deletion test/dependencies/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ require (
github.com/huin/goupnp v1.3.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/ipfs/bbloom v0.0.4 // indirect
github.com/ipfs/boxo v0.29.1 // indirect
github.com/ipfs/boxo v0.29.2-0.20250409154342-bbaf2e146dfb // indirect
github.com/ipfs/go-block-format v0.2.0 // indirect
github.com/ipfs/go-cid v0.5.0 // indirect
github.com/ipfs/go-datastore v0.8.2 // indirect
Expand Down
4 changes: 2 additions & 2 deletions test/dependencies/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,8 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs=
github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0=
github.com/ipfs/boxo v0.29.1 h1:z61ZT4YDfTHLjXTsu/+3wvJ8aJlExthDSOCpx6Nh8xc=
github.com/ipfs/boxo v0.29.1/go.mod h1:MkDJStXiJS9U99cbAijHdcmwNfVn5DKYBmQCOgjY2NU=
github.com/ipfs/boxo v0.29.2-0.20250409154342-bbaf2e146dfb h1:kA7c3CF6/d8tUwGJR/SwIfaRz7Xk7Fbyoh2ePZAFMlw=
github.com/ipfs/boxo v0.29.2-0.20250409154342-bbaf2e146dfb/go.mod h1:omQZmLS7LegSpBy3m4CrAB9/SO7Fq3pfv+5y1FOd+gI=
github.com/ipfs/go-block-format v0.2.0 h1:ZqrkxBA2ICbDRbK8KJs/u0O3dlp6gmAuuXUJNiW1Ycs=
github.com/ipfs/go-block-format v0.2.0/go.mod h1:+jpL11nFx5A/SPpsoBn6Bzkra/zaArfSmsknbPMYgzM=
github.com/ipfs/go-cid v0.5.0 h1:goEKKhaGm0ul11IHA7I6p1GmKz8kEYniqFopaB5Otwg=
Expand Down
1 change: 1 addition & 0 deletions test/sharness/t0119-prometheus-data/prometheus_metrics
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
exchange_bitswap_requests_in_flight
exchange_bitswap_response_bytes_bucket
exchange_bitswap_response_bytes_count
exchange_bitswap_response_bytes_sum
Expand Down
Loading