Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tests to verify CARv2 blockstore dosen't put duplicates and has the required de-duped blocks #125

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 165 additions & 0 deletions v2/blockstore/readwrite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,33 @@ import (
"context"
"fmt"
"io"
"io/ioutil"
"math/rand"
"os"
"reflect"
"sync"
"testing"
"time"

"github.com/ipfs/go-blockservice"
ds "github.com/ipfs/go-datastore"
dssync "github.com/ipfs/go-datastore/sync"
bstore "github.com/ipfs/go-ipfs-blockstore"
chunk "github.com/ipfs/go-ipfs-chunker"
offline "github.com/ipfs/go-ipfs-exchange-offline"
files "github.com/ipfs/go-ipfs-files"
ipldformat "github.com/ipfs/go-ipld-format"
"github.com/ipfs/go-merkledag"
"github.com/ipfs/go-unixfs/importer/balanced"
"github.com/ipfs/go-unixfs/importer/helpers"
"github.com/ipld/go-car"
carv2 "github.com/ipld/go-car/v2"
basicnode "github.com/ipld/go-ipld-prime/node/basic"
"github.com/ipld/go-ipld-prime/traversal/selector"
"github.com/ipld/go-ipld-prime/traversal/selector/builder"
"github.com/multiformats/go-multihash"
"github.com/stretchr/testify/require"
"golang.org/x/xerrors"

"github.com/ipld/go-car/v2/blockstore"

Expand All @@ -20,6 +39,9 @@ import (
"github.com/ipld/go-car/v2/internal/carv1"
)

const unixfsChunkSize uint64 = 1 << 10
const unixfsLinksPerLevel = 1024

func TestBlockstore(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
Expand Down Expand Up @@ -211,3 +233,146 @@ func TestBlockstoreConcurrentUse(t *testing.T) {
}
wg.Wait()
}

// The CARv2 file for a UnixFS DAG that has duplicates should NOT have duplicates.
func TestDeDup(t *testing.T) {
// generate a CARv2 file from the "testdata/duplicate_blocks.txt" and also get the "inmemory" blockstore that was
// used to create the UnixFS DAG in the first pass when we didn't have it's root.
root, CARv2Path, inmemory := GenCARv2FromNormalFile(t, "testdata/duplicate_blocks.txt")
require.NotEmpty(t, CARv2Path)
defer os.Remove(CARv2Path)

// Get a reader over the CARv1 payload of the CARv2 file.
// and iterate over the CARv1 payload to ensure there are no duplicates in it.
v2r, err := carv2.NewReaderMmap(CARv2Path)
require.NoError(t, err)
defer v2r.Close()
cr, err := car.NewCarReader(v2r.CarV1Reader())
require.NoError(t, err)
seen := make(map[cid.Cid]struct{})
for {
b, err := cr.Next()
if err == io.EOF {
break
}
require.NoError(t, err)

_, ok := seen[b.Cid()]
// TODO This line fails as CARv2 has duplicate blocks.
require.Falsef(t, ok, "already seen cid %s", b.Cid())
seen[b.Cid()] = struct{}{}
}

// A CARv1 traversal over the UnixFS DAG using the inmemory blockstore wll return all the de-duped blocks ->
// should be the same as what the CARv1 reader above returned.
seen2 := make(map[cid.Cid]struct{})
var mu sync.Mutex
ssb := builder.NewSelectorSpecBuilder(basicnode.Prototype.Any)
sel := ssb.ExploreRecursive(selector.RecursionLimitNone(),
ssb.ExploreAll(ssb.ExploreRecursiveEdge())).
Node()

sc := car.NewSelectiveCar(context.Background(), inmemory, []car.Dag{
{
Root: root,
Selector: sel,
},
})

require.NoError(t, sc.Write(ioutil.Discard, func(b car.Block) error {
mu.Lock()
defer mu.Unlock()

if _, ok := seen2[b.BlockCID]; ok {
err = xerrors.Errorf("already seen cid %s", b.BlockCID)
}

seen2[b.BlockCID] = struct{}{}

return nil
}))

mu.Lock()
defer mu.Unlock()

require.NoError(t, err)
// both maps should have the same blocks
require.True(t, reflect.DeepEqual(seen, seen2))
}

// GenCARv2FromNormalFile generates a CARv2 file from a "normal" i.e. non-CAR file and returns the file path.
func GenCARv2FromNormalFile(t *testing.T, normalFilePath string) (root cid.Cid, carV2FilePath string, blockstore bstore.Blockstore) {
ctx := context.Background()

f, err := os.Open(normalFilePath)
require.NoError(t, err)
file := files.NewReaderFile(f)
bs := bstore.NewBlockstore(dssync.MutexWrap(ds.NewMapDatastore()))
dag := merkledag.NewDAGService(blockservice.New(bs, offline.Exchange(bs)))

// import to UnixFS
bufferedDS := ipldformat.NewBufferedDAG(ctx, dag)

params := helpers.DagBuilderParams{
Maxlinks: unixfsLinksPerLevel,
RawLeaves: true,
CidBuilder: nil,
Dagserv: bufferedDS,
}

db, err := params.New(chunk.NewSizeSplitter(file, int64(unixfsChunkSize)))
require.NoError(t, err)

nd, err := balanced.Layout(db)
require.NoError(t, err)

err = bufferedDS.Commit()
require.NoError(t, err)
require.NoError(t, file.Close())

// Create a UnixFS DAG again AND generate a CARv2 file using a CARv2 read-write blockstore now that we have the root.
carV2Path := genWithCARv2Blockstore(t, normalFilePath, nd.Cid())

return nd.Cid(), carV2Path, bs
}

func genWithCARv2Blockstore(t *testing.T, fPath string, root cid.Cid) string {
ctx := context.Background()
tmp, err := os.CreateTemp("", "rand")
require.NoError(t, err)
require.NoError(t, tmp.Close())

rw, err := blockstore.NewReadWrite(tmp.Name(), []cid.Cid{root})
require.NoError(t, err)

bsvc := blockservice.New(rw, offline.Exchange(rw))
dag := merkledag.NewDAGService(bsvc)
// import to UnixFS
bufferedDS := ipldformat.NewBufferedDAG(ctx, dag)

params := helpers.DagBuilderParams{
Maxlinks: unixfsLinksPerLevel,
RawLeaves: true,
CidBuilder: nil,
Dagserv: bufferedDS,
}

f, err := os.Open(fPath)
require.NoError(t, err)

db, err := params.New(chunk.NewSizeSplitter(f, int64(unixfsChunkSize)))
require.NoError(t, err)

// TODO: The below lines fail with "not found".
nd, err := balanced.Layout(db)
require.NoError(t, err)

err = bufferedDS.Commit()
require.NoError(t, err)

require.NoError(t, rw.Finalize())
require.Equal(t, root, nd.Cid())

// return the path of the CARv2 file.
return tmp.Name()
}
1 change: 1 addition & 0 deletions v2/blockstore/testdata/duplicate_blocks.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

22 changes: 21 additions & 1 deletion v2/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,37 @@ module github.com/ipld/go-car/v2
go 1.15

require (
github.com/davidlazar/go-crypto v0.0.0-20190912175916-7055855a373f // indirect
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect
github.com/gopherjs/gopherjs v0.0.0-20190812055157-5d271430af9f // indirect
github.com/ipfs/go-block-format v0.0.3
github.com/ipfs/go-blockservice v0.1.4-0.20200624145336-a978cec6e834
github.com/ipfs/go-cid v0.0.7
github.com/ipfs/go-datastore v0.4.5
github.com/ipfs/go-ipfs-blockstore v1.0.3
github.com/ipfs/go-ipfs-chunker v0.0.5
github.com/ipfs/go-ipfs-exchange-offline v0.0.1
github.com/ipfs/go-ipfs-files v0.0.8
github.com/ipfs/go-ipld-cbor v0.0.5
github.com/ipfs/go-ipld-format v0.2.0
github.com/ipfs/go-merkledag v0.3.2
github.com/mattn/go-colorable v0.1.8 // indirect
github.com/ipfs/go-peertaskqueue v0.2.0 // indirect
github.com/ipfs/go-unixfs v0.2.6
github.com/ipld/go-car v0.3.1
github.com/ipld/go-ipld-prime v0.9.0
github.com/libp2p/go-libp2p v0.12.0 // indirect
github.com/libp2p/go-libp2p-record v0.1.1 // indirect
github.com/libp2p/go-sockaddr v0.1.0 // indirect
github.com/multiformats/go-multihash v0.0.15
github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9
github.com/smartystreets/assertions v1.0.1 // indirect
github.com/stretchr/testify v1.7.0
github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11
github.com/whyrusleeping/cbor-gen v0.0.0-20210219115102-f37d292932f2 // indirect
golang.org/x/exp v0.0.0-20210615023648-acb5c1269671
golang.org/x/lint v0.0.0-20200130185559-910be7a94367 // indirect
golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57 // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1
google.golang.org/protobuf v1.25.0 // indirect
gopkg.in/yaml.v2 v2.3.0 // indirect
)
Loading