From db3425341fcea51ecd1c697a264b0002ea1efe2b Mon Sep 17 00:00:00 2001 From: anjor Date: Fri, 7 Jul 2023 11:38:47 +0100 Subject: [PATCH 1/6] add docker file --- Dockerfile | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..cd01a24 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,11 @@ +FROM golang:1.19-alpine + +WORKDIR $GOPATH + +RUN go install github.com/anjor/go-fil-dataprep/cmd/data-prep@277ca0e7f83bb3ad3cd05d9e62e5d140fc409a51 + +RUN mkdir /app +WORKDIR /app + +ENTRYPOINT ["data-prep"] + From 2ce2f489b1b58764685f89ba82445039f29f982e Mon Sep 17 00:00:00 2001 From: Anjor Kanekar Date: Sat, 8 Jul 2023 22:32:33 +0100 Subject: [PATCH 2/6] fix wrap dir (#28) --- cmd/data-prep/fil-data-prep/fil-data-prep.go | 15 +++++++++------ cmd/data-prep/fil-data-prep/tree_utils.go | 6 +++--- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/cmd/data-prep/fil-data-prep/fil-data-prep.go b/cmd/data-prep/fil-data-prep/fil-data-prep.go index a100f7d..4a7cb6c 100644 --- a/cmd/data-prep/fil-data-prep/fil-data-prep.go +++ b/cmd/data-prep/fil-data-prep/fil-data-prep.go @@ -100,8 +100,11 @@ func filDataPrep(c *cli.Context) error { rcid = nodes[0].Cid() // use fake root directory if multiple args writeNode(nodes, wout) } else { - rcid = nodes[1].Cid() // otherwise use the first node (which should work) - writeNode(nodes[1:], wout) + path := paths[0] + splitPath := strings.Split(path, "/") + idx := len(splitPath) + rcid = nodes[idx].Cid() // otherwise use the first node (which should work) + writeNode(nodes[idx:], wout) } }() @@ -151,15 +154,15 @@ func filDataPrep(c *cli.Context) error { } func writeNode(nodes []*merkledag.ProtoNode, wout *io.PipeWriter) { - var cid, sizeVi []byte + var c, sizeVi []byte for _, nd := range nodes { - cid = []byte(nd.Cid().KeyString()) + c = []byte(nd.Cid().KeyString()) d := nd.RawData() - sizeVi = appendVarint(sizeVi[:0], uint64(len(cid))+uint64(len(d))) + sizeVi = appendVarint(sizeVi[:0], uint64(len(c))+uint64(len(d))) if _, err := wout.Write(sizeVi); err == nil { - if _, err := wout.Write(cid); err == nil { + if _, err := wout.Write(c); err == nil { if _, err := wout.Write(d); err != nil { fmt.Printf("failed to write car: %s\n", err) } diff --git a/cmd/data-prep/fil-data-prep/tree_utils.go b/cmd/data-prep/fil-data-prep/tree_utils.go index a70c667..7a58079 100644 --- a/cmd/data-prep/fil-data-prep/tree_utils.go +++ b/cmd/data-prep/fil-data-prep/tree_utils.go @@ -64,11 +64,11 @@ func (n *node) constructNode() { n.size = size } -func constructTree(paths []string, rs []roots) *node { +func constructTree(files []string, rs []roots) *node { root := newNode("root") - for i, path := range paths { - parts := strings.Split(path, "/") + for i, file := range files { + parts := strings.Split(file, "/") currentNode := root for _, part := range parts { From 234c8fbb60af2924f91a09613a4d92fdcae8a3b9 Mon Sep 17 00:00:00 2001 From: anjor Date: Sat, 8 Jul 2023 22:46:47 +0100 Subject: [PATCH 3/6] comments --- cmd/data-prep/fil-data-prep/fil-data-prep.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cmd/data-prep/fil-data-prep/fil-data-prep.go b/cmd/data-prep/fil-data-prep/fil-data-prep.go index 4a7cb6c..f6c4538 100644 --- a/cmd/data-prep/fil-data-prep/fil-data-prep.go +++ b/cmd/data-prep/fil-data-prep/fil-data-prep.go @@ -97,13 +97,19 @@ func filDataPrep(c *cli.Context) error { nodes := getDirectoryNodes(tr) if len(nodes) == 1 || len(paths) > 1 { // len(nodes) = 1 means a file was passed as input - rcid = nodes[0].Cid() // use fake root directory if multiple args + // use fake root directory if multiple args. + // If there are nested paths it will wrap all the intermediate directories up in the fake root + rcid = nodes[0].Cid() writeNode(nodes, wout) } else { path := paths[0] + + // Need to do this to handle nested paths, where the root cid should be the actual final directory + // for example, if the input is /opt/data/data_dir, the root cid should correspond to data_dir and not to / splitPath := strings.Split(path, "/") idx := len(splitPath) - rcid = nodes[idx].Cid() // otherwise use the first node (which should work) + rcid = nodes[idx].Cid() + writeNode(nodes[idx:], wout) } }() From 7a5338c06903193e595bd3ce8687c2aed413d41b Mon Sep 17 00:00:00 2001 From: Jason Cihelka Date: Sun, 16 Jul 2023 15:01:31 -0700 Subject: [PATCH 4/6] patch: modify output filename prefix behavior (#30) Co-authored-by: Jason Cihelka --- README.md | 6 +++-- cmd/data-prep/fil-data-prep/fil-data-prep.go | 27 ++++++++++++------- .../split-and-commp/split-and-commp.go | 21 ++++++++++----- 3 files changed, 35 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 6c13bf1..3758c16 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,8 @@ This command transforms data into a bunch of car files sized "correctly" (target provided as an input), calculates commP and saves all of this data in a metadata file. It also prints out the root cid for the IPLD dag to stdout. +The `--output` flag will optionally prefix resulting car filenames with the provided string + ``` $data-prep fil-data-prep --size 100000000000 --metadata meta.csv --output test 5gb-filecoin-payload.bin root cid = bafybeihsshuadcxukrkye76kfeci5mbs7v7o5iq32d2xhzygxnj6s7asw4 @@ -31,8 +33,8 @@ root cid = bafybeihsshuadcxukrkye76kfeci5mbs7v7o5iq32d2xhzygxnj6s7asw4 ``` $ cat meta.csv -timestamp,original data,car file,root_cid,piece cid,padded piece size -2023-05-10T13:04:47Z,test,test-baga6ea4seaqnxhbabidowdpd6pl3bombnh2jw3r2uu2s37ippoam5vergcxmyny-0.car,bafybeihsshuadcxukrkye76kfeci5mbs7v7o5iq32d2xhzygxnj6s7asw4,baga6ea4seaqnxhbabidowdpd6pl3bombnh2jw3r2uu2s37ippoam5vergcxmyny,8589934592 +timestamp,car file,root_cid,piece cid,padded piece size +2023-05-10T13:04:47Z,test-baga6ea4seaqnxhbabidowdpd6pl3bombnh2jw3r2uu2s37ippoam5vergcxmyny.car,bafybeihsshuadcxukrkye76kfeci5mbs7v7o5iq32d2xhzygxnj6s7asw4,baga6ea4seaqnxhbabidowdpd6pl3bombnh2jw3r2uu2s37ippoam5vergcxmyny,8589934592 ``` ### split-and-commp diff --git a/cmd/data-prep/fil-data-prep/fil-data-prep.go b/cmd/data-prep/fil-data-prep/fil-data-prep.go index f6c4538..1393a3b 100644 --- a/cmd/data-prep/fil-data-prep/fil-data-prep.go +++ b/cmd/data-prep/fil-data-prep/fil-data-prep.go @@ -4,17 +4,18 @@ import ( "encoding/csv" "encoding/json" "fmt" - "github.com/anjor/anelace" - "github.com/anjor/carlet" - "github.com/ipfs/go-cid" - "github.com/ipfs/go-merkledag" - "github.com/urfave/cli/v2" "io" "os" "strconv" "strings" "sync" "time" + + "github.com/anjor/anelace" + "github.com/anjor/carlet" + "github.com/ipfs/go-cid" + "github.com/ipfs/go-merkledag" + "github.com/urfave/cli/v2" ) var Cmd = &cli.Command{ @@ -27,7 +28,7 @@ var Cmd = &cli.Command{ Name: "output", Aliases: []string{"o"}, Required: false, - Usage: "optional output name for car files. Defaults to filename (stdin if streamed in from stdin).", + Usage: "optional output filename prefix for car filename.", }, &cli.IntFlag{ Name: "size", @@ -118,10 +119,17 @@ func filDataPrep(c *cli.Context) error { m := c.String("metadata") s := c.Int("size") + var filenamePrefix string + if o != "" { + // Add a dash to separate prefix from filename + // note: we only do this when prefix specified, otherwise filename will begin with "-", which can cause problem with some fs operations as it is interpreted as a flag + filenamePrefix = fmt.Sprintf("%s-", o) + } + go func() { defer wg.Done() - carFiles, err := carlet.SplitAndCommp(rout, s, o) + carFiles, err := carlet.SplitAndCommp(rout, s, filenamePrefix) if err != nil { fmt.Printf("split and commp failed : %s\n", err) return @@ -134,7 +142,7 @@ func filDataPrep(c *cli.Context) error { return } w := csv.NewWriter(f) - err = w.Write([]string{"timestamp", "original data", "car file", "root_cid", "piece cid", "padded piece size"}) + err = w.Write([]string{"timestamp", "car file", "root_cid", "piece cid", "padded piece size"}) if err != nil { fmt.Printf("failed to write csv header\n") return @@ -143,8 +151,7 @@ func filDataPrep(c *cli.Context) error { for _, c := range carFiles { err = w.Write([]string{ time.Now().UTC().Format(time.RFC3339), - o, - c.CarName, + c.Name, rcid.String(), c.CommP.String(), strconv.FormatUint(c.PaddedSize, 10), diff --git a/cmd/data-prep/split-and-commp/split-and-commp.go b/cmd/data-prep/split-and-commp/split-and-commp.go index 49df864..1d4f5ff 100644 --- a/cmd/data-prep/split-and-commp/split-and-commp.go +++ b/cmd/data-prep/split-and-commp/split-and-commp.go @@ -2,12 +2,14 @@ package split_and_commp import ( "encoding/csv" - "github.com/anjor/carlet" - "github.com/urfave/cli/v2" + "fmt" "io" "os" "strconv" "time" + + "github.com/anjor/carlet" + "github.com/urfave/cli/v2" ) var Cmd = &cli.Command{ @@ -30,7 +32,7 @@ var splitAndCommpFlags = []cli.Flag{ Name: "output", Aliases: []string{"o"}, Required: true, - Usage: "optional output name for car files. Defaults to filename (stdin if streamed in from stdin).", + Usage: "optional output filename prefix for car files.", }, &cli.StringFlag{ Name: "metadata", @@ -52,7 +54,13 @@ func splitAndCommpAction(c *cli.Context) error { output := c.String("output") meta := c.String("metadata") - carFiles, err := carlet.SplitAndCommp(fi, size, output) + var filenamePrefix string + + if output != "" { + filenamePrefix = fmt.Sprintf("%s-", output) + } + + carFiles, err := carlet.SplitAndCommp(fi, size, filenamePrefix) if err != nil { return err } @@ -64,7 +72,7 @@ func splitAndCommpAction(c *cli.Context) error { } w := csv.NewWriter(f) - err = w.Write([]string{"timestamp", "original data", "car file", "piece cid", "padded piece size"}) + err = w.Write([]string{"timestamp", "car file", "piece cid", "padded piece size"}) if err != nil { return err } @@ -72,8 +80,7 @@ func splitAndCommpAction(c *cli.Context) error { for _, c := range carFiles { err = w.Write([]string{ time.Now().Format(time.RFC3339), - output, - c.CarName, + c.Name, c.CommP.String(), strconv.FormatUint(c.PaddedSize, 10), }) From bf61394b8cd7ddb05fb9c652e8c55312ada118e1 Mon Sep 17 00:00:00 2001 From: anjor Date: Wed, 19 Jul 2023 16:25:19 +0100 Subject: [PATCH 5/6] bump up carlet --- cmd/data-prep/go.mod | 2 +- cmd/data-prep/go.sum | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cmd/data-prep/go.mod b/cmd/data-prep/go.mod index d5f741c..1991987 100644 --- a/cmd/data-prep/go.mod +++ b/cmd/data-prep/go.mod @@ -4,7 +4,7 @@ go 1.19 require ( github.com/anjor/anelace v0.0.0-20230330084912-e7a70b075964 - github.com/anjor/carlet v0.0.0-20230606223552-f3beca82dfa0 + github.com/anjor/carlet v0.0.0-20230716220206-4f4b6bc5eed6 github.com/ipfs/go-cid v0.4.1 github.com/ipfs/go-ipld-format v0.2.0 github.com/ipfs/go-merkledag v0.5.1 diff --git a/cmd/data-prep/go.sum b/cmd/data-prep/go.sum index d1f28ab..aa70a8e 100644 --- a/cmd/data-prep/go.sum +++ b/cmd/data-prep/go.sum @@ -31,6 +31,8 @@ github.com/anjor/carlet v0.0.0-20230511201044-6605618601a5 h1:fQ5/r5dju88baq1lqt github.com/anjor/carlet v0.0.0-20230511201044-6605618601a5/go.mod h1:sTpcN668YLQ1cvCT2RcIxifc/soFheI9wbNeDGCJX74= github.com/anjor/carlet v0.0.0-20230606223552-f3beca82dfa0 h1:o7qYsmyzBnQ072cDltv+xtz9Sqhi9pF/qLe11FM38PY= github.com/anjor/carlet v0.0.0-20230606223552-f3beca82dfa0/go.mod h1:sTpcN668YLQ1cvCT2RcIxifc/soFheI9wbNeDGCJX74= +github.com/anjor/carlet v0.0.0-20230716220206-4f4b6bc5eed6 h1:uLi1JJ/wPzj/YIG0HOZDfwpUDeh0zR4hXAQrl8zj//4= +github.com/anjor/carlet v0.0.0-20230716220206-4f4b6bc5eed6/go.mod h1:sTpcN668YLQ1cvCT2RcIxifc/soFheI9wbNeDGCJX74= github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c= github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= github.com/apache/thrift v0.13.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= From a77e61a91e9d30651fae37f04b1116785ee085fc Mon Sep 17 00:00:00 2001 From: Anjor Kanekar Date: Sat, 27 Jul 2024 20:59:00 +0100 Subject: [PATCH 6/6] Update FUNDING.yml --- .github/FUNDING.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index 51a3fe9..543aab3 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1,6 +1,7 @@ # These are supported funding model platforms github: anjor +buy_me_a_coffee: anjor # patreon: # Replace with a single Patreon username # open_collective: # Replace with a single Open Collective username # ko_fi: # Replace with a single Ko-fi username