From e10d379e67b5837ca4f1d9dc57607b01fb0b5074 Mon Sep 17 00:00:00 2001 From: Anjor Kanekar Date: Wed, 9 Oct 2024 06:00:02 +0100 Subject: [PATCH] sort car files (#166) --- .gitignore | 2 ++ cmd-car-split.go | 73 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/.gitignore b/.gitignore index 9bac135a..f8f31572 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ _site /.cargo /target + +.zed diff --git a/cmd-car-split.go b/cmd-car-split.go index 5097fcd1..ef7c086c 100644 --- a/cmd-car-split.go +++ b/cmd-car-split.go @@ -12,6 +12,7 @@ import ( "io/fs" "os" "path/filepath" + "sort" "strconv" "github.com/anjor/carlet" @@ -530,3 +531,75 @@ func readHeader(streamBuf *bufio.Reader) ([]byte, int64, error) { return headerBuf.Bytes(), streamLen, nil } + +func SortCarFiles(carFiles []string) ([]string, error) { + type carFileInfo struct { + path string + firstSlot int64 + } + + var fileInfos []carFileInfo + + for _, path := range carFiles { + file, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("failed to open CAR file %s: %w", path, err) + } + defer file.Close() + + // Create a new CarReader + cr, err := carreader.New(file) + if err != nil { + return nil, fmt.Errorf("failed to create CarReader for %s: %w", path, err) + } + + // Get the root CID + if len(cr.Header.Roots) != 1 { + return nil, fmt.Errorf("expected 1 root CID, got %d in file %s", len(cr.Header.Roots), path) + } + rootCid := cr.Header.Roots[0] + + // Read nodes until we find the one matching the root CID + var subset *ipldbindcode.Subset + for { + c, _, blockData, err := cr.NextNodeBytes() + if err != nil { + if err == io.EOF { + return nil, fmt.Errorf("reached end of file without finding root node in %s", path) + } + return nil, fmt.Errorf("failed to read node in file %s: %w", path, err) + } + + if c == rootCid { + // Parse the block as a Subset object + subset, err = iplddecoders.DecodeSubset(blockData) + if err != nil { + return nil, fmt.Errorf("failed to decode Subset from block in file %s: %w", path, err) + } + break + } + } + + if subset == nil { + return nil, fmt.Errorf("failed to find root node in file %s", path) + } + + fileInfos = append(fileInfos, carFileInfo{ + path: path, + firstSlot: int64(subset.First), + }) + } + + // Sort the file infos based on the firstSlot + sort.Slice(fileInfos, func(i, j int) bool { + return fileInfos[i].firstSlot < fileInfos[j].firstSlot + }) + + // Extract the sorted file paths + sortedFiles := make([]string, len(fileInfos)) + for i, info := range fileInfos { + sortedFiles[i] = info.path + } + + return sortedFiles, nil +}