From e2261a1bb41f126f6de5f6052301f2b0749a0caf Mon Sep 17 00:00:00 2001 From: Drew Stinnett Date: Thu, 31 Aug 2023 02:09:51 -0400 Subject: [PATCH 01/19] fs: Skip files named . during walk (#384) This happens from tar files being created in their target directory, apparently. Avoid infinite walk. Fix #383. * Cleaning up directories containing dots * Cleaning up some debug bits * More descriptive tests * Update fs.go Updating per suggestion, looks great! Co-authored-by: Matt Holt * Update fs.go --------- Co-authored-by: Matt Holt --- fs.go | 3 +++ fs_test.go | 29 +++++++++++++++++++++++++++++ testdata/self-tar.tar | Bin 0 -> 6144 bytes 3 files changed, 32 insertions(+) create mode 100644 testdata/self-tar.tar diff --git a/fs.go b/fs.go index a0ed59bc..eee7e607 100644 --- a/fs.go +++ b/fs.go @@ -567,6 +567,9 @@ func (f ArchiveFS) ReadDir(name string) ([]fs.DirEntry, error) { ) handler := func(_ context.Context, file File) error { file.NameInArchive = strings.Trim(file.NameInArchive, "/") + if file.NameInArchive == "." { + return nil + } files = append(files, file) if file.NameInArchive == name && !file.IsDir() { foundFile = true diff --git a/fs_test.go b/fs_test.go index 6262b161..9180fbf3 100644 --- a/fs_test.go +++ b/fs_test.go @@ -8,6 +8,7 @@ import ( "io/fs" "log" "net/http" + "os" "path" "reflect" "sort" @@ -53,6 +54,34 @@ var ( unorderZip []byte ) +func TestSelfTar(t *testing.T) { + fn := "testdata/self-tar.tar" + fh, err := os.Open(fn) + if err != nil { + t.Fatalf("Could not load test tar: %v", fn) + } + fstat, err := os.Stat(fn) + if err != nil { + t.Fatalf("Could not stat test tar: %v", fn) + } + fsys := ArchiveFS{ + Stream: io.NewSectionReader(fh, 0, fstat.Size()), + Format: Tar{}, + } + var count int + err = fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error { + if count > 10 { + t.Error("walking test tar appears to be recursing in error") + return fmt.Errorf("recursing tar: %v", fn) + } + count++ + return nil + }) + if err != nil { + t.Fatal(err) + } +} + func ExampleArchiveFS_Stream() { fsys := ArchiveFS{ Stream: io.NewSectionReader(bytes.NewReader(testZIP), 0, int64(len(testZIP))), diff --git a/testdata/self-tar.tar b/testdata/self-tar.tar new file mode 100644 index 0000000000000000000000000000000000000000..15c2b1d995a92ec62523001fc2f5f645a9052d74 GIT binary patch literal 6144 zcmeHL%T59@6s;!4h-*#U^$R*}r!B*R#Yjvr!KgqCu5bcHNgN(SQGUTcaPP+W0WSPH zJAi?p8HT4u+T>>Xxc9a_?U^|lR7POc1!J7#9D?vE22V^`zpcBluG^3>mb3-61?CVF zBDeq$uXkrluL92tump!C z>;rq+sJ6DYu>GPfMsbV9{Ln3oc-FQ*5wiUiu>S`FqfuAH2(>n;6GDWzv2A1G4xg4 zmuVtWww+=QC5U0+I1D>9!ILm|dhF&i+0&~N)bo@I&`)$tyO*xp^)s}6=Qr!6+XPF& zf)2`EM+J>>JhALMXY$2E09o<;7V( Date: Fri, 1 Sep 2023 09:56:34 -0600 Subject: [PATCH 02/19] Fix checkName for FileFS on Windows Using path.Base() doesn't parse correctly for WIndows filepaths. Use filepath.Base() instead. --- fs.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs.go b/fs.go index eee7e607..2aa5737a 100644 --- a/fs.go +++ b/fs.go @@ -192,11 +192,15 @@ func (f FileFS) Stat(name string) (fs.FileInfo, error) { return os.Stat(f.Path) } +// checkName ensures the name is a valid path and also, in the case of +// the FileFS, that it is either ".", the filename originally passed in +// to create the FileFS, or the base of the filename (name without path). +// Other names do not make sense for a FileFS since the FS is only 1 file. func (f FileFS) checkName(name, op string) error { if !fs.ValidPath(name) { return &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid} } - if name != "." && name != path.Base(f.Path) { + if name != "." && name != f.Path && name != filepath.Base(f.Path) { return &fs.PathError{Op: op, Path: name, Err: fs.ErrNotExist} } return nil From 24fa33e9b6a0b17e8418ffc90a94a06ab79bd5c2 Mon Sep 17 00:00:00 2001 From: Dan Garrick <59327926+dpgarrick@users.noreply.github.com> Date: Tue, 12 Sep 2023 14:00:37 +1200 Subject: [PATCH 03/19] zlib: More precise matching (#386) * demo bug * check 2 bytes of zlib header * add .zz test --- formats_test.go | 35 +++++++++++++++++++++++++++++++++++ zlib.go | 30 +++++++++++++++++++++++++----- 2 files changed, 60 insertions(+), 5 deletions(-) diff --git a/formats_test.go b/formats_test.go index 2531ceb1..106f48a0 100644 --- a/formats_test.go +++ b/formats_test.go @@ -3,6 +3,7 @@ package archiver import ( "bytes" "context" + "errors" "io" "io/fs" "math/rand" @@ -370,6 +371,13 @@ func TestIdentifyFindFormatByStreamContent(t *testing.T) { compressorName: "", wantFormatName: ".rar", }, + { + name: "should recognize zz", + openCompressionWriter: Zlib{}.OpenWriter, + content: []byte("this is text"), + compressorName: ".zz", + wantFormatName: ".zz", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -410,3 +418,30 @@ func TestIdentifyAndOpenZip(t *testing.T) { }) checkErr(t, err, "extracting zip") } + +func TestIdentifyASCIIFileStartingWithX(t *testing.T) { + // Create a temporary file starting with the letter 'x' + tmpFile, err := os.CreateTemp("", "TestIdentifyASCIIFileStartingWithX-tmp-*.txt") + if err != nil { + t.Fatalf("fail to create tmp test file for archive tests: err=%v", err) + } + + _, err = tmpFile.Write([]byte("xThis is a test file")) + if err != nil { + t.Fatalf("Failed to write to temp file: %v", err) + } + tmpFile.Close() + + // Open the file and use the Identify function + file, err := os.Open(tmpFile.Name()) + if err != nil { + t.Fatalf("Failed to open temp file: %v", err) + } + defer file.Close() + + _, _, err = Identify(tmpFile.Name(), file) + if !errors.Is(err, ErrNoMatch) { + t.Fatalf("Identify failed: %v", err) + } + +} diff --git a/zlib.go b/zlib.go index ce07890d..84275186 100644 --- a/zlib.go +++ b/zlib.go @@ -1,7 +1,6 @@ package archiver import ( - "bytes" "io" "strings" @@ -28,11 +27,13 @@ func (zz Zlib) Match(filename string, stream io.Reader) (MatchResult, error) { } // match file header - buf, err := readAtMost(stream, len(ZlibHeader)) - if err != nil { + buf, err := readAtMost(stream, 2) + // If an error occurred or buf is not 2 bytes we can't check the header + if err != nil || len(buf) < 2 { return mr, err } - mr.ByStream = bytes.Equal(buf, ZlibHeader) + + mr.ByStream = isValidZlibHeader(buf[0], buf[1]) return mr, nil } @@ -49,4 +50,23 @@ func (Zlib) OpenReader(r io.Reader) (io.ReadCloser, error) { return zlib.NewReader(r) } -var ZlibHeader = []byte{0x78} +func isValidZlibHeader(first, second byte) bool { + // Define all 32 valid zlib headers, see https://stackoverflow.com/questions/9050260/what-does-a-zlib-header-look-like/54915442#54915442 + validHeaders := map[uint16]struct{}{ + 0x081D: {}, 0x085B: {}, 0x0899: {}, 0x08D7: {}, + 0x1819: {}, 0x1857: {}, 0x1895: {}, 0x18D3: {}, + 0x2815: {}, 0x2853: {}, 0x2891: {}, 0x28CF: {}, + 0x3811: {}, 0x384F: {}, 0x388D: {}, 0x38CB: {}, + 0x480D: {}, 0x484B: {}, 0x4889: {}, 0x48C7: {}, + 0x5809: {}, 0x5847: {}, 0x5885: {}, 0x58C3: {}, + 0x6805: {}, 0x6843: {}, 0x6881: {}, 0x68DE: {}, + 0x7801: {}, 0x785E: {}, 0x789C: {}, 0x78DA: {}, + } + + // Combine the first and second bytes into a single 16-bit, big-endian value + header := uint16(first)<<8 | uint16(second) + + // Check if the header is in the map of valid headers + _, isValid := validHeaders[header] + return isValid +} From 1de21189c6c94972341654d537394d2985973b18 Mon Sep 17 00:00:00 2001 From: breezerider Date: Thu, 14 Sep 2023 23:52:48 +0200 Subject: [PATCH 04/19] Option to use number for user and/or group names (#385) * Option to use number for user and/or group names * add NumericUid and NumericGid to Tar struct * writeFileToArchive: set Uname and Gname to empty string if NumericUid and NumericGid are true, respectively. * Join options to use numeric user and group id * join NumericUid and NumericGid in Tar struct * name the new option NumericUidGid * Apply suggestions from code review --------- Co-authored-by: Matt Holt --- tar.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tar.go b/tar.go index ce719695..c07efb4f 100644 --- a/tar.go +++ b/tar.go @@ -17,6 +17,9 @@ func init() { } type Tar struct { + // If true, preserve only numeric user and group id + NumericUIDGID bool + // If true, errors encountered during reading or writing // a file within an archive will be logged and the // operation will continue on remaining files. @@ -71,7 +74,7 @@ func (t Tar) ArchiveAsync(ctx context.Context, output io.Writer, jobs <-chan Arc return nil } -func (Tar) writeFileToArchive(ctx context.Context, tw *tar.Writer, file File) error { +func (t Tar) writeFileToArchive(ctx context.Context, tw *tar.Writer, file File) error { if err := ctx.Err(); err != nil { return err // honor context cancellation } @@ -81,6 +84,10 @@ func (Tar) writeFileToArchive(ctx context.Context, tw *tar.Writer, file File) er return fmt.Errorf("file %s: creating header: %w", file.NameInArchive, err) } hdr.Name = file.NameInArchive // complete path, since FileInfoHeader() only has base name + if t.NumericUIDGID { + hdr.Uname = "" + hdr.Gname = "" + } if err := tw.WriteHeader(hdr); err != nil { return fmt.Errorf("file %s: writing header: %w", file.NameInArchive, err) From aa12f39dc27c2f2d34a52439f7c192cc72bc781e Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Fri, 15 Sep 2023 13:34:10 -0600 Subject: [PATCH 05/19] fs: Leave decompressor open until archive close (fix #365) I don't like this solution, but it's all I could think of while preserving the API. Anyone using CompressedArchive to Extract files without using archiver.FS will need to figure it out on their own if they don't close files before the call to Extract returns. --- .github/workflows/macos-latest.yml | 6 +-- .github/workflows/ubuntu-latest.yml | 6 +-- .github/workflows/windows-latest.yml | 6 +-- formats.go | 18 ++++++- fs.go | 66 +++++++++++++++++------ go.mod | 25 +++++---- go.sum | 80 +++++++++++++++++----------- 7 files changed, 138 insertions(+), 69 deletions(-) diff --git a/.github/workflows/macos-latest.yml b/.github/workflows/macos-latest.yml index f11420b5..a33681f1 100644 --- a/.github/workflows/macos-latest.yml +++ b/.github/workflows/macos-latest.yml @@ -8,16 +8,16 @@ jobs: strategy: matrix: - go-version: [1.18] + go-version: [1.21] runs-on: macos-latest steps: - name: Install Go - uses: actions/setup-go@v2 + uses: actions/setup-go@v4 with: go-version: ${{ matrix.go-version }} - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Build run: go build cmd/arc/main.go diff --git a/.github/workflows/ubuntu-latest.yml b/.github/workflows/ubuntu-latest.yml index 475379c8..af8ebcb6 100644 --- a/.github/workflows/ubuntu-latest.yml +++ b/.github/workflows/ubuntu-latest.yml @@ -8,16 +8,16 @@ jobs: strategy: matrix: - go-version: [1.18] + go-version: [1.21] runs-on: ubuntu-latest steps: - name: Install Go - uses: actions/setup-go@v2 + uses: actions/setup-go@v4 with: go-version: ${{ matrix.go-version }} - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Build run: go build cmd/arc/main.go diff --git a/.github/workflows/windows-latest.yml b/.github/workflows/windows-latest.yml index c1b401bd..90fbfb61 100644 --- a/.github/workflows/windows-latest.yml +++ b/.github/workflows/windows-latest.yml @@ -8,16 +8,16 @@ jobs: strategy: matrix: - go-version: [1.18] + go-version: [1.21] runs-on: windows-latest steps: - name: Install Go - uses: actions/setup-go@v2 + uses: actions/setup-go@v4 with: go-version: ${{ matrix.go-version }} - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Build run: go build cmd/arc/main.go diff --git a/formats.go b/formats.go index db22811b..546d3122 100644 --- a/formats.go +++ b/formats.go @@ -253,16 +253,30 @@ func (caf CompressedArchive) ArchiveAsync(ctx context.Context, output io.Writer, } // Extract reads files out of an archive while decompressing the results. +// If Extract is not called from ArchiveFS.Open, then the FileHandler passed +// in must close all opened files by the time the Extract walk finishes. func (caf CompressedArchive) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchive []string, handleFile FileHandler) error { if caf.Compression != nil { rc, err := caf.Compression.OpenReader(sourceArchive) if err != nil { return err } - defer rc.Close() + // I don't like this solution, but we have to close the decompressor. + // The problem is that if we simply defer rc.Close(), we potentially + // close it before the caller is done using files it opened. Ideally + // it should be closed when the sourceArchive is also closed. But since + // we don't originate sourceArchive, we can't close it when it closes. + // The best I can think of for now is this hack where we tell a type + // that supports this to close another reader when itself closes. + // See issue #365. + if cc, ok := sourceArchive.(compressorCloser); ok { + cc.closeCompressor(rc) + } else { + defer rc.Close() + } sourceArchive = rc } - return caf.Archival.(Extractor).Extract(ctx, sourceArchive, pathsInArchive, handleFile) + return caf.Archival.Extract(ctx, sourceArchive, pathsInArchive, handleFile) } // MatchResult returns true if the format was matched either diff --git a/fs.go b/fs.go index 2aa5737a..684fdf5e 100644 --- a/fs.go +++ b/fs.go @@ -329,8 +329,12 @@ func (f ArchiveFS) Open(name string) (fs.File, error) { return nil } - var inputStream io.Reader = archiveFile - if f.Stream != nil { + var inputStream io.Reader + if f.Stream == nil { + // when the archive file is closed, any (soon-to-be) associated decompressor should also be closed; see #365 + archiveFile = &closeBoth{File: archiveFile} + inputStream = archiveFile + } else { inputStream = io.NewSectionReader(f.Stream, 0, f.Stream.Size()) } @@ -368,13 +372,13 @@ func (f ArchiveFS) Open(name string) (fs.File, error) { // implicit files files = fillImplicit(files) - file := search(name, files) - if file == nil { + file, foundFile := search(name, files) + if !foundFile { return nil, fs.ErrNotExist } if file.IsDir() { - return &dirFile{extractedFile: extractedFile{File: *file}, entries: openReadDir(name, files)}, nil + return &dirFile{extractedFile: extractedFile{File: file}, entries: openReadDir(name, files)}, nil } // very unlikely @@ -383,7 +387,7 @@ func (f ArchiveFS) Open(name string) (fs.File, error) { // if named file is not a regular file, it can't be opened if !file.Mode().IsRegular() { - return extractedFile{File: *file}, nil + return extractedFile{File: file}, nil } // regular files can be read, so open it for reading @@ -391,7 +395,7 @@ func (f ArchiveFS) Open(name string) (fs.File, error) { if err != nil { return nil, err } - return extractedFile{File: *file, ReadCloser: rc, parentArchive: archiveFile}, nil + return extractedFile{File: file, ReadCloser: rc, parentArchive: archiveFile}, nil } // copy of the same function from zip @@ -414,7 +418,7 @@ func split(name string) (dir, elem string, isDir bool) { func fillImplicit(files []File) []File { dirs := make(map[string]bool) knownDirs := make(map[string]bool) - entries := make([]File, 0, 0) + entries := make([]File, 0) for _, file := range files { for dir := path.Dir(file.NameInArchive); dir != "."; dir = path.Dir(dir) { dirs[dir] = true @@ -444,7 +448,7 @@ func fillImplicit(files []File) []File { } // modified from zip.Reader openLookup -func search(name string, entries []File) *File { +func search(name string, entries []File) (File, bool) { dir, elem, _ := split(name) i := sort.Search(len(entries), func(i int) bool { idir, ielem, _ := split(entries[i].NameInArchive) @@ -453,10 +457,10 @@ func search(name string, entries []File) *File { if i < len(entries) { fname := entries[i].NameInArchive if fname == name || len(fname) == len(name)+1 && fname[len(name)] == '/' && fname[:len(name)] == name { - return &entries[i] + return entries[i], true } } - return nil + return File{}, false } // modified from zip.Reader openReadDir @@ -538,8 +542,8 @@ func (f ArchiveFS) Stat(name string) (fs.FileInfo, error) { } files = fillImplicit(files) - file := search(name, files) - if file == nil { + file, found := search(name, files) + if !found { return nil, fs.ErrNotExist } return file.FileInfo, nil @@ -608,8 +612,8 @@ func (f ArchiveFS) ReadDir(name string) ([]fs.DirEntry, error) { return openReadDir(name, files), nil } - file := search(name, files) - if file == nil { + file, foundFile := search(name, files) + if !foundFile { return nil, fs.ErrNotExist } @@ -799,6 +803,36 @@ func (ef extractedFile) Close() error { return nil } +// compressorCloser is a type that closes two closers at the same time. +// It only exists to fix #365. If a better solution can be found, I'd +// likely prefer it. +type compressorCloser interface { + io.Closer + closeCompressor(io.Closer) +} + +// closeBoth closes both the file and an associated +// closer, such as a (de)compressor that wraps the +// reading/writing of the file. See issue #365. If a +// better solution is found, I'd probably prefer that. +type closeBoth struct { + fs.File + c io.Closer +} + +// closeCompressor will have the closer closed when the associated File closes. +func (dc *closeBoth) closeCompressor(c io.Closer) { dc.c = c } + +// Close closes both the file and the associated closer. It always calls +// Close() on both, but returns only the first error, if any. +func (dc closeBoth) Close() error { + err1, err2 := dc.File.Close(), dc.c.Close() + if err1 != nil { + return err1 + } + return err2 +} + // implicitDirEntry represents a directory that does // not actually exist in the archive but is inferred // from the paths of actual files in the archive. @@ -840,4 +874,6 @@ var ( _ fs.ReadDirFS = (*ArchiveFS)(nil) _ fs.StatFS = (*ArchiveFS)(nil) _ fs.SubFS = (*ArchiveFS)(nil) + + _ compressorCloser = (*closeBoth)(nil) ) diff --git a/go.mod b/go.mod index 9ab9cf88..96490a87 100644 --- a/go.mod +++ b/go.mod @@ -1,29 +1,28 @@ module github.com/mholt/archiver/v4 -go 1.18 +go 1.20 require ( - github.com/andybalholm/brotli v1.0.4 + github.com/andybalholm/brotli v1.0.5 github.com/dsnet/compress v0.0.1 - github.com/klauspost/compress v1.15.9 - github.com/klauspost/pgzip v1.2.5 + github.com/klauspost/compress v1.16.7 + github.com/klauspost/pgzip v1.2.6 github.com/nwaples/rardecode/v2 v2.0.0-beta.2 github.com/therootcompany/xz v1.0.1 - github.com/ulikunitz/xz v0.5.10 + github.com/ulikunitz/xz v0.5.11 ) require ( - github.com/bodgit/sevenzip v1.3.0 + github.com/bodgit/sevenzip v1.4.3 github.com/golang/snappy v0.0.4 - github.com/pierrec/lz4/v4 v4.1.15 - golang.org/x/text v0.3.8 + github.com/pierrec/lz4/v4 v4.1.18 + golang.org/x/text v0.13.0 ) require ( - github.com/bodgit/plumbing v1.2.0 // indirect - github.com/bodgit/windows v1.0.0 // indirect - github.com/connesc/cipherio v0.2.1 // indirect - github.com/hashicorp/errwrap v1.0.0 // indirect + github.com/bodgit/plumbing v1.3.0 // indirect + github.com/bodgit/windows v1.0.1 // indirect + github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect - go4.org v0.0.0-20200411211856-f5505b9728dd // indirect + go4.org v0.0.0-20230225012048-214862532bf5 // indirect ) diff --git a/go.sum b/go.sum index 80834a65..3df54f5a 100644 --- a/go.sum +++ b/go.sum @@ -17,23 +17,22 @@ cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0Zeo dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= -github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= -github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= -github.com/bodgit/plumbing v1.2.0 h1:gg4haxoKphLjml+tgnecR4yLBV5zo4HAZGCtAh3xCzM= -github.com/bodgit/plumbing v1.2.0/go.mod h1:b9TeRi7Hvc6Y05rjm8VML3+47n4XTZPtQ/5ghqic2n8= -github.com/bodgit/sevenzip v1.3.0 h1:1ljgELgtHqvgIp8W8kgeEGHIWP4ch3xGI8uOBZgLVKY= -github.com/bodgit/sevenzip v1.3.0/go.mod h1:omwNcgZTEooWM8gA/IJ2Nk/+ZQ94+GsytRzOJJ8FBlM= -github.com/bodgit/windows v1.0.0 h1:rLQ/XjsleZvx4fR1tB/UxQrK+SJ2OFHzfPjLWWOhDIA= -github.com/bodgit/windows v1.0.0/go.mod h1:a6JLwrB4KrTR5hBpp8FI9/9W9jJfeQ2h4XDXU74ZCdM= +github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= +github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= +github.com/bodgit/plumbing v1.3.0 h1:pf9Itz1JOQgn7vEOE7v7nlEfBykYqvUYioC61TwWCFU= +github.com/bodgit/plumbing v1.3.0/go.mod h1:JOTb4XiRu5xfnmdnDJo6GmSbSbtSyufrsyZFByMtKEs= +github.com/bodgit/sevenzip v1.4.3 h1:46Rb9vCYdpceC1U+GIR0bS3hP2/Xv8coKFDeLJySV/A= +github.com/bodgit/sevenzip v1.4.3/go.mod h1:F8n3+0CwbdxqmNy3wFeOAtanza02Ur66AGfs/hbYblI= +github.com/bodgit/windows v1.0.1 h1:tF7K6KOluPYygXa3Z2594zxlkbKPAOvqr97etrGNIz4= +github.com/bodgit/windows v1.0.1/go.mod h1:a6JLwrB4KrTR5hBpp8FI9/9W9jJfeQ2h4XDXU74ZCdM= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/connesc/cipherio v0.2.1 h1:FGtpTPMbKNNWByNrr9aEBtaJtXjqOzkIXNYJp6OEycw= -github.com/connesc/cipherio v0.2.1/go.mod h1:ukY0MWJDFnJEbXMQtOcn2VmTpRfzcTz4OoVrWGGJZcA= -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q= github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo= github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= @@ -49,8 +48,6 @@ github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfb github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= -github.com/golang/mock v1.4.3 h1:GV+pQPG/EUUbkh47niozDcADz6go/dUwhVzdUQHIVRw= -github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -70,8 +67,9 @@ github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hf github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= -github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= +github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= @@ -81,42 +79,48 @@ github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1 github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= -github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY= -github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= +github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= +github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= -github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE= -github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= +github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU= +github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/nwaples/rardecode/v2 v2.0.0-beta.2 h1:e3mzJFJs4k83GXBEiTaQ5HgSc/kOK8q0rDaRO0MPaOk= github.com/nwaples/rardecode/v2 v2.0.0-beta.2/go.mod h1:yntwv/HfMc/Hbvtq9I19D1n58te3h6KsqCf3GxyfBGY= -github.com/pierrec/lz4/v4 v4.1.15 h1:MO0/ucJhngq7299dKLwIMtgTfbkoSPF6AoMYDd8Q4q0= -github.com/pierrec/lz4/v4 v4.1.15/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pierrec/lz4/v4 v4.1.18 h1:xaKrnTkyoqfh1YItXl56+6KJNVYWlEEPuAQW9xsplYQ= +github.com/pierrec/lz4/v4 v4.1.18/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/therootcompany/xz v1.0.1 h1:CmOtsn1CbtmyYiusbfmhmkpAAETj0wBIH6kCYaX+xzw= github.com/therootcompany/xz v1.0.1/go.mod h1:3K3UH1yCKgBneZYhuQUvJ9HPD19UEXEI0BWbMn8qNMY= github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8= -github.com/ulikunitz/xz v0.5.10 h1:t92gobL9l3HE202wg3rlk19F6X+JOxl9BBrCCMYEYd8= -github.com/ulikunitz/xz v0.5.10/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= +github.com/ulikunitz/xz v0.5.11 h1:kpFauv27b6ynzBNT/Xy+1k+fK4WswhN/6PN5WhFAGw8= +github.com/ulikunitz/xz v0.5.11/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go4.org v0.0.0-20200411211856-f5505b9728dd h1:BNJlw5kRTzdmyfh5U8F93HA2OwkP7ZGwA51eJ/0wKOU= -go4.org v0.0.0-20200411211856-f5505b9728dd/go.mod h1:CIiUVy99QCPfoE13bO4EZaz5GZMZXMSBGhxRdsvzbkg= +go4.org v0.0.0-20230225012048-214862532bf5 h1:nifaUDeh+rPaBCMPMQHZmvJf+QdpLFnuQPwx+LxVmtc= +go4.org v0.0.0-20230225012048-214862532bf5/go.mod h1:F57wTi5Lrj6WLyswp5EYV1ncrEbFGHD4hhz6S1ZYeaU= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -142,6 +146,7 @@ golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKG golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -155,7 +160,9 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -167,6 +174,7 @@ golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -179,13 +187,23 @@ golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= -golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY= -golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -212,6 +230,7 @@ golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapK golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -252,8 +271,9 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= From 09bbccc36c86ca5404a4be1c15639c363151dbfa Mon Sep 17 00:00:00 2001 From: halfcrazy Date: Wed, 13 Dec 2023 11:06:09 +0800 Subject: [PATCH 06/19] Fix create archive to a continuous writing source file failed (#388) * Fix create archive to a continuous writing source file failed https://github.com/mholt/archiver/issues/387 Signed-off-by: Yan Zhu * Update archiver.go --------- Signed-off-by: Yan Zhu Co-authored-by: Matt Holt --- archiver.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/archiver.go b/archiver.go index 73ec00d4..1701b078 100644 --- a/archiver.go +++ b/archiver.go @@ -221,8 +221,14 @@ func openAndCopyFile(file File, w io.Writer) error { return err } defer fileReader.Close() - _, err = io.Copy(w, fileReader) - return err + // When file is in use and size is being written to, creating the compressed + // file will fail with "archive/tar: write too long." Using CopyN gracefully + // handles this. + _, err = io.CopyN(w, fileReader, file.Size()) + if err != nil && err != io.EOF { + return err + } + return nil } // fileIsIncluded returns true if filename is included according to From 81f9e06b11ad6ba424f8311c0bc18ceb01f2b67a Mon Sep 17 00:00:00 2001 From: Shelton Zhu <498220739@qq.com> Date: Tue, 23 Jan 2024 11:00:47 +0800 Subject: [PATCH 07/19] 7z: Copy f for loop closure (#394) fix: fix iteration bug --- 7z.go | 1 + 1 file changed, 1 insertion(+) diff --git a/7z.go b/7z.go index 44f8a00b..d57bd452 100644 --- a/7z.go +++ b/7z.go @@ -82,6 +82,7 @@ func (z SevenZip) Extract(ctx context.Context, sourceArchive io.Reader, pathsInA skipDirs := skipList{} for i, f := range zr.File { + f := f // make a copy for the Open closure if err := ctx.Err(); err != nil { return err // honor context cancellation } From 43a073ede7796a604dc6f4b1acabcfb4725c43be Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Tue, 13 Feb 2024 13:22:52 -0700 Subject: [PATCH 08/19] zip: Implement Insert (append files) Close #397 Also minor tweaks --- README.md | 8 ++++--- archiver.go | 7 +++++- go.mod | 5 +++- go.sum | 3 +++ interfaces.go | 1 + tar.go | 3 +++ zip.go | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 87 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 90937eb6..3f0024ff 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Introducing **Archiver 4.0** - a cross-platform, multi-format archive utility an - Create and extract archive files - Walk or traverse into archive files - Extract only specific files from archives -- Insert (append) into .tar files +- Insert (append) into .tar and .zip archives - Read from password-protected 7-Zip files - Numerous archive and compression formats supported - Extensible (add more formats just by registering them) @@ -301,9 +301,9 @@ defer decompressor.Close() // reads from decompressor will be decompressed ``` -### Append to tarball +### Append to tarball and zip archives -Tar archives can be appended to without creating a whole new archive by calling `Insert()` on a tar stream. However, this requires that the tarball is not compressed (due to complexities with modifying compression dictionaries). +Tar and Zip archives can be appended to without creating a whole new archive by calling `Insert()` on a tar or zip stream. However, for tarballs, this requires that the tarball is not compressed (due to complexities with modifying compression dictionaries). Here is an example that appends a file to a tarball on disk: @@ -325,3 +325,5 @@ if err != nil { } ``` +The code is similar for inserting into a Zip archive, except you'll call `Insert()` on the `Zip` type instead. + diff --git a/archiver.go b/archiver.go index 1701b078..c968e1f7 100644 --- a/archiver.go +++ b/archiver.go @@ -27,6 +27,11 @@ type File struct { // it is such a common field and we want to preserve // format-agnosticism (no type assertions) for basic // operations. + // + // EXPERIMENTAL: If inserting a file into an archive, + // and this is left blank, the implementation of the + // archive format can default to using the file's base + // name. NameInArchive string // For symbolic and hard links, the target of the link. @@ -224,7 +229,7 @@ func openAndCopyFile(file File, w io.Writer) error { // When file is in use and size is being written to, creating the compressed // file will fail with "archive/tar: write too long." Using CopyN gracefully // handles this. - _, err = io.CopyN(w, fileReader, file.Size()) + _, err = io.Copy(w, fileReader) if err != nil && err != io.EOF { return err } diff --git a/go.mod b/go.mod index 96490a87..d1e0d85e 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,8 @@ module github.com/mholt/archiver/v4 -go 1.20 +go 1.21.3 + +toolchain go1.22.0 require ( github.com/andybalholm/brotli v1.0.5 @@ -13,6 +15,7 @@ require ( ) require ( + github.com/STARRY-S/zip v0.1.0 github.com/bodgit/sevenzip v1.4.3 github.com/golang/snappy v0.0.4 github.com/pierrec/lz4/v4 v4.1.18 diff --git a/go.sum b/go.sum index 3df54f5a..c352a829 100644 --- a/go.sum +++ b/go.sum @@ -17,6 +17,8 @@ cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0Zeo dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/STARRY-S/zip v0.1.0 h1:eUER3jKmHKXjv+iy3BekLa+QnNSo1Lqz4eTzYBcGDqo= +github.com/STARRY-S/zip v0.1.0/go.mod h1:qj/mTZkvb3AvfGQ2e775/3AODRvB4peSw8KNMvrM8/I= github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= github.com/bodgit/plumbing v1.3.0 h1:pf9Itz1JOQgn7vEOE7v7nlEfBykYqvUYioC61TwWCFU= @@ -104,6 +106,7 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/therootcompany/xz v1.0.1 h1:CmOtsn1CbtmyYiusbfmhmkpAAETj0wBIH6kCYaX+xzw= github.com/therootcompany/xz v1.0.1/go.mod h1:3K3UH1yCKgBneZYhuQUvJ9HPD19UEXEI0BWbMn8qNMY= github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8= diff --git a/interfaces.go b/interfaces.go index bfc53163..9a41e1f1 100644 --- a/interfaces.go +++ b/interfaces.go @@ -94,6 +94,7 @@ type Extractor interface { } // Inserter can insert files into an existing archive. +// EXPERIMENTAL: This API is subject to change. type Inserter interface { // Insert inserts the files into archive. // diff --git a/tar.go b/tar.go index c07efb4f..16116917 100644 --- a/tar.go +++ b/tar.go @@ -84,6 +84,9 @@ func (t Tar) writeFileToArchive(ctx context.Context, tw *tar.Writer, file File) return fmt.Errorf("file %s: creating header: %w", file.NameInArchive, err) } hdr.Name = file.NameInArchive // complete path, since FileInfoHeader() only has base name + if hdr.Name == "" { + hdr.Name = file.Name() // assume base name of file I guess + } if t.NumericUIDGID { hdr.Uname = "" hdr.Gname = "" diff --git a/zip.go b/zip.go index 421fe6ec..0694d7e1 100644 --- a/zip.go +++ b/zip.go @@ -11,6 +11,8 @@ import ( "path" "strings" + szip "github.com/STARRY-S/zip" + "github.com/dsnet/compress/bzip2" "github.com/klauspost/compress/zip" "github.com/klauspost/compress/zstd" @@ -137,6 +139,9 @@ func (z Zip) archiveOneFile(ctx context.Context, zw *zip.Writer, idx int, file F return fmt.Errorf("getting info for file %d: %s: %w", idx, file.Name(), err) } hdr.Name = file.NameInArchive // complete path, since FileInfoHeader() only has base name + if hdr.Name == "" { + hdr.Name = file.Name() // assume base name of file I guess + } // customize header based on file properties if file.IsDir() { @@ -256,6 +261,66 @@ func (z Zip) decodeText(hdr *zip.FileHeader) { } } +// Insert appends the listed files into the provided Zip archive stream. +func (z Zip) Insert(ctx context.Context, into io.ReadWriteSeeker, files []File) error { + // following very simple example at https://github.com/STARRY-S/zip?tab=readme-ov-file#usage + zu, err := szip.NewUpdater(into) + if err != nil { + return err + } + defer zu.Close() + + for idx, file := range files { + if err := ctx.Err(); err != nil { + return err // honor context cancellation + } + + hdr, err := szip.FileInfoHeader(file) + if err != nil { + return fmt.Errorf("getting info for file %d: %s: %w", idx, file.NameInArchive, err) + } + hdr.Name = file.NameInArchive // complete path, since FileInfoHeader() only has base name + if hdr.Name == "" { + hdr.Name = file.Name() // assume base name of file I guess + } + + // customize header based on file properties + if file.IsDir() { + if !strings.HasSuffix(hdr.Name, "/") { + hdr.Name += "/" // required + } + hdr.Method = zip.Store + } else if z.SelectiveCompression { + // only enable compression on compressable files + ext := strings.ToLower(path.Ext(hdr.Name)) + if _, ok := compressedFormats[ext]; ok { + hdr.Method = zip.Store + } else { + hdr.Method = z.Compression + } + } + + w, err := zu.AppendHeaderAt(hdr, -1) + if err != nil { + return fmt.Errorf("inserting file header: %d: %s: %w", idx, file.Name(), err) + } + + // directories have no file body + if file.IsDir() { + return nil + } + if err := openAndCopyFile(file, w); err != nil { + if z.ContinueOnError && ctx.Err() == nil { + log.Printf("[ERROR] appending file %d into archive: %s: %v", idx, file.Name(), err) + continue + } + return fmt.Errorf("copying inserted file %d: %s: %w", idx, file.Name(), err) + } + } + + return nil +} + type seekReaderAt interface { io.ReaderAt io.Seeker From de08bfa4c5588130c63dfcede2da42f26abccbb4 Mon Sep 17 00:00:00 2001 From: Shun Sakai Date: Tue, 9 Apr 2024 03:30:22 +0900 Subject: [PATCH 09/19] Add lzip support (#401) * Add lzip support * Make the extension comparison to strict --- .github/workflows/macos-latest.yml | 2 +- .github/workflows/ubuntu-latest.yml | 2 +- .github/workflows/windows-latest.yml | 2 +- README.md | 1 + formats_test.go | 7 ++++ go.mod | 7 ++-- go.sum | 6 ++-- lzip.go | 53 ++++++++++++++++++++++++++++ 8 files changed, 72 insertions(+), 8 deletions(-) create mode 100644 lzip.go diff --git a/.github/workflows/macos-latest.yml b/.github/workflows/macos-latest.yml index a33681f1..70b0aa57 100644 --- a/.github/workflows/macos-latest.yml +++ b/.github/workflows/macos-latest.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: - go-version: [1.21] + go-version: [1.22] runs-on: macos-latest steps: - name: Install Go diff --git a/.github/workflows/ubuntu-latest.yml b/.github/workflows/ubuntu-latest.yml index af8ebcb6..0502d0d8 100644 --- a/.github/workflows/ubuntu-latest.yml +++ b/.github/workflows/ubuntu-latest.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: - go-version: [1.21] + go-version: [1.22] runs-on: ubuntu-latest steps: - name: Install Go diff --git a/.github/workflows/windows-latest.yml b/.github/workflows/windows-latest.yml index 90fbfb61..d27e28d9 100644 --- a/.github/workflows/windows-latest.yml +++ b/.github/workflows/windows-latest.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: - go-version: [1.21] + go-version: [1.22] runs-on: windows-latest steps: - name: Install Go diff --git a/README.md b/README.md index 3f0024ff..2d7dcf7c 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ Introducing **Archiver 4.0** - a cross-platform, multi-format archive utility an - flate (.zip) - gzip (.gz) - lz4 (.lz4) +- lzip (.lz) - snappy (.sz) - xz (.xz) - zlib (.zz) diff --git a/formats_test.go b/formats_test.go index 106f48a0..ec5e3bb8 100644 --- a/formats_test.go +++ b/formats_test.go @@ -315,6 +315,13 @@ func TestIdentifyFindFormatByStreamContent(t *testing.T) { compressorName: ".lz4", wantFormatName: ".lz4", }, + { + name: "should recognize lz", + openCompressionWriter: Lzip{}.OpenWriter, + content: []byte("this is text"), + compressorName: ".lz", + wantFormatName: ".lz", + }, { name: "should recognize sz", openCompressionWriter: Sz{}.OpenWriter, diff --git a/go.mod b/go.mod index d1e0d85e..748b3618 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,8 @@ module github.com/mholt/archiver/v4 -go 1.21.3 +go 1.22 -toolchain go1.22.0 +toolchain go1.22.2 require ( github.com/andybalholm/brotli v1.0.5 @@ -11,7 +11,7 @@ require ( github.com/klauspost/pgzip v1.2.6 github.com/nwaples/rardecode/v2 v2.0.0-beta.2 github.com/therootcompany/xz v1.0.1 - github.com/ulikunitz/xz v0.5.11 + github.com/ulikunitz/xz v0.5.12 ) require ( @@ -19,6 +19,7 @@ require ( github.com/bodgit/sevenzip v1.4.3 github.com/golang/snappy v0.0.4 github.com/pierrec/lz4/v4 v4.1.18 + github.com/sorairolake/lzip-go v0.3.1 golang.org/x/text v0.13.0 ) diff --git a/go.sum b/go.sum index c352a829..42ffe66f 100644 --- a/go.sum +++ b/go.sum @@ -98,6 +98,8 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk= +github.com/sorairolake/lzip-go v0.3.1 h1:/v5NxPwhyEV/0NxdniSOPt0zTTZweQfIr2d/f9cE0Uk= +github.com/sorairolake/lzip-go v0.3.1/go.mod h1:sGvZv/ZFQzR0DSbXsjCyA6Nmb84TIPT8QbmhFBfVRlI= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= @@ -110,8 +112,8 @@ github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXl github.com/therootcompany/xz v1.0.1 h1:CmOtsn1CbtmyYiusbfmhmkpAAETj0wBIH6kCYaX+xzw= github.com/therootcompany/xz v1.0.1/go.mod h1:3K3UH1yCKgBneZYhuQUvJ9HPD19UEXEI0BWbMn8qNMY= github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8= -github.com/ulikunitz/xz v0.5.11 h1:kpFauv27b6ynzBNT/Xy+1k+fK4WswhN/6PN5WhFAGw8= -github.com/ulikunitz/xz v0.5.11/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= +github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc= +github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= diff --git a/lzip.go b/lzip.go new file mode 100644 index 00000000..a861a487 --- /dev/null +++ b/lzip.go @@ -0,0 +1,53 @@ +package archiver + +import ( + "bytes" + "io" + "path/filepath" + "strings" + + "github.com/sorairolake/lzip-go" +) + +func init() { + RegisterFormat(Lzip{}) +} + +// Lzip facilitates lzip compression. +type Lzip struct{} + +func (Lzip) Name() string { return ".lz" } + +func (lz Lzip) Match(filename string, stream io.Reader) (MatchResult, error) { + var mr MatchResult + + // match filename + if filepath.Ext(strings.ToLower(filename)) == lz.Name() { + mr.ByName = true + } + + // match file header + buf, err := readAtMost(stream, len(lzipHeader)) + if err != nil { + return mr, err + } + mr.ByStream = bytes.Equal(buf, lzipHeader) + + return mr, nil +} + +func (Lzip) OpenWriter(w io.Writer) (io.WriteCloser, error) { + return lzip.NewWriter(w), nil +} + +func (Lzip) OpenReader(r io.Reader) (io.ReadCloser, error) { + lzr, err := lzip.NewReader(r) + if err != nil { + return nil, err + } + return io.NopCloser(lzr), err +} + +// magic number at the beginning of lzip files +// https://datatracker.ietf.org/doc/html/draft-diaz-lzip-09#section-2 +var lzipHeader = []byte("LZIP") From 07b70a61b711e8a1400760a438f23220d3933322 Mon Sep 17 00:00:00 2001 From: Richard Gomez <32133502+rgmz@users.noreply.github.com> Date: Thu, 13 Jun 2024 15:08:31 -0400 Subject: [PATCH 10/19] gzip: expose gzip.Multistream (#407) --- gz.go | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/gz.go b/gz.go index e747d030..b9873f19 100644 --- a/gz.go +++ b/gz.go @@ -20,6 +20,10 @@ type Gz struct { // than no compression. CompressionLevel int + // DisableMultistream controls whether the reader supports multistream files. + // See https://pkg.go.dev/compress/gzip#example-Reader.Multistream + DisableMultistream bool + // Use a fast parallel Gzip implementation. This is only // effective for large streams (about 1 MB or greater). Multithreaded bool @@ -65,14 +69,19 @@ func (gz Gz) OpenWriter(w io.Writer) (io.WriteCloser, error) { } func (gz Gz) OpenReader(r io.Reader) (io.ReadCloser, error) { - var rc io.ReadCloser - var err error if gz.Multithreaded { - rc, err = pgzip.NewReader(r) - } else { - rc, err = gzip.NewReader(r) + gzR, err := pgzip.NewReader(r) + if gzR != nil && gz.DisableMultistream { + gzR.Multistream(false) + } + return gzR, err + } + + gzR, err := gzip.NewReader(r) + if gzR != nil && gz.DisableMultistream { + gzR.Multistream(false) } - return rc, err + return gzR, err } // magic number at the beginning of gzip files From c341cc08a3eadf3b9859e34034709616e186ff1a Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Sat, 8 Jun 2024 07:47:16 -0600 Subject: [PATCH 11/19] Upgrade dependencies; honor fs.SkipAll when walking --- go.mod | 15 ++++++++------- go.sum | 39 ++++++++++++++++++++++----------------- rar.go | 4 +++- tar.go | 4 +++- zip.go | 4 +++- 5 files changed, 39 insertions(+), 27 deletions(-) diff --git a/go.mod b/go.mod index 748b3618..c81ae494 100644 --- a/go.mod +++ b/go.mod @@ -5,9 +5,9 @@ go 1.22 toolchain go1.22.2 require ( - github.com/andybalholm/brotli v1.0.5 - github.com/dsnet/compress v0.0.1 - github.com/klauspost/compress v1.16.7 + github.com/andybalholm/brotli v1.1.0 + github.com/dsnet/compress v0.0.2-0.20230904184137-39efe44ab707 + github.com/klauspost/compress v1.17.8 github.com/klauspost/pgzip v1.2.6 github.com/nwaples/rardecode/v2 v2.0.0-beta.2 github.com/therootcompany/xz v1.0.1 @@ -16,11 +16,11 @@ require ( require ( github.com/STARRY-S/zip v0.1.0 - github.com/bodgit/sevenzip v1.4.3 + github.com/bodgit/sevenzip v1.5.1 github.com/golang/snappy v0.0.4 - github.com/pierrec/lz4/v4 v4.1.18 - github.com/sorairolake/lzip-go v0.3.1 - golang.org/x/text v0.13.0 + github.com/pierrec/lz4/v4 v4.1.21 + github.com/sorairolake/lzip-go v0.3.4 + golang.org/x/text v0.16.0 ) require ( @@ -28,5 +28,6 @@ require ( github.com/bodgit/windows v1.0.1 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect + github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect go4.org v0.0.0-20230225012048-214862532bf5 // indirect ) diff --git a/go.sum b/go.sum index 42ffe66f..a706d93d 100644 --- a/go.sum +++ b/go.sum @@ -19,12 +19,12 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/STARRY-S/zip v0.1.0 h1:eUER3jKmHKXjv+iy3BekLa+QnNSo1Lqz4eTzYBcGDqo= github.com/STARRY-S/zip v0.1.0/go.mod h1:qj/mTZkvb3AvfGQ2e775/3AODRvB4peSw8KNMvrM8/I= -github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= -github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= +github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= +github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= github.com/bodgit/plumbing v1.3.0 h1:pf9Itz1JOQgn7vEOE7v7nlEfBykYqvUYioC61TwWCFU= github.com/bodgit/plumbing v1.3.0/go.mod h1:JOTb4XiRu5xfnmdnDJo6GmSbSbtSyufrsyZFByMtKEs= -github.com/bodgit/sevenzip v1.4.3 h1:46Rb9vCYdpceC1U+GIR0bS3hP2/Xv8coKFDeLJySV/A= -github.com/bodgit/sevenzip v1.4.3/go.mod h1:F8n3+0CwbdxqmNy3wFeOAtanza02Ur66AGfs/hbYblI= +github.com/bodgit/sevenzip v1.5.1 h1:rVj0baZsooZFy64DJN0zQogPzhPrT8BQ8TTRd1H4WHw= +github.com/bodgit/sevenzip v1.5.1/go.mod h1:Q3YMySuVWq6pyGEolyIE98828lOfEoeWg5zeH6x22rc= github.com/bodgit/windows v1.0.1 h1:tF7K6KOluPYygXa3Z2594zxlkbKPAOvqr97etrGNIz4= github.com/bodgit/windows v1.0.1/go.mod h1:a6JLwrB4KrTR5hBpp8FI9/9W9jJfeQ2h4XDXU74ZCdM= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= @@ -35,8 +35,8 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q= -github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo= +github.com/dsnet/compress v0.0.2-0.20230904184137-39efe44ab707 h1:2tV76y6Q9BB+NEBasnqvs7e49aEBFI8ejC89PSnWH+4= +github.com/dsnet/compress v0.0.2-0.20230904184137-39efe44ab707/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s= github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= @@ -62,6 +62,7 @@ github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5a github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= @@ -76,13 +77,15 @@ github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+l github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= +github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= -github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= -github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU= +github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU= github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= @@ -91,15 +94,15 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/nwaples/rardecode/v2 v2.0.0-beta.2 h1:e3mzJFJs4k83GXBEiTaQ5HgSc/kOK8q0rDaRO0MPaOk= github.com/nwaples/rardecode/v2 v2.0.0-beta.2/go.mod h1:yntwv/HfMc/Hbvtq9I19D1n58te3h6KsqCf3GxyfBGY= -github.com/pierrec/lz4/v4 v4.1.18 h1:xaKrnTkyoqfh1YItXl56+6KJNVYWlEEPuAQW9xsplYQ= -github.com/pierrec/lz4/v4 v4.1.18/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= +github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk= -github.com/sorairolake/lzip-go v0.3.1 h1:/v5NxPwhyEV/0NxdniSOPt0zTTZweQfIr2d/f9cE0Uk= -github.com/sorairolake/lzip-go v0.3.1/go.mod h1:sGvZv/ZFQzR0DSbXsjCyA6Nmb84TIPT8QbmhFBfVRlI= +github.com/sorairolake/lzip-go v0.3.4 h1:588FEACmBj0SFhS4SsbiePLxx24ENrqsbt8OiIAoESA= +github.com/sorairolake/lzip-go v0.3.4/go.mod h1:N0KYq5iWrMXI0ZEXKXaS9hCyOjZUQdBDEIbXfoUwbdk= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= @@ -107,11 +110,11 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/therootcompany/xz v1.0.1 h1:CmOtsn1CbtmyYiusbfmhmkpAAETj0wBIH6kCYaX+xzw= github.com/therootcompany/xz v1.0.1/go.mod h1:3K3UH1yCKgBneZYhuQUvJ9HPD19UEXEI0BWbMn8qNMY= -github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8= +github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc= github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= @@ -180,6 +183,8 @@ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -207,8 +212,8 @@ golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= -golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= +golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/rar.go b/rar.go index 61e55e57..ed0099f6 100644 --- a/rar.go +++ b/rar.go @@ -106,7 +106,9 @@ func (r Rar) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchiv } err = handleFile(ctx, file) - if errors.Is(err, fs.SkipDir) { + if errors.Is(err, fs.SkipAll) { + break + } else if errors.Is(err, fs.SkipDir) { // if a directory, skip this path; if a file, skip the folder path dirPath := hdr.Name if !hdr.IsDir { diff --git a/tar.go b/tar.go index 16116917..0db0a665 100644 --- a/tar.go +++ b/tar.go @@ -221,7 +221,9 @@ func (t Tar) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchiv } err = handleFile(ctx, file) - if errors.Is(err, fs.SkipDir) { + if errors.Is(err, fs.SkipAll) { + break + } else if errors.Is(err, fs.SkipDir) { // if a directory, skip this path; if a file, skip the folder path dirPath := hdr.Name if hdr.Typeflag != tar.TypeDir { diff --git a/zip.go b/zip.go index 0694d7e1..7d11ea4d 100644 --- a/zip.go +++ b/zip.go @@ -224,7 +224,9 @@ func (z Zip) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchiv } err := handleFile(ctx, file) - if errors.Is(err, fs.SkipDir) { + if errors.Is(err, fs.SkipAll) { + break + } else if errors.Is(err, fs.SkipDir) { // if a directory, skip this path; if a file, skip the folder path dirPath := f.Name if !file.IsDir() { From 743ede3881bb34d43b9ca063232367cbc237daea Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Wed, 21 Aug 2024 11:43:08 -0600 Subject: [PATCH 12/19] fs: Fix bug with ArchiveFS stat method I think this is what @WeidiDeng intended from #354 --- fs.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs.go b/fs.go index 684fdf5e..bceda446 100644 --- a/fs.go +++ b/fs.go @@ -537,7 +537,8 @@ func (f ArchiveFS) Stat(name string) (fs.FileInfo, error) { return nil, err } - if (len(files) == 0 && files[0].NameInArchive == name) || found { + // exactly one or exact file found, test name match to detect implicit dir name https://github.com/mholt/archiver/issues/340 + if (len(files) == 1 && files[0].NameInArchive == name) || found { return files[len(files)-1].FileInfo, nil } From 700715b82e7d81885287ff00f7ac1f58c2ba1bea Mon Sep 17 00:00:00 2001 From: Petr Ilin Date: Thu, 22 Aug 2024 00:32:06 +0300 Subject: [PATCH 13/19] zip: Fix compression method not set without SelectiveCompression enabled (#419) --- zip.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zip.go b/zip.go index 7d11ea4d..0a4d04a8 100644 --- a/zip.go +++ b/zip.go @@ -157,6 +157,8 @@ func (z Zip) archiveOneFile(ctx context.Context, zw *zip.Writer, idx int, file F } else { hdr.Method = z.Compression } + } else { + hdr.Method = z.Compression } w, err := zw.CreateHeader(hdr) From a902fcc00a188fb940b75bb3db65a2cb44636a58 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Thu, 22 Aug 2024 21:48:44 -0600 Subject: [PATCH 14/19] go.mod: Use copy of lzip library that disappeared (close #421) --- go.mod | 3 ++- go.sum | 6 ++++-- lzip.go | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/go.mod b/go.mod index c81ae494..688939e3 100644 --- a/go.mod +++ b/go.mod @@ -18,8 +18,8 @@ require ( github.com/STARRY-S/zip v0.1.0 github.com/bodgit/sevenzip v1.5.1 github.com/golang/snappy v0.0.4 + github.com/mholt/lzip-go v0.3.6 github.com/pierrec/lz4/v4 v4.1.21 - github.com/sorairolake/lzip-go v0.3.4 golang.org/x/text v0.16.0 ) @@ -29,5 +29,6 @@ require ( github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect + github.com/sorairolake/lzip-go v0.3.5 // indirect go4.org v0.0.0-20230225012048-214862532bf5 // indirect ) diff --git a/go.sum b/go.sum index a706d93d..909854a4 100644 --- a/go.sum +++ b/go.sum @@ -92,6 +92,8 @@ github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQ github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/mholt/lzip-go v0.3.6 h1:M32wuPUoU7OWXMFukNnK/OZRXdPQyZsdn9ls/Gqivts= +github.com/mholt/lzip-go v0.3.6/go.mod h1:E5mRuN06hLueqwJepeuv1xf2X5GnW9xd+Qs+JJBDGBM= github.com/nwaples/rardecode/v2 v2.0.0-beta.2 h1:e3mzJFJs4k83GXBEiTaQ5HgSc/kOK8q0rDaRO0MPaOk= github.com/nwaples/rardecode/v2 v2.0.0-beta.2/go.mod h1:yntwv/HfMc/Hbvtq9I19D1n58te3h6KsqCf3GxyfBGY= github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= @@ -101,8 +103,8 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk= -github.com/sorairolake/lzip-go v0.3.4 h1:588FEACmBj0SFhS4SsbiePLxx24ENrqsbt8OiIAoESA= -github.com/sorairolake/lzip-go v0.3.4/go.mod h1:N0KYq5iWrMXI0ZEXKXaS9hCyOjZUQdBDEIbXfoUwbdk= +github.com/sorairolake/lzip-go v0.3.5 h1:ms5Xri9o1JBIWvOFAorYtUNik6HI3HgBTkISiqu0Cwg= +github.com/sorairolake/lzip-go v0.3.5/go.mod h1:N0KYq5iWrMXI0ZEXKXaS9hCyOjZUQdBDEIbXfoUwbdk= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= diff --git a/lzip.go b/lzip.go index a861a487..958c6e6f 100644 --- a/lzip.go +++ b/lzip.go @@ -6,7 +6,7 @@ import ( "path/filepath" "strings" - "github.com/sorairolake/lzip-go" + "github.com/mholt/lzip-go" ) func init() { From 275fd2ee42701bbce945d304784a6d0a460062f8 Mon Sep 17 00:00:00 2001 From: Shun Sakai Date: Wed, 25 Sep 2024 08:07:22 +0900 Subject: [PATCH 15/19] chore(go.mod): Switch to upstream lzip package (#422) This reverts commit a902fcc00a188fb940b75bb3db65a2cb44636a58. --- go.mod | 3 +-- go.sum | 2 -- lzip.go | 2 +- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index 688939e3..a6e217a2 100644 --- a/go.mod +++ b/go.mod @@ -18,8 +18,8 @@ require ( github.com/STARRY-S/zip v0.1.0 github.com/bodgit/sevenzip v1.5.1 github.com/golang/snappy v0.0.4 - github.com/mholt/lzip-go v0.3.6 github.com/pierrec/lz4/v4 v4.1.21 + github.com/sorairolake/lzip-go v0.3.5 golang.org/x/text v0.16.0 ) @@ -29,6 +29,5 @@ require ( github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect - github.com/sorairolake/lzip-go v0.3.5 // indirect go4.org v0.0.0-20230225012048-214862532bf5 // indirect ) diff --git a/go.sum b/go.sum index 909854a4..205feb38 100644 --- a/go.sum +++ b/go.sum @@ -92,8 +92,6 @@ github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQ github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/mholt/lzip-go v0.3.6 h1:M32wuPUoU7OWXMFukNnK/OZRXdPQyZsdn9ls/Gqivts= -github.com/mholt/lzip-go v0.3.6/go.mod h1:E5mRuN06hLueqwJepeuv1xf2X5GnW9xd+Qs+JJBDGBM= github.com/nwaples/rardecode/v2 v2.0.0-beta.2 h1:e3mzJFJs4k83GXBEiTaQ5HgSc/kOK8q0rDaRO0MPaOk= github.com/nwaples/rardecode/v2 v2.0.0-beta.2/go.mod h1:yntwv/HfMc/Hbvtq9I19D1n58te3h6KsqCf3GxyfBGY= github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= diff --git a/lzip.go b/lzip.go index 958c6e6f..a861a487 100644 --- a/lzip.go +++ b/lzip.go @@ -6,7 +6,7 @@ import ( "path/filepath" "strings" - "github.com/mholt/lzip-go" + "github.com/sorairolake/lzip-go" ) func init() { From 264c9016644b6044d963957748730c421404b90d Mon Sep 17 00:00:00 2001 From: Richard Gomez <32133502+rgmz@users.noreply.github.com> Date: Fri, 18 Oct 2024 16:20:43 -0400 Subject: [PATCH 16/19] build: upgrade rardecode to v2.0.0-beta.3 (#423) --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index a6e217a2..3adbffad 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/dsnet/compress v0.0.2-0.20230904184137-39efe44ab707 github.com/klauspost/compress v1.17.8 github.com/klauspost/pgzip v1.2.6 - github.com/nwaples/rardecode/v2 v2.0.0-beta.2 + github.com/nwaples/rardecode/v2 v2.0.0-beta.3 github.com/therootcompany/xz v1.0.1 github.com/ulikunitz/xz v0.5.12 ) diff --git a/go.sum b/go.sum index 205feb38..e1803b1c 100644 --- a/go.sum +++ b/go.sum @@ -92,8 +92,8 @@ github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQ github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/nwaples/rardecode/v2 v2.0.0-beta.2 h1:e3mzJFJs4k83GXBEiTaQ5HgSc/kOK8q0rDaRO0MPaOk= -github.com/nwaples/rardecode/v2 v2.0.0-beta.2/go.mod h1:yntwv/HfMc/Hbvtq9I19D1n58te3h6KsqCf3GxyfBGY= +github.com/nwaples/rardecode/v2 v2.0.0-beta.3 h1:evQTW0IjM2GAL5AaPHiQrT+laWohkt5zHKA3yCsGQGU= +github.com/nwaples/rardecode/v2 v2.0.0-beta.3/go.mod h1:yntwv/HfMc/Hbvtq9I19D1n58te3h6KsqCf3GxyfBGY= github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= From e310539bed5a26da3ebd604bd83f652d50012818 Mon Sep 17 00:00:00 2001 From: Matt Holt Date: Thu, 7 Nov 2024 21:01:39 -0700 Subject: [PATCH 17/19] Refactor FS types; improve performance (#426) * WIP * More WIP * Finish improvements (probably) --- .github/workflows/macos-latest.yml | 2 +- .github/workflows/ubuntu-latest.yml | 2 +- .github/workflows/windows-latest.yml | 2 +- 7z.go | 21 +- README.md | 16 +- archiver.go | 33 +- archiver_test.go | 3 +- brotli.go | 19 +- bz2.go | 7 +- formats.go | 119 ++-- formats_test.go | 119 ++-- fs.go | 862 +++++++++++++-------------- fs_test.go | 21 +- go.mod | 14 +- go.sum | 26 +- gz.go | 7 +- interfaces.go | 26 +- lz4.go | 7 +- lzip.go | 7 +- rar.go | 19 +- sz.go | 7 +- tar.go | 27 +- xz.go | 7 +- zip.go | 25 +- zlib.go | 7 +- zstd.go | 7 +- 26 files changed, 716 insertions(+), 696 deletions(-) diff --git a/.github/workflows/macos-latest.yml b/.github/workflows/macos-latest.yml index 70b0aa57..5f2bdf3d 100644 --- a/.github/workflows/macos-latest.yml +++ b/.github/workflows/macos-latest.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: - go-version: [1.22] + go-version: [1.23] runs-on: macos-latest steps: - name: Install Go diff --git a/.github/workflows/ubuntu-latest.yml b/.github/workflows/ubuntu-latest.yml index 0502d0d8..d25b72d2 100644 --- a/.github/workflows/ubuntu-latest.yml +++ b/.github/workflows/ubuntu-latest.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: - go-version: [1.22] + go-version: [1.23] runs-on: ubuntu-latest steps: - name: Install Go diff --git a/.github/workflows/windows-latest.yml b/.github/workflows/windows-latest.yml index d27e28d9..b53e3eed 100644 --- a/.github/workflows/windows-latest.yml +++ b/.github/workflows/windows-latest.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: - go-version: [1.22] + go-version: [1.23] runs-on: windows-latest steps: - name: Install Go diff --git a/7z.go b/7z.go index d57bd452..4a3dbd4a 100644 --- a/7z.go +++ b/7z.go @@ -31,13 +31,13 @@ type SevenZip struct { Password string } -func (z SevenZip) Name() string { return ".7z" } +func (z SevenZip) Extension() string { return ".7z" } -func (z SevenZip) Match(filename string, stream io.Reader) (MatchResult, error) { +func (z SevenZip) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { var mr MatchResult // match filename - if strings.Contains(strings.ToLower(filename), z.Name()) { + if strings.Contains(strings.ToLower(filename), z.Extension()) { mr.ByName = true } @@ -52,7 +52,7 @@ func (z SevenZip) Match(filename string, stream io.Reader) (MatchResult, error) } // Archive is not implemented for 7z, but the method exists so that SevenZip satisfies the ArchiveFormat interface. -func (z SevenZip) Archive(_ context.Context, _ io.Writer, _ []File) error { +func (z SevenZip) Archive(_ context.Context, _ io.Writer, _ []FileInfo) error { return fmt.Errorf("not implemented for 7z because there is no pure Go implementation found") } @@ -94,11 +94,18 @@ func (z SevenZip) Extract(ctx context.Context, sourceArchive io.Reader, pathsInA continue } - file := File{ - FileInfo: f.FileInfo(), + fi := f.FileInfo() + file := FileInfo{ + FileInfo: fi, Header: f.FileHeader, NameInArchive: f.Name, - Open: func() (io.ReadCloser, error) { return f.Open() }, + Open: func() (fs.File, error) { + openedFile, err := f.Open() + if err != nil { + return nil, err + } + return fileInArchive{openedFile, fi}, nil + }, } err := handleFile(ctx, file) diff --git a/README.md b/README.md index 2d7dcf7c..9b1cdc78 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # archiver [![Go Reference](https://pkg.go.dev/badge/github.com/mholt/archiver/v4.svg)](https://pkg.go.dev/github.com/mholt/archiver/v4) [![Ubuntu-latest](https://github.com/mholt/archiver/actions/workflows/ubuntu-latest.yml/badge.svg)](https://github.com/mholt/archiver/actions/workflows/ubuntu-latest.yml) [![Macos-latest](https://github.com/mholt/archiver/actions/workflows/macos-latest.yml/badge.svg)](https://github.com/mholt/archiver/actions/workflows/macos-latest.yml) [![Windows-latest](https://github.com/mholt/archiver/actions/workflows/windows-latest.yml/badge.svg)](https://github.com/mholt/archiver/actions/workflows/windows-latest.yml) -Introducing **Archiver 4.0** - a cross-platform, multi-format archive utility and Go library. A powerful and flexible library meets an elegant CLI in this generic replacement for several platform-specific or format-specific archive utilities. +Introducing **Archiver 4.0 (alpha)** - a cross-platform, multi-format archive utility and Go library. A powerful and flexible library meets an elegant CLI in this generic replacement for several platform-specific or format-specific archive utilities. **:warning: v4 is in ALPHA. The core library APIs work pretty well but the command has not been implemented yet, nor have most automated tests. If you need the `arc` command, stick with v3 for now.** @@ -11,8 +11,8 @@ Introducing **Archiver 4.0** - a cross-platform, multi-format archive utility an - By file name - By header - Traverse directories, archive files, and any other file uniformly as [`io/fs`](https://pkg.go.dev/io/fs) file systems: - - [`DirFS`](https://pkg.go.dev/github.com/mholt/archiver/v4#DirFS) - [`FileFS`](https://pkg.go.dev/github.com/mholt/archiver/v4#FileFS) + - [`DirFS`](https://pkg.go.dev/github.com/mholt/archiver/v4#DirFS) - [`ArchiveFS`](https://pkg.go.dev/github.com/mholt/archiver/v4#ArchiveFS) - Compress and decompress files - Create and extract archive files @@ -117,7 +117,7 @@ If you want all the files, pass in a nil list of file paths. ```go // the type that will be used to read the input stream -format := archiver.Zip{} +var format archiver.Zip // the list of files we want out of the archive; any // directories will include all their contents unless @@ -141,7 +141,7 @@ if err != nil { Have an input stream with unknown contents? No problem, archiver can identify it for you. It will try matching based on filename and/or the header (which peeks at the stream): ```go -format, input, err := archiver.Identify("filename.tar.zst", input) +format, input, err := archiver.Identify(ctx, "filename.tar.zst", input) if err != nil { return err } @@ -165,7 +165,7 @@ if decom, ok := format.(archiver.Decompressor); ok { } ``` -`Identify()` works by reading an arbitrary number of bytes from the beginning of the stream (just enough to check for file headers). It buffers them and returns a new reader that lets you re-read them anew. +`Identify()` works by reading an arbitrary number of bytes from the beginning of the stream (just enough to check for file headers). It buffers them and returns a new reader that lets you re-read them anew. If your input stream is `io.Seeker` however, no buffer is created (it uses `Seek()` instead). ### Virtual file systems @@ -212,7 +212,7 @@ if dir, ok := f.(fs.ReadDirFile); ok { return err } for _, e := range entries { - fmt.Println(e.Name()) + fmt.Println(e.Extension()) } } ``` @@ -225,7 +225,7 @@ if err != nil { return err } for _, e := range entries { - fmt.Println(e.Name()) + fmt.Println(e.Extension()) } ``` @@ -247,6 +247,8 @@ if err != nil { } ``` +**Important .tar note:** Tar files do not efficiently implement file system semantics due to their roots in sequential-access design for tapes. File systems inherently assume random access, but tar files need to be read from the beginning to access something at the end. This is especially slow when the archive is compressed. Optimizations have been implemented to amortize `ReadDir()` calls so that `fs.WalkDir()` only has to scan the archive once, but they use more memory. Open calls require another scan to find the file. It may be more efficient to use `Tar.Extract()` directly if file system semantics are not important to you. + #### Use with `http.FileServer` It can be used with http.FileServer to browse archives and directories in a browser. However, due to how http.FileServer works, don't directly use http.FileServer with compressed files; instead wrap it like following: diff --git a/archiver.go b/archiver.go index c968e1f7..7e68f30d 100644 --- a/archiver.go +++ b/archiver.go @@ -12,14 +12,14 @@ import ( "time" ) -// File is a virtualized, generalized file abstraction for interacting with archives. -type File struct { +// FileInfo is a virtualized, generalized file abstraction for interacting with archives. +type FileInfo struct { fs.FileInfo // The file header as used/provided by the archive format. // Typically, you do not need to set this field when creating // an archive. - Header interface{} + Header any // The path of the file as it appears in the archive. // This is equivalent to Header.Name (for most Header @@ -28,6 +28,10 @@ type File struct { // format-agnosticism (no type assertions) for basic // operations. // + // When extracting, this name or path may not have + // been sanitized; it should not be trusted at face + // value. Consider using path.Clean() before using. + // // EXPERIMENTAL: If inserting a file into an archive, // and this is left blank, the implementation of the // archive format can default to using the file's base @@ -40,12 +44,11 @@ type File struct { // A callback function that opens the file to read its // contents. The file must be closed when reading is - // complete. Nil for files that don't have content - // (such as directories and links). - Open func() (io.ReadCloser, error) + // complete. + Open func() (fs.File, error) } -func (f File) Stat() (fs.FileInfo, error) { return f.FileInfo, nil } +func (f FileInfo) Stat() (fs.FileInfo, error) { return f.FileInfo, nil } // FilesFromDisk returns a list of files by walking the directories in the // given filenames map. The keys are the names on disk, and the values are @@ -68,8 +71,8 @@ func (f File) Stat() (fs.FileInfo, error) { return f.FileInfo, nil } // // This function is used primarily when preparing a list of files to add to // an archive. -func FilesFromDisk(options *FromDiskOptions, filenames map[string]string) ([]File, error) { - var files []File +func FilesFromDisk(options *FromDiskOptions, filenames map[string]string) ([]FileInfo, error) { + var files []FileInfo for rootOnDisk, rootInArchive := range filenames { walkErr := filepath.WalkDir(rootOnDisk, func(filename string, d fs.DirEntry, err error) error { if err != nil { @@ -114,11 +117,11 @@ func FilesFromDisk(options *FromDiskOptions, filenames map[string]string) ([]Fil info = noAttrFileInfo{info} } - file := File{ + file := FileInfo{ FileInfo: info, NameInArchive: nameInArchive, LinkTarget: linkTarget, - Open: func() (io.ReadCloser, error) { + Open: func() (fs.File, error) { return os.Open(filename) }, } @@ -191,7 +194,7 @@ func (no noAttrFileInfo) Mode() fs.FileMode { return no.FileInfo.Mode() & (fs.ModeType | fs.ModePerm) } func (noAttrFileInfo) ModTime() time.Time { return time.Time{} } -func (noAttrFileInfo) Sys() interface{} { return nil } +func (noAttrFileInfo) Sys() any { return nil } // FromDiskOptions specifies various options for gathering files from disk. type FromDiskOptions struct { @@ -215,12 +218,12 @@ type FromDiskOptions struct { // archive contents are not necessarily ordered, skipping directories requires // memory, and skipping lots of directories may run up your memory bill. // -// Any other returned error will terminate a walk. -type FileHandler func(ctx context.Context, f File) error +// Any other returned error will terminate a walk and be returned to the caller. +type FileHandler func(ctx context.Context, info FileInfo) error // openAndCopyFile opens file for reading, copies its // contents to w, then closes file. -func openAndCopyFile(file File, w io.Writer) error { +func openAndCopyFile(file FileInfo, w io.Writer) error { fileReader, err := file.Open() if err != nil { return err diff --git a/archiver_test.go b/archiver_test.go index e4355540..0cf8124c 100644 --- a/archiver_test.go +++ b/archiver_test.go @@ -245,7 +245,8 @@ func TestNameOnDiskToNameInArchive(t *testing.T) { }, } { if !strings.HasPrefix(tc.nameOnDisk, tc.rootOnDisk) { - t.Fatalf("Test %d: Invalid test case! Filename (on disk) will have rootOnDisk as a prefix according to the fs.WalkDirFunc godoc.", i) + t.Errorf("Test %d: Invalid test case! Filename (on disk) will have rootOnDisk as a prefix according to the fs.WalkDirFunc godoc.", i) + continue } if tc.windows && runtime.GOOS != "windows" { t.Logf("Test %d: Skipping test that is only compatible with Windows", i) diff --git a/brotli.go b/brotli.go index 5d17fae7..c650f40e 100644 --- a/brotli.go +++ b/brotli.go @@ -1,6 +1,7 @@ package archiver import ( + "context" "io" "strings" @@ -16,19 +17,25 @@ type Brotli struct { Quality int } -func (Brotli) Name() string { return ".br" } +func (Brotli) Extension() string { return ".br" } -func (br Brotli) Match(filename string, stream io.Reader) (MatchResult, error) { +func (br Brotli) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { var mr MatchResult // match filename - if strings.Contains(strings.ToLower(filename), br.Name()) { + if strings.Contains(strings.ToLower(filename), br.Extension()) { mr.ByName = true } - // brotli does not have well-defined file headers; the - // best way to match the stream would be to try decoding - // part of it, and this is not implemented for now + // brotli does not have well-defined file headers or a magic number; + // the best way to match the stream is probably to try decoding part + // of it, but we'll just have to guess a large-enough size that is + // still small enough for the smallest streams we'll encounter + r := brotli.NewReader(stream) + buf := make([]byte, 16) + if _, err := io.ReadFull(r, buf); err == nil { + mr.ByStream = true + } return mr, nil } diff --git a/bz2.go b/bz2.go index 57a278f4..a2a5f05e 100644 --- a/bz2.go +++ b/bz2.go @@ -2,6 +2,7 @@ package archiver import ( "bytes" + "context" "io" "strings" @@ -17,13 +18,13 @@ type Bz2 struct { CompressionLevel int } -func (Bz2) Name() string { return ".bz2" } +func (Bz2) Extension() string { return ".bz2" } -func (bz Bz2) Match(filename string, stream io.Reader) (MatchResult, error) { +func (bz Bz2) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { var mr MatchResult // match filename - if strings.Contains(strings.ToLower(filename), bz.Name()) { + if strings.Contains(strings.ToLower(filename), bz.Extension()) { mr.ByName = true } diff --git a/formats.go b/formats.go index 546d3122..24865fea 100644 --- a/formats.go +++ b/formats.go @@ -12,7 +12,7 @@ import ( // RegisterFormat registers a format. It should be called during init. // Duplicate formats by name are not allowed and will panic. func RegisterFormat(format Format) { - name := strings.Trim(strings.ToLower(format.Name()), ".") + name := strings.Trim(strings.ToLower(format.Extension()), ".") if _, ok := formats[name]; ok { panic("format " + name + " is already registered") } @@ -32,14 +32,21 @@ func RegisterFormat(format Format) { // // If stream is non-nil then the returned io.Reader will always be // non-nil and will read from the same point as the reader which was -// passed in; it should be used in place of the input stream after +// passed in. If the input stream is not an io.Seeker, the returned +// io.Reader value should be used in place of the input stream after // calling Identify() because it preserves and re-reads the bytes that // were already read during the identification process. -func Identify(filename string, stream io.Reader) (Format, io.Reader, error) { +// +// If the input stream is an io.Seeker, Seek() must work, and the +// original input value will be returned instead of a wrapper value. +func Identify(ctx context.Context, filename string, stream io.Reader) (Format, io.Reader, error) { var compression Compression var archival Archival - rewindableStream := newRewindReader(stream) + rewindableStream, err := newRewindReader(stream) + if err != nil { + return nil, nil, err + } // try compression format first, since that's the outer "layer" for name, format := range formats { @@ -48,7 +55,7 @@ func Identify(filename string, stream io.Reader) (Format, io.Reader, error) { continue } - matchResult, err := identifyOne(format, filename, rewindableStream, nil) + matchResult, err := identifyOne(ctx, format, filename, rewindableStream, nil) if err != nil { return nil, rewindableStream.reader(), fmt.Errorf("matching %s: %w", name, err) } @@ -68,7 +75,7 @@ func Identify(filename string, stream io.Reader) (Format, io.Reader, error) { continue } - matchResult, err := identifyOne(format, filename, rewindableStream, compression) + matchResult, err := identifyOne(ctx, format, filename, rewindableStream, compression) if err != nil { return nil, rewindableStream.reader(), fmt.Errorf("matching %s: %w", name, err) } @@ -89,13 +96,17 @@ func Identify(filename string, stream io.Reader) (Format, io.Reader, error) { case compression != nil && archival != nil: return CompressedArchive{compression, archival}, bufferedStream, nil default: - return nil, bufferedStream, ErrNoMatch + return nil, bufferedStream, NoMatch } } -func identifyOne(format Format, filename string, stream *rewindReader, comp Compression) (mr MatchResult, err error) { +func identifyOne(ctx context.Context, format Format, filename string, stream *rewindReader, comp Compression) (mr MatchResult, err error) { defer stream.rewind() + if filename == "." { + filename = "" + } + // if looking within a compressed format, wrap the stream in a // reader that can decompress it so we can match the "inner" format // (yes, we have to make a new reader every time we do a match, @@ -107,14 +118,14 @@ func identifyOne(format Format, filename string, stream *rewindReader, comp Comp return MatchResult{}, openErr } defer decompressedStream.Close() - mr, err = format.Match(filename, decompressedStream) + mr, err = format.Match(ctx, filename, decompressedStream) } else { // Make sure we pass a nil io.Reader not a *rewindReader(nil) var r io.Reader if stream != nil { r = stream } - mr, err = format.Match(filename, r) + mr, err = format.Match(ctx, filename, r) } // if the error is EOF, we can just ignore it. @@ -168,26 +179,26 @@ type CompressedArchive struct { // Name returns a concatenation of the archive format name // and the compression format name. -func (caf CompressedArchive) Name() string { +func (caf CompressedArchive) Extension() string { if caf.Compression == nil && caf.Archival == nil { panic("missing both compression and archive formats") } var name string if caf.Archival != nil { - name += caf.Archival.Name() + name += caf.Archival.Extension() } if caf.Compression != nil { - name += caf.Compression.Name() + name += caf.Compression.Extension() } return name } // Match matches if the input matches both the compression and archive format. -func (caf CompressedArchive) Match(filename string, stream io.Reader) (MatchResult, error) { +func (caf CompressedArchive) Match(ctx context.Context, filename string, stream io.Reader) (MatchResult, error) { var conglomerate MatchResult if caf.Compression != nil { - matchResult, err := caf.Compression.Match(filename, stream) + matchResult, err := caf.Compression.Match(ctx, filename, stream) if err != nil { return MatchResult{}, err } @@ -208,7 +219,7 @@ func (caf CompressedArchive) Match(filename string, stream io.Reader) (MatchResu } if caf.Archival != nil { - matchResult, err := caf.Archival.Match(filename, stream) + matchResult, err := caf.Archival.Match(ctx, filename, stream) if err != nil { return MatchResult{}, err } @@ -223,7 +234,7 @@ func (caf CompressedArchive) Match(filename string, stream io.Reader) (MatchResu } // Archive adds files to the output archive while compressing the result. -func (caf CompressedArchive) Archive(ctx context.Context, output io.Writer, files []File) error { +func (caf CompressedArchive) Archive(ctx context.Context, output io.Writer, files []FileInfo) error { if caf.Compression != nil { wc, err := caf.Compression.OpenWriter(output) if err != nil { @@ -239,7 +250,7 @@ func (caf CompressedArchive) Archive(ctx context.Context, output io.Writer, file func (caf CompressedArchive) ArchiveAsync(ctx context.Context, output io.Writer, jobs <-chan ArchiveAsyncJob) error { do, ok := caf.Archival.(ArchiverAsync) if !ok { - return fmt.Errorf("%s archive does not support async writing", caf.Name()) + return fmt.Errorf("%s archive does not support async writing", caf.Extension()) } if caf.Compression != nil { wc, err := caf.Compression.OpenWriter(output) @@ -253,27 +264,13 @@ func (caf CompressedArchive) ArchiveAsync(ctx context.Context, output io.Writer, } // Extract reads files out of an archive while decompressing the results. -// If Extract is not called from ArchiveFS.Open, then the FileHandler passed -// in must close all opened files by the time the Extract walk finishes. func (caf CompressedArchive) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchive []string, handleFile FileHandler) error { if caf.Compression != nil { rc, err := caf.Compression.OpenReader(sourceArchive) if err != nil { return err } - // I don't like this solution, but we have to close the decompressor. - // The problem is that if we simply defer rc.Close(), we potentially - // close it before the caller is done using files it opened. Ideally - // it should be closed when the sourceArchive is also closed. But since - // we don't originate sourceArchive, we can't close it when it closes. - // The best I can think of for now is this hack where we tell a type - // that supports this to close another reader when itself closes. - // See issue #365. - if cc, ok := sourceArchive.(compressorCloser); ok { - cc.closeCompressor(rc) - } else { - defer rc.Close() - } + defer rc.Close() sourceArchive = rc } return caf.Archival.Extract(ctx, sourceArchive, pathsInArchive, handleFile) @@ -299,26 +296,42 @@ func (mr MatchResult) Matched() bool { return mr.ByName || mr.ByStream } // read from the stream. This is useful for "peeking" a stream an // arbitrary number of bytes. Loosely based on the Connection type // from https://github.com/mholt/caddy-l4. +// +// If the reader is also an io.Seeker, no buffer is used, and instead +// the stream seeks back to the starting position. type rewindReader struct { io.Reader + start int64 buf *bytes.Buffer bufReader io.Reader } -func newRewindReader(r io.Reader) *rewindReader { +func newRewindReader(r io.Reader) (*rewindReader, error) { if r == nil { - return nil + return nil, nil } - return &rewindReader{ - Reader: r, - buf: new(bytes.Buffer), + + rr := &rewindReader{Reader: r} + + // avoid buffering if we have a seeker we can use + if seeker, ok := r.(io.Seeker); ok { + var err error + rr.start, err = seeker.Seek(0, io.SeekCurrent) + if err != nil { + return nil, fmt.Errorf("seek to determine current position: %w", err) + } + } else { + rr.buf = new(bytes.Buffer) } + + return rr, nil } func (rr *rewindReader) Read(p []byte) (n int, err error) { if rr == nil { - panic("internal error: reading from nil rewindReader") + panic("reading from nil rewindReader") } + // if there is a buffer we should read from, start // with that; we only read from the underlying stream // after the buffer has been "depleted" @@ -333,13 +346,13 @@ func (rr *rewindReader) Read(p []byte) (n int, err error) { } } - // buffer has been "depleted" so read from - // underlying connection + // buffer has been depleted or we are not using one, + // so read from underlying stream nr, err := rr.Reader.Read(p[n:]) // anything that was read needs to be written to - // the buffer, even if there was an error - if nr > 0 { + // the buffer (if used), even if there was an error + if nr > 0 && rr.buf != nil { if nw, errw := rr.buf.Write(p[n : n+nr]); errw != nil { return nw, errw } @@ -355,18 +368,24 @@ func (rr *rewindReader) Read(p []byte) (n int, err error) { // rewind resets the stream to the beginning by causing // Read() to start reading from the beginning of the -// buffered bytes. +// stream, or, if buffering, the buffered bytes. func (rr *rewindReader) rewind() { if rr == nil { return } + if ras, ok := rr.Reader.(io.Seeker); ok { + if _, err := ras.Seek(rr.start, io.SeekStart); err == nil { + return + } + } rr.bufReader = bytes.NewReader(rr.buf.Bytes()) } // reader returns a reader that reads first from the buffered -// bytes, then from the underlying stream. After calling this, -// no more rewinding is allowed since reads from the stream are -// not recorded, so rewinding properly is impossible. +// bytes (if buffering), then from the underlying stream; if a +// Seeker, the stream will be seeked back to the start. After +// calling this, no more rewinding is allowed since reads from +// the stream are not recorded, so rewinding properly is impossible. // If the underlying reader implements io.Seeker, then the // underlying reader will be used directly. func (rr *rewindReader) reader() io.Reader { @@ -374,15 +393,15 @@ func (rr *rewindReader) reader() io.Reader { return nil } if ras, ok := rr.Reader.(io.Seeker); ok { - if _, err := ras.Seek(0, io.SeekStart); err == nil { + if _, err := ras.Seek(rr.start, io.SeekStart); err == nil { return rr.Reader } } return io.MultiReader(bytes.NewReader(rr.buf.Bytes()), rr.Reader) } -// ErrNoMatch is returned if there are no matching formats. -var ErrNoMatch = fmt.Errorf("no formats matched") +// NoMatch is a special error returned if there are no matching formats. +var NoMatch = fmt.Errorf("no formats matched") // Registered formats. var formats = make(map[string]Format) diff --git a/formats_test.go b/formats_test.go index ec5e3bb8..6c8d621f 100644 --- a/formats_test.go +++ b/formats_test.go @@ -16,7 +16,10 @@ import ( func TestRewindReader(t *testing.T) { data := "the header\nthe body\n" - r := newRewindReader(strings.NewReader(data)) + r, err := newRewindReader(strings.NewReader(data)) + if err != nil { + t.Errorf("creating rewindReader: %v", err) + } buf := make([]byte, 10) // enough for 'the header' @@ -25,10 +28,10 @@ func TestRewindReader(t *testing.T) { r.rewind() n, err := r.Read(buf) if err != nil { - t.Fatalf("Read failed: %s", err) + t.Errorf("Read failed: %s", err) } if string(buf[:n]) != "the header" { - t.Fatalf("iteration %d: expected 'the header' but got '%s' (n=%d)", i, string(buf[:n]), n) + t.Errorf("iteration %d: expected 'the header' but got '%s' (n=%d)", i, string(buf[:n]), n) } } @@ -38,10 +41,10 @@ func TestRewindReader(t *testing.T) { buf = make([]byte, len(data)) n, err := io.ReadFull(finalReader, buf) if err != nil { - t.Fatalf("ReadFull failed: %s (n=%d)", err, n) + t.Errorf("ReadFull failed: %s (n=%d)", err, n) } if string(buf) != data { - t.Fatalf("expected '%s' but got '%s'", string(data), string(buf)) + t.Errorf("expected '%s' but got '%s'", string(data), string(buf)) } } @@ -65,24 +68,24 @@ func TestCompression(t *testing.T) { checkErr(t, wc.Close(), "closing writer") // make sure Identify correctly chooses this compression method - format, stream, err := Identify(testFilename, compressed) + format, stream, err := Identify(context.Background(), testFilename, compressed) checkErr(t, err, "identifying") - if format.Name() != comp.Name() { - t.Fatalf("expected format %s but got %s", comp.Name(), format.Name()) + if format.Extension() != comp.Extension() { + t.Errorf("expected format %s but got %s", comp.Extension(), format.Extension()) } // read the contents back out and compare decompReader, err := format.(Decompressor).OpenReader(stream) - checkErr(t, err, "opening with decompressor '%s'", format.Name()) + checkErr(t, err, "opening with decompressor '%s'", format.Extension()) data, err := io.ReadAll(decompReader) checkErr(t, err, "reading decompressed data") checkErr(t, decompReader.Close(), "closing decompressor") if !bytes.Equal(data, contents) { - t.Fatalf("not equal to original") + t.Errorf("not equal to original") } } - var cannotIdentifyFromStream = map[string]bool{Brotli{}.Name(): true} + var cannotIdentifyFromStream = map[string]bool{Brotli{}.Extension(): true} for _, f := range formats { // only test compressors @@ -91,24 +94,24 @@ func TestCompression(t *testing.T) { continue } - t.Run(f.Name()+"_with_extension", func(t *testing.T) { - testOK(t, comp, "file"+f.Name()) + t.Run(f.Extension()+"_with_extension", func(t *testing.T) { + testOK(t, comp, "file"+f.Extension()) }) - if !cannotIdentifyFromStream[f.Name()] { - t.Run(f.Name()+"_without_extension", func(t *testing.T) { + if !cannotIdentifyFromStream[f.Extension()] { + t.Run(f.Extension()+"_without_extension", func(t *testing.T) { testOK(t, comp, "") }) } } } -func checkErr(t *testing.T, err error, msgFmt string, args ...interface{}) { +func checkErr(t *testing.T, err error, msgFmt string, args ...any) { t.Helper() if err == nil { return } args = append(args, err) - t.Fatalf(msgFmt+": %s", args...) + t.Errorf(msgFmt+": %s", args...) } func TestIdentifyDoesNotMatchContentFromTrimmedKnownHeaderHaving0Suffix(t *testing.T) { @@ -142,13 +145,13 @@ func TestIdentifyDoesNotMatchContentFromTrimmedKnownHeaderHaving0Suffix(t *testi } headerTrimmed := tt.header[:headerLen-1] stream := bytes.NewReader(headerTrimmed) - got, _, err := Identify("", stream) + got, _, err := Identify(context.Background(), "", stream) if got != nil { - t.Errorf("no Format expected for trimmed know %s header: found Format= %v", tt.name, got.Name()) + t.Errorf("no Format expected for trimmed know %s header: found Format= %v", tt.name, got.Extension()) return } - if ErrNoMatch != err { - t.Fatalf("ErrNoMatch expected for for trimmed know %s header: err :=%#v", tt.name, err) + if !errors.Is(err, NoMatch) { + t.Errorf("NoMatch expected for for trimmed know %s header: err :=%#v", tt.name, err) return } @@ -185,13 +188,13 @@ func TestIdentifyCanAssessSmallOrNoContent(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, _, err := Identify("", tt.args.stream) + got, _, err := Identify(context.Background(), "", tt.args.stream) if got != nil { - t.Errorf("no Format expected for non archive and not compressed stream: found Format= %v", got.Name()) + t.Errorf("no Format expected for non archive and not compressed stream: found Format=%#v", got) return } - if ErrNoMatch != err { - t.Fatalf("ErrNoMatch expected for non archive and not compressed stream: err :=%#v", err) + if !errors.Is(err, NoMatch) { + t.Errorf("NoMatch expected for non archive and not compressed stream: %#v", err) return } @@ -206,36 +209,36 @@ func compress( buf := bytes.NewBuffer(make([]byte, 0, 128)) cwriter, err := openwriter(buf) if err != nil { - t.Fatalf("fail to open compression writer: compression-name=%s, err=%#v", compName, err) + t.Errorf("fail to open compression writer: compression-name=%s, err=%#v", compName, err) return nil } _, err = cwriter.Write(content) if err != nil { cerr := cwriter.Close() - t.Fatalf( + t.Errorf( "fail to write using compression writer: compression-name=%s, err=%#v, close-err=%#v", compName, err, cerr) return nil } err = cwriter.Close() if err != nil { - t.Fatalf("fail to close compression writer: compression-name=%s, err=%#v", compName, err) + t.Errorf("fail to close compression writer: compression-name=%s, err=%#v", compName, err) return nil } return buf.Bytes() } func archive(t *testing.T, arch Archiver, fname string, fileInfo fs.FileInfo) []byte { - files := []File{ + files := []FileInfo{ {FileInfo: fileInfo, NameInArchive: "tmp.txt", - Open: func() (io.ReadCloser, error) { + Open: func() (fs.File, error) { return os.Open(fname) }}, } buf := bytes.NewBuffer(make([]byte, 0, 128)) err := arch.Archive(context.TODO(), buf, files) if err != nil { - t.Fatalf("fail to create archive: err=%#v", err) + t.Errorf("fail to create archive: err=%#v", err) return nil } return buf.Bytes() @@ -251,29 +254,24 @@ func newWriteNopCloser(w io.Writer) (io.WriteCloser, error) { } func newTmpTextFile(t *testing.T, content string) (string, fs.FileInfo) { - tmpTxtFile, err := os.CreateTemp("", "TestIdentifyFindFormatByStreamContent-tmp-*.txt") if err != nil { - t.Fatalf("fail to create tmp test file for archive tests: err=%v", err) + t.Errorf("fail to create tmp test file for archive tests: err=%v", err) return "", nil } fname := tmpTxtFile.Name() if _, err = tmpTxtFile.Write([]byte(content)); err != nil { - tmpTxtFile.Close() - os.Remove(fname) - t.Fatalf("fail to write content to tmp-txt-file: err=%#v", err) + t.Errorf("fail to write content to tmp-txt-file: err=%#v", err) return "", nil } if err = tmpTxtFile.Close(); err != nil { - os.Remove(fname) - t.Fatalf("fail to close tmp-txt-file: err=%#v", err) + t.Errorf("fail to close tmp-txt-file: err=%#v", err) return "", nil } fi, err := os.Stat(fname) if err != nil { - os.Remove(fname) - t.Fatalf("fail to get tmp-txt-file stats: err=%v", err) + t.Errorf("fail to get tmp-txt-file stats: err=%v", err) return "", nil } @@ -281,9 +279,9 @@ func newTmpTextFile(t *testing.T, content string) (string, fs.FileInfo) { } func TestIdentifyFindFormatByStreamContent(t *testing.T) { - tmpTxtFileName, tmpTxtFileInfo := newTmpTextFile(t, "this is text") + tmpTxtFileName, tmpTxtFileInfo := newTmpTextFile(t, "this is text that has to be long enough for brotli to match") t.Cleanup(func() { - os.Remove(tmpTxtFileName) + os.RemoveAll(tmpTxtFileName) }) tests := []struct { @@ -293,7 +291,13 @@ func TestIdentifyFindFormatByStreamContent(t *testing.T) { compressorName string wantFormatName string }{ - //TODO add test case for brotli when Brotli.Match() by stream content is implemented + { + name: "should recognize brotli", + openCompressionWriter: Brotli{}.OpenWriter, + content: []byte("this is text, but it has to be long enough to match brotli which doesn't have a magic number"), + compressorName: ".br", + wantFormatName: ".br", + }, { name: "should recognize bz2", openCompressionWriter: Bz2{}.OpenWriter, @@ -389,13 +393,13 @@ func TestIdentifyFindFormatByStreamContent(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { stream := bytes.NewReader(compress(t, tt.compressorName, tt.content, tt.openCompressionWriter)) - got, _, err := Identify("", stream) + got, _, err := Identify(context.Background(), "", stream) if err != nil { - t.Fatalf("should have found a corresponding Format: err :=%+v", err) + t.Errorf("should have found a corresponding Format, but got err=%+v", err) return } - if tt.wantFormatName != got.Name() { - t.Errorf("unexpected format found: expected=%s actual:%s", tt.wantFormatName, got.Name()) + if tt.wantFormatName != got.Extension() { + t.Errorf("unexpected format found: expected=%s actual=%s", tt.wantFormatName, got.Extension()) return } @@ -408,13 +412,13 @@ func TestIdentifyAndOpenZip(t *testing.T) { checkErr(t, err, "opening zip") defer f.Close() - format, reader, err := Identify("test.zip", f) + format, reader, err := Identify(context.Background(), "test.zip", f) checkErr(t, err, "identifying zip") - if format.Name() != ".zip" { - t.Fatalf("unexpected format found: expected=.zip actual:%s", format.Name()) + if format.Extension() != ".zip" { + t.Errorf("unexpected format found: expected=.zip actual=%s", format.Extension()) } - err = format.(Extractor).Extract(context.Background(), reader, nil, func(ctx context.Context, f File) error { + err = format.(Extractor).Extract(context.Background(), reader, nil, func(ctx context.Context, f FileInfo) error { rc, err := f.Open() if err != nil { return err @@ -430,25 +434,26 @@ func TestIdentifyASCIIFileStartingWithX(t *testing.T) { // Create a temporary file starting with the letter 'x' tmpFile, err := os.CreateTemp("", "TestIdentifyASCIIFileStartingWithX-tmp-*.txt") if err != nil { - t.Fatalf("fail to create tmp test file for archive tests: err=%v", err) + t.Errorf("fail to create tmp test file for archive tests: err=%v", err) } + defer os.Remove(tmpFile.Name()) _, err = tmpFile.Write([]byte("xThis is a test file")) if err != nil { - t.Fatalf("Failed to write to temp file: %v", err) + t.Errorf("Failed to write to temp file: %v", err) } tmpFile.Close() // Open the file and use the Identify function file, err := os.Open(tmpFile.Name()) if err != nil { - t.Fatalf("Failed to open temp file: %v", err) + t.Errorf("Failed to open temp file: %v", err) } defer file.Close() - _, _, err = Identify(tmpFile.Name(), file) - if !errors.Is(err, ErrNoMatch) { - t.Fatalf("Identify failed: %v", err) + _, _, err = Identify(context.Background(), tmpFile.Name(), file) + if !errors.Is(err, NoMatch) { + t.Errorf("Identify failed: %v", err) } } diff --git a/fs.go b/fs.go index bceda446..560727b5 100644 --- a/fs.go +++ b/fs.go @@ -9,132 +9,121 @@ import ( "os" "path" "path/filepath" - "runtime" - "sort" + "slices" "strings" "time" - - "github.com/klauspost/compress/zip" ) -// FileSystem opens the file at root as a read-only file system. The root may be a -// path to a directory, archive file, compressed archive file, compressed file, or -// any other file on disk. +// FileSystem identifies the format of the input and returns a read-only file system. +// The input can be a filename, stream, or both. // -// If root is a directory, its contents are accessed directly from the disk's file system. -// If root is an archive file, its contents can be accessed like a normal directory; -// compressed archive files are transparently decompressed as contents are accessed. -// And if root is any other file, it is the only file in the file system; if the file -// is compressed, it is transparently decompressed when read from. +// If only a filename is specified, it may be a path to a directory, archive file, +// compressed archive file, compressed regular file, or any other regular file on +// disk. If the filename is a directory, its contents are accessed directly from +// the device's file system. If the filename is an archive file, the contents can +// be accessed like a normal directory; compressed archive files are transparently +// decompressed as contents are accessed. And if the filename is any other file, it +// is the only file in the returned file system; if the file is compressed, it is +// transparently decompressed when read from. // -// This method essentially offers uniform read access to various kinds of files: -// directories, archives, compressed archives, and individual files are all treated -// the same way. +// If a stream is specified, the filename (if available) is used as a hint to help +// identify its format. Streams of archive files must be able to be made into an +// io.SectionReader (for safe concurrency) which requires io.ReaderAt and io.Seeker +// (to efficiently determine size). The automatic format identification requires +// io.Reader and will use io.Seeker if supported to avoid buffering. // -// Except for zip files, the returned FS values are guaranteed to be fs.ReadDirFS and -// fs.StatFS types, and may also be fs.SubFS. -func FileSystem(ctx context.Context, root string) (fs.FS, error) { - info, err := os.Stat(root) - if err != nil { - return nil, err - } +// Whether the data comes from disk or a stream, it is peeked at to automatically +// detect which format to use. +// +// This function essentially offers uniform read access to various kinds of files: +// directories, archives, compressed archives, individual files, and file streams +// are all treated the same way. +// +// NOTE: The performance of compressed tar archives is not great due to overhead +// with decompression. However, the fs.WalkDir() use case has been optimized to +// create an index on first call to ReadDir(). +func FileSystem(ctx context.Context, filename string, stream ReaderAtSeeker) (fs.FS, error) { + if filename == "" && stream == nil { + return nil, errors.New("no input") + } + + // if an input stream is specified, we'll use that for identification + // and for ArchiveFS (if it's an archive); but if not, we'll open the + // file and read it for identification, but in that case we won't want + // to also use it for the ArchiveFS (because we need to close what we + // opened, and ArchiveFS opens its own files), hence this separate var + idStream := stream + + // if input is only a filename (no stream), check if it's a directory; + // if not, open it so we can determine which format to use (filename + // is not always a good indicator of file format) + if filename != "" && stream == nil { + info, err := os.Stat(filename) + if err != nil { + return nil, err + } - // real folders can be accessed easily - if info.IsDir() { - return DirFS(root), nil - } + // real folders can be accessed easily + if info.IsDir() { + return os.DirFS(filename), nil + } - // if any archive formats recognize this file, access it like a folder - file, err := os.Open(root) - if err != nil { - return nil, err + // if any archive formats recognize this file, access it like a folder + file, err := os.Open(filename) + if err != nil { + return nil, err + } + defer file.Close() + idStream = file // use file for format identification only } - defer file.Close() - format, _, err := Identify(filepath.Base(root), file) - if err != nil && !errors.Is(err, ErrNoMatch) { - return nil, err + // normally, callers should use the Reader value returned from Identify, but + // our input is a Seeker, so we know the original input value gets returned + format, _, err := Identify(ctx, filepath.Base(filename), idStream) + if errors.Is(err, NoMatch) { + return FileFS{Path: filename}, nil // must be an ordinary file + } + if err != nil { + return nil, fmt.Errorf("identify format: %w", err) } - if format != nil { - switch ff := format.(type) { - case Zip: - // zip.Reader is more performant than ArchiveFS, because zip.Reader caches content information - // and zip.Reader can open several content files concurrently because of io.ReaderAt requirement - // while ArchiveFS can't. - // zip.Reader doesn't suffer from issue #330 and #310 according to local test (but they should be fixed anyway) - - // open the file anew, as our original handle will be closed when we return - file, err := os.Open(root) - if err != nil { - return nil, err - } - return zip.NewReader(file, info.Size()) - case Archival: - // TODO: we only really need Extractor and Decompressor here, not the combined interfaces... - return ArchiveFS{Path: root, Format: ff, Context: ctx}, nil - case Compression: - return FileFS{Path: root, Compression: ff}, nil + switch fileFormat := format.(type) { + case Extractor: + // if no stream was input, return an ArchiveFS that relies on the filepath + if stream == nil { + return &ArchiveFS{Path: filename, Format: fileFormat, Context: ctx}, nil } - } - // otherwise consider it an ordinary file; make a file system with it as its only file - return FileFS{Path: root}, nil -} + // otherwise, if a stream was input, return an ArchiveFS that relies on that -// DirFS allows accessing a directory on disk with a consistent file system interface. -// It is almost the same as os.DirFS, except for some reason os.DirFS only implements -// Open() and Stat(), but we also need ReadDir(). Seems like an obvious miss (as of Go 1.17) -// and I have questions: https://twitter.com/mholt6/status/1476058551432876032 -type DirFS string + // determine size -- we know that the stream value we get back from + // Identify is the same type as what we input because it is a Seeker + size, err := stream.Seek(0, io.SeekEnd) + if err != nil { + return nil, fmt.Errorf("seeking for size: %w", err) + } + _, err = stream.Seek(0, io.SeekStart) + if err != nil { + return nil, fmt.Errorf("seeking back to beginning: %w", err) + } -// Open opens the named file. -func (f DirFS) Open(name string) (fs.File, error) { - if err := f.checkName(name, "open"); err != nil { - return nil, err - } - return os.Open(filepath.Join(string(f), name)) -} + sr := io.NewSectionReader(stream, 0, size) -// ReadDir returns a listing of all the files in the named directory. -func (f DirFS) ReadDir(name string) ([]fs.DirEntry, error) { - if err := f.checkName(name, "readdir"); err != nil { - return nil, err - } - return os.ReadDir(filepath.Join(string(f), name)) -} + return &ArchiveFS{Stream: sr, Format: fileFormat, Context: ctx}, nil -// Stat returns info about the named file. -func (f DirFS) Stat(name string) (fs.FileInfo, error) { - if err := f.checkName(name, "stat"); err != nil { - return nil, err + case Compression: + return FileFS{Path: filename, Compression: fileFormat}, nil } - return os.Stat(filepath.Join(string(f), name)) -} -// Sub returns an FS corresponding to the subtree rooted at dir. -func (f DirFS) Sub(dir string) (fs.FS, error) { - if err := f.checkName(dir, "sub"); err != nil { - return nil, err - } - info, err := f.Stat(dir) - if err != nil { - return nil, err - } - if !info.IsDir() { - return nil, fmt.Errorf("%s is not a directory", dir) - } - return DirFS(filepath.Join(string(f), dir)), nil + return nil, fmt.Errorf("unable to create file system rooted at %s due to unsupported file or folder type", filename) } -// checkName returns an error if name is not a valid path according to the docs of -// the io/fs package, with an extra cue taken from the standard lib's implementation -// of os.dirFS.Open(), which checks for invalid characters in Windows paths. -func (f DirFS) checkName(name, op string) error { - if !fs.ValidPath(name) || runtime.GOOS == "windows" && strings.ContainsAny(name, `\:`) { - return &fs.PathError{Op: op, Path: name, Err: fs.ErrInvalid} - } - return nil +// ReaderAtSeeker is a type that can read, read at, and seek. +// os.File and io.SectionReader both implement this interface. +type ReaderAtSeeker interface { + io.Reader + io.ReaderAt + io.Seeker } // FileFS allows accessing a file on disk using a consistent file system interface. @@ -169,7 +158,15 @@ func (f FileFS) Open(name string) (fs.File, error) { if err != nil { return nil, err } - return compressedFile{file, r}, nil + return compressedFile{r, closeBoth{file, r}}, nil +} + +// Stat stats the named file, which must be the file used to create the file system. +func (f FileFS) Stat(name string) (fs.FileInfo, error) { + if err := f.checkName(name, "stat"); err != nil { + return nil, err + } + return os.Stat(f.Path) } // ReadDir returns a directory listing with the file as the singular entry. @@ -184,23 +181,18 @@ func (f FileFS) ReadDir(name string) ([]fs.DirEntry, error) { return []fs.DirEntry{fs.FileInfoToDirEntry(info)}, nil } -// Stat stats the named file, which must be the file used to create the file system. -func (f FileFS) Stat(name string) (fs.FileInfo, error) { - if err := f.checkName(name, "stat"); err != nil { - return nil, err - } - return os.Stat(f.Path) -} - // checkName ensures the name is a valid path and also, in the case of // the FileFS, that it is either ".", the filename originally passed in // to create the FileFS, or the base of the filename (name without path). // Other names do not make sense for a FileFS since the FS is only 1 file. func (f FileFS) checkName(name, op string) error { + if name == f.Path { + return nil + } if !fs.ValidPath(name) { - return &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid} + return &fs.PathError{Op: op, Path: name, Err: fs.ErrInvalid} } - if name != "." && name != f.Path && name != filepath.Base(f.Path) { + if name != "." && name != filepath.Base(f.Path) { return &fs.PathError{Op: op, Path: name, Err: fs.ErrNotExist} } return nil @@ -210,50 +202,66 @@ func (f FileFS) checkName(name, op string) error { // from a decompression reader, and which closes both // that reader and the underlying file. type compressedFile struct { - *os.File - decomp io.ReadCloser + io.Reader // decompressor + closeBoth // file and decompressor } -func (cf compressedFile) Read(p []byte) (int, error) { return cf.decomp.Read(p) } -func (cf compressedFile) Close() error { - err := cf.File.Close() - err2 := cf.decomp.Close() - if err2 != nil && err == nil { - err = err2 - } - return err -} - -// ArchiveFS allows accessing an archive (or a compressed archive) using a +// ArchiveFS allows reading an archive (or a compressed archive) using a // consistent file system interface. Essentially, it allows traversal and // reading of archive contents the same way as any normal directory on disk. // The contents of compressed archives are transparently decompressed. // -// A valid ArchiveFS value must set either Path or Stream. If Path is set, -// a literal file will be opened from the disk. If Stream is set, new -// SectionReaders will be implicitly created to access the stream, enabling -// safe, concurrent access. +// A valid ArchiveFS value must set either Path or Stream, but not both. +// If Path is set, a literal file will be opened from the disk. +// If Stream is set, new SectionReaders will be implicitly created to +// access the stream, enabling safe, concurrent access. // // NOTE: Due to Go's file system APIs (see package io/fs), the performance -// of ArchiveFS when used with fs.WalkDir() is poor for archives with lots -// of files (see issue #326). The fs.WalkDir() API requires listing each -// directory's contents in turn, and the only way to ensure we return the -// complete list of folder contents is to traverse the whole archive and -// build a slice; so if this is done for the root of an archive with many -// files, performance tends toward O(n^2) as the entire archive is walked -// for every folder that is enumerated (WalkDir calls ReadDir recursively). -// If you do not need each directory's contents walked in order, please -// prefer calling Extract() from an archive type directly; this will perform -// a O(n) walk of the contents in archive order, rather than the slower -// directory tree order. +// of ArchiveFS can suffer when using fs.WalkDir(). To mitigate this, +// an optimized fs.ReadDirFS has been implemented that indexes the entire +// archive on the first call to ReadDir() (since the entire archive needs +// to be walked for every call to ReadDir() anyway, as archive contents are +// often unordered). The first call to ReadDir(), i.e. near the start of the +// walk, will be slow for large archives, but should be instantaneous after. +// If you don't care about walking a file system in directory order, consider +// calling Extract() on the underlying archive format type directly, which +// walks the archive in entry order, without needing to do any sorting. +// +// Note that fs.FS implementations, including this one, reject paths starting +// with "./". This can be problematic sometimes, as it is not uncommon for +// tarballs to contain a top-level/root directory literally named ".", which +// can happen if a tarball is created in the same directory it is archiving. +// The underlying Extract() calls are faithful to entries with this name, +// but file systems have certain semantics around "." that restrict its use. +// For example, a file named "." cannot be created on a real file system +// because it is a special name that means "current directory". +// +// We had to decide whether to honor the true name in the archive, or honor +// file system semantics. Given that this is a virtual file system and other +// code using the fs.FS APIs will trip over a literal directory named ".", +// we choose to honor file system semantics. Files named "." are ignored; +// directories with this name are effectively transparent; their contents +// get promoted up a directory/level. This means a file at "./x" where "." +// is a literal directory name, its name will be passed in as "x" in +// WalkDir callbacks. If you need the raw, uninterpeted values from an +// archive, use the formats' Extract() method directly. See +// https://github.com/golang/go/issues/70155 for a little more background. +// +// This does have one negative edge case... a tar containing contents like +// [x . ./x] will have a conflict on the file named "x" because "./x" will +// also be accessed with the name of "x". type ArchiveFS struct { // set one of these Path string // path to the archive file on disk, or... Stream *io.SectionReader // ...stream from which to read archive - Format Archival // the archive format + Format Extractor // the archive format Prefix string // optional subdirectory in which to root the fs - Context context.Context // optional + Context context.Context // optional; mainly for cancellation + + // amortizing cache speeds up walks (esp. ReadDir) + contents map[string]fs.FileInfo + dirs map[string][]fs.DirEntry } // context always return a context, preferring f.Context if not nil. @@ -268,12 +276,33 @@ func (f ArchiveFS) context() context.Context { // the archive file itself will be opened as a directory file. func (f ArchiveFS) Open(name string) (fs.File, error) { if !fs.ValidPath(name) { - return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid} + return nil, &fs.PathError{Op: "open", Path: name, Err: fmt.Errorf("%w: %s", fs.ErrInvalid, name)} + } + + // apply prefix if fs is rooted in a subtree + name = path.Join(f.Prefix, name) + + // if we've already indexed the archive, we can know quickly if the file doesn't exist, + // and we can also return directory files with their entries instantly + if f.contents != nil { + if info, found := f.contents[name]; found { + if info.IsDir() { + if entries, ok := f.dirs[name]; ok { + return &dirFile{info: info, entries: entries}, nil + } + } + } else { + if entries, found := f.dirs[name]; found { + return &dirFile{info: implicitDirInfo{implicitDirEntry{name}}, entries: entries}, nil + } + return nil, &fs.PathError{Op: "open", Path: name, Err: fmt.Errorf("open %s: %w", name, fs.ErrNotExist)} + } } - var archiveFile fs.File + // if a filename is specified, open the archive file + var archiveFile *os.File var err error - if f.Path != "" { + if f.Stream == nil { archiveFile, err = os.Open(f.Path) if err != nil { return nil, err @@ -286,210 +315,133 @@ func (f ArchiveFS) Open(name string) (fs.File, error) { archiveFile.Close() } }() - } else if f.Stream != nil { - archiveFile = fakeArchiveFile{} + } else if f.Stream == nil { + return nil, fmt.Errorf("no input; one of Path or Stream must be set") } - // apply prefix if fs is rooted in a subtree - name = path.Join(f.Prefix, name) - // handle special case of opening the archive root - if name == "." && archiveFile != nil { - archiveInfo, err := archiveFile.Stat() + if name == "." { + var archiveInfo fs.FileInfo + if archiveFile != nil { + archiveInfo, err = archiveFile.Stat() + if err != nil { + return nil, err + } + } else { + archiveInfo = implicitDirInfo{ + implicitDirEntry{"."}, + } + } + var entries []fs.DirEntry + entries, err = f.ReadDir(name) if err != nil { return nil, err } - entries, err := f.ReadDir(name) - if err != nil { + if err := archiveFile.Close(); err != nil { return nil, err } return &dirFile{ - extractedFile: extractedFile{ - File: File{ - FileInfo: dirFileInfo{archiveInfo}, - NameInArchive: ".", - }, - }, + info: dirFileInfo{archiveInfo}, entries: entries, }, nil } - var ( - files []File - found bool - ) - // collect them all or stop at exact file match, note we don't stop at folder match - handler := func(_ context.Context, file File) error { - file.NameInArchive = strings.Trim(file.NameInArchive, "/") - files = append(files, file) - if file.NameInArchive == name && !file.IsDir() { - found = true - return errStopWalk - } - return nil - } - var inputStream io.Reader if f.Stream == nil { - // when the archive file is closed, any (soon-to-be) associated decompressor should also be closed; see #365 - archiveFile = &closeBoth{File: archiveFile} inputStream = archiveFile } else { inputStream = io.NewSectionReader(f.Stream, 0, f.Stream.Size()) } - err = f.Format.Extract(f.context(), inputStream, []string{name}, handler) - if found { - err = nil - } - if err != nil { - return nil, err - } - - if len(files) == 0 { - return nil, fs.ErrNotExist + var decompressor io.ReadCloser + if caf, ok := f.Format.(CompressedArchive); ok { + if caf.Compression != nil { + decompressor, err = caf.Compression.OpenReader(inputStream) + if err != nil { + return nil, err + } + inputStream = decompressor + } } - // exactly one or exact file found, test name match to detect implicit dir name https://github.com/mholt/archiver/issues/340 - if (len(files) == 1 && files[0].NameInArchive == name) || found { - file := files[len(files)-1] - if file.IsDir() { - return &dirFile{extractedFile: extractedFile{File: file}}, nil + // prepare the handler that we'll need if we have to iterate the + // archive to find the file being requested + var fsFile fs.File + handler := func(ctx context.Context, file FileInfo) error { + if err := ctx.Err(); err != nil { + return err } - // if named file is not a regular file, it can't be opened - if !file.Mode().IsRegular() { - return extractedFile{File: file}, nil + // paths in archives can't necessarily be trusted; also clean up any "./" prefix + file.NameInArchive = path.Clean(file.NameInArchive) + + if !strings.HasPrefix(file.NameInArchive, name) { + return nil } - // regular files can be read, so open it for reading - rc, err := file.Open() - if err != nil { - return nil, err + // if this is the requested file, and it's a directory, set up the dirFile, + // which will include a listing of all its contents as we continue the walk + if file.NameInArchive == name && file.IsDir() { + fsFile = &dirFile{info: file} // will fill entries slice as we continue the walk + return nil } - return extractedFile{File: file, ReadCloser: rc, parentArchive: archiveFile}, nil - } - // implicit files - files = fillImplicit(files) - file, foundFile := search(name, files) - if !foundFile { - return nil, fs.ErrNotExist - } + // if the named file was a directory and we are filling its entries, + // add this entry to the list + if df, ok := fsFile.(*dirFile); ok { + df.entries = append(df.entries, fs.FileInfoToDirEntry(file)) - if file.IsDir() { - return &dirFile{extractedFile: extractedFile{File: file}, entries: openReadDir(name, files)}, nil - } + // don't traverse into subfolders + if file.IsDir() { + return fs.SkipDir + } - // very unlikely - // maybe just panic, because extractor already walk through all the entries, file is impossible to read - // unless it's from a zip file. + return nil + } - // if named file is not a regular file, it can't be opened - if !file.Mode().IsRegular() { - return extractedFile{File: file}, nil - } + innerFile, err := file.Open() + if err != nil { + return err + } - // regular files can be read, so open it for reading - rc, err := file.Open() - if err != nil { - return nil, err - } - return extractedFile{File: file, ReadCloser: rc, parentArchive: archiveFile}, nil -} + fsFile = closeBoth{File: innerFile, c: archiveFile} -// copy of the same function from zip -func split(name string) (dir, elem string, isDir bool) { - if name[len(name)-1] == '/' { - isDir = true - name = name[:len(name)-1] - } - i := len(name) - 1 - for i >= 0 && name[i] != '/' { - i-- - } - if i < 0 { - return ".", name, isDir + if decompressor != nil { + fsFile = closeBoth{fsFile, decompressor} + } + + return fs.SkipAll } - return name[:i], name[i+1:], isDir -} -// modified from zip.Reader initFileList, it's used to find all implicit dirs -func fillImplicit(files []File) []File { - dirs := make(map[string]bool) - knownDirs := make(map[string]bool) - entries := make([]File, 0) - for _, file := range files { - for dir := path.Dir(file.NameInArchive); dir != "."; dir = path.Dir(dir) { - dirs[dir] = true - } - entries = append(entries, file) - if file.IsDir() { - knownDirs[file.NameInArchive] = true - } + // when we start the walk, we pass in a nil list of files to extract, since + // files may have a "." component in them, and the underlying format doesn't + // know about our file system semantics, so we need to filter ourselves (it's + // not significantly less efficient). + if caf, ok := f.Format.(CompressedArchive); ok { + // bypass the CompressedArchive format's opening of the decompressor, since + // we already did it, since we need to keep it open after returning + // "I BYPASSED THE COMPRESSOR!" -Rey + err = caf.Archival.Extract(f.context(), inputStream, nil, handler) + } else { + err = f.Format.Extract(f.context(), inputStream, nil, handler) } - for dir := range dirs { - if !knownDirs[dir] { - entries = append(entries, File{FileInfo: implicitDirInfo{implicitDirEntry{path.Base(dir)}}, NameInArchive: dir}) - } + if err != nil { + return nil, &fs.PathError{Op: "open", Path: name, Err: fmt.Errorf("extract: %w", err)} } - - sort.Slice(entries, func(i, j int) bool { - fi, fj := entries[i], entries[j] - di, ei, _ := split(fi.NameInArchive) - dj, ej, _ := split(fj.NameInArchive) - - if di != dj { - return di < dj - } - return ei < ej - }) - return entries -} - -// modified from zip.Reader openLookup -func search(name string, entries []File) (File, bool) { - dir, elem, _ := split(name) - i := sort.Search(len(entries), func(i int) bool { - idir, ielem, _ := split(entries[i].NameInArchive) - return idir > dir || idir == dir && ielem >= elem - }) - if i < len(entries) { - fname := entries[i].NameInArchive - if fname == name || len(fname) == len(name)+1 && fname[len(name)] == '/' && fname[:len(name)] == name { - return entries[i], true - } + if fsFile == nil { + return nil, &fs.PathError{Op: "open", Path: name, Err: fmt.Errorf("open %s: %w", name, fs.ErrNotExist)} } - return File{}, false -} -// modified from zip.Reader openReadDir -func openReadDir(dir string, entries []File) []fs.DirEntry { - i := sort.Search(len(entries), func(i int) bool { - idir, _, _ := split(entries[i].NameInArchive) - return idir >= dir - }) - j := sort.Search(len(entries), func(j int) bool { - jdir, _, _ := split(entries[j].NameInArchive) - return jdir > dir - }) - dirs := make([]fs.DirEntry, j-i) - for idx := range dirs { - dirs[idx] = fs.FileInfoToDirEntry(entries[i+idx]) - } - return dirs + return fsFile, nil } // Stat stats the named file from within the archive. If name is "." then // the archive file itself is statted and treated as a directory file. func (f ArchiveFS) Stat(name string) (fs.FileInfo, error) { if !fs.ValidPath(name) { - return nil, &fs.PathError{Op: "stat", Path: name, Err: fs.ErrInvalid} + return nil, &fs.PathError{Op: "stat", Path: name, Err: fmt.Errorf("%s: %w", name, fs.ErrInvalid)} } - // apply prefix if fs is rooted in a subtree - name = path.Join(f.Prefix, name) - if name == "." { if f.Path != "" { fileInfo, err := os.Stat(f.Path) @@ -502,6 +454,17 @@ func (f ArchiveFS) Stat(name string) (fs.FileInfo, error) { } } + // apply prefix if fs is rooted in a subtree + name = path.Join(f.Prefix, name) + + // if archive has already been indexed, simply use it + if f.contents != nil { + if info, ok := f.contents[name]; ok { + return info, nil + } + return nil, &fs.PathError{Op: "stat", Path: name, Err: fmt.Errorf("stat %s: %w", name, fs.ErrNotExist)} + } + var archiveFile *os.File var err error if f.Stream == nil { @@ -512,16 +475,14 @@ func (f ArchiveFS) Stat(name string) (fs.FileInfo, error) { defer archiveFile.Close() } - var ( - files []File - found bool - ) - handler := func(_ context.Context, file File) error { - file.NameInArchive = strings.Trim(file.NameInArchive, "/") - files = append(files, file) - if file.NameInArchive == name { - found = true - return errStopWalk + var result FileInfo + handler := func(ctx context.Context, file FileInfo) error { + if err := ctx.Err(); err != nil { + return err + } + if path.Clean(file.NameInArchive) == name { + result = file + return fs.SkipAll } return nil } @@ -529,33 +490,38 @@ func (f ArchiveFS) Stat(name string) (fs.FileInfo, error) { if f.Stream != nil { inputStream = io.NewSectionReader(f.Stream, 0, f.Stream.Size()) } - err = f.Format.Extract(f.context(), inputStream, []string{name}, handler) - if found { - err = nil - } - if err != nil { + err = f.Format.Extract(f.context(), inputStream, nil, handler) + if err != nil && result.FileInfo == nil { return nil, err } - - // exactly one or exact file found, test name match to detect implicit dir name https://github.com/mholt/archiver/issues/340 - if (len(files) == 1 && files[0].NameInArchive == name) || found { - return files[len(files)-1].FileInfo, nil - } - - files = fillImplicit(files) - file, found := search(name, files) - if !found { + if result.FileInfo == nil { return nil, fs.ErrNotExist } - return file.FileInfo, nil + return result.FileInfo, nil } -// ReadDir reads the named directory from within the archive. -func (f ArchiveFS) ReadDir(name string) ([]fs.DirEntry, error) { +// ReadDir reads the named directory from within the archive. If name is "." +// then the root of the archive content is listed. +func (f *ArchiveFS) ReadDir(name string) ([]fs.DirEntry, error) { if !fs.ValidPath(name) { return nil, &fs.PathError{Op: "readdir", Path: name, Err: fs.ErrInvalid} } + // apply prefix if fs is rooted in a subtree + name = path.Join(f.Prefix, name) + + // fs.WalkDir() calls ReadDir() once per directory, and for archives with + // lots of directories, that is very slow, since we have to traverse the + // entire archive in order to ensure that we got all the entries for a + // directory -- so we can fast-track this lookup if we've done the + // traversal already + if len(f.dirs) > 0 { + return f.dirs[name], nil + } + + f.contents = make(map[string]fs.FileInfo) + f.dirs = make(map[string][]fs.DirEntry) + var archiveFile *os.File var err error if f.Stream == nil { @@ -566,31 +532,72 @@ func (f ArchiveFS) ReadDir(name string) ([]fs.DirEntry, error) { defer archiveFile.Close() } - // apply prefix if fs is rooted in a subtree - name = path.Join(f.Prefix, name) + handler := func(ctx context.Context, file FileInfo) error { + if err := ctx.Err(); err != nil { + return err + } - // collect all files with prefix - var ( - files []File - foundFile bool - ) - handler := func(_ context.Context, file File) error { - file.NameInArchive = strings.Trim(file.NameInArchive, "/") + // can't always trust path names + file.NameInArchive = path.Clean(file.NameInArchive) + + // avoid infinite walk; apparently, creating a tar file in the target + // directory may result in an entry called "." in the archive; see #384 if file.NameInArchive == "." { return nil } - files = append(files, file) + + // if the name being requested isn't a directory, return an error similar to + // what most OSes return from the readdir system call when given a non-dir if file.NameInArchive == name && !file.IsDir() { - foundFile = true - return errStopWalk + return &fs.PathError{Op: "readdir", Path: name, Err: errors.New("not a directory")} } - return nil - } - // handle special case of reading from root of archive - var filter []string - if name != "." { - filter = []string{name} + // index this file info for quick access + f.contents[file.NameInArchive] = file + + // this is a real directory; prefer its DirEntry over an implicit/fake one we may have created earlier; + // first try to find if it exists, and if so, replace the value; otherwise insert it in sorted position + if file.IsDir() { + dirEntry := fs.FileInfoToDirEntry(file) + idx, found := slices.BinarySearchFunc(f.dirs[path.Dir(file.NameInArchive)], dirEntry, func(a, b fs.DirEntry) int { + return strings.Compare(a.Name(), b.Name()) + }) + if found { + f.dirs[path.Dir(file.NameInArchive)][idx] = dirEntry + } else { + f.dirs[path.Dir(file.NameInArchive)] = slices.Insert(f.dirs[path.Dir(file.NameInArchive)], idx, dirEntry) + } + } + + // this loop looks like an abomination, but it's really quite simple: we're + // just iterating the directories of the path up to the root; i.e. we lob off + // the base (last component) of the path until no separators remain, i.e. only + // one component remains -- then loop again to make sure it's not a duplicate + for dir, base := path.Dir(file.NameInArchive), path.Base(file.NameInArchive); ; dir, base = path.Dir(dir), path.Base(dir) { + if err := ctx.Err(); err != nil { + return err + } + + var dirInfo fs.DirEntry = implicitDirInfo{implicitDirEntry{base}} + + // we are "filling in" any directories that could potentially be only implicit, + // and since a nested directory can have more than 1 item, we need to prevent + // duplication; for example: given a/b/c and a/b/d, we need to avoid adding + // an entry for "b" twice within "a" -- hence we search for it first, and if + // it doesn't already exist, we insert it in sorted position + idx, found := slices.BinarySearchFunc(f.dirs[dir], dirInfo, func(a, b fs.DirEntry) int { + return strings.Compare(a.Name(), b.Name()) + }) + if !found { + f.dirs[dir] = slices.Insert(f.dirs[dir], idx, dirInfo) + } + + if dir == "." { + break + } + } + + return nil } var inputStream io.Reader = archiveFile @@ -598,30 +605,18 @@ func (f ArchiveFS) ReadDir(name string) ([]fs.DirEntry, error) { inputStream = io.NewSectionReader(f.Stream, 0, f.Stream.Size()) } - err = f.Format.Extract(f.context(), inputStream, filter, handler) - if foundFile { - return nil, &fs.PathError{Op: "readdir", Path: name, Err: errors.New("not a dir")} - } + err = f.Format.Extract(f.context(), inputStream, nil, handler) if err != nil { - return nil, err + // these being non-nil implies that we have indexed the archive, + // but if an error occurred, we likely only got part of the way + // through and our index is incomplete, and we'd have to re-walk + // the whole thing anyway; so reset these to nil to avoid bugs + f.dirs = nil + f.contents = nil + return nil, fmt.Errorf("extract: %w", err) } - // always find all implicit directories - files = fillImplicit(files) - // and return early for dot file - if name == "." { - return openReadDir(name, files), nil - } - - file, foundFile := search(name, files) - if !foundFile { - return nil, fs.ErrNotExist - } - - if !file.IsDir() { - return nil, &fs.PathError{Op: "readdir", Path: name, Err: errors.New("not a dir")} - } - return openReadDir(name, files), nil + return f.dirs[name], nil } // Sub returns an FS corresponding to the subtree rooted at dir. @@ -636,6 +631,11 @@ func (f *ArchiveFS) Sub(dir string) (fs.FS, error) { if !info.IsDir() { return nil, fmt.Errorf("%s is not a directory", dir) } + // result is the same as what we're starting with, except + // we indicate a path prefix to be used for all operations; + // the reason we don't append to the Path field directly + // is because the input might be a stream rather than a + // path on disk, and the Prefix field is applied on both result := f result.Prefix = dir return result, nil @@ -705,44 +705,18 @@ func pathWithoutTopDir(fpath string) string { return fpath[slashIdx+1:] } -// errStopWalk is an arbitrary error value, since returning -// any error (other than fs.SkipDir) will stop a walk. We -// use this as we may only want 1 file from an extraction, -// even if that file is a directory and would otherwise be -// traversed during the walk. -var errStopWalk = fmt.Errorf("stop walk") - -type fakeArchiveFile struct{} - -func (f fakeArchiveFile) Stat() (fs.FileInfo, error) { - return implicitDirInfo{ - implicitDirEntry{name: "."}, - }, nil -} -func (f fakeArchiveFile) Read([]byte) (int, error) { return 0, io.EOF } -func (f fakeArchiveFile) Close() error { return nil } - // dirFile implements the fs.ReadDirFile interface. type dirFile struct { - extractedFile - - // TODO: We could probably be more memory-efficient by not loading - // all the entries at once and then "faking" the paging for ReadDir(). - // Instead, we could maybe store a reference to the parent archive FS, - // then walk it each time ReadDir is called, skipping entriesRead - // files, then continuing the listing, until n are listed. But that - // might be kinda messy and a lot of work, so I leave it for a future - // optimization if needed. + info fs.FileInfo entries []fs.DirEntry - entriesRead int + entriesRead int // used for paging with ReadDir(n) } -// If this represents the root of the archive, we use the archive's -// FileInfo which says it's a file, not a directory; the whole point -// of this package is to treat the archive as a directory, so always -// return true in our case. -func (dirFile) IsDir() bool { return true } +func (dirFile) Read([]byte) (int, error) { return 0, errors.New("cannot read a directory file") } +func (df dirFile) Stat() (fs.FileInfo, error) { return df.info, nil } +func (dirFile) Close() error { return nil } +// ReadDir implements [fs.ReadDirFile]. func (df *dirFile) ReadDir(n int) ([]fs.DirEntry, error) { if n <= 0 { return df.entries, nil @@ -771,46 +745,14 @@ func (dirFileInfo) Size() int64 { return 0 } func (info dirFileInfo) Mode() fs.FileMode { return info.FileInfo.Mode() | fs.ModeDir } func (dirFileInfo) IsDir() bool { return true } -// extractedFile implements fs.File, thus it represents an "opened" file, -// which is slightly different from our File type which represents a file -// that possibly may be opened. If the file is actually opened, this type -// ensures that the parent archive is closed when this file from within it -// is also closed. -type extractedFile struct { - File - - // Set these fields if a "regular file" which has actual content - // that can be read, i.e. a file that is open for reading. - // ReadCloser should be the file's reader, and parentArchive is - // a reference to the archive the files comes out of. - // If parentArchive is set, it will also be closed along with - // the file when Close() is called. +// fileInArchive represents a file that is opened from within an archive. +// It implements fs.File. +type fileInArchive struct { io.ReadCloser - parentArchive io.Closer -} - -// Close closes the the current file if opened and -// the parent archive if specified. This is a no-op -// for directories which do not set those fields. -func (ef extractedFile) Close() error { - if ef.parentArchive != nil { - if err := ef.parentArchive.Close(); err != nil { - return err - } - } - if ef.ReadCloser != nil { - return ef.ReadCloser.Close() - } - return nil + info fs.FileInfo } -// compressorCloser is a type that closes two closers at the same time. -// It only exists to fix #365. If a better solution can be found, I'd -// likely prefer it. -type compressorCloser interface { - io.Closer - closeCompressor(io.Closer) -} +func (af fileInArchive) Stat() (fs.FileInfo, error) { return af.info, nil } // closeBoth closes both the file and an associated // closer, such as a (de)compressor that wraps the @@ -818,28 +760,34 @@ type compressorCloser interface { // better solution is found, I'd probably prefer that. type closeBoth struct { fs.File - c io.Closer + c io.Closer // usually the archive or the decompressor } -// closeCompressor will have the closer closed when the associated File closes. -func (dc *closeBoth) closeCompressor(c io.Closer) { dc.c = c } - // Close closes both the file and the associated closer. It always calls -// Close() on both, but returns only the first error, if any. +// Close() on both, but if multiple errors occur they are wrapped together. func (dc closeBoth) Close() error { - err1, err2 := dc.File.Close(), dc.c.Close() - if err1 != nil { - return err1 + var err error + if dc.File != nil { + if err2 := dc.File.Close(); err2 != nil { + err = fmt.Errorf("closing file: %w", err2) + } + } + if dc.c != nil { + if err2 := dc.c.Close(); err2 != nil { + if err == nil { + err = fmt.Errorf("closing closer: %w", err2) + } else { + err = fmt.Errorf("%w; additionally, closing closer: %w", err, err2) + } + } } - return err2 + return err } // implicitDirEntry represents a directory that does // not actually exist in the archive but is inferred // from the paths of actual files in the archive. -type implicitDirEntry struct { - name string -} +type implicitDirEntry struct{ name string } func (e implicitDirEntry) Name() string { return e.name } func (implicitDirEntry) IsDir() bool { return true } @@ -853,28 +801,20 @@ func (e implicitDirEntry) Info() (fs.FileInfo, error) { // not contain actual entries for a directory, but we need to // pretend it exists so its contents can be discovered and // traversed. -type implicitDirInfo struct { - implicitDirEntry -} +type implicitDirInfo struct{ implicitDirEntry } func (d implicitDirInfo) Name() string { return d.name } func (implicitDirInfo) Size() int64 { return 0 } func (d implicitDirInfo) Mode() fs.FileMode { return d.Type() } func (implicitDirInfo) ModTime() time.Time { return time.Time{} } -func (implicitDirInfo) Sys() interface{} { return nil } +func (implicitDirInfo) Sys() any { return nil } // Interface guards var ( - _ fs.ReadDirFS = (*DirFS)(nil) - _ fs.StatFS = (*DirFS)(nil) - _ fs.SubFS = (*DirFS)(nil) - _ fs.ReadDirFS = (*FileFS)(nil) _ fs.StatFS = (*FileFS)(nil) _ fs.ReadDirFS = (*ArchiveFS)(nil) _ fs.StatFS = (*ArchiveFS)(nil) _ fs.SubFS = (*ArchiveFS)(nil) - - _ compressorCloser = (*closeBoth)(nil) ) diff --git a/fs_test.go b/fs_test.go index 9180fbf3..5d6a8bd3 100644 --- a/fs_test.go +++ b/fs_test.go @@ -58,13 +58,13 @@ func TestSelfTar(t *testing.T) { fn := "testdata/self-tar.tar" fh, err := os.Open(fn) if err != nil { - t.Fatalf("Could not load test tar: %v", fn) + t.Errorf("Could not load test tar: %v", fn) } fstat, err := os.Stat(fn) if err != nil { - t.Fatalf("Could not stat test tar: %v", fn) + t.Errorf("Could not stat test tar: %v", fn) } - fsys := ArchiveFS{ + fsys := &ArchiveFS{ Stream: io.NewSectionReader(fh, 0, fstat.Size()), Format: Tar{}, } @@ -78,12 +78,12 @@ func TestSelfTar(t *testing.T) { return nil }) if err != nil { - t.Fatal(err) + t.Error(err) } } func ExampleArchiveFS_Stream() { - fsys := ArchiveFS{ + fsys := &ArchiveFS{ Stream: io.NewSectionReader(bytes.NewReader(testZIP), 0, int64(len(testZIP))), Format: Zip{}, } @@ -158,9 +158,7 @@ func TestArchiveFS_ReadDir(t *testing.T) { t.Parallel() fsys := tc.archive for baseDir, wantLS := range tc.want { - baseDir := baseDir - wantLS := wantLS - t.Run(fmt.Sprintf("ReadDir(%s)", baseDir), func(t *testing.T) { + t.Run(fmt.Sprintf("ReadDir(%q)", baseDir), func(t *testing.T) { dis, err := fsys.ReadDir(baseDir) if err != nil { t.Error(err) @@ -183,17 +181,18 @@ func TestArchiveFS_ReadDir(t *testing.T) { t.Run(fmt.Sprintf("Open(%s)", baseDir), func(t *testing.T) { f, err := fsys.Open(baseDir) if err != nil { - t.Error(err) + t.Errorf("fsys.Open(%q): %#v %s", baseDir, err, err) + return } rdf, ok := f.(fs.ReadDirFile) if !ok { - t.Fatalf("'%s' did not return a fs.ReadDirFile, %+v", baseDir, rdf) + t.Errorf("fsys.Open(%q) did not return a fs.ReadDirFile, got: %#v", baseDir, f) } dis, err := rdf.ReadDir(-1) if err != nil { - t.Fatal(err) + t.Error(err) } dirs := []string{} diff --git a/go.mod b/go.mod index 3adbffad..0dacae9b 100644 --- a/go.mod +++ b/go.mod @@ -1,26 +1,26 @@ module github.com/mholt/archiver/v4 -go 1.22 +go 1.22.2 -toolchain go1.22.2 +toolchain go1.23.2 require ( - github.com/andybalholm/brotli v1.1.0 + github.com/andybalholm/brotli v1.1.1 github.com/dsnet/compress v0.0.2-0.20230904184137-39efe44ab707 - github.com/klauspost/compress v1.17.8 + github.com/klauspost/compress v1.17.11 github.com/klauspost/pgzip v1.2.6 - github.com/nwaples/rardecode/v2 v2.0.0-beta.3 + github.com/nwaples/rardecode/v2 v2.0.0-beta.4 github.com/therootcompany/xz v1.0.1 github.com/ulikunitz/xz v0.5.12 ) require ( github.com/STARRY-S/zip v0.1.0 - github.com/bodgit/sevenzip v1.5.1 + github.com/bodgit/sevenzip v1.5.2 github.com/golang/snappy v0.0.4 github.com/pierrec/lz4/v4 v4.1.21 github.com/sorairolake/lzip-go v0.3.5 - golang.org/x/text v0.16.0 + golang.org/x/text v0.19.0 ) require ( diff --git a/go.sum b/go.sum index e1803b1c..5e844566 100644 --- a/go.sum +++ b/go.sum @@ -19,12 +19,12 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/STARRY-S/zip v0.1.0 h1:eUER3jKmHKXjv+iy3BekLa+QnNSo1Lqz4eTzYBcGDqo= github.com/STARRY-S/zip v0.1.0/go.mod h1:qj/mTZkvb3AvfGQ2e775/3AODRvB4peSw8KNMvrM8/I= -github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= -github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= +github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= +github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= github.com/bodgit/plumbing v1.3.0 h1:pf9Itz1JOQgn7vEOE7v7nlEfBykYqvUYioC61TwWCFU= github.com/bodgit/plumbing v1.3.0/go.mod h1:JOTb4XiRu5xfnmdnDJo6GmSbSbtSyufrsyZFByMtKEs= -github.com/bodgit/sevenzip v1.5.1 h1:rVj0baZsooZFy64DJN0zQogPzhPrT8BQ8TTRd1H4WHw= -github.com/bodgit/sevenzip v1.5.1/go.mod h1:Q3YMySuVWq6pyGEolyIE98828lOfEoeWg5zeH6x22rc= +github.com/bodgit/sevenzip v1.5.2 h1:acMIYRaqoHAdeu9LhEGGjL9UzBD4RNf9z7+kWDNignI= +github.com/bodgit/sevenzip v1.5.2/go.mod h1:gTGzXA67Yko6/HLSD0iK4kWaWzPlPmLfDO73jTjSRqc= github.com/bodgit/windows v1.0.1 h1:tF7K6KOluPYygXa3Z2594zxlkbKPAOvqr97etrGNIz4= github.com/bodgit/windows v1.0.1/go.mod h1:a6JLwrB4KrTR5hBpp8FI9/9W9jJfeQ2h4XDXU74ZCdM= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= @@ -84,16 +84,16 @@ github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1 github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= -github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU= -github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU= github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/nwaples/rardecode/v2 v2.0.0-beta.3 h1:evQTW0IjM2GAL5AaPHiQrT+laWohkt5zHKA3yCsGQGU= -github.com/nwaples/rardecode/v2 v2.0.0-beta.3/go.mod h1:yntwv/HfMc/Hbvtq9I19D1n58te3h6KsqCf3GxyfBGY= +github.com/nwaples/rardecode/v2 v2.0.0-beta.4 h1:sdiJxQdPjECn2lh9nLFFhgLCf+0ulDU5rODbtERTlUY= +github.com/nwaples/rardecode/v2 v2.0.0-beta.4/go.mod h1:yntwv/HfMc/Hbvtq9I19D1n58te3h6KsqCf3GxyfBGY= github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -117,6 +117,8 @@ github.com/therootcompany/xz v1.0.1/go.mod h1:3K3UH1yCKgBneZYhuQUvJ9HPD19UEXEI0B github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc= github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= +github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= +github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= @@ -183,8 +185,8 @@ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= -golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -212,8 +214,8 @@ golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= -golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= +golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/gz.go b/gz.go index b9873f19..e8b3f98d 100644 --- a/gz.go +++ b/gz.go @@ -2,6 +2,7 @@ package archiver import ( "bytes" + "context" "io" "strings" @@ -29,13 +30,13 @@ type Gz struct { Multithreaded bool } -func (Gz) Name() string { return ".gz" } +func (Gz) Extension() string { return ".gz" } -func (gz Gz) Match(filename string, stream io.Reader) (MatchResult, error) { +func (gz Gz) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { var mr MatchResult // match filename - if strings.Contains(strings.ToLower(filename), gz.Name()) { + if strings.Contains(strings.ToLower(filename), gz.Extension()) { mr.ByName = true } diff --git a/interfaces.go b/interfaces.go index 9a41e1f1..f675f0e2 100644 --- a/interfaces.go +++ b/interfaces.go @@ -5,10 +5,12 @@ import ( "io" ) -// Format represents either an archive or compression format. +// Format represents a way of getting data out of something else. +// A format usually represents compression or an archive (or both). type Format interface { - // Name returns the name of the format. - Name() string + // Extension returns the conventional file extension for this + // format. + Extension() string // Match returns true if the given name/stream is recognized. // One of the arguments is optional: filename might be empty @@ -21,7 +23,7 @@ type Format interface { // preserve the stream through matching, you should either // buffer what is read by Match, or seek to the last position // before Match was called. - Match(filename string, stream io.Reader) (MatchResult, error) + Match(ctx context.Context, filename string, stream io.Reader) (MatchResult, error) } // Compression is a compression format with both compress and decompress methods. @@ -57,13 +59,13 @@ type Archiver interface { // Archive writes an archive file to output with the given files. // // Context cancellation must be honored. - Archive(ctx context.Context, output io.Writer, files []File) error + Archive(ctx context.Context, output io.Writer, files []FileInfo) error } // ArchiveAsyncJob contains a File to be archived and a channel that // the result of the archiving should be returned on. type ArchiveAsyncJob struct { - File File + File FileInfo Result chan<- error } @@ -83,14 +85,20 @@ type ArchiverAsync interface { // Extractor can extract files from an archive. type Extractor interface { - // Extract reads the files at pathsInArchive from sourceArchive. + // Extract walks entries in the archive and calls handleFile for each + // entry that matches the pathsInArchive filter by path/name. + // // If pathsInArchive is nil, all files are extracted without discretion. // If pathsInArchive is empty, no files are extracted. // If a path refers to a directory, all files within it are extracted. // Extracted files are passed to the handleFile callback for handling. // + // Any files opened in the FileHandler should be closed when it returns, + // as there is no guarantee the files can be read outside the handler + // or after the walk has proceeded to the next file. + // // Context cancellation must be honored. - Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchive []string, handleFile FileHandler) error + Extract(ctx context.Context, archive io.Reader, pathsInArchive []string, handleFile FileHandler) error } // Inserter can insert files into an existing archive. @@ -99,5 +107,5 @@ type Inserter interface { // Insert inserts the files into archive. // // Context cancellation must be honored. - Insert(ctx context.Context, archive io.ReadWriteSeeker, files []File) error + Insert(ctx context.Context, archive io.ReadWriteSeeker, files []FileInfo) error } diff --git a/lz4.go b/lz4.go index aaa22a54..7425ad2a 100644 --- a/lz4.go +++ b/lz4.go @@ -2,6 +2,7 @@ package archiver import ( "bytes" + "context" "io" "strings" @@ -17,13 +18,13 @@ type Lz4 struct { CompressionLevel int } -func (Lz4) Name() string { return ".lz4" } +func (Lz4) Extension() string { return ".lz4" } -func (lz Lz4) Match(filename string, stream io.Reader) (MatchResult, error) { +func (lz Lz4) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { var mr MatchResult // match filename - if strings.Contains(strings.ToLower(filename), lz.Name()) { + if strings.Contains(strings.ToLower(filename), lz.Extension()) { mr.ByName = true } diff --git a/lzip.go b/lzip.go index a861a487..1cbffa50 100644 --- a/lzip.go +++ b/lzip.go @@ -2,6 +2,7 @@ package archiver import ( "bytes" + "context" "io" "path/filepath" "strings" @@ -16,13 +17,13 @@ func init() { // Lzip facilitates lzip compression. type Lzip struct{} -func (Lzip) Name() string { return ".lz" } +func (Lzip) Extension() string { return ".lz" } -func (lz Lzip) Match(filename string, stream io.Reader) (MatchResult, error) { +func (lz Lzip) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { var mr MatchResult // match filename - if filepath.Ext(strings.ToLower(filename)) == lz.Name() { + if filepath.Ext(strings.ToLower(filename)) == lz.Extension() { mr.ByName = true } diff --git a/rar.go b/rar.go index ed0099f6..bece6071 100644 --- a/rar.go +++ b/rar.go @@ -30,13 +30,13 @@ type Rar struct { Password string } -func (Rar) Name() string { return ".rar" } +func (Rar) Extension() string { return ".rar" } -func (r Rar) Match(filename string, stream io.Reader) (MatchResult, error) { +func (r Rar) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { var mr MatchResult // match filename - if strings.Contains(strings.ToLower(filename), r.Name()) { + if strings.Contains(strings.ToLower(filename), r.Extension()) { mr.ByName = true } @@ -57,7 +57,7 @@ func (r Rar) Match(filename string, stream io.Reader) (MatchResult, error) { } // Archive is not implemented for RAR, but the method exists so that Rar satisfies the ArchiveFormat interface. -func (r Rar) Archive(_ context.Context, _ io.Writer, _ []File) error { +func (r Rar) Archive(_ context.Context, _ io.Writer, _ []FileInfo) error { return fmt.Errorf("not implemented because RAR is a proprietary format") } @@ -98,11 +98,14 @@ func (r Rar) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchiv continue } - file := File{ - FileInfo: rarFileInfo{hdr}, + info := rarFileInfo{hdr} + file := FileInfo{ + FileInfo: info, Header: hdr, NameInArchive: hdr.Name, - Open: func() (io.ReadCloser, error) { return io.NopCloser(rr), nil }, + Open: func() (fs.File, error) { + return fileInArchive{io.NopCloser(rr), info}, nil + }, } err = handleFile(ctx, file) @@ -133,7 +136,7 @@ func (rfi rarFileInfo) Size() int64 { return rfi.fh.UnPackedSize } func (rfi rarFileInfo) Mode() os.FileMode { return rfi.fh.Mode() } func (rfi rarFileInfo) ModTime() time.Time { return rfi.fh.ModificationTime } func (rfi rarFileInfo) IsDir() bool { return rfi.fh.IsDir } -func (rfi rarFileInfo) Sys() interface{} { return nil } +func (rfi rarFileInfo) Sys() any { return nil } var ( rarHeaderV1_5 = []byte("Rar!\x1a\x07\x00") // v1.5 diff --git a/sz.go b/sz.go index 9d10604a..8a926b7f 100644 --- a/sz.go +++ b/sz.go @@ -2,6 +2,7 @@ package archiver import ( "bytes" + "context" "io" "strings" @@ -15,13 +16,13 @@ func init() { // Sz facilitates Snappy compression. type Sz struct{} -func (sz Sz) Name() string { return ".sz" } +func (sz Sz) Extension() string { return ".sz" } -func (sz Sz) Match(filename string, stream io.Reader) (MatchResult, error) { +func (sz Sz) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { var mr MatchResult // match filename - if strings.Contains(strings.ToLower(filename), sz.Name()) { + if strings.Contains(strings.ToLower(filename), sz.Extension()) { mr.ByName = true } diff --git a/tar.go b/tar.go index 0db0a665..d4106257 100644 --- a/tar.go +++ b/tar.go @@ -26,13 +26,13 @@ type Tar struct { ContinueOnError bool } -func (Tar) Name() string { return ".tar" } +func (Tar) Extension() string { return ".tar" } -func (t Tar) Match(filename string, stream io.Reader) (MatchResult, error) { +func (t Tar) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { var mr MatchResult // match filename - if strings.Contains(strings.ToLower(filename), t.Name()) { + if strings.Contains(strings.ToLower(filename), t.Extension()) { mr.ByName = true } @@ -46,7 +46,7 @@ func (t Tar) Match(filename string, stream io.Reader) (MatchResult, error) { return mr, nil } -func (t Tar) Archive(ctx context.Context, output io.Writer, files []File) error { +func (t Tar) Archive(ctx context.Context, output io.Writer, files []FileInfo) error { tw := tar.NewWriter(output) defer tw.Close() @@ -74,7 +74,7 @@ func (t Tar) ArchiveAsync(ctx context.Context, output io.Writer, jobs <-chan Arc return nil } -func (t Tar) writeFileToArchive(ctx context.Context, tw *tar.Writer, file File) error { +func (t Tar) writeFileToArchive(ctx context.Context, tw *tar.Writer, file FileInfo) error { if err := ctx.Err(); err != nil { return err // honor context cancellation } @@ -109,7 +109,7 @@ func (t Tar) writeFileToArchive(ctx context.Context, tw *tar.Writer, file File) return nil } -func (t Tar) Insert(ctx context.Context, into io.ReadWriteSeeker, files []File) error { +func (t Tar) Insert(ctx context.Context, into io.ReadWriteSeeker, files []FileInfo) error { // Tar files may end with some, none, or a lot of zero-byte padding. The spec says // it should end with two 512-byte trailer records consisting solely of null/0 // bytes: https://www.gnu.org/software/tar/manual/html_node/Standard.html. However, @@ -212,16 +212,25 @@ func (t Tar) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchiv continue } - file := File{ - FileInfo: hdr.FileInfo(), + info := hdr.FileInfo() + file := FileInfo{ + FileInfo: info, Header: hdr, NameInArchive: hdr.Name, LinkTarget: hdr.Linkname, - Open: func() (io.ReadCloser, error) { return io.NopCloser(tr), nil }, + Open: func() (fs.File, error) { + return fileInArchive{io.NopCloser(tr), info}, nil + }, } err = handleFile(ctx, file) if errors.Is(err, fs.SkipAll) { + // At first, I wasn't sure if fs.SkipAll implied that the rest of the entries + // should still be iterated and just "skipped" (i.e. no-ops) or if the walk + // should stop; both have the same net effect, one is just less efficient... + // apparently the name of fs.StopWalk was the preferred name, but it still + // became fs.SkipAll because of semantics with documentation; see + // https://github.com/golang/go/issues/47209 -- anyway, the walk should stop. break } else if errors.Is(err, fs.SkipDir) { // if a directory, skip this path; if a file, skip the folder path diff --git a/xz.go b/xz.go index 4e1b6b41..edb61373 100644 --- a/xz.go +++ b/xz.go @@ -2,6 +2,7 @@ package archiver import ( "bytes" + "context" "io" "strings" @@ -16,13 +17,13 @@ func init() { // Xz facilitates xz compression. type Xz struct{} -func (Xz) Name() string { return ".xz" } +func (Xz) Extension() string { return ".xz" } -func (x Xz) Match(filename string, stream io.Reader) (MatchResult, error) { +func (x Xz) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { var mr MatchResult // match filename - if strings.Contains(strings.ToLower(filename), x.Name()) { + if strings.Contains(strings.ToLower(filename), x.Extension()) { mr.ByName = true } diff --git a/zip.go b/zip.go index 0a4d04a8..c012c080 100644 --- a/zip.go +++ b/zip.go @@ -83,13 +83,13 @@ type Zip struct { TextEncoding string } -func (z Zip) Name() string { return ".zip" } +func (z Zip) Extension() string { return ".zip" } -func (z Zip) Match(filename string, stream io.Reader) (MatchResult, error) { +func (z Zip) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { var mr MatchResult // match filename - if strings.Contains(strings.ToLower(filename), z.Name()) { + if strings.Contains(strings.ToLower(filename), z.Extension()) { mr.ByName = true } @@ -103,7 +103,7 @@ func (z Zip) Match(filename string, stream io.Reader) (MatchResult, error) { return mr, nil } -func (z Zip) Archive(ctx context.Context, output io.Writer, files []File) error { +func (z Zip) Archive(ctx context.Context, output io.Writer, files []FileInfo) error { zw := zip.NewWriter(output) defer zw.Close() @@ -129,7 +129,7 @@ func (z Zip) ArchiveAsync(ctx context.Context, output io.Writer, jobs <-chan Arc return nil } -func (z Zip) archiveOneFile(ctx context.Context, zw *zip.Writer, idx int, file File) error { +func (z Zip) archiveOneFile(ctx context.Context, zw *zip.Writer, idx int, file FileInfo) error { if err := ctx.Err(); err != nil { return err // honor context cancellation } @@ -218,11 +218,18 @@ func (z Zip) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchiv continue } - file := File{ - FileInfo: f.FileInfo(), + info := f.FileInfo() + file := FileInfo{ + FileInfo: info, Header: f.FileHeader, NameInArchive: f.Name, - Open: func() (io.ReadCloser, error) { return f.Open() }, + Open: func() (fs.File, error) { + openedFile, err := f.Open() + if err != nil { + return nil, err + } + return fileInArchive{openedFile, info}, nil + }, } err := handleFile(ctx, file) @@ -266,7 +273,7 @@ func (z Zip) decodeText(hdr *zip.FileHeader) { } // Insert appends the listed files into the provided Zip archive stream. -func (z Zip) Insert(ctx context.Context, into io.ReadWriteSeeker, files []File) error { +func (z Zip) Insert(ctx context.Context, into io.ReadWriteSeeker, files []FileInfo) error { // following very simple example at https://github.com/STARRY-S/zip?tab=readme-ov-file#usage zu, err := szip.NewUpdater(into) if err != nil { diff --git a/zlib.go b/zlib.go index 84275186..485991e6 100644 --- a/zlib.go +++ b/zlib.go @@ -1,6 +1,7 @@ package archiver import ( + "context" "io" "strings" @@ -16,13 +17,13 @@ type Zlib struct { CompressionLevel int } -func (Zlib) Name() string { return ".zz" } +func (Zlib) Extension() string { return ".zz" } -func (zz Zlib) Match(filename string, stream io.Reader) (MatchResult, error) { +func (zz Zlib) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { var mr MatchResult // match filename - if strings.Contains(strings.ToLower(filename), zz.Name()) { + if strings.Contains(strings.ToLower(filename), zz.Extension()) { mr.ByName = true } diff --git a/zstd.go b/zstd.go index fe07b76f..cd0c2814 100644 --- a/zstd.go +++ b/zstd.go @@ -2,6 +2,7 @@ package archiver import ( "bytes" + "context" "io" "strings" @@ -18,13 +19,13 @@ type Zstd struct { DecoderOptions []zstd.DOption } -func (Zstd) Name() string { return ".zst" } +func (Zstd) Extension() string { return ".zst" } -func (zs Zstd) Match(filename string, stream io.Reader) (MatchResult, error) { +func (zs Zstd) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) { var mr MatchResult // match filename - if strings.Contains(strings.ToLower(filename), zs.Name()) { + if strings.Contains(strings.ToLower(filename), zs.Extension()) { mr.ByName = true } From 76ea0d6df8ba057642bad730bebe7665d657c30d Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Thu, 7 Nov 2024 21:12:04 -0700 Subject: [PATCH 18/19] Reuse code to determine stream size by seeking --- archiver.go | 20 ++++++++++++++++++++ fs.go | 6 +----- zip.go | 16 ---------------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/archiver.go b/archiver.go index 7e68f30d..f89410e2 100644 --- a/archiver.go +++ b/archiver.go @@ -264,6 +264,26 @@ func isSymlink(info fs.FileInfo) bool { return info.Mode()&os.ModeSymlink != 0 } +// streamSizeBySeeking determines the size of the stream by +// seeking to the end, then back again, so the resulting +// seek position upon returning is the same as when called +// (assuming no errors). +func streamSizeBySeeking(s io.Seeker) (int64, error) { + currentPosition, err := s.Seek(0, io.SeekCurrent) + if err != nil { + return 0, fmt.Errorf("getting current offset: %w", err) + } + maxPosition, err := s.Seek(0, io.SeekEnd) + if err != nil { + return 0, fmt.Errorf("fast-forwarding to end: %w", err) + } + _, err = s.Seek(currentPosition, io.SeekStart) + if err != nil { + return 0, fmt.Errorf("returning to prior offset %d: %w", currentPosition, err) + } + return maxPosition, nil +} + // skipList keeps a list of non-intersecting paths // as long as its add method is used. Identical // elements are rejected, more specific paths are diff --git a/fs.go b/fs.go index 560727b5..a2c7510a 100644 --- a/fs.go +++ b/fs.go @@ -98,14 +98,10 @@ func FileSystem(ctx context.Context, filename string, stream ReaderAtSeeker) (fs // determine size -- we know that the stream value we get back from // Identify is the same type as what we input because it is a Seeker - size, err := stream.Seek(0, io.SeekEnd) + size, err := streamSizeBySeeking(stream) if err != nil { return nil, fmt.Errorf("seeking for size: %w", err) } - _, err = stream.Seek(0, io.SeekStart) - if err != nil { - return nil, fmt.Errorf("seeking back to beginning: %w", err) - } sr := io.NewSectionReader(stream, 0, size) diff --git a/zip.go b/zip.go index c012c080..73361d8b 100644 --- a/zip.go +++ b/zip.go @@ -337,22 +337,6 @@ type seekReaderAt interface { io.Seeker } -func streamSizeBySeeking(s io.Seeker) (int64, error) { - currentPosition, err := s.Seek(0, io.SeekCurrent) - if err != nil { - return 0, fmt.Errorf("getting current offset: %w", err) - } - maxPosition, err := s.Seek(0, io.SeekEnd) - if err != nil { - return 0, fmt.Errorf("fast-forwarding to end: %w", err) - } - _, err = s.Seek(currentPosition, io.SeekStart) - if err != nil { - return 0, fmt.Errorf("returning to prior offset %d: %w", currentPosition, err) - } - return maxPosition, nil -} - // Additional compression methods not offered by archive/zip. // See https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT section 4.4.5. const ( From f9dfd58fd69108c2bd0857b734e84844170e888c Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Fri, 8 Nov 2024 10:34:42 -0700 Subject: [PATCH 19/19] Refactor and simplify interfaces Split Archival into Archival/Extraction since some archive formats can't do both. Rar is proprietary for creating, and there's no pure-Go 7z writing implementation that I know of. - Extractor no longer requires a filename filter (kind of pointless at best, confusing at worst) - CompressedArchive renamed to Archive - Archival is now just creating archives - New Extraction interface is for reading archives - Archive format can compose compression, archival, and extraction --- 7z.go | 13 +++--- formats.go | 116 +++++++++++++++++++++++++++--------------------- formats_test.go | 4 +- fs.go | 24 +++++----- interfaces.go | 16 +++---- rar.go | 13 +++--- tar.go | 5 +-- zip.go | 5 +-- 8 files changed, 98 insertions(+), 98 deletions(-) diff --git a/7z.go b/7z.go index 4a3dbd4a..06e4dd17 100644 --- a/7z.go +++ b/7z.go @@ -51,10 +51,7 @@ func (z SevenZip) Match(_ context.Context, filename string, stream io.Reader) (M return mr, nil } -// Archive is not implemented for 7z, but the method exists so that SevenZip satisfies the ArchiveFormat interface. -func (z SevenZip) Archive(_ context.Context, _ io.Writer, _ []FileInfo) error { - return fmt.Errorf("not implemented for 7z because there is no pure Go implementation found") -} +// Archive is not implemented for 7z because I do not know of a pure-Go 7z writer. // Extract extracts files from z, implementing the Extractor interface. Uniquely, however, // sourceArchive must be an io.ReaderAt and io.Seeker, which are oddly disjoint interfaces @@ -62,7 +59,7 @@ func (z SevenZip) Archive(_ context.Context, _ io.Writer, _ []FileInfo) error { // the interface because we figure you can Read() from anything you can ReadAt() or Seek() // with. Due to the nature of the zip archive format, if sourceArchive is not an io.Seeker // and io.ReaderAt, an error is returned. -func (z SevenZip) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchive []string, handleFile FileHandler) error { +func (z SevenZip) Extract(ctx context.Context, sourceArchive io.Reader, handleFile FileHandler) error { sra, ok := sourceArchive.(seekReaderAt) if !ok { return fmt.Errorf("input type must be an io.ReaderAt and io.Seeker because of zip format constraints") @@ -87,9 +84,6 @@ func (z SevenZip) Extract(ctx context.Context, sourceArchive io.Reader, pathsInA return err // honor context cancellation } - if !fileIsIncluded(pathsInArchive, f.Name) { - continue - } if fileIsIncluded(skipDirs, f.Name) { continue } @@ -130,3 +124,6 @@ func (z SevenZip) Extract(ctx context.Context, sourceArchive io.Reader, pathsInA // https://py7zr.readthedocs.io/en/latest/archive_format.html#signature var sevenZipHeader = []byte("7z\xBC\xAF\x27\x1C") + +// Interface guard +var _ Extractor = SevenZip{} diff --git a/formats.go b/formats.go index 24865fea..837114d9 100644 --- a/formats.go +++ b/formats.go @@ -42,13 +42,14 @@ func RegisterFormat(format Format) { func Identify(ctx context.Context, filename string, stream io.Reader) (Format, io.Reader, error) { var compression Compression var archival Archival + var extraction Extraction rewindableStream, err := newRewindReader(stream) if err != nil { return nil, nil, err } - // try compression format first, since that's the outer "layer" + // try compression format first, since that's the outer "layer" if combined for name, format := range formats { cf, isCompression := format.(Compression) if !isCompression { @@ -68,10 +69,11 @@ func Identify(ctx context.Context, filename string, stream io.Reader) (Format, i } } - // try archive format next + // try archival and extraction format next for name, format := range formats { - af, isArchive := format.(Archival) - if !isArchive { + ar, isArchive := format.(Archival) + ex, isExtract := format.(Extraction) + if !isArchive && !isExtract { continue } @@ -81,20 +83,23 @@ func Identify(ctx context.Context, filename string, stream io.Reader) (Format, i } if matchResult.Matched() { - archival = af + archival = ar + extraction = ex break } } - // the stream should be rewound by identifyOne + // the stream should be rewound by identifyOne; then return the most specific type of match bufferedStream := rewindableStream.reader() switch { - case compression != nil && archival == nil: + case compression != nil && archival == nil && extraction == nil: return compression, bufferedStream, nil - case compression == nil && archival != nil: + case compression == nil && archival != nil && extraction == nil: return archival, bufferedStream, nil - case compression != nil && archival != nil: - return CompressedArchive{compression, archival}, bufferedStream, nil + case compression == nil && archival == nil && extraction != nil: + return extraction, bufferedStream, nil + case archival != nil || extraction != nil: + return Archive{compression, archival, extraction}, bufferedStream, nil default: return nil, bufferedStream, NoMatch } @@ -161,44 +166,44 @@ func readAtMost(stream io.Reader, n int) ([]byte, error) { return nil, err } -// CompressedArchive combines a compression format on top of an archive -// format (e.g. "tar.gz") and provides both functionalities in a single -// type. It ensures that archive functions are wrapped by compressors and +// Archive represents an archive which may be compressed at the outer layer. +// It combines a compression format on top of an archive/extraction +// format (e.g. ".tar.gz") and provides both functionalities in a single +// type. It ensures that archival functions are wrapped by compressors and // decompressors. However, compressed archives have some limitations; for // example, files cannot be inserted/appended because of complexities with // modifying existing compression state (perhaps this could be overcome, // but I'm not about to try it). // -// As this type is intended to compose compression and archive formats, -// both must be specified in order for this value to be valid, or its -// methods will return errors. -type CompressedArchive struct { +// The embedded Archival and Extraction values are used for writing and +// reading, respectively. Compression is optional and is only needed if the +// format is compressed externally (for example, tar archives). +type Archive struct { Compression Archival + Extraction } -// Name returns a concatenation of the archive format name -// and the compression format name. -func (caf CompressedArchive) Extension() string { - if caf.Compression == nil && caf.Archival == nil { - panic("missing both compression and archive formats") - } +// Name returns a concatenation of the archive and compression format extensions. +func (ar Archive) Extension() string { var name string - if caf.Archival != nil { - name += caf.Archival.Extension() + if ar.Archival != nil { + name += ar.Archival.Extension() + } else if ar.Extraction != nil { + name += ar.Extraction.Extension() } - if caf.Compression != nil { - name += caf.Compression.Extension() + if ar.Compression != nil { + name += ar.Compression.Extension() } return name } -// Match matches if the input matches both the compression and archive format. -func (caf CompressedArchive) Match(ctx context.Context, filename string, stream io.Reader) (MatchResult, error) { +// Match matches if the input matches both the compression and archival/extraction format. +func (ar Archive) Match(ctx context.Context, filename string, stream io.Reader) (MatchResult, error) { var conglomerate MatchResult - if caf.Compression != nil { - matchResult, err := caf.Compression.Match(ctx, filename, stream) + if ar.Compression != nil { + matchResult, err := ar.Compression.Match(ctx, filename, stream) if err != nil { return MatchResult{}, err } @@ -208,7 +213,7 @@ func (caf CompressedArchive) Match(ctx context.Context, filename string, stream // wrap the reader with the decompressor so we can // attempt to match the archive by reading the stream - rc, err := caf.Compression.OpenReader(stream) + rc, err := ar.Compression.OpenReader(stream) if err != nil { return matchResult, err } @@ -218,8 +223,8 @@ func (caf CompressedArchive) Match(ctx context.Context, filename string, stream conglomerate = matchResult } - if caf.Archival != nil { - matchResult, err := caf.Archival.Match(ctx, filename, stream) + if ar.Archival != nil { + matchResult, err := ar.Archival.Match(ctx, filename, stream) if err != nil { return MatchResult{}, err } @@ -234,26 +239,32 @@ func (caf CompressedArchive) Match(ctx context.Context, filename string, stream } // Archive adds files to the output archive while compressing the result. -func (caf CompressedArchive) Archive(ctx context.Context, output io.Writer, files []FileInfo) error { - if caf.Compression != nil { - wc, err := caf.Compression.OpenWriter(output) +func (ar Archive) Archive(ctx context.Context, output io.Writer, files []FileInfo) error { + if ar.Archival == nil { + return fmt.Errorf("no archival format") + } + if ar.Compression != nil { + wc, err := ar.Compression.OpenWriter(output) if err != nil { return err } defer wc.Close() output = wc } - return caf.Archival.Archive(ctx, output, files) + return ar.Archival.Archive(ctx, output, files) } // ArchiveAsync adds files to the output archive while compressing the result asynchronously. -func (caf CompressedArchive) ArchiveAsync(ctx context.Context, output io.Writer, jobs <-chan ArchiveAsyncJob) error { - do, ok := caf.Archival.(ArchiverAsync) +func (ar Archive) ArchiveAsync(ctx context.Context, output io.Writer, jobs <-chan ArchiveAsyncJob) error { + if ar.Archival == nil { + return fmt.Errorf("no archival format") + } + do, ok := ar.Archival.(ArchiverAsync) if !ok { - return fmt.Errorf("%s archive does not support async writing", caf.Extension()) + return fmt.Errorf("%T archive does not support async writing", ar.Archival) } - if caf.Compression != nil { - wc, err := caf.Compression.OpenWriter(output) + if ar.Compression != nil { + wc, err := ar.Compression.OpenWriter(output) if err != nil { return err } @@ -264,16 +275,19 @@ func (caf CompressedArchive) ArchiveAsync(ctx context.Context, output io.Writer, } // Extract reads files out of an archive while decompressing the results. -func (caf CompressedArchive) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchive []string, handleFile FileHandler) error { - if caf.Compression != nil { - rc, err := caf.Compression.OpenReader(sourceArchive) +func (ar Archive) Extract(ctx context.Context, sourceArchive io.Reader, handleFile FileHandler) error { + if ar.Extraction == nil { + return fmt.Errorf("no extraction format") + } + if ar.Compression != nil { + rc, err := ar.Compression.OpenReader(sourceArchive) if err != nil { return err } defer rc.Close() sourceArchive = rc } - return caf.Archival.Extract(ctx, sourceArchive, pathsInArchive, handleFile) + return ar.Extraction.Extract(ctx, sourceArchive, handleFile) } // MatchResult returns true if the format was matched either @@ -408,8 +422,8 @@ var formats = make(map[string]Format) // Interface guards var ( - _ Format = (*CompressedArchive)(nil) - _ Archiver = (*CompressedArchive)(nil) - _ ArchiverAsync = (*CompressedArchive)(nil) - _ Extractor = (*CompressedArchive)(nil) + _ Format = (*Archive)(nil) + _ Archiver = (*Archive)(nil) + _ ArchiverAsync = (*Archive)(nil) + _ Extractor = (*Archive)(nil) ) diff --git a/formats_test.go b/formats_test.go index 6c8d621f..20349c2a 100644 --- a/formats_test.go +++ b/formats_test.go @@ -111,7 +111,7 @@ func checkErr(t *testing.T, err error, msgFmt string, args ...any) { return } args = append(args, err) - t.Errorf(msgFmt+": %s", args...) + t.Fatalf(msgFmt+": %s", args...) } func TestIdentifyDoesNotMatchContentFromTrimmedKnownHeaderHaving0Suffix(t *testing.T) { @@ -418,7 +418,7 @@ func TestIdentifyAndOpenZip(t *testing.T) { t.Errorf("unexpected format found: expected=.zip actual=%s", format.Extension()) } - err = format.(Extractor).Extract(context.Background(), reader, nil, func(ctx context.Context, f FileInfo) error { + err = format.(Extractor).Extract(context.Background(), reader, func(ctx context.Context, f FileInfo) error { rc, err := f.Open() if err != nil { return err diff --git a/fs.go b/fs.go index a2c7510a..56042cf6 100644 --- a/fs.go +++ b/fs.go @@ -350,14 +350,12 @@ func (f ArchiveFS) Open(name string) (fs.File, error) { } var decompressor io.ReadCloser - if caf, ok := f.Format.(CompressedArchive); ok { - if caf.Compression != nil { - decompressor, err = caf.Compression.OpenReader(inputStream) - if err != nil { - return nil, err - } - inputStream = decompressor + if decomp, ok := f.Format.(Decompressor); ok { + decompressor, err = decomp.OpenReader(inputStream) + if err != nil { + return nil, err } + inputStream = decompressor } // prepare the handler that we'll need if we have to iterate the @@ -413,13 +411,13 @@ func (f ArchiveFS) Open(name string) (fs.File, error) { // files may have a "." component in them, and the underlying format doesn't // know about our file system semantics, so we need to filter ourselves (it's // not significantly less efficient). - if caf, ok := f.Format.(CompressedArchive); ok { + if ar, ok := f.Format.(Archive); ok { // bypass the CompressedArchive format's opening of the decompressor, since - // we already did it, since we need to keep it open after returning + // we already did it because we need to keep it open after returning. // "I BYPASSED THE COMPRESSOR!" -Rey - err = caf.Archival.Extract(f.context(), inputStream, nil, handler) + err = ar.Extraction.Extract(f.context(), inputStream, handler) } else { - err = f.Format.Extract(f.context(), inputStream, nil, handler) + err = f.Format.Extract(f.context(), inputStream, handler) } if err != nil { return nil, &fs.PathError{Op: "open", Path: name, Err: fmt.Errorf("extract: %w", err)} @@ -486,7 +484,7 @@ func (f ArchiveFS) Stat(name string) (fs.FileInfo, error) { if f.Stream != nil { inputStream = io.NewSectionReader(f.Stream, 0, f.Stream.Size()) } - err = f.Format.Extract(f.context(), inputStream, nil, handler) + err = f.Format.Extract(f.context(), inputStream, handler) if err != nil && result.FileInfo == nil { return nil, err } @@ -601,7 +599,7 @@ func (f *ArchiveFS) ReadDir(name string) ([]fs.DirEntry, error) { inputStream = io.NewSectionReader(f.Stream, 0, f.Stream.Size()) } - err = f.Format.Extract(f.context(), inputStream, nil, handler) + err = f.Format.Extract(f.context(), inputStream, handler) if err != nil { // these being non-nil implies that we have indexed the archive, // but if an error occurred, we likely only got part of the way diff --git a/interfaces.go b/interfaces.go index f675f0e2..fd817864 100644 --- a/interfaces.go +++ b/interfaces.go @@ -33,10 +33,15 @@ type Compression interface { Decompressor } -// Archival is an archival format with both archive and extract methods. +// Archival is an archival format that can create/write archives. type Archival interface { Format Archiver +} + +// Extraction is an archival format that extract from (read) archives. +type Extraction interface { + Format Extractor } @@ -86,19 +91,14 @@ type ArchiverAsync interface { // Extractor can extract files from an archive. type Extractor interface { // Extract walks entries in the archive and calls handleFile for each - // entry that matches the pathsInArchive filter by path/name. - // - // If pathsInArchive is nil, all files are extracted without discretion. - // If pathsInArchive is empty, no files are extracted. - // If a path refers to a directory, all files within it are extracted. - // Extracted files are passed to the handleFile callback for handling. + // entry in the archive. // // Any files opened in the FileHandler should be closed when it returns, // as there is no guarantee the files can be read outside the handler // or after the walk has proceeded to the next file. // // Context cancellation must be honored. - Extract(ctx context.Context, archive io.Reader, pathsInArchive []string, handleFile FileHandler) error + Extract(ctx context.Context, archive io.Reader, handleFile FileHandler) error } // Inserter can insert files into an existing archive. diff --git a/rar.go b/rar.go index bece6071..8ca559e6 100644 --- a/rar.go +++ b/rar.go @@ -56,12 +56,9 @@ func (r Rar) Match(_ context.Context, filename string, stream io.Reader) (MatchR return mr, nil } -// Archive is not implemented for RAR, but the method exists so that Rar satisfies the ArchiveFormat interface. -func (r Rar) Archive(_ context.Context, _ io.Writer, _ []FileInfo) error { - return fmt.Errorf("not implemented because RAR is a proprietary format") -} +// Archive is not implemented for RAR because it is patent-encumbered. -func (r Rar) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchive []string, handleFile FileHandler) error { +func (r Rar) Extract(ctx context.Context, sourceArchive io.Reader, handleFile FileHandler) error { var options []rardecode.Option if r.Password != "" { options = append(options, rardecode.Password(r.Password)) @@ -91,9 +88,6 @@ func (r Rar) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchiv } return err } - if !fileIsIncluded(pathsInArchive, hdr.Name) { - continue - } if fileIsIncluded(skipDirs, hdr.Name) { continue } @@ -142,3 +136,6 @@ var ( rarHeaderV1_5 = []byte("Rar!\x1a\x07\x00") // v1.5 rarHeaderV5_0 = []byte("Rar!\x1a\x07\x01\x00") // v5.0 ) + +// Interface guard +var _ Extractor = Rar{} diff --git a/tar.go b/tar.go index d4106257..d84fed43 100644 --- a/tar.go +++ b/tar.go @@ -179,7 +179,7 @@ func (t Tar) Insert(ctx context.Context, into io.ReadWriteSeeker, files []FileIn return nil } -func (t Tar) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchive []string, handleFile FileHandler) error { +func (t Tar) Extract(ctx context.Context, sourceArchive io.Reader, handleFile FileHandler) error { tr := tar.NewReader(sourceArchive) // important to initialize to non-nil, empty value due to how fileIsIncluded works @@ -201,9 +201,6 @@ func (t Tar) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchiv } return err } - if !fileIsIncluded(pathsInArchive, hdr.Name) { - continue - } if fileIsIncluded(skipDirs, hdr.Name) { continue } diff --git a/zip.go b/zip.go index 73361d8b..1de5b516 100644 --- a/zip.go +++ b/zip.go @@ -183,7 +183,7 @@ func (z Zip) archiveOneFile(ctx context.Context, zw *zip.Writer, idx int, file F // the interface because we figure you can Read() from anything you can ReadAt() or Seek() // with. Due to the nature of the zip archive format, if sourceArchive is not an io.Seeker // and io.ReaderAt, an error is returned. -func (z Zip) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchive []string, handleFile FileHandler) error { +func (z Zip) Extract(ctx context.Context, sourceArchive io.Reader, handleFile FileHandler) error { sra, ok := sourceArchive.(seekReaderAt) if !ok { return fmt.Errorf("input type must be an io.ReaderAt and io.Seeker because of zip format constraints") @@ -211,9 +211,6 @@ func (z Zip) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchiv // ensure filename and comment are UTF-8 encoded (issue #147 and PR #305) z.decodeText(&f.FileHeader) - if !fileIsIncluded(pathsInArchive, f.Name) { - continue - } if fileIsIncluded(skipDirs, f.Name) { continue }