diff --git a/pkg/archiver/tar.go b/pkg/archiver/tar.go index b5b8762695..b5e1527b9b 100644 --- a/pkg/archiver/tar.go +++ b/pkg/archiver/tar.go @@ -6,18 +6,19 @@ package archiver import ( "archive/tar" + "bytes" "context" + "errors" "fmt" "io" "log" "os" + "syscall" multierror "github.com/hashicorp/go-multierror" ) -// Tar creates .tar archive and writes it to output for every item in paths channel -// -//nolint:gocyclo +// Tar creates .tar archive and writes it to output for every item in paths channel. func Tar(ctx context.Context, paths <-chan FileItem, output io.Writer) error { tw := tar.NewWriter(output) //nolint:errcheck @@ -25,6 +26,8 @@ func Tar(ctx context.Context, paths <-chan FileItem, output io.Writer) error { var multiErr *multierror.Error + buf := make([]byte, 4096) + for fi := range paths { if fi.Error != nil { multiErr = multierror.Append(multiErr, fmt.Errorf("skipping %q: %s", fi.FullPath, fi.Error)) @@ -32,83 +35,101 @@ func Tar(ctx context.Context, paths <-chan FileItem, output io.Writer) error { continue } - header, err := tar.FileInfoHeader(fi.FileInfo, fi.Link) + err := processFile(ctx, tw, fi, buf) if err != nil { - // not supported by tar multiErr = multierror.Append(multiErr, fmt.Errorf("skipping %q: %s", fi.FullPath, err)) - - continue } + } - header.Name = fi.RelPath - if fi.FileInfo.IsDir() { - header.Name += string(os.PathSeparator) - } + if err := tw.Close(); err != nil { + multiErr = multierror.Append(multiErr, err) + } + + return multiErr.ErrorOrNil() +} - skipData := false +//nolint:gocyclo +func processFile(ctx context.Context, tw *tar.Writer, fi FileItem, buf []byte) error { + header, err := tar.FileInfoHeader(fi.FileInfo, fi.Link) + if err != nil { + // not supported by tar + return err + } - switch header.Typeflag { - case tar.TypeLink, tar.TypeSymlink, tar.TypeChar, tar.TypeBlock, tar.TypeDir, tar.TypeFifo: - // no data for these types, move on - skipData = true - } + header.Name = fi.RelPath + if fi.FileInfo.IsDir() { + header.Name += string(os.PathSeparator) + } - if header.Size == 0 { - // skip files with zero length - // - // this might skip contents for special files in /proc, but - // anyways we can't archive them properly if we don't know size beforehand - skipData = true - } + skipData := false + + switch header.Typeflag { + case tar.TypeLink, tar.TypeSymlink, tar.TypeChar, tar.TypeBlock, tar.TypeDir, tar.TypeFifo: + // no data for these types, move on + skipData = true + } + var r io.Reader + + if !skipData { var fp *os.File - if !skipData { - fp, err = os.Open(fi.FullPath) - if err != nil { - multiErr = multierror.Append(multiErr, fmt.Errorf("skipping %q: %s", fi.FullPath, err)) - continue - } + fp, err = os.Open(fi.FullPath) + if err != nil { + return err } - err = tw.WriteHeader(header) - if err != nil { - //nolint:errcheck - fp.Close() + defer fp.Close() //nolint:errcheck - multiErr = multierror.Append(multiErr, err) + r = fp + } - return multiErr - } + if !skipData && header.Size == 0 { + // Linux reports /proc files as zero length, but they might have data, + // so we try to read limited amount of data from it to determine the size + var n int - if !skipData { - err = archiveFile(ctx, tw, fi, fp) - if err != nil { - multiErr = multierror.Append(multiErr, err) + n, err = r.Read(buf) - return multiErr + switch { + case err == io.EOF: + // file is empty for real + skipData = true + case err != nil: + // error reading from the file + if errors.Is(err, syscall.EINVAL) { + // some files are not supported by os.Open, e.g. /proc/sys/net/ipv4/conf/all/accept_local + skipData = true + } else { + return err } + case n < len(buf): + header.Size = int64(n) + r = bytes.NewReader(append([]byte(nil), buf[:n]...)) + default: + // none matched so the file is bigger than we expected, ignore it and copy as zero size + skipData = true } } - if err := tw.Close(); err != nil { - multiErr = multierror.Append(multiErr, err) + err = tw.WriteHeader(header) + if err != nil { + return err } - return multiErr.ErrorOrNil() -} - -func archiveFile(ctx context.Context, tw io.Writer, fi FileItem, fp *os.File) error { - //nolint:errcheck - defer fp.Close() + if skipData { + return nil + } - buf := make([]byte, 4096) + return archiveFile(ctx, tw, fi, r, buf) +} +func archiveFile(ctx context.Context, tw io.Writer, fi FileItem, r io.Reader, buf []byte) error { for { - n, err := fp.Read(buf) + n, err := r.Read(buf) if err != nil { if err == io.EOF { - break + return nil } return err @@ -131,6 +152,4 @@ func archiveFile(ctx context.Context, tw io.Writer, fi FileItem, fp *os.File) er return err } } - - return fp.Close() } diff --git a/pkg/archiver/tar_test.go b/pkg/archiver/tar_test.go index b5beedeb03..91211a1f8a 100644 --- a/pkg/archiver/tar_test.go +++ b/pkg/archiver/tar_test.go @@ -111,6 +111,46 @@ func (suite *TarSuite) TestArchiveFile() { } } +func (suite *TarSuite) TestArchiveProcfs() { + ch, err := archiver.Walker(context.Background(), "/proc/self/", archiver.WithMaxRecurseDepth(0)) + suite.Require().NoError(err) + + var buf bytes.Buffer + + // it's okay to have some errors here, as some files are not readable + archiver.Tar(context.Background(), ch, &buf) //nolint:errcheck + + tr := tar.NewReader(&buf) + + expectedNonZeroFiles := map[string]struct{}{ + "cmdline": {}, + "environ": {}, + "limits": {}, + "io": {}, + "stat": {}, + } + + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + + suite.Require().NoError(err) + + if _, expected := expectedNonZeroFiles[hdr.Name]; !expected { + continue + } + + suite.Assert().EqualValues(hdr.Typeflag, tar.TypeReg) + suite.Assert().NotZero(hdr.Size) + + delete(expectedNonZeroFiles, hdr.Name) + } + + suite.Assert().Empty(expectedNonZeroFiles) +} + func TestTarSuite(t *testing.T) { suite.Run(t, new(TarSuite)) }