From b92d9965fd7f3d8eaf5398d84ce031f324e19a42 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 20 Apr 2023 22:36:45 +0400 Subject: [PATCH] fix: allow `talosctl cp` to handle special files in `/proc` There is some refactoring to simplify things, but mostly handle files which report size 0 in `stat`, but actually contain data when read. We try to read up to the small buffer, if we read whole file, we use that as contents, otherwise we still skip the file, as we need to write tar header with size _before_ we read the whole file. Signed-off-by: Andrey Smirnov (cherry picked from commit f661d84877e6db5bc8856b982990926dcbfe949c) --- pkg/archiver/tar.go | 129 ++++++++++++++++++++++----------------- pkg/archiver/tar_test.go | 40 ++++++++++++ 2 files changed, 114 insertions(+), 55 deletions(-) diff --git a/pkg/archiver/tar.go b/pkg/archiver/tar.go index b5b8762695..b5e1527b9b 100644 --- a/pkg/archiver/tar.go +++ b/pkg/archiver/tar.go @@ -6,18 +6,19 @@ package archiver import ( "archive/tar" + "bytes" "context" + "errors" "fmt" "io" "log" "os" + "syscall" multierror "github.com/hashicorp/go-multierror" ) -// Tar creates .tar archive and writes it to output for every item in paths channel -// -//nolint:gocyclo +// Tar creates .tar archive and writes it to output for every item in paths channel. func Tar(ctx context.Context, paths <-chan FileItem, output io.Writer) error { tw := tar.NewWriter(output) //nolint:errcheck @@ -25,6 +26,8 @@ func Tar(ctx context.Context, paths <-chan FileItem, output io.Writer) error { var multiErr *multierror.Error + buf := make([]byte, 4096) + for fi := range paths { if fi.Error != nil { multiErr = multierror.Append(multiErr, fmt.Errorf("skipping %q: %s", fi.FullPath, fi.Error)) @@ -32,83 +35,101 @@ func Tar(ctx context.Context, paths <-chan FileItem, output io.Writer) error { continue } - header, err := tar.FileInfoHeader(fi.FileInfo, fi.Link) + err := processFile(ctx, tw, fi, buf) if err != nil { - // not supported by tar multiErr = multierror.Append(multiErr, fmt.Errorf("skipping %q: %s", fi.FullPath, err)) - - continue } + } - header.Name = fi.RelPath - if fi.FileInfo.IsDir() { - header.Name += string(os.PathSeparator) - } + if err := tw.Close(); err != nil { + multiErr = multierror.Append(multiErr, err) + } + + return multiErr.ErrorOrNil() +} - skipData := false +//nolint:gocyclo +func processFile(ctx context.Context, tw *tar.Writer, fi FileItem, buf []byte) error { + header, err := tar.FileInfoHeader(fi.FileInfo, fi.Link) + if err != nil { + // not supported by tar + return err + } - switch header.Typeflag { - case tar.TypeLink, tar.TypeSymlink, tar.TypeChar, tar.TypeBlock, tar.TypeDir, tar.TypeFifo: - // no data for these types, move on - skipData = true - } + header.Name = fi.RelPath + if fi.FileInfo.IsDir() { + header.Name += string(os.PathSeparator) + } - if header.Size == 0 { - // skip files with zero length - // - // this might skip contents for special files in /proc, but - // anyways we can't archive them properly if we don't know size beforehand - skipData = true - } + skipData := false + + switch header.Typeflag { + case tar.TypeLink, tar.TypeSymlink, tar.TypeChar, tar.TypeBlock, tar.TypeDir, tar.TypeFifo: + // no data for these types, move on + skipData = true + } + var r io.Reader + + if !skipData { var fp *os.File - if !skipData { - fp, err = os.Open(fi.FullPath) - if err != nil { - multiErr = multierror.Append(multiErr, fmt.Errorf("skipping %q: %s", fi.FullPath, err)) - continue - } + fp, err = os.Open(fi.FullPath) + if err != nil { + return err } - err = tw.WriteHeader(header) - if err != nil { - //nolint:errcheck - fp.Close() + defer fp.Close() //nolint:errcheck - multiErr = multierror.Append(multiErr, err) + r = fp + } - return multiErr - } + if !skipData && header.Size == 0 { + // Linux reports /proc files as zero length, but they might have data, + // so we try to read limited amount of data from it to determine the size + var n int - if !skipData { - err = archiveFile(ctx, tw, fi, fp) - if err != nil { - multiErr = multierror.Append(multiErr, err) + n, err = r.Read(buf) - return multiErr + switch { + case err == io.EOF: + // file is empty for real + skipData = true + case err != nil: + // error reading from the file + if errors.Is(err, syscall.EINVAL) { + // some files are not supported by os.Open, e.g. /proc/sys/net/ipv4/conf/all/accept_local + skipData = true + } else { + return err } + case n < len(buf): + header.Size = int64(n) + r = bytes.NewReader(append([]byte(nil), buf[:n]...)) + default: + // none matched so the file is bigger than we expected, ignore it and copy as zero size + skipData = true } } - if err := tw.Close(); err != nil { - multiErr = multierror.Append(multiErr, err) + err = tw.WriteHeader(header) + if err != nil { + return err } - return multiErr.ErrorOrNil() -} - -func archiveFile(ctx context.Context, tw io.Writer, fi FileItem, fp *os.File) error { - //nolint:errcheck - defer fp.Close() + if skipData { + return nil + } - buf := make([]byte, 4096) + return archiveFile(ctx, tw, fi, r, buf) +} +func archiveFile(ctx context.Context, tw io.Writer, fi FileItem, r io.Reader, buf []byte) error { for { - n, err := fp.Read(buf) + n, err := r.Read(buf) if err != nil { if err == io.EOF { - break + return nil } return err @@ -131,6 +152,4 @@ func archiveFile(ctx context.Context, tw io.Writer, fi FileItem, fp *os.File) er return err } } - - return fp.Close() } diff --git a/pkg/archiver/tar_test.go b/pkg/archiver/tar_test.go index b5beedeb03..91211a1f8a 100644 --- a/pkg/archiver/tar_test.go +++ b/pkg/archiver/tar_test.go @@ -111,6 +111,46 @@ func (suite *TarSuite) TestArchiveFile() { } } +func (suite *TarSuite) TestArchiveProcfs() { + ch, err := archiver.Walker(context.Background(), "/proc/self/", archiver.WithMaxRecurseDepth(0)) + suite.Require().NoError(err) + + var buf bytes.Buffer + + // it's okay to have some errors here, as some files are not readable + archiver.Tar(context.Background(), ch, &buf) //nolint:errcheck + + tr := tar.NewReader(&buf) + + expectedNonZeroFiles := map[string]struct{}{ + "cmdline": {}, + "environ": {}, + "limits": {}, + "io": {}, + "stat": {}, + } + + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + + suite.Require().NoError(err) + + if _, expected := expectedNonZeroFiles[hdr.Name]; !expected { + continue + } + + suite.Assert().EqualValues(hdr.Typeflag, tar.TypeReg) + suite.Assert().NotZero(hdr.Size) + + delete(expectedNonZeroFiles, hdr.Name) + } + + suite.Assert().Empty(expectedNonZeroFiles) +} + func TestTarSuite(t *testing.T) { suite.Run(t, new(TarSuite)) }