diff --git a/integration/dockerfiles/Dockerfile_test_issue_1039 b/integration/dockerfiles/Dockerfile_test_issue_1039 new file mode 100644 index 0000000000..f70ec01224 --- /dev/null +++ b/integration/dockerfiles/Dockerfile_test_issue_1039 @@ -0,0 +1,10 @@ +FROM registry.access.redhat.com/ubi7/ubi:7.7-214 + +# Install GCC, GCC-C++ and make libraries for build environment +# Then clean caches +RUN yum --disableplugin=subscription-manager update -y \ + && yum --disableplugin=subscription-manager install -y \ + gcc-4.8.5-39.el7 \ + gcc-c++-4.8.5-39.el7 \ + make-3.82-24.el7 \ + && yum --disableplugin=subscription-manager clean all diff --git a/pkg/filesystem/resolve.go b/pkg/filesystem/resolve.go new file mode 100644 index 0000000000..94b62de942 --- /dev/null +++ b/pkg/filesystem/resolve.go @@ -0,0 +1,158 @@ +/* +Copyright 2020 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package filesystem + +import ( + "os" + "path/filepath" + + "github.com/GoogleContainerTools/kaniko/pkg/util" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +// ResolvePaths takes a slice of file paths and a slice of whitelist entries. It resolve each +// file path according to a set of rules and then returns a slice of resolved paths or error. +// File paths are resolved according to the following rules: +// * If path is whitelisted, skip it. +// * If path is a symlink, resolve it's ancestor link and add it to the output set. +// * If path is a symlink, resolve it's target. If the target is not whitelisted add it to the +// output set. +// * Add all ancestors of each path to the output set. +func ResolvePaths(paths []string, wl []util.WhitelistEntry) (pathsToAdd []string, err error) { + logrus.Info("Resolving paths") + + fileSet := make(map[string]bool) + + for _, f := range paths { + // If the given path is part of the whitelist ignore it + if util.IsInProvidedWhitelist(f, wl) { + logrus.Debugf("path %s is whitelisted, ignoring it", f) + continue + } + + link, e := resolveSymlinkAncestor(f) + if e != nil { + err = e + return + } + + if f != link { + logrus.Tracef("updated link %s to %s", f, link) + } + + if !fileSet[link] { + pathsToAdd = append(pathsToAdd, link) + } + fileSet[link] = true + + var evaled string + + // If the path is a symlink we need to also consider the target of that + // link + evaled, err = filepath.EvalSymlinks(f) + if err != nil { + if !os.IsNotExist(err) { + logrus.Errorf("couldn't eval %s with link %s", f, link) + return + } + + logrus.Debugf("symlink path %s, target does not exist", f) + } + + // If the given path is a symlink and the target is part of the whitelist + // ignore the target + if util.IsInProvidedWhitelist(evaled, wl) { + logrus.Debugf("path %s is whitelisted, ignoring it", evaled) + continue + } + + if !fileSet[evaled] { + pathsToAdd = append(pathsToAdd, evaled) + } + fileSet[evaled] = true + } + + // Also add parent directories to keep the permission of them correctly. + pathsToAdd = filesWithParentDirs(pathsToAdd) + + return +} + +// filesWithParentDirs returns every ancestor path for each provided file path. +// I.E. /foo/bar/baz/boom.txt => [/, /foo, /foo/bar, /foo/bar/baz, /foo/bar/baz/boom.txt] +func filesWithParentDirs(files []string) []string { + filesSet := map[string]bool{} + + for _, file := range files { + file = filepath.Clean(file) + filesSet[file] = true + + for _, dir := range util.ParentDirectories(file) { + dir = filepath.Clean(dir) + filesSet[dir] = true + } + } + + newFiles := []string{} + for file := range filesSet { + newFiles = append(newFiles, file) + } + + return newFiles +} + +// resolveSymlinkAncestor returns the ancestor link of the provided symlink path or returns the +// the path if it is not a link. The ancestor link is the filenode whose type is a Symlink. +// E.G /baz/boom/bar.txt links to /usr/bin/bar.txt but /baz/boom/bar.txt itself is not a link. +// Instead /bar/boom is actually a link to /usr/bin. In this case resolveSymlinkAncestor would +// return /bar/boom. +func resolveSymlinkAncestor(path string) (string, error) { + if !filepath.IsAbs(path) { + return "", errors.New("dest path must be abs") + } + last := "" + newPath := path +loop: + for newPath != "/" { + fi, err := os.Lstat(newPath) + if err != nil { + return "", errors.Wrap(err, "failed to lstat") + } + + switch mode := fi.Mode(); { + case mode&os.ModeSymlink != 0: + last = filepath.Base(newPath) + newPath = filepath.Dir(newPath) + default: + target, err := filepath.EvalSymlinks(newPath) + if err != nil { + return "", err + } + + if target != newPath { + last = filepath.Base(newPath) + newPath = filepath.Dir(newPath) + } else { + break loop + } + } + } + + newPath = filepath.Join(newPath, last) + return filepath.Clean(newPath), nil +} diff --git a/pkg/filesystem/resolve_test.go b/pkg/filesystem/resolve_test.go new file mode 100644 index 0000000000..a2a7be3a39 --- /dev/null +++ b/pkg/filesystem/resolve_test.go @@ -0,0 +1,185 @@ +/* +Copyright 2020 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package filesystem + +import ( + "io/ioutil" + "os" + "path/filepath" + "reflect" + "sort" + "testing" + + "github.com/GoogleContainerTools/kaniko/pkg/util" +) + +func Test_ResolvePaths(t *testing.T) { + validateResults := func( + t *testing.T, + actualFiles, + expectedFiles []string, + err error, + ) { + if err != nil { + t.Errorf("expected err to be nil but was %s", err) + } + + // Sort so that comparison is against consistent order + sort.Strings(actualFiles) + sort.Strings(expectedFiles) + + if !reflect.DeepEqual(actualFiles, expectedFiles) { + t.Errorf("expected files to equal %s but was %s", + expectedFiles, actualFiles, + ) + } + } + + t.Run("list of files", func(t *testing.T) { + dir, err := ioutil.TempDir("", "snapshot-test") + if err != nil { + t.Fatal(err) + } + + defer os.RemoveAll(dir) + + files := []string{ + "/foo/bar.txt", + "/baz/boom.txt", + } + + t.Run("all are symlinks", func(t *testing.T) { + for _, f := range files { + fLink := filepath.Join(dir, "link", f) + fTarget := filepath.Join(dir, "target", f) + + if err := os.MkdirAll(filepath.Dir(fTarget), 0777); err != nil { + t.Fatal(err) + } + + if err := ioutil.WriteFile(fTarget, []byte{}, 0777); err != nil { + t.Fatal(err) + } + + if err := os.MkdirAll(filepath.Dir(fLink), 0777); err != nil { + t.Fatal(err) + } + + if err := os.Symlink(fTarget, fLink); err != nil { + t.Fatal(err) + } + } + + t.Run("none are whitelisted", func(t *testing.T) { + wl := []util.WhitelistEntry{} + + inputFiles := []string{} + expectedFiles := []string{} + + for _, f := range files { + link := filepath.Join(dir, "link", f) + expectedFiles = append(expectedFiles, link) + inputFiles = append(inputFiles, link) + + target := filepath.Join(dir, "target", f) + expectedFiles = append(expectedFiles, target) + } + + expectedFiles = filesWithParentDirs(expectedFiles) + + files, err := ResolvePaths(inputFiles, wl) + + validateResults(t, files, expectedFiles, err) + }) + + t.Run("some are whitelisted", func(t *testing.T) { + wl := []util.WhitelistEntry{ + { + Path: filepath.Join(dir, "link", "baz"), + }, + { + Path: filepath.Join(dir, "target", "foo"), + }, + } + + expectedFiles := []string{} + inputFiles := []string{} + + for _, f := range files { + link := filepath.Join(dir, "link", f) + inputFiles = append(inputFiles, link) + + if util.IsInProvidedWhitelist(link, wl) { + t.Logf("skipping %s", link) + continue + } + + expectedFiles = append(expectedFiles, link) + + target := filepath.Join(dir, "target", f) + + if util.IsInProvidedWhitelist(target, wl) { + t.Logf("skipping %s", target) + continue + } + + expectedFiles = append(expectedFiles, target) + } + + link := filepath.Join(dir, "link", "zoom/") + + target := filepath.Join(dir, "target", "zaam/") + if err := os.MkdirAll(target, 0777); err != nil { + t.Fatal(err) + } + + if err := ioutil.WriteFile(filepath.Join(target, "meow.txt"), []byte{}, 0777); err != nil { + t.Fatal(err) + } + + if err := os.Symlink(target, link); err != nil { + t.Fatal(err) + } + + file := filepath.Join(link, "meow.txt") + inputFiles = append(inputFiles, file) + + expectedFiles = append(expectedFiles, link) + + targetFile := filepath.Join(target, "meow.txt") + expectedFiles = append(expectedFiles, targetFile) + + expectedFiles = filesWithParentDirs(expectedFiles) + + files, err := ResolvePaths(inputFiles, wl) + + validateResults(t, files, expectedFiles, err) + }) + }) + }) + + t.Run("empty set of files", func(t *testing.T) { + inputFiles := []string{} + expectedFiles := []string{} + + wl := []util.WhitelistEntry{} + + files, err := ResolvePaths(inputFiles, wl) + + validateResults(t, files, expectedFiles, err) + }) +} diff --git a/pkg/snapshot/snapshot.go b/pkg/snapshot/snapshot.go index 59be2f0e0c..90f06564ee 100644 --- a/pkg/snapshot/snapshot.go +++ b/pkg/snapshot/snapshot.go @@ -22,8 +22,10 @@ import ( "os" "path/filepath" "sort" + "strings" "syscall" + "github.com/GoogleContainerTools/kaniko/pkg/filesystem" "github.com/GoogleContainerTools/kaniko/pkg/timing" "github.com/karrick/godirwalk" @@ -41,11 +43,12 @@ var snapshotPathPrefix = constants.KanikoDir type Snapshotter struct { l *LayeredMap directory string + whitelist []util.WhitelistEntry } // NewSnapshotter creates a new snapshotter rooted at d func NewSnapshotter(l *LayeredMap, d string) *Snapshotter { - return &Snapshotter{l: l, directory: d} + return &Snapshotter{l: l, directory: d, whitelist: util.Whitelist()} } // Init initializes a new snapshotter @@ -73,12 +76,15 @@ func (s *Snapshotter) TakeSnapshot(files []string) (string, error) { logrus.Info("No files changed in this command, skipping snapshotting.") return "", nil } + + filesToAdd, err := filesystem.ResolvePaths(files, s.whitelist) + if err != nil { + return "", nil + } + logrus.Info("Taking snapshot of files...") logrus.Debugf("Taking snapshot of files %v", files) - // Also add parent directories to keep the permission of them correctly. - filesToAdd := filesWithParentDirs(files) - sort.Strings(filesToAdd) // Add files to the layered map @@ -149,19 +155,42 @@ func (s *Snapshotter) scanFullFilesystem() ([]string, []string, error) { ) timing.DefaultRun.Stop(timer) + filesToResolve := make([]string, 0, len(memFs)) + for file := range memFs { + if strings.HasPrefix(file, "/tmp/dir") { + logrus.Infof("found %s", file) + } + filesToResolve = append(filesToResolve, file) + } + + resolvedFiles, err := filesystem.ResolvePaths(filesToResolve, s.whitelist) + if err != nil { + return nil, nil, err + } + + resolvedMemFs := make(map[string]bool) + for _, f := range resolvedFiles { + if strings.HasPrefix(f, "/tmp/dir") { + logrus.Infof("found again %s", f) + } + resolvedMemFs[f] = true + } + // First handle whiteouts // Get a list of all the files that existed before this layer existingPaths := s.l.getFlattenedPathsForWhiteOut() + // Find the delta by removing everything left in this layer. - for p := range memFs { + for p := range resolvedMemFs { delete(existingPaths, p) } + // The paths left here are the ones that have been deleted in this layer. filesToWhiteOut := []string{} for path := range existingPaths { // Only add the whiteout if the directory for the file still exists. dir := filepath.Dir(path) - if _, ok := memFs[dir]; ok { + if _, ok := resolvedMemFs[dir]; ok { if s.l.MaybeAddWhiteout(path) { logrus.Debugf("Adding whiteout for %s", path) filesToWhiteOut = append(filesToWhiteOut, path) @@ -170,7 +199,7 @@ func (s *Snapshotter) scanFullFilesystem() ([]string, []string, error) { } filesToAdd := []string{} - for path := range memFs { + for path := range resolvedMemFs { if util.CheckWhitelist(path) { logrus.Tracef("Not adding %s to layer, as it's whitelisted", path) continue @@ -181,19 +210,11 @@ func (s *Snapshotter) scanFullFilesystem() ([]string, []string, error) { return nil, nil, fmt.Errorf("could not check if file has changed %s %s", path, err) } if fileChanged { - // Get target file for symlinks so the symlink is not a dead link. - files, err := filesWithLinks(path) - if err != nil { - return nil, nil, err - } - logrus.Tracef("Adding files %s to layer, because it was changed.", files) - filesToAdd = append(filesToAdd, files...) + logrus.Tracef("Adding file %s to layer, because it was changed.", path) + filesToAdd = append(filesToAdd, path) } } - // Also add parent directories to keep their permissions correctly. - filesToAdd = filesWithParentDirs(filesToAdd) - sort.Strings(filesToAdd) // Add files to the layered map for _, file := range filesToAdd { @@ -221,27 +242,6 @@ func writeToTar(t util.Tar, files, whiteouts []string) error { return nil } -func filesWithParentDirs(files []string) []string { - filesSet := map[string]bool{} - - for _, file := range files { - file = filepath.Clean(file) - filesSet[file] = true - - for _, dir := range util.ParentDirectories(file) { - dir = filepath.Clean(dir) - filesSet[dir] = true - } - } - - newFiles := []string{} - for file := range filesSet { - newFiles = append(newFiles, file) - } - - return newFiles -} - // filesWithLinks returns the symlink and the target path if its exists. func filesWithLinks(path string) ([]string, error) { link, err := util.GetSymLink(path) diff --git a/pkg/snapshot/snapshot_test.go b/pkg/snapshot/snapshot_test.go index 67b70b9c51..cd9a0a4ff0 100644 --- a/pkg/snapshot/snapshot_test.go +++ b/pkg/snapshot/snapshot_test.go @@ -63,19 +63,24 @@ func TestSnapshotFSFileChange(t *testing.T) { fooPath: "newbaz1", batPath: "baz", } - for _, dir := range util.ParentDirectoriesWithoutLeadingSlash(fooPath) { - snapshotFiles[dir] = "" - } - for _, dir := range util.ParentDirectoriesWithoutLeadingSlash(batPath) { - snapshotFiles[dir] = "" - } - numFiles := 0 + + // Their parents didn't change so they shouldn't be included + //for _, dir := range util.ParentDirectoriesWithoutLeadingSlash(fooPath) { + // snapshotFiles[dir] = "" + //} + //for _, dir := range util.ParentDirectoriesWithoutLeadingSlash(batPath) { + // snapshotFiles[dir] = "" + //} + + actualFiles := []string{} for { hdr, err := tr.Next() if err == io.EOF { break } - numFiles++ + + actualFiles = append(actualFiles, hdr.Name) + if _, isFile := snapshotFiles[hdr.Name]; !isFile { t.Fatalf("File %s unexpectedly in tar", hdr.Name) } @@ -84,8 +89,8 @@ func TestSnapshotFSFileChange(t *testing.T) { t.Fatalf("Contents of %s incorrect, expected: %s, actual: %s", hdr.Name, snapshotFiles[hdr.Name], string(contents)) } } - if numFiles != len(snapshotFiles) { - t.Fatalf("Incorrect number of files were added, expected: 2, actual: %v", numFiles) + if len(actualFiles) != len(snapshotFiles) { + t.Fatalf("Incorrect number of files were added, expected: %d, actual: %d", len(snapshotFiles), len(actualFiles)) } } @@ -155,17 +160,20 @@ func TestSnapshotFSChangePermissions(t *testing.T) { snapshotFiles := map[string]string{ batPathWithoutLeadingSlash: "baz2", } - for _, dir := range util.ParentDirectoriesWithoutLeadingSlash(batPath) { - snapshotFiles[dir] = "" - } - numFiles := 0 + + // The parents haven't changed so they shouldn't be in the tar + //for _, dir := range util.ParentDirectoriesWithoutLeadingSlash(batPath) { + // snapshotFiles[dir] = "" + //} + + foundFiles := []string{} for { hdr, err := tr.Next() if err == io.EOF { break } t.Logf("Info %s in tar", hdr.Name) - numFiles++ + foundFiles = append(foundFiles, hdr.Name) if _, isFile := snapshotFiles[hdr.Name]; !isFile { t.Fatalf("File %s unexpectedly in tar", hdr.Name) } @@ -174,8 +182,11 @@ func TestSnapshotFSChangePermissions(t *testing.T) { t.Fatalf("Contents of %s incorrect, expected: %s, actual: %s", hdr.Name, snapshotFiles[hdr.Name], string(contents)) } } - if numFiles != len(snapshotFiles) { - t.Fatalf("Incorrect number of files were added, expected: 1, got: %v", numFiles) + if len(foundFiles) != len(snapshotFiles) { + t.Logf("expected\n%v\nto equal\n%v", foundFiles, snapshotFiles) + t.Fatalf("Incorrect number of files were added, expected: %d, got: %d", + len(snapshotFiles), + len(foundFiles)) } } diff --git a/pkg/util/fs_util.go b/pkg/util/fs_util.go index afd20e65b8..39edc7d856 100644 --- a/pkg/util/fs_util.go +++ b/pkg/util/fs_util.go @@ -83,6 +83,10 @@ type FSConfig struct { type FSOpt func(*FSConfig) +func Whitelist() []WhitelistEntry { + return whitelist +} + func IncludeWhiteout() FSOpt { return func(opts *FSConfig) { opts.includeWhiteout = true @@ -358,8 +362,12 @@ func ExtractFile(dest string, hdr *tar.Header, tr io.Reader) error { } func IsInWhitelist(path string) bool { - for _, wl := range whitelist { - if !wl.PrefixMatchOnly && path == wl.Path { + return IsInProvidedWhitelist(path, whitelist) +} + +func IsInProvidedWhitelist(path string, wl []WhitelistEntry) bool { + for _, entry := range wl { + if !entry.PrefixMatchOnly && path == entry.Path { return true } }