Skip to content
This repository was archived by the owner on Mar 27, 2024. It is now read-only.

Add rpmlayer differ #252

Merged
merged 2 commits into from
Aug 10, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ const (
RemotePrefix = "remote://"
)

var layerAnalyzers = [...]string{"layer", "aptlayer"}
var layerAnalyzers = [...]string{"layer", "aptlayer", "rpmlayer"}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since you're in here, would you mind making these strings constants somewhere?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will include the constants somewhere


var RootCmd = &cobra.Command{
Use: "container-diff",
Expand Down
1 change: 1 addition & 0 deletions differs/differs.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ var Analyzers = map[string]Analyzer{
"apt": AptAnalyzer{},
"aptlayer": AptLayerAnalyzer{},
"rpm": RPMAnalyzer{},
"rpmlayer": RPMLayerAnalyzer{},
"pip": PipAnalyzer{},
"node": NodeAnalyzer{},
}
Expand Down
120 changes: 117 additions & 3 deletions differs/rpm_diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ import (
"github.com/google/go-containerregistry/pkg/name"
"github.com/google/go-containerregistry/pkg/v1"
"github.com/google/go-containerregistry/pkg/v1/daemon"
"github.com/google/go-containerregistry/pkg/v1/mutate"
"github.com/google/go-containerregistry/pkg/v1/random"

pkgutil "github.com/GoogleContainerTools/container-diff/pkg/util"
"github.com/GoogleContainerTools/container-diff/util"
Expand Down Expand Up @@ -101,7 +103,7 @@ func (a RPMAnalyzer) getPackages(image pkgutil.Image) (map[string]util.PackageIn
packages, err := rpmDataFromImageFS(image)
if err != nil {
logrus.Info("Running RPM binary from image in a container")
return rpmDataFromContainer(image)
return rpmDataFromContainer(image.Image)
}
return packages, err
}
Expand Down Expand Up @@ -164,7 +166,7 @@ func rpmDBPath(rootFSPath string) (string, error) {

// rpmDataFromContainer runs image in a container, queries the data of
// installed rpm packages and returns a map of packages.
func rpmDataFromContainer(image pkgutil.Image) (map[string]util.PackageInfo, error) {
func rpmDataFromContainer(image v1.Image) (map[string]util.PackageInfo, error) {
packages := make(map[string]util.PackageInfo)

client, err := godocker.NewClientFromEnv()
Expand All @@ -175,7 +177,7 @@ func rpmDataFromContainer(image pkgutil.Image) (map[string]util.PackageInfo, err
return packages, err
}

imageName, err := loadImageToDaemon(image.Image)
imageName, err := loadImageToDaemon(image)

if err != nil {
return packages, fmt.Errorf("Error loading image: %s", err)
Expand Down Expand Up @@ -364,3 +366,115 @@ func unlock() error {
daemonMutex.Unlock()
return nil
}

type RPMLayerAnalyzer struct {
}

// Name returns the name of the analyzer.
func (a RPMLayerAnalyzer) Name() string {
return "RPMLayerAnalyzer"
}

// Diff compares the installed rpm packages of image1 and image2 for each layer
func (a RPMLayerAnalyzer) Diff(image1, image2 pkgutil.Image) (util.Result, error) {
diff, err := singleVersionLayerDiff(image1, image2, a)
return diff, err
}

// Analyze collects information of the installed rpm packages on each layer
func (a RPMLayerAnalyzer) Analyze(image pkgutil.Image) (util.Result, error) {
analysis, err := singleVersionLayerAnalysis(image, a)
return analysis, err
}

// getPackages returns an array of maps of installed rpm packages on each layer
func (a RPMLayerAnalyzer) getPackages(image pkgutil.Image) ([]map[string]util.PackageInfo, error) {
path := image.FSPath
var packages []map[string]util.PackageInfo
if _, err := os.Stat(path); err != nil {
// invalid image directory path
return packages, err
}

// try to find the rpm binary in bin/ or usr/bin/
rpmBinary := filepath.Join(path, "bin/rpm")
if _, err := os.Stat(rpmBinary); err != nil {
rpmBinary = filepath.Join(path, "usr/bin/rpm")
if _, err = os.Stat(rpmBinary); err != nil {
logrus.Errorf("Could not detect RPM binary in unpacked image %s", image.Source)
return packages, nil
}
}

packages, err := rpmDataFromLayerFS(image)
if err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we care about the error here? This fallback pattern makes sense to me here, but I feel like we might want to be logging this error (I know we're not doing it in RPMAnalyzer.getPackages() either). At the very least we should add something to the log message saying something like Couldn't retrieve RPM data from extracted filesystem; running query in container.

Copy link
Contributor Author

@davidcassany davidcassany Jul 31, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree, lets include some better logs

logrus.Info("Running RPM binary from image in a container")
return rpmDataFromLayeredContainers(image.Image)
}
return packages, err
}

// rpmDataFromLayerFS runs a local rpm binary, if any, to query the layer
// rpmdb and returns an array of maps of installed packages.
func rpmDataFromLayerFS(image pkgutil.Image) ([]map[string]util.PackageInfo, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic in this function is really similar to rpmDataFromImageFS. Could you pull out some of the shared code between them and reuse it in both functions? Apart from iterating over layers here I think they're almost identical.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was also thinking about it, I can try to move some part to another function an see how it looks like. I think only lines 435->444 are clear candidates to be moved to another function.

var packages []map[string]util.PackageInfo
// Check there is an executable rpm tool in host
if err := exec.Command("rpm", "--version").Run(); err != nil {
logrus.Warn("No RPM binary in host")
return packages, err
}
dbPath, err := rpmDBPath(image.FSPath)
if err != nil {
logrus.Warnf("Couldn't find RPM database: %s", err.Error())
return packages, err
}
for _, layer := range image.Layers {
layerPackages := make(map[string]util.PackageInfo)
//query only layers that include the rpm database
if _, err := os.Stat(filepath.Join(layer.FSPath, dbPath)); err == nil {
cmdArgs := append([]string{"--root", layer.FSPath, "--dbpath", dbPath}, rpmCmd[1:]...)
out, err := exec.Command(rpmCmd[0], cmdArgs...).Output()
if err != nil {
logrus.Warnf("RPM call failed: %s", err.Error())
return packages, err
}
layerPackages, err = parsePackageData(strings.Split(string(out), "\n"))
if err != nil {
return packages, err
}
}
packages = append(packages, layerPackages)
}

return packages, nil
}

// rpmDataFromLayeredContainers runs a tmp image in a container for each layer,
// queries the data of installed rpm packages and returns an array of maps of
// packages.
func rpmDataFromLayeredContainers(image v1.Image) ([]map[string]util.PackageInfo, error) {
var packages []map[string]util.PackageInfo
tmpImage, err := random.Image(0, 0)
if err != nil {
return packages, err
}
layers, err := image.Layers()
if err != nil {
return packages, err
}
// Append layers one by one to an empty image and query rpm
// database on each iteration
for _, layer := range layers {
tmpImage, err = mutate.AppendLayers(tmpImage, layer)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're trying to retrieve a map of packages installed in each layer here, right? I'm wondering if we actually want to be appending each layer onto the previous layers before running rpmDataFromContainer(), and instead create a single-layer image for each layer. Won't we be getting package information for the combination of all layers before the one we're actually examining here?

e.g. if I have layer A with package foo==1.0.0, and layer B with package bar==1.0.0, my maps here will look like
A: {
foo: 1.0.0
},

B: {
foo: 1.0.0, <-- don't want this here right?
bar: 1.0.0,
}

To get around this you could just create a new random.Image for each layer, append it, and then make the rpmDataFromContainer() call. Does this make sense?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To get around this you could just create a new random.Image for each layer, append it, and then make the rpmDataFromContainer() call. Does this make sense?

This is the same situation we have in APT packages, the package database reports all the installed packages, thus in each layer we will get always the list of all packages included in current and previous layers. To work around this there is the method singleVersionLayerAnalysis

func singleVersionLayerAnalysis(image pkgutil.Image, analyzer SingleVersionPackageLayerAnalyzer) (*util.SingleVersionPackageLayerAnalyzeResult, error) {
pack, err := analyzer.getPackages(image)
if err != nil {
return &util.SingleVersionPackageLayerAnalyzeResult{}, err
}
var pkgDiffs []util.PackageDiff
// Each layer with modified packages includes a complete list of packages
// in its package database. Thus we diff the current layer with the
// previous one that contains a package database. Layers that do not
// include a package database are omitted.
preInd := -1
for i := range pack {
var pkgDiff util.PackageDiff
if preInd < 0 && len(pack[i]) > 0 {
pkgDiff = util.GetMapDiff(make(map[string]util.PackageInfo), pack[i])
preInd = i
} else if preInd >= 0 && len(pack[i]) > 0 {
pkgDiff = util.GetMapDiff(pack[preInd], pack[i])
preInd = i
}
pkgDiffs = append(pkgDiffs, pkgDiff)
}

which basically performs a diff between layers.

Also I am appending the layers one by one because in this case the rpm binary used to parse the database is the one included inside the image, thus at least the layer including this binary (most probably the base layer) should be also part of the container to run. In order to make things simpler without having to include some kind of logic to guess which layers should be appended and which aren't needed I opted to simply stack them one by one. Note that this is the fallback in case the host does not have the RPM tool.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh right, ok I think I follow that. Thanks for the explanation!

if err != nil {
return packages, err
}
layerPackages, err := rpmDataFromContainer(tmpImage)
if err != nil {
return packages, err
}
packages = append(packages, layerPackages)
}

return packages, nil
}