Skip to content

Commit c257e53

Browse files
committed
Isolate OS matchers to re-use by node scanning
1 parent 961e5ab commit c257e53

File tree

3 files changed

+184
-20
lines changed

3 files changed

+184
-20
lines changed

pkg/matcher/matcher.go

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@ import (
44
"io"
55
"io/fs"
66
"os"
7+
"path"
78
"path/filepath"
89
"regexp"
910
"strings"
1011

12+
"github.com/stackrox/rox/pkg/set"
1113
"github.com/stackrox/scanner/pkg/whiteout"
1214
)
1315

@@ -20,25 +22,84 @@ type Matcher interface {
2022
Match(fullPath string, fileInfo os.FileInfo, contents io.ReaderAt) (matches bool, extract bool)
2123
}
2224

25+
// PrefixMatcher is a matcher that uses file prefixes.
26+
type PrefixMatcher interface {
27+
Matcher
28+
29+
// GetCommonPrefixDirs list all directories from all the prefixes used in this
30+
// matcher, and returns a list of common directories in all of them, up to one
31+
// level below the root dir, e.g. prefixes are {"a/b/f", "a/c/f", "b/c/"} the
32+
// common prefix list is {"a/", "b/c/"}. The returned directories will always be
33+
// terminated with /. If a name is not terminated by a slash it is considered a
34+
// file and ignored. Example:
35+
//
36+
// Prefixes:
37+
// - var/lib/rpm/
38+
// - var/lib/dpkg/
39+
// - root/buildinfo/
40+
// - usr/bin
41+
// - usr/bin/bash
42+
// - etc/apt.sources
43+
//
44+
// Output:
45+
// - var/lib/
46+
// - root/buildinfo/
47+
// - usr/
48+
// - etc/
49+
GetCommonPrefixDirs() []string
50+
}
51+
2352
type allowlistMatcher struct {
2453
allowlist []string
2554
}
2655

27-
// NewPrefixAllowlistMatcher returns a matcher that matches all filenames which have any
28-
// of the passed paths as a prefix.
29-
func NewPrefixAllowlistMatcher(allowlist ...string) Matcher {
56+
// NewPrefixAllowlistMatcher returns a prefix matcher that matches all filenames
57+
// which have any of the passed paths as a prefix.
58+
func NewPrefixAllowlistMatcher(allowlist ...string) PrefixMatcher {
3059
return &allowlistMatcher{allowlist: allowlist}
3160
}
3261

33-
func (w *allowlistMatcher) Match(fullPath string, _ os.FileInfo, _ io.ReaderAt) (matches bool, extract bool) {
34-
for _, s := range w.allowlist {
62+
func (m *allowlistMatcher) Match(fullPath string, _ os.FileInfo, _ io.ReaderAt) (matches bool, extract bool) {
63+
for _, s := range m.allowlist {
3564
if strings.HasPrefix(fullPath, s) {
3665
return true, true
3766
}
3867
}
3968
return false, false
4069
}
4170

71+
func (m *allowlistMatcher) GetCommonPrefixDirs() []string {
72+
return findCommonDirPrefixes(m.allowlist)
73+
}
74+
75+
// findCommonDirPrefixes goes over all prefixes, steps one level down from the
76+
// root directory, and returns exactly one common prefix per first level dir
77+
// referenced. It does it by doing creating a trie-like structure with the
78+
// directory tree filtering paths with only single-children nodes.
79+
func findCommonDirPrefixes(prefixes []string) []string {
80+
pre := make(map[string]set.StringSet)
81+
for _, d := range prefixes {
82+
for d != "" {
83+
p, _ := path.Split(strings.TrimSuffix(d, "/"))
84+
s := pre[p]
85+
s.Add(d)
86+
pre[p] = s
87+
d = p
88+
}
89+
}
90+
// Work on one step below root.
91+
firstLevelDirs := pre[""].AsSlice()
92+
ret := firstLevelDirs[:0]
93+
for _, d := range firstLevelDirs {
94+
for len(pre[d]) == 1 {
95+
d = pre[d].GetArbitraryElem()
96+
}
97+
d, _ := path.Split(d)
98+
ret = append(ret, d)
99+
}
100+
return ret
101+
}
102+
42103
type whiteoutMatcher struct{}
43104

44105
// NewWhiteoutMatcher returns a matcher that matches all whiteout files

pkg/matcher/matcher_test.go

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,3 +199,82 @@ func TestAndMatcher(t *testing.T) {
199199
assert.False(t, match)
200200
assert.False(t, extract)
201201
}
202+
203+
func Test_findCommonDirPrefixes(t *testing.T) {
204+
tests := []struct {
205+
name string
206+
prefixes []string
207+
want []string
208+
}{
209+
{
210+
name: "happy case",
211+
prefixes: []string{
212+
"bin/[",
213+
"bin/busybox",
214+
"etc/alpine-release",
215+
"etc/apt/sources.list",
216+
"etc/centos-release",
217+
"etc/lsb-release",
218+
"etc/oracle-release",
219+
"etc/os-release",
220+
"etc/os-release",
221+
"etc/redhat-release",
222+
"etc/system-release",
223+
"lib/apk/db/installed",
224+
"root/buildinfo/content_manifests",
225+
"usr/lib/os-release",
226+
"var/lib/dpkg/status",
227+
"var/lib/rpm/Packages",
228+
"var/lib/rpm/Packages",
229+
},
230+
want: []string{
231+
"bin/",
232+
"etc/",
233+
"lib/apk/db/",
234+
"root/buildinfo/",
235+
"usr/lib/",
236+
"var/lib/",
237+
},
238+
},
239+
{
240+
name: "prefixes with directories",
241+
prefixes: []string{
242+
"foo/bar/",
243+
"foo/bar/ok/",
244+
"foo/bar/nook/",
245+
"foo/bar/nook/",
246+
},
247+
want: []string{"foo/bar/"},
248+
},
249+
{
250+
name: "non-slash are considered files",
251+
prefixes: []string{
252+
"usr/bin",
253+
"usr/bin/",
254+
},
255+
want: []string{"usr/"},
256+
},
257+
{
258+
name: "example from doc comment",
259+
prefixes: []string{
260+
"var/lib/rpm/",
261+
"var/lib/dpkg/",
262+
"root/buildinfo/",
263+
"usr/bin",
264+
"usr/bin/bash",
265+
"etc/apt.sources",
266+
},
267+
want: []string{
268+
"var/lib/",
269+
"root/buildinfo/",
270+
"usr/",
271+
"etc/",
272+
},
273+
},
274+
}
275+
for _, tt := range tests {
276+
t.Run(tt.name, func(t *testing.T) {
277+
assert.ElementsMatch(t, tt.want, findCommonDirPrefixes(tt.prefixes))
278+
})
279+
}
280+
}

singletons/requiredfilenames/matcher.go

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,30 +11,36 @@ import (
1111
)
1212

1313
var (
14+
osMatcher matcher.PrefixMatcher
15+
osMatcherOnce sync.Once
16+
17+
activeVulnMatcher matcher.Matcher
18+
activeVulnMatcherOnce sync.Once
19+
1420
instance matcher.Matcher
1521
once sync.Once
22+
1623
// dynamicLibRegexp matches all dynamic libraries.
1724
dynamicLibRegexp = regexp.MustCompile(`(^|/)(lib|ld-)[^/.-][^/]*\.so(\.[^/.]+)*$`)
1825
// libraryDirRegexp matches all files under directories where the dynamic libraries are commonly found.
1926
// This is to filter for symbolic links needed to resolve dynamic library paths.
2027
libraryDirRegexp = regexp.MustCompile(`^(usr/(local/)?)?lib(32|64)?(/.+|$)`)
2128
)
2229

23-
// SingletonMatcher returns the singleton matcher instance to use for extracting
24-
// files to be analyzed for operating system features.
25-
// Note: language-level analyzers implement a different interface, and do not require
26-
// extraction of files into a `FileMap`. Therefore, the respective files do not need
27-
// to be matched here.
28-
func SingletonMatcher() matcher.Matcher {
29-
once.Do(func() {
30+
// SingletonOSMatcher returns the singleton matcher instance for extracting files
31+
// for OS package analysis.
32+
func SingletonOSMatcher() matcher.PrefixMatcher {
33+
osMatcherOnce.Do(func() {
3034
allFileNames := append(featurefmt.RequiredFilenames(), featurens.RequiredFilenames()...)
31-
clairMatcher := matcher.NewPrefixAllowlistMatcher(allFileNames...)
32-
whiteoutMatcher := matcher.NewWhiteoutMatcher()
33-
34-
allMatchers := make([]matcher.Matcher, 0, 6)
35-
allMatchers = append(allMatchers, clairMatcher, whiteoutMatcher)
35+
osMatcher = matcher.NewPrefixAllowlistMatcher(allFileNames...)
36+
})
37+
return osMatcher
38+
}
3639

37-
// Active Vuln Mgmt related matchers.
40+
// SingletonActiveVulnMatcher returns the singleton matcher instance for
41+
// extracting files for active vulnerability analysis.
42+
func SingletonActiveVulnMatcher() matcher.Matcher {
43+
activeVulnMatcherOnce.Do(func() {
3844
dpkgFilenamesMatcher := matcher.NewRegexpMatcher(dpkg.FilenamesListRegexp, true)
3945
dynamicLibMatcher := matcher.NewRegexpMatcher(dynamicLibRegexp, false)
4046
libDirSymlinkMatcher := matcher.NewAndMatcher(matcher.NewRegexpMatcher(libraryDirRegexp, false), matcher.NewSymbolicLinkMatcher())
@@ -44,9 +50,27 @@ func SingletonMatcher() matcher.Matcher {
4450
// remaining executable files which went unmatched otherwise.
4551
// Therefore, this matcher MUST be the last matcher.
4652
executableMatcher := matcher.NewExecutableMatcher()
47-
allMatchers = append(allMatchers, dpkgFilenamesMatcher, dynamicLibMatcher, libDirSymlinkMatcher, executableMatcher)
53+
activeVulnMatcher = matcher.NewOrMatcher(
54+
dpkgFilenamesMatcher,
55+
dynamicLibMatcher,
56+
libDirSymlinkMatcher,
57+
executableMatcher,
58+
)
59+
})
60+
return activeVulnMatcher
61+
}
4862

49-
instance = matcher.NewOrMatcher(allMatchers...)
63+
// SingletonMatcher returns the singleton matcher instance to use for extracting
64+
// files for analyzing image container. It includes matching for OS features
65+
// and active vulnerability. Note: language-level analyzers implement a different
66+
// interface, and do not require extraction of files. Therefore, the respective
67+
// files do not need to be matched here.
68+
func SingletonMatcher() matcher.Matcher {
69+
once.Do(func() {
70+
instance = matcher.NewOrMatcher(
71+
matcher.NewWhiteoutMatcher(),
72+
SingletonOSMatcher(),
73+
SingletonActiveVulnMatcher())
5074
})
5175
return instance
5276
}

0 commit comments

Comments
 (0)