Skip to content

Commit

Permalink
matcher: reword matcher DB to deduplicate data and reduce size
Browse files Browse the repository at this point in the history
We have many vulnerabilities that share their metadata (name and
description), this change saves that metadata once and references it
everywhere it's needed.

Signed-off-by: crozzy <joseph.crosland@gmail.com>
  • Loading branch information
crozzy committed Mar 27, 2024
1 parent 406b4f8 commit 714ecaf
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 8 deletions.
11 changes: 7 additions & 4 deletions datastore/query_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,9 @@ func buildGetQuery(record *claircore.IndexRecord, opts *datastore.GetOpts) (stri
}

query := psql.Select(
"hash",
"name",
"description",
"vuln.hash",
"name.value",
"desc.value",
"issued",
"links",
"severity",
Expand All @@ -109,7 +109,10 @@ func buildGetQuery(record *claircore.IndexRecord, opts *datastore.GetOpts) (stri
"repo_uri",
"fixed_in_version",
"updater",
).From("vuln").Where(exps...)
).From("vuln").
Join(goqu.I("metadata").As("desc"), goqu.On(goqu.Ex{"vuln.description_id": goqu.I("desc.id")})).
Join(goqu.I("metadata").As("name"), goqu.On(goqu.Ex{"vuln.name_id": goqu.I("name.id")})).
Where(exps...)

sql, _, err := query.ToSQL()
if err != nil {
Expand Down
47 changes: 43 additions & 4 deletions datastore/sqlite_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,36 @@ func hashEnrichment(r *driver.EnrichmentRecord) (k string, d []byte) {
return "md5", h.Sum(nil)
}

func getMetadata(ctx context.Context, tx *sql.Tx, kind string, val string) (int64, error) {
var metadataID int64
s := md5.Sum([]byte(val))
const (
get = `
SELECT id FROM metadata
WHERE kind = $1
AND hash_kind = $2
AND hash = $3`
insert = `
INSERT INTO metadata (
kind, hash_kind, hash, value
) VALUES (
$1, $2, $3, $4
)
ON CONFLICT (kind, hash_kind, hash) DO NOTHING
RETURNING id;`
)
err := tx.QueryRowContext(ctx, get, kind, "md5", s[:], val).Scan(&metadataID)
switch {
case err == sql.ErrNoRows:
if err := tx.QueryRowContext(ctx, insert, kind, "md5", s[:], val).Scan(&metadataID); err != nil {
return 0, fmt.Errorf("failed to scan description: %v", err)
}
case err != nil:
return 0, err
}
return metadataID, nil
}

// UpdateVulnerabilities creates a new UpdateOperation, inserts the provided
// vulnerabilities, and ensures vulnerabilities from previous updates are
// not queried by clients.
Expand All @@ -178,11 +208,11 @@ func (ms *sqliteMatcherStore) UpdateVulnerabilities(ctx context.Context, updater
insert = `
INSERT INTO vuln (
hash_kind, hash,
name, updater, description, issued, links, severity, normalized_severity,
updater, issued, links, severity, normalized_severity,
package_name, package_version, package_module, package_arch, package_kind,
dist_id, dist_name, dist_version, dist_version_code_name, dist_version_id, dist_arch, dist_cpe, dist_pretty_name,
repo_name, repo_key, repo_uri,
fixed_in_version, arch_operation, version_kind, vulnerable_range
fixed_in_version, arch_operation, version_kind, vulnerable_range, description_id, name_id
) VALUES (
$1, $2,
$3, $4, $5, $6, $7, $8, $9,
Expand All @@ -202,6 +232,15 @@ func (ms *sqliteMatcherStore) UpdateVulnerabilities(ctx context.Context, updater
defer tx.Rollback()

for _, vuln := range vs {
// Get or save description
descID, err := getMetadata(ctx, tx, "description", vuln.Description)
if err != nil {
return uuid.Nil, fmt.Errorf("failed to get description: %w", err)
}
nameID, err := getMetadata(ctx, tx, "name", vuln.Name)
if err != nil {
return uuid.Nil, fmt.Errorf("failed to get name: %w", err)
}
if vuln.Package == nil || vuln.Package.Name == "" {
continue
}
Expand All @@ -220,11 +259,11 @@ func (ms *sqliteMatcherStore) UpdateVulnerabilities(ctx context.Context, updater

if _, err := tx.ExecContext(ctx, insert,
hashKind, hash,
vuln.Name, vuln.Updater, vuln.Description, vuln.Issued.Format(time.RFC3339), vuln.Links, vuln.Severity, vuln.NormalizedSeverity,
vuln.Updater, vuln.Issued.Format(time.RFC3339), vuln.Links, vuln.Severity, vuln.NormalizedSeverity,
pkg.Name, pkg.Version, pkg.Module, pkg.Arch, pkg.Kind,
dist.DID, dist.Name, dist.Version, dist.VersionCodeName, dist.VersionID, dist.Arch, &dist.CPE, dist.PrettyName,
repo.Name, repo.Key, repo.URI,
vuln.FixedInVersion, vuln.ArchOperation, vKind, strings.Join([]string{vrLower, vrUpper}, "__"),
vuln.FixedInVersion, vuln.ArchOperation, vKind, strings.Join([]string{vrLower, vrUpper}, "__"), descID, nameID,
); err != nil {
return uuid.Nil, fmt.Errorf("failed to insert vulnerability: %w", err)
}
Expand Down
56 changes: 56 additions & 0 deletions migrations/02-add-metadata.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
CREATE TABLE IF NOT EXISTS metadata (
id INTEGER PRIMARY KEY,
kind TEXT NOT NULL,
hash_kind TEXT NOT NULL,
hash TEXT NOT NULL,
value TEXT NOT NULL,
UNIQUE (kind, hash_kind, hash)
);

DROP TABLE vuln;

CREATE TABLE vuln (
id INTEGER PRIMARY KEY,
hash_kind TEXT NOT NULL,
hash TEXT NOT NULL,
updater TEXT,
name_id INTEGER,
description_id INTEGER,
issued TEXT,
links TEXT,
severity TEXT,
normalized_severity TEXT,
package_name TEXT,
package_version TEXT,
package_module TEXT,
package_arch TEXT,
package_kind TEXT,
dist_id TEXT,
dist_name TEXT,
dist_version TEXT,
dist_version_code_name TEXT,
dist_version_id TEXT,
dist_arch TEXT,
dist_cpe TEXT,
dist_pretty_name TEXT,
repo_name TEXT,
repo_key TEXT,
repo_uri TEXT,
fixed_in_version TEXT,
arch_operation TEXT,
vulnerable_range TEXT,
version_kind TEXT,
UNIQUE (hash_kind, hash),
FOREIGN KEY (description_id) REFERENCES metadata(id),
FOREIGN KEY (name_id) REFERENCES metadata(id)
);

CREATE INDEX vuln_lookup_idx on vuln (package_name, dist_id,
dist_name, dist_pretty_name,
dist_version, dist_version_id,
package_module, dist_version_code_name,
repo_name, dist_arch,
dist_cpe, repo_key,
repo_uri);
CREATE INDEX vuln_lookup_updater ON vuln (updater);

4 changes: 4 additions & 0 deletions migrations/migrations.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,8 @@ var MatcherMigrations = []migrate.Migration{
ID: 1,
Up: runFile("01-init.sql"),
},
{
ID: 2,
Up: runFile("02-add-metadata.sql"),
},
}

0 comments on commit 714ecaf

Please sign in to comment.