Skip to content

Commit bda3a70

Browse files
authored
Add vulnerability support for discovered dependencies #835 (#846)
Signed-off-by: Thomas Druez <tdruez@nexb.com>
1 parent a215650 commit bda3a70

18 files changed

+176
-85
lines changed

CHANGELOG.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@ v32.6.0 (unreleased)
88
creation REST API.
99
https://github.com/nexB/scancode.io/issues/828
1010

11+
- Update the ``fetch_vulnerabilities`` pipe to make the API requests by batch of purls.
12+
https://github.com/nexB/scancode.io/issues/835
13+
14+
- Add vulnerability support for discovered dependencies.
15+
The dependency data is loaded using the ``find_vulnerabilities`` pipeline backed by
16+
a VulnerableCode database.
17+
https://github.com/nexB/scancode.io/issues/835
18+
1119
v32.5.0 (2023-08-02)
1220
--------------------
1321

scancodeio/context_processors.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,12 @@
2323
from scancode_config import __version__ as scancode_toolkit_version
2424

2525
from scancodeio import __version__ as scancodeio_version
26+
from scancodeio import settings
2627

2728

2829
def versions(request):
2930
return {
3031
"SCANCODEIO_VERSION": scancodeio_version.lstrip("v"),
3132
"SCANCODE_TOOLKIT_VERSION": scancode_toolkit_version,
33+
"VULNERABLECODE_URL": settings.VULNERABLECODE_URL,
3234
}

scanpipe/api/serializers.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,7 @@ class Meta:
363363
"datafile_path",
364364
"datasource_id",
365365
"package_type",
366+
"affected_by_vulnerabilities",
366367
]
367368

368369

scanpipe/filters.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
563563
"is_optional",
564564
"is_resolved",
565565
"datasource_id",
566+
"is_vulnerable",
566567
]
567568

568569
search = django_filters.CharFilter(
@@ -589,6 +590,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
589590
is_runtime = StrictBooleanFilter()
590591
is_optional = StrictBooleanFilter()
591592
is_resolved = StrictBooleanFilter()
593+
is_vulnerable = IsVulnerable(field_name="affected_by_vulnerabilities")
592594

593595
class Meta:
594596
model = DiscoveredDependency
@@ -607,6 +609,7 @@ class Meta:
607609
"is_optional",
608610
"is_resolved",
609611
"datasource_id",
612+
"is_vulnerable",
610613
]
611614

612615

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Generated by Django 4.2.3 on 2023-08-02 10:43
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
dependencies = [
8+
("scanpipe", "0039_discoveredpackage_compliance_alert_and_more"),
9+
]
10+
11+
operations = [
12+
migrations.AddField(
13+
model_name="discovereddependency",
14+
name="affected_by_vulnerabilities",
15+
field=models.JSONField(blank=True, default=list),
16+
),
17+
]

scanpipe/models.py

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,6 +1025,11 @@ def vulnerable_package_count(self):
10251025
"""Return the number of vulnerable packages related to this project."""
10261026
return self.discoveredpackages.vulnerable().count()
10271027

1028+
@cached_property
1029+
def vulnerable_dependency_count(self):
1030+
"""Return the number of vulnerable dependencies related to this project."""
1031+
return self.discovereddependencies.vulnerable().count()
1032+
10281033
@cached_property
10291034
def dependency_count(self):
10301035
"""Return the number of dependencies related to this project."""
@@ -2245,11 +2250,31 @@ def __str__(self):
22452250
return f"{self.from_resource.pk} > {self.to_resource.pk} using {self.map_type}"
22462251

22472252

2248-
class DiscoveredPackageQuerySet(PackageURLQuerySetMixin, ProjectRelatedQuerySet):
2253+
class VulnerabilityMixin(models.Model):
2254+
"""Add the vulnerability related fields and methods."""
2255+
2256+
affected_by_vulnerabilities = models.JSONField(blank=True, default=list)
2257+
2258+
@property
2259+
def is_vulnerable(self):
2260+
"""Returns True if this instance is affected by vulnerabilities."""
2261+
return bool(self.affected_by_vulnerabilities)
2262+
2263+
class Meta:
2264+
abstract = True
2265+
2266+
2267+
class VulnerabilityQuerySetMixin:
22492268
def vulnerable(self):
22502269
return self.filter(~Q(affected_by_vulnerabilities__in=EMPTY_VALUES))
22512270

22522271

2272+
class DiscoveredPackageQuerySet(
2273+
VulnerabilityQuerySetMixin, PackageURLQuerySetMixin, ProjectRelatedQuerySet
2274+
):
2275+
pass
2276+
2277+
22532278
class AbstractPackage(models.Model):
22542279
"""These fields should be kept in line with `packagedcode.models.PackageData`."""
22552280

@@ -2446,20 +2471,6 @@ class Meta:
24462471
abstract = True
24472472

24482473

2449-
class VulnerabilityMixin(models.Model):
2450-
"""Add the vulnerability related fields and methods."""
2451-
2452-
affected_by_vulnerabilities = models.JSONField(blank=True, default=list)
2453-
2454-
@property
2455-
def is_vulnerable(self):
2456-
"""Returns True if this instance is affected by vulnerabilities."""
2457-
return bool(self.affected_by_vulnerabilities)
2458-
2459-
class Meta:
2460-
abstract = True
2461-
2462-
24632474
class DiscoveredPackage(
24642475
ProjectRelatedModel,
24652476
ExtraDataFieldMixin,
@@ -2746,7 +2757,9 @@ def as_cyclonedx(self):
27462757
)
27472758

27482759

2749-
class DiscoveredDependencyQuerySet(PackageURLQuerySetMixin, ProjectRelatedQuerySet):
2760+
class DiscoveredDependencyQuerySet(
2761+
PackageURLQuerySetMixin, VulnerabilityQuerySetMixin, ProjectRelatedQuerySet
2762+
):
27502763
def prefetch_for_serializer(self):
27512764
"""
27522765
Optimized prefetching for a QuerySet to be consumed by the
@@ -2767,6 +2780,7 @@ class DiscoveredDependency(
27672780
ProjectRelatedModel,
27682781
SaveProjectErrorMixin,
27692782
UpdateFromDataMixin,
2783+
VulnerabilityMixin,
27702784
PackageURLMixin,
27712785
):
27722786
"""

scanpipe/pipelines/find_vulnerabilities.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,17 @@
2626

2727
class FindVulnerabilities(Pipeline):
2828
"""
29-
Find vulnerabilities for discovered packages in the VulnerableCode database.
29+
Find vulnerabilities for packages and dependencies in the VulnerableCode database.
3030
31-
Vulnerability data is stored on each package instance.
31+
Vulnerability data is stored on each package and dependency instance.
3232
"""
3333

3434
@classmethod
3535
def steps(cls):
3636
return (
3737
cls.check_vulnerablecode_service_availability,
38-
cls.lookup_vulnerabilities,
38+
cls.lookup_packages_vulnerabilities,
39+
cls.lookup_dependencies_vulnerabilities,
3940
)
4041

4142
def check_vulnerablecode_service_availability(self):
@@ -46,7 +47,12 @@ def check_vulnerablecode_service_availability(self):
4647
if not vulnerablecode.is_available():
4748
raise Exception("VulnerableCode is not available.")
4849

49-
def lookup_vulnerabilities(self):
50+
def lookup_packages_vulnerabilities(self):
5051
"""Check for vulnerabilities for each of the project's discovered package."""
5152
packages = self.project.discoveredpackages.all()
52-
vulnerablecode.fetch_vulnerabilities(packages)
53+
vulnerablecode.fetch_vulnerabilities(packages, logger=self.log)
54+
55+
def lookup_dependencies_vulnerabilities(self):
56+
"""Check for vulnerabilities for each of the project's discovered dependency."""
57+
dependencies = self.project.discovereddependencies.filter(is_resolved=True)
58+
vulnerablecode.fetch_vulnerabilities(dependencies, logger=self.log)

scanpipe/pipes/vulnerablecode.py

Lines changed: 40 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,22 @@ def is_available():
7272
return response.status_code == requests.codes.ok
7373

7474

75-
def get_purls(packages):
75+
def chunked(iterable, chunk_size):
7676
"""
77-
Return the PURLs for the given list of `packages`.
78-
Do not include qualifiers nor subpath when `base` is provided.
77+
Break an `iterable` into lists of `chunk_size` length.
78+
79+
>>> list(chunked([1, 2, 3, 4, 5], 2))
80+
[[1, 2], [3, 4], [5]]
81+
>>> list(chunked([1, 2, 3, 4, 5], 3))
82+
[[1, 2, 3], [4, 5]]
7983
"""
84+
for index in range(0, len(iterable), chunk_size):
85+
end = index + chunk_size
86+
yield iterable[index:end]
87+
88+
89+
def get_purls(packages):
90+
"""Return the PURLs for the given list of `packages`."""
8091
return [package_url for package in packages if (package_url := package.package_url)]
8192

8293

@@ -168,6 +179,7 @@ def bulk_search_by_purl(
168179

169180
data = {
170181
"purls": purls,
182+
"vulnerabilities_only": True,
171183
}
172184

173185
logger.debug(f"VulnerableCode: url={url} purls_count={len(purls)}")
@@ -190,32 +202,33 @@ def bulk_search_by_cpes(
190202
return request_post(url, data, timeout)
191203

192204

193-
def get_unique_vulnerabilities(packages_data):
205+
def fetch_vulnerabilities(packages, chunk_size=1000, logger=logger.info):
194206
"""
195-
Return the unique instance of vulnerabilities for the provided ``packages_data``.
196-
197-
Note this should be implemented on the VulnerableCode side, see:
198-
https://github.com/nexB/vulnerablecode/issues/1219#issuecomment-1620123301
207+
Fetch and store vulnerabilities for each provided ``packages``.
208+
The PURLs are used for the lookups in batch of ``chunk_size`` per request.
199209
"""
200-
if not packages_data:
201-
return
202-
203-
unique_vulnerabilities = []
204-
seen_vulnerability_ids = set()
205-
206-
for package_entry in packages_data:
207-
for vulnerability in package_entry.get("affected_by_vulnerabilities", []):
208-
vulnerability_id = vulnerability.get("vulnerability_id")
209-
if vulnerability_id not in seen_vulnerability_ids:
210-
unique_vulnerabilities.append(vulnerability)
211-
seen_vulnerability_ids.add(vulnerability_id)
212-
213-
return unique_vulnerabilities
210+
vulnerabilities_by_purl = {}
214211

212+
for purls_batch in chunked(get_purls(packages), chunk_size):
213+
response_data = bulk_search_by_purl(purls_batch)
214+
for vulnerability_data in response_data:
215+
vulnerabilities_by_purl[vulnerability_data["purl"]] = vulnerability_data
215216

216-
def fetch_vulnerabilities(packages):
217-
"""Fetch and store vulnerabilities for each provided ``packages``."""
217+
unsaved_objects = []
218218
for package in packages:
219-
if packages_data := get_vulnerabilities_by_purl(package.package_url):
220-
if unique_vulnerabilities := get_unique_vulnerabilities(packages_data):
221-
package.update(affected_by_vulnerabilities=unique_vulnerabilities)
219+
if package_data := vulnerabilities_by_purl.get(package.package_url):
220+
if affected_by := package_data.get("affected_by_vulnerabilities", []):
221+
package.affected_by_vulnerabilities = affected_by
222+
unsaved_objects.append(package)
223+
224+
if unsaved_objects:
225+
model_class = unsaved_objects[0].__class__
226+
model_class.objects.bulk_update(
227+
objs=unsaved_objects,
228+
fields=["affected_by_vulnerabilities"],
229+
batch_size=1000,
230+
)
231+
logger(
232+
f"{len(unsaved_objects)} {model_class._meta.verbose_name_plural} updated "
233+
f"with vulnerability data."
234+
)

scanpipe/templates/scanpipe/dependency_list.html

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@
2323
<tr class="break-word">
2424
<td style="min-width: 300px;" title="{{ dependency.dependency_uid }}">
2525
<a href="{{ dependency.get_absolute_url }}">{{ dependency.purl }}</a>
26+
{% if dependency.is_vulnerable %}
27+
<a href="{{ dependency.get_absolute_url }}#vulnerabilities">
28+
<i class="fa-solid fa-bug fa-sm has-text-danger" title="Vulnerabilities"></i>
29+
</a>
30+
{% endif %}
2631
</td>
2732
<td>
2833
<a href="?type={{ dependency.type }}" class="is-black-link">{{ dependency.type }}</a>

scanpipe/templates/scanpipe/includes/project_summary_level.html

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,17 @@
2323
<div class="level-item has-text-centered">
2424
<div>
2525
<p class="heading">Dependencies</p>
26-
<p class="{{ title_class }}">
26+
<p class="{{ title_class }} is-flex is-align-items-center is-justify-content-center">
2727
{% if project.dependency_count %}
2828
<a href="{% url 'project_dependencies' project.slug %}">
2929
{{ project.dependency_count|intcomma }}
3030
</a>
31+
{% if project.vulnerable_dependency_count %}
32+
<a href="{% url 'project_dependencies' project.slug %}?is_vulnerable=yes" class="has-text-danger is-size-5 ml-2">
33+
{{ project.vulnerable_dependency_count|intcomma }}
34+
<i class="fa-solid fa-bug is-size-6"></i>
35+
</a>
36+
{% endif %}
3137
{% else %}
3238
<span>0</span>
3339
{% endif %}

0 commit comments

Comments
 (0)