Skip to content

Commit 5a4cdc4

Browse files
committed
Add support for advisory curation
Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>
1 parent be89117 commit 5a4cdc4

File tree

9 files changed

+305
-23
lines changed

9 files changed

+305
-23
lines changed

vulnerabilities/importer.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,31 @@ def from_dict(cls, advisory_data):
499499
if "fixed_version_range" in affected_packages[0]
500500
else AffectedPackage
501501
)
502+
if advisory_data.get("advisory_id") or advisory_data.get("severities") or affected_package_cls is AffectedPackageV2:
503+
transformed = {
504+
"advisory_id": advisory_data["advisory_id"],
505+
"aliases": advisory_data["aliases"],
506+
"summary": advisory_data["summary"],
507+
"affected_packages": [
508+
affected_package_cls.from_dict(pkg)
509+
for pkg in affected_packages
510+
if pkg is not None
511+
],
512+
"references_v2": [
513+
ReferenceV2.from_dict(ref) for ref in advisory_data["references"]
514+
],
515+
"severities": [
516+
VulnerabilitySeverity.from_dict(sev)
517+
for sev in advisory_data.get("severities", [])
518+
],
519+
"date_published": datetime.datetime.fromisoformat(date_published)
520+
if date_published
521+
else None,
522+
"weaknesses": advisory_data["weaknesses"],
523+
"url": advisory_data.get("url") or None,
524+
"original_advisory_text": advisory_data.get("original_advisory_text") or None,
525+
}
526+
return cls(**transformed)
502527
transformed = {
503528
"aliases": advisory_data["aliases"],
504529
"summary": advisory_data["summary"],

vulnerabilities/models.py

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2422,13 +2422,22 @@ def create_new_job(self, execute_now=False):
24222422

24232423

24242424
ISSUE_TYPE_CHOICES = [
2425-
("MISSING_AFFECTED_PACKAGE", "Advisory is missing affected package"),
2426-
("MISSING_FIXED_BY_PACKAGE", "Advisory is missing fixed-by package"),
2425+
(
2426+
"MISSING_AFFECTED_PACKAGE",
2427+
"Advisory is missing affected package",
2428+
),
2429+
(
2430+
"MISSING_FIXED_BY_PACKAGE",
2431+
"Advisory is missing fixed-by package",
2432+
),
24272433
(
24282434
"MISSING_AFFECTED_AND_FIXED_BY_PACKAGES",
24292435
"Advisory is missing both affected and fixed-by packages",
24302436
),
2431-
("MISSING_SUMMARY", "Advisory is missing summary"),
2437+
(
2438+
"MISSING_SUMMARY",
2439+
"Advisory is missing summary",
2440+
),
24322441
("CONFLICTING_FIXED_BY_PACKAGES", "Advisories have conflicting fixed-by packages"),
24332442
("CONFLICTING_AFFECTED_PACKAGES", "Advisories have conflicting affected packages"),
24342443
(
@@ -2502,22 +2511,22 @@ class AdvisoryToDoV2(models.Model):
25022511
# (see https://code.djangoproject.com/ticket/702), we use related_advisories_id
25032512
# to avoid creating duplicate issue for same set of advisories,
25042513
related_advisories_id = models.CharField(
2505-
max_length=40,
2506-
help_text="SHA1 digest of the unique_content_id field of the applicable advisories.",
2514+
max_length=64,
2515+
help_text="Computed unique content ID that identifies the related advisories.",
25072516
)
25082517

25092518
advisories = models.ManyToManyField(
25102519
"AdvisoryV2",
25112520
through="ToDoRelatedAdvisoryV2",
25122521
related_name="advisory_todos",
2513-
help_text="Advisory/ies where this TODO is applicable.",
2522+
help_text="Advisories for this TODO.",
25142523
)
25152524

25162525
issue_type = models.CharField(
25172526
max_length=50,
25182527
choices=ISSUE_TYPE_CHOICES,
25192528
db_index=True,
2520-
help_text="Select the issue that needs to be addressed from the available options.",
2529+
help_text="The issue type that needs to be addressed.",
25212530
)
25222531

25232532
issue_detail = models.TextField(
@@ -2530,12 +2539,6 @@ class AdvisoryToDoV2(models.Model):
25302539
help_text="Timestamp indicating when this TODO was created.",
25312540
)
25322541

2533-
is_resolved = models.BooleanField(
2534-
default=False,
2535-
db_index=True,
2536-
help_text="This TODO is resolved or not.",
2537-
)
2538-
25392542
resolved_at = models.DateTimeField(
25402543
null=True,
25412544
blank=True,
@@ -2547,10 +2550,24 @@ class AdvisoryToDoV2(models.Model):
25472550
help_text="Additional detail on how this TODO was resolved.",
25482551
)
25492552

2553+
curation_advisory = models.ForeignKey(
2554+
"AdvisoryV2",
2555+
null=True,
2556+
blank=True,
2557+
on_delete=models.SET_NULL,
2558+
related_name="curated_todos",
2559+
help_text="The advisory that was created/updated to resolve this TODO.",
2560+
)
2561+
2562+
status = models.CharField(
2563+
max_length=20,
2564+
default="open",
2565+
help_text="The current status of the TODO item.",
2566+
)
2567+
25502568
class Meta:
25512569
unique_together = ("related_advisories_id", "issue_type")
25522570

2553-
25542571
class AdvisorySeverity(models.Model):
25552572
url = models.URLField(
25562573
max_length=1024,

vulnerabilities/pipelines/compute_advisory_todo.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from vulnerabilities.models import Alias
1919
from vulnerabilities.models import ToDoRelatedAdvisory
2020
from vulnerabilities.pipelines import VulnerableCodePipeline
21-
from vulnerabilities.pipes.advisory import advisories_checksum
21+
from vulnerabilities.pipes.advisory import compute_advisories_content_id
2222

2323

2424
class ComputeToDo(VulnerableCodePipeline):
@@ -52,7 +52,7 @@ def compute_individual_advisory_todo(self):
5252
progress_step=1,
5353
)
5454
for advisory in progress.iter(advisories.iterator(chunk_size=5000)):
55-
advisory_todo_id = advisories_checksum(advisories=advisory)
55+
advisory_todo_id = compute_advisories_content_id(advisories=advisory)
5656
check_missing_summary(
5757
advisory=advisory,
5858
todo_id=advisory_todo_id,
@@ -297,7 +297,7 @@ def check_conflicting_affected_and_fixed_by_packages_for_alias(
297297
"Conflict matrix": matrix,
298298
}
299299

300-
todo_id = advisories_checksum(advisories)
300+
todo_id = compute_advisories_content_id(advisories)
301301
todo = AdvisoryToDo(
302302
related_advisories_id=todo_id,
303303
issue_type=issue_type,

vulnerabilities/pipelines/v2_improvers/compute_advisory_todo.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from vulnerabilities.models import AdvisoryV2
1919
from vulnerabilities.models import ToDoRelatedAdvisoryV2
2020
from vulnerabilities.pipelines import VulnerableCodePipeline
21-
from vulnerabilities.pipes.advisory import advisories_checksum
21+
from vulnerabilities.pipes.advisory import compute_advisories_content_id
2222

2323

2424
class ComputeToDo(VulnerableCodePipeline):
@@ -54,7 +54,7 @@ def compute_individual_advisory_todo(self):
5454
progress_step=1,
5555
)
5656
for advisory in progress.iter(advisories.iterator(chunk_size=5000)):
57-
advisory_todo_id = advisories_checksum(advisories=advisory)
57+
advisory_todo_id = compute_advisories_content_id(advisories=advisory)
5858
check_missing_summary(
5959
advisory=advisory,
6060
todo_id=advisory_todo_id,
@@ -302,7 +302,7 @@ def check_conflicting_affected_and_fixed_by_packages_for_alias(
302302
"Conflict matrix": matrix,
303303
}
304304

305-
todo_id = advisories_checksum(advisories)
305+
todo_id = compute_advisories_content_id(advisories)
306306
todo = AdvisoryToDoV2(
307307
related_advisories_id=todo_id,
308308
issue_type=issue_type,
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
import json
10+
from pathlib import Path
11+
12+
from fetchcode.vcs import fetch_via_vcs
13+
14+
from vulnerabilities.importer import AdvisoryData
15+
from vulnerabilities.models import AdvisoryToDoV2
16+
from vulnerabilities.models import AdvisoryV2
17+
from vulnerabilities.pipelines import VulnerableCodePipeline
18+
from vulnerabilities.pipes.advisory import insert_advisory_v2
19+
20+
21+
class CurateAdvisoriesPipeline(VulnerableCodePipeline):
22+
"""
23+
Curate advisories
24+
"""
25+
26+
pipeline_id = "curate_advisories"
27+
license_expression = None
28+
29+
"""
30+
Sample Curation Advisory:
31+
32+
{
33+
advisory: {
34+
"advisory_id": "CVE-2024-12345",
35+
"summary": "This is a curated summary for CVE-2024-12345",
36+
"url": "https://github.com/TG1999/CVE-2024-12345",
37+
"aliases": ["GHSA-1323-1213"],
38+
"references": [
39+
{
40+
"url": "https://github.com/TG1999/CVE-2024-12345",
41+
"reference_id": "CVE-2024-12345",
42+
}
43+
],
44+
"severity": [
45+
{
46+
"system": "CVSSv3",
47+
"value": "9.8",
48+
"vector": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H",
49+
}
50+
],
51+
"affected_packages": [
52+
{
53+
"package": {
54+
"type": "pypi",
55+
"namespace": null,
56+
"name": "example-package",
57+
"version": "1.0.0"
58+
},
59+
"affected_version_range": "<=1.0.0",
60+
"fixed_version": "1.0.1"
61+
},
62+
]
63+
},
64+
related_advisories: ["nvd_importer_v2/CVE-2024-12345"],
65+
todo_ids : [133],
66+
source: "Tushar",
67+
}
68+
"""
69+
70+
@classmethod
71+
def steps(cls):
72+
return (
73+
cls.fetch_curation_repo,
74+
cls.apply_curations,
75+
)
76+
77+
@classmethod
78+
def fetch_curation_repo(self):
79+
"""
80+
Fetch curation repository
81+
"""
82+
self.vcs_response = fetch_via_vcs(self.repo_url)
83+
84+
@classmethod
85+
def apply_curations(self):
86+
"""
87+
Apply curation to advisories
88+
"""
89+
advisory_files = Path(self.vcs_response.dest_dir).rglob("*.json")
90+
for advisory_file in advisory_files:
91+
advisory_data = json.load(open(advisory_file))
92+
advisory = AdvisoryData.from_dict(advisory_data["advisory"])
93+
advisory_obj = insert_advisory_v2(advisory=advisory, source=advisory_data.get("source"))
94+
# Link related advisories
95+
for related_advisory_id in advisory_data.get("related_advisories", []):
96+
related_advisory = AdvisoryV2.objects.filter(avid=related_advisory_id).first()
97+
if related_advisory:
98+
advisory_obj.related_advisories.add(related_advisory)
99+
advisory_obj.save()
100+
101+
for todo in AdvisoryToDoV2.objects.filter(id__in=advisory_data.get("todo_ids", [])):
102+
# Add advisory in todo's curated_advisories field
103+
todo.curated_advisories.add(advisory_obj)
104+
todo.save()

vulnerabilities/pipes/advisory.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -330,12 +330,15 @@ def import_advisory(
330330
advisory.save()
331331

332332

333-
def advisories_checksum(advisories: Union[Advisory, List[Advisory]]) -> str:
333+
def compute_advisories_content_id(advisories: Union[Advisory, List[Advisory]]) -> str:
334+
"""
335+
Return a content based ID string that uniquely identifies the list of advisories.
336+
"""
334337
if isinstance(advisories, Advisory) or isinstance(advisories, AdvisoryV2):
335338
advisories = [advisories]
336339

337340
contents = sorted([advisory.unique_content_id for advisory in advisories])
338341
combined_contents = "".join(contents)
339342

340-
checksum = hashlib.sha1(combined_contents.encode())
343+
checksum = hashlib.sha256(combined_contents.encode(), usedforsecurity=False)
341344
return checksum.hexdigest()

0 commit comments

Comments
 (0)