Skip to content

Commit 46953eb

Browse files
committed
Update sync_vulnerablecode pipeline to import vulnerablecode Git repositories with new schema format
Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent 99b96fc commit 46953eb

File tree

3 files changed

+140
-76
lines changed

3 files changed

+140
-76
lines changed

fedcode/pipelines/sync_vulnerablecode.py

Lines changed: 120 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@
88
#
99

1010
import logging
11-
import os.path
1211
from itertools import zip_longest
12+
from pathlib import Path
1313

1414
import saneyaml
15+
from django.db import transaction
1516

17+
from aboutcode.hashid import get_core_purl
1618
from aboutcode.pipeline import LoopProgress
1719
from fedcode.activitypub import Activity
1820
from fedcode.activitypub import UpdateActivity
@@ -40,6 +42,10 @@ def get_git_repos(self):
4042
self.git_repos = Repository.objects.all()
4143

4244
def sync_vulnerablecode_repositories(self):
45+
"""
46+
Sync repositories
47+
For vulnerablecode-data we have 3 files types vulnerabilities.yml, purls.yml, VCID-1ues-ahar-buaa.yml
48+
"""
4349
repositories_count = self.git_repos.count()
4450
self.log(f"Syncing vulnerability from {repositories_count:,d} repositories")
4551

@@ -56,6 +62,7 @@ def sync_vulnerabilities(repository, logger):
5662
repo = repository.git_repo_obj
5763
latest_commit_hash = repo.head.commit.hexsha
5864
latest_commit = repo.commit(latest_commit_hash)
65+
5966
if repository.last_imported_commit:
6067
last_imported_commit = repo.commit(repository.last_imported_commit)
6168
diffs = last_imported_commit.diff(latest_commit)
@@ -73,7 +80,7 @@ def sync_vulnerabilities(repository, logger):
7380
logger(f"Syncing {diff_count:,d} vulnerability scan from {repository.url}")
7481
progress = LoopProgress(total_iterations=diff_count, logger=logger)
7582
for diff in progress.iter(diffs):
76-
if not diff.a_path.endswith(".yaml"):
83+
if not diff.a_path.endswith(".yml"):
7784
continue
7885

7986
if diff.a_path.startswith("."):
@@ -82,100 +89,139 @@ def sync_vulnerabilities(repository, logger):
8289
yaml_data_a_blob = saneyaml.load(diff.a_blob.data_stream.read()) if diff.a_blob else None
8390
yaml_data_b_blob = saneyaml.load(diff.b_blob.data_stream.read()) if diff.b_blob else None
8491

85-
if os.path.split(diff.a_path)[1].startswith("VCID") or os.path.split(diff.b_path)[
86-
1
87-
].startswith("VCID"):
88-
vul_handler(
89-
diff.change_type,
90-
repository,
91-
yaml_data_a_blob,
92-
yaml_data_b_blob,
93-
logger,
92+
a_name = Path(diff.a_path).name
93+
b_name = Path(diff.b_path).name
94+
95+
if a_name == "vulnerabilities.yml" or b_name == "vulnerabilities.yml":
96+
note_handler(
97+
diff.change_type, repository.admin, yaml_data_a_blob, yaml_data_b_blob, logger
9498
)
95-
continue
9699

97-
pkg_handler(
98-
diff.change_type,
99-
repository.admin,
100-
yaml_data_a_blob,
101-
yaml_data_b_blob,
102-
)
100+
if a_name == "purls.yml" or b_name == "purls.yml":
101+
pkg_handler(
102+
diff.change_type, repository.admin, yaml_data_a_blob, yaml_data_b_blob, logger
103+
)
104+
105+
if a_name.startswith("VCID") or b_name.startswith("VCID"):
106+
vul_handler(diff.change_type, repository, yaml_data_a_blob, yaml_data_b_blob, logger)
103107

104108
repository.last_imported_commit = latest_commit_hash
105109
repository.save()
106110
logger("The Importer run successfully")
107111

108112

109113
def vul_handler(change_type, repo_obj, yaml_data_a_blob, yaml_data_b_blob, logger):
114+
"""
115+
VCID-XXXX-XXXX-XXXX.yml
116+
"""
117+
vulnerability_a_id = yaml_data_a_blob.get("vulnerability_id") if yaml_data_a_blob else None
118+
vulnerability_b_id = yaml_data_b_blob.get("vulnerability_id") if yaml_data_b_blob else None
119+
110120
if change_type == "A": # A for added paths
111121
Vulnerability.objects.get_or_create(
112-
id=yaml_data_b_blob.get("vulnerability_id"),
122+
id=vulnerability_b_id,
113123
repo=repo_obj,
114124
)
115125
elif change_type in [
116126
"M",
117127
"R",
118128
]: # R for renamed paths , M for paths with modified data
119-
vul = Vulnerability.objects.get(
120-
id=yaml_data_a_blob.get("vulnerability_id"),
121-
repo=repo_obj,
122-
)
123-
vul.filename = yaml_data_b_blob.get("vulnerability_id")
124-
vul.save()
129+
with transaction.atomic():
130+
Vulnerability.objects.get(id=vulnerability_a_id, repo=repo_obj).delete()
131+
Vulnerability.objects.create(id=vulnerability_b_id, repo=repo_obj)
132+
125133
elif change_type == "D": # D for deleted paths
126-
vul = Vulnerability.objects.filter(
134+
Vulnerability.objects.get(
127135
id=yaml_data_b_blob.get("vulnerability_id"),
128136
repo=repo_obj,
129-
)
130-
vul.delete()
137+
).delete()
131138
else:
132139
logger(f"Invalid Vulnerability File", level=logging.ERROR)
133140

134141

135-
def pkg_handler(change_type, default_service, yaml_data_a_blob, yaml_data_b_blob):
142+
def pkg_handler(change_type, default_service, yaml_data_a_blob, yaml_data_b_blob, logger):
143+
"""
144+
purls.yml
145+
"""
146+
147+
if change_type == "A":
148+
for purl in yaml_data_b_blob:
149+
core_purl = get_core_purl(purl)
150+
pkg, _ = Package.objects.get_or_create(purl=core_purl, service=default_service)
151+
152+
# elif change_type == "M":
153+
# pkg = Package.objects.get(purl=package_a, service=default_service)
154+
# pkg.purl = package_b
155+
# pkg.save()
156+
#
157+
# for version_a, version_b in zip_longest(
158+
# yaml_data_a_blob, yaml_data_b_blob
159+
# ):
160+
# if version_b and not version_a:
161+
# utils.create_note(pkg, version_b)
162+
#
163+
# if version_a and not version_b:
164+
# utils.delete_note(pkg, version_a)
165+
#
166+
# if version_a and version_b:
167+
# note = Note.objects.get(acct=pkg.acct, content=saneyaml.dump(version_a))
168+
# if note.content == saneyaml.dump(version_b):
169+
# continue
170+
#
171+
# note.content = saneyaml.dump(version_b)
172+
# note.save()
173+
#
174+
# update_activity = UpdateActivity(actor=pkg.to_ap, object=note.to_ap)
175+
# Activity.federate(
176+
# targets=pkg.followers_inboxes,
177+
# body=update_activity.to_ap(),
178+
# key_id=pkg.key_id,
179+
# )
180+
#
181+
# elif change_type == "D":
182+
# pkg = Package.objects.get(purl=package_a, service=default_service)
183+
# for version in yaml_data_a_blob:
184+
# utils.delete_note(pkg, version)
185+
# pkg.delete()
186+
187+
188+
def note_handler(change_type, default_service, yaml_data_a_blob, yaml_data_b_blob, logger):
189+
"""
190+
vulnerabilities.yml
191+
"""
136192
if change_type == "A":
137-
package = yaml_data_b_blob.get("package")
138-
139-
pkg, _ = Package.objects.get_or_create(purl=package, service=default_service)
140-
141-
for version in yaml_data_b_blob.get("versions", []):
142-
utils.create_note(pkg, version)
143-
144-
elif change_type == "M":
145-
old_package = yaml_data_a_blob.get("package")
146-
new_package = yaml_data_b_blob.get("package")
147-
148-
pkg = Package.objects.get(purl=old_package, service=default_service)
149-
pkg.purl = new_package
150-
pkg.save()
151-
152-
for version_a, version_b in zip_longest(
153-
yaml_data_a_blob.get("versions", []), yaml_data_b_blob.get("versions", [])
154-
):
155-
if version_b and not version_a:
156-
utils.create_note(pkg, version_b)
157-
158-
if version_a and not version_b:
159-
utils.delete_note(pkg, version_a)
160-
161-
if version_a and version_b:
162-
note = Note.objects.get(acct=pkg.acct, content=saneyaml.dump(version_a))
163-
if note.content == saneyaml.dump(version_b):
164-
continue
165-
166-
note.content = saneyaml.dump(version_b)
167-
note.save()
168-
169-
update_activity = UpdateActivity(actor=pkg.to_ap, object=note.to_ap)
170-
Activity.federate(
171-
targets=pkg.followers_inboxes,
172-
body=update_activity.to_ap(),
173-
key_id=pkg.key_id,
174-
)
175-
176-
elif change_type == "D":
177-
package = yaml_data_a_blob.get("package")
178-
pkg = Package.objects.get(purl=package, service=default_service)
179-
for version in yaml_data_a_blob.get("versions", []):
180-
utils.delete_note(pkg, version)
181-
pkg.delete()
193+
for pkg_status in yaml_data_b_blob:
194+
purl = pkg_status.get("purl")
195+
if not purl:
196+
logger(f"Invalid Vulnerability File", level=logging.ERROR)
197+
return
198+
core_purl = get_core_purl(purl)
199+
pkg_b, _ = Package.objects.get_or_create(purl=core_purl, service=default_service)
200+
temp = saneyaml.dump(pkg_status)
201+
utils.create_note(pkg_b, temp)
202+
203+
# elif change_type == "M":
204+
# for pkg_status_a, pkg_status_b in zip_longest(
205+
# yaml_data_a_blob, yaml_data_b_blob
206+
# ):
207+
# if pkg_status_a and not pkg_status_b:
208+
# utils.create_note(pkg_a, pkg_status_b)
209+
#
210+
# if pkg_status_a and not pkg_status_b:
211+
# utils.delete_note(pkg_a, pkg_status_b)
212+
#
213+
# if pkg_status_a and pkg_status_b:
214+
# utils.update_note(pkg_a, saneyaml.dump(pkg_status_a), saneyaml.dump(pkg_status_b))
215+
#
216+
# elif change_type == "D":
217+
# for pkg_status in yaml_data_a_blob:
218+
# purl = pkg_status.get("purl")
219+
# if not purl:
220+
# logger(f"Invalid Vulnerability File", level=logging.ERROR)
221+
# return
222+
# core_purl = get_core_purl(purl)
223+
# pkg_a, _ = Package.objects.get_or_create(purl=core_purl, service=default_service)
224+
# temp = saneyaml.dump(pkg_status)
225+
# utils.delete_note(pkg_a, temp)
226+
else:
227+
logger(f"Invalid Vulnerability File", level=logging.ERROR)

fedcode/pipes/utils.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from fedcode.activitypub import Activity
1616
from fedcode.activitypub import CreateActivity
1717
from fedcode.activitypub import DeleteActivity
18+
from fedcode.activitypub import UpdateActivity
1819
from fedcode.models import Note
1920

2021

@@ -31,6 +32,23 @@ def create_note(pkg, note_dict):
3132
)
3233

3334

35+
def update_note(pkg, old_note_dict, new_note_dict):
36+
if old_note_dict == new_note_dict:
37+
return
38+
39+
note = Note.objects.get(acct=pkg.acct, content=saneyaml.dump(old_note_dict))
40+
41+
note.content = saneyaml.dump(new_note_dict)
42+
note.save()
43+
44+
update_activity = UpdateActivity(actor=pkg.to_ap, object=note.to_ap)
45+
Activity.federate(
46+
targets=pkg.followers_inboxes,
47+
body=update_activity.to_ap(),
48+
key_id=pkg.key_id,
49+
)
50+
51+
3452
def delete_note(pkg, note_dict):
3553
note = Note.objects.get(acct=pkg.acct, content=saneyaml.dump(note_dict))
3654
note_ap = note.to_ap

fedcode/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ def parse_webfinger(subject):
4242
return tuple(subject.split("@"))
4343

4444

45-
def generate_webfinger(username, domain=FEDERATEDCODE_DOMAIN):
46-
return username + "@" + domain
45+
def generate_webfinger(username: str, domain=FEDERATEDCODE_DOMAIN) -> str:
46+
return str(username) + "@" + domain
4747

4848

4949
def clone_git_repo(repo_path, repo_url):

0 commit comments

Comments
 (0)