@@ -72,11 +72,22 @@ def is_available():
72
72
return response .status_code == requests .codes .ok
73
73
74
74
75
- def get_purls ( packages ):
75
+ def chunked ( iterable , chunk_size ):
76
76
"""
77
- Return the PURLs for the given list of `packages`.
78
- Do not include qualifiers nor subpath when `base` is provided.
77
+ Break an `iterable` into lists of `chunk_size` length.
78
+
79
+ >>> list(chunked([1, 2, 3, 4, 5], 2))
80
+ [[1, 2], [3, 4], [5]]
81
+ >>> list(chunked([1, 2, 3, 4, 5], 3))
82
+ [[1, 2, 3], [4, 5]]
79
83
"""
84
+ for index in range (0 , len (iterable ), chunk_size ):
85
+ end = index + chunk_size
86
+ yield iterable [index :end ]
87
+
88
+
89
+ def get_purls (packages ):
90
+ """Return the PURLs for the given list of `packages`."""
80
91
return [package_url for package in packages if (package_url := package .package_url )]
81
92
82
93
@@ -168,6 +179,7 @@ def bulk_search_by_purl(
168
179
169
180
data = {
170
181
"purls" : purls ,
182
+ "vulnerabilities_only" : True ,
171
183
}
172
184
173
185
logger .debug (f"VulnerableCode: url={ url } purls_count={ len (purls )} " )
@@ -190,32 +202,33 @@ def bulk_search_by_cpes(
190
202
return request_post (url , data , timeout )
191
203
192
204
193
- def get_unique_vulnerabilities ( packages_data ):
205
+ def fetch_vulnerabilities ( packages , chunk_size = 1000 , logger = logger . info ):
194
206
"""
195
- Return the unique instance of vulnerabilities for the provided ``packages_data``.
196
-
197
- Note this should be implemented on the VulnerableCode side, see:
198
- https://github.com/nexB/vulnerablecode/issues/1219#issuecomment-1620123301
207
+ Fetch and store vulnerabilities for each provided ``packages``.
208
+ The PURLs are used for the lookups in batch of ``chunk_size`` per request.
199
209
"""
200
- if not packages_data :
201
- return
202
-
203
- unique_vulnerabilities = []
204
- seen_vulnerability_ids = set ()
205
-
206
- for package_entry in packages_data :
207
- for vulnerability in package_entry .get ("affected_by_vulnerabilities" , []):
208
- vulnerability_id = vulnerability .get ("vulnerability_id" )
209
- if vulnerability_id not in seen_vulnerability_ids :
210
- unique_vulnerabilities .append (vulnerability )
211
- seen_vulnerability_ids .add (vulnerability_id )
212
-
213
- return unique_vulnerabilities
210
+ vulnerabilities_by_purl = {}
214
211
212
+ for purls_batch in chunked (get_purls (packages ), chunk_size ):
213
+ response_data = bulk_search_by_purl (purls_batch )
214
+ for vulnerability_data in response_data :
215
+ vulnerabilities_by_purl [vulnerability_data ["purl" ]] = vulnerability_data
215
216
216
- def fetch_vulnerabilities (packages ):
217
- """Fetch and store vulnerabilities for each provided ``packages``."""
217
+ unsaved_objects = []
218
218
for package in packages :
219
- if packages_data := get_vulnerabilities_by_purl (package .package_url ):
220
- if unique_vulnerabilities := get_unique_vulnerabilities (packages_data ):
221
- package .update (affected_by_vulnerabilities = unique_vulnerabilities )
219
+ if package_data := vulnerabilities_by_purl .get (package .package_url ):
220
+ if affected_by := package_data .get ("affected_by_vulnerabilities" , []):
221
+ package .affected_by_vulnerabilities = affected_by
222
+ unsaved_objects .append (package )
223
+
224
+ if unsaved_objects :
225
+ model_class = unsaved_objects [0 ].__class__
226
+ model_class .objects .bulk_update (
227
+ objs = unsaved_objects ,
228
+ fields = ["affected_by_vulnerabilities" ],
229
+ batch_size = 1000 ,
230
+ )
231
+ logger (
232
+ f"{ len (unsaved_objects )} { model_class ._meta .verbose_name_plural } updated "
233
+ f"with vulnerability data."
234
+ )
0 commit comments