Skip to content

Commit 1b7d92f

Browse files
committed
Migrate nvd importer to use 2.0 API schema
Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent 8c001a1 commit 1b7d92f

File tree

6 files changed

+822
-995
lines changed

6 files changed

+822
-995
lines changed

vulnerabilities/pipelines/nvd_importer.py

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def advisories_count(self):
9494
return advisory_count
9595

9696
def collect_advisories(self) -> Iterable[AdvisoryData]:
97-
for _year, cve_data in fetch_cve_data_1_1(logger=self.log):
97+
for _year, cve_data in fetch_cve_data_2_0(logger=self.log):
9898
yield from to_advisories(cve_data=cve_data)
9999

100100

@@ -107,15 +107,15 @@ def fetch(url, logger=None):
107107
return json.loads(data)
108108

109109

110-
def fetch_cve_data_1_1(starting_year=2002, logger=None):
110+
def fetch_cve_data_2_0(starting_year=2002, logger=None):
111111
"""
112112
Yield tuples of (year, lists of CVE mappings) from the NVD, one for each
113113
year since ``starting_year`` defaulting to 2002.
114114
"""
115115
current_year = date.today().year
116116
# NVD json feeds start from 2002.
117117
for year in range(starting_year, current_year + 1):
118-
download_url = f"https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{year}.json.gz"
118+
download_url = f"https://nvd.nist.gov/feeds/json/cve/2.0/nvdcve-2.0-{year}.json.gz"
119119
yield year, fetch(url=download_url, logger=logger)
120120

121121

@@ -134,20 +134,22 @@ class CveItem:
134134
cve_item = attr.attrib(default=attr.Factory(dict), type=dict)
135135

136136
@classmethod
137-
def to_advisories(cls, cve_data, skip_hardware=True):
137+
def to_advisories(cls, vulnerabilities, skip_hardware=True):
138138
"""
139139
Yield AdvisoryData objects from ``cve_data`` data for CVE JSON 1.1feed.
140140
Skip hardware
141141
"""
142-
for cve_item in CveItem.from_cve_data(cve_data=cve_data, skip_hardware=skip_hardware):
142+
for cve_item in CveItem.from_cve_data(
143+
vulnerabilities=vulnerabilities, skip_hardware=skip_hardware
144+
):
143145
yield cve_item.to_advisory()
144146

145147
@classmethod
146148
def from_cve_data(cls, cve_data, skip_hardware=True):
147149
"""
148150
Yield CVE items mapping from a cve_data list of CVE mappings from the NVD.
149151
"""
150-
for cve_item in cve_data.get("CVE_Items") or []:
152+
for cve_item in cve_data.get("vulnerabilities") or []:
151153
if not cve_item:
152154
continue
153155
if not isinstance(cve_item, dict):
@@ -159,20 +161,20 @@ def from_cve_data(cls, cve_data, skip_hardware=True):
159161

160162
@property
161163
def cve_id(self):
162-
return self.cve_item["cve"]["CVE_data_meta"]["ID"]
164+
return self.cve_item["cve"]["id"]
163165

164166
@property
165167
def summary(self):
166168
"""
167169
Return a descriptive summary.
168170
"""
169-
# In 99% of cases len(cve_item['cve']['description']['description_data']) == 1 , so
170-
# this usually returns cve_item['cve']['description']['description_data'][0]['value']
171+
# In 99% of cases len(cve_item['cve']['description']) == 1 , so
172+
# this usually returns cve_item['cve']['description'][0]['value']
171173
# In the remaining 1% cases this returns the longest summary.
172-
# FIXME: we should retun the full description WITH the summry as the first line instead
174+
# FIXME: we should return the full description WITH the summary as the first line instead
173175
summaries = []
174-
for desc in get_item(self.cve_item, "cve", "description", "description_data") or []:
175-
if desc.get("value"):
176+
for desc in get_item(self.cve_item, "cve", "descriptions") or []:
177+
if desc.get("value") and desc.get("lang") == "en":
176178
summaries.append(desc["value"])
177179
return max(summaries, key=len) if summaries else None
178180

@@ -183,11 +185,12 @@ def cpes(self):
183185
"""
184186
# FIXME: we completely ignore the configurations here
185187
cpes = []
186-
for node in get_item(self.cve_item, "configurations", "nodes") or []:
187-
for cpe_data in node.get("cpe_match") or []:
188-
cpe23_uri = cpe_data.get("cpe23Uri")
189-
if cpe23_uri and cpe23_uri not in cpes:
190-
cpes.append(cpe23_uri)
188+
for nodes in get_item(self.cve_item, "cve", "configurations") or []:
189+
for node in nodes.get("nodes") or []:
190+
for cpe_data in node.get("cpeMatch") or []:
191+
cpe23_uri = cpe_data.get("criteria")
192+
if cpe23_uri and cpe23_uri not in cpes:
193+
cpes.append(cpe23_uri)
191194
return cpes
192195

193196
@property
@@ -243,7 +246,7 @@ def reference_urls(self):
243246
# FIXME: we should also collect additional data from the references such as tags and ids
244247

245248
urls = []
246-
for reference in get_item(self.cve_item, "cve", "references", "reference_data") or []:
249+
for reference in get_item(self.cve_item, "cve", "references") or []:
247250
ref_url = reference.get("url")
248251
if ref_url and ref_url.startswith(("http", "ftp")) and ref_url not in urls:
249252
urls.append(ref_url)
@@ -294,9 +297,7 @@ def weaknesses(self):
294297
Return a list of CWE IDs like: [119, 189]
295298
"""
296299
weaknesses = []
297-
for weaknesses_item in (
298-
get_item(self.cve_item, "cve", "problemtype", "problemtype_data") or []
299-
):
300+
for weaknesses_item in get_item(self.cve_item, "cve", "weaknesses") or []:
300301
weaknesses_description = weaknesses_item.get("description") or []
301302
for weaknesses_value in weaknesses_description:
302303
cwe_id = (
@@ -315,7 +316,7 @@ def to_advisory(self):
315316
aliases=[self.cve_id],
316317
summary=self.summary,
317318
references=self.references,
318-
date_published=dateparser.parse(self.cve_item.get("publishedDate")),
319+
date_published=dateparser.parse(self.cve_item["cve"].get("published")),
319320
weaknesses=self.weaknesses,
320321
url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}",
321322
)

0 commit comments

Comments
 (0)