Skip to content

Commit 7904862

Browse files
ziadhanyTG1999
authored andcommitted
Update nvd test data
Update NVD Importer v2 to use 2.0 API schema Migrate nvd importer v1 to use 2.0 API schema Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent ca6a6bf commit 7904862

File tree

7 files changed

+1018
-715
lines changed

7 files changed

+1018
-715
lines changed

vulnerabilities/pipelines/nvd_importer.py

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import json
1212
import logging
1313
from datetime import date
14+
from datetime import timezone
1415
from traceback import format_exc as traceback_format_exc
1516
from typing import Iterable
1617

@@ -94,7 +95,7 @@ def advisories_count(self):
9495
return advisory_count
9596

9697
def collect_advisories(self) -> Iterable[AdvisoryData]:
97-
for _year, cve_data in fetch_cve_data_1_1(logger=self.log):
98+
for _year, cve_data in fetch_cve_data_2_0(logger=self.log):
9899
yield from to_advisories(cve_data=cve_data)
99100

100101

@@ -107,15 +108,15 @@ def fetch(url, logger=None):
107108
return json.loads(data)
108109

109110

110-
def fetch_cve_data_1_1(starting_year=2002, logger=None):
111+
def fetch_cve_data_2_0(starting_year=2002, logger=None):
111112
"""
112113
Yield tuples of (year, lists of CVE mappings) from the NVD, one for each
113114
year since ``starting_year`` defaulting to 2002.
114115
"""
115116
current_year = date.today().year
116117
# NVD json feeds start from 2002.
117118
for year in range(starting_year, current_year + 1):
118-
download_url = f"https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{year}.json.gz"
119+
download_url = f"https://nvd.nist.gov/feeds/json/cve/2.0/nvdcve-2.0-{year}.json.gz"
119120
yield year, fetch(url=download_url, logger=logger)
120121

121122

@@ -134,20 +135,22 @@ class CveItem:
134135
cve_item = attr.attrib(default=attr.Factory(dict), type=dict)
135136

136137
@classmethod
137-
def to_advisories(cls, cve_data, skip_hardware=True):
138+
def to_advisories(cls, vulnerabilities, skip_hardware=True):
138139
"""
139140
Yield AdvisoryData objects from ``cve_data`` data for CVE JSON 1.1feed.
140141
Skip hardware
141142
"""
142-
for cve_item in CveItem.from_cve_data(cve_data=cve_data, skip_hardware=skip_hardware):
143+
for cve_item in CveItem.from_cve_data(
144+
cve_data=vulnerabilities, skip_hardware=skip_hardware
145+
):
143146
yield cve_item.to_advisory()
144147

145148
@classmethod
146149
def from_cve_data(cls, cve_data, skip_hardware=True):
147150
"""
148151
Yield CVE items mapping from a cve_data list of CVE mappings from the NVD.
149152
"""
150-
for cve_item in cve_data.get("CVE_Items") or []:
153+
for cve_item in cve_data.get("vulnerabilities") or []:
151154
if not cve_item:
152155
continue
153156
if not isinstance(cve_item, dict):
@@ -159,20 +162,20 @@ def from_cve_data(cls, cve_data, skip_hardware=True):
159162

160163
@property
161164
def cve_id(self):
162-
return self.cve_item["cve"]["CVE_data_meta"]["ID"]
165+
return self.cve_item["cve"]["id"]
163166

164167
@property
165168
def summary(self):
166169
"""
167170
Return a descriptive summary.
168171
"""
169-
# In 99% of cases len(cve_item['cve']['description']['description_data']) == 1 , so
170-
# this usually returns cve_item['cve']['description']['description_data'][0]['value']
172+
# In 99% of cases len(cve_item['cve']['description']) == 1 , so
173+
# this usually returns cve_item['cve']['description'][0]['value']
171174
# In the remaining 1% cases this returns the longest summary.
172-
# FIXME: we should retun the full description WITH the summry as the first line instead
175+
# FIXME: we should return the full description WITH the summary as the first line instead
173176
summaries = []
174-
for desc in get_item(self.cve_item, "cve", "description", "description_data") or []:
175-
if desc.get("value"):
177+
for desc in get_item(self.cve_item, "cve", "descriptions") or []:
178+
if desc.get("value") and desc.get("lang") == "en":
176179
summaries.append(desc["value"])
177180
return max(summaries, key=len) if summaries else None
178181

@@ -183,11 +186,12 @@ def cpes(self):
183186
"""
184187
# FIXME: we completely ignore the configurations here
185188
cpes = []
186-
for node in get_item(self.cve_item, "configurations", "nodes") or []:
187-
for cpe_data in node.get("cpe_match") or []:
188-
cpe23_uri = cpe_data.get("cpe23Uri")
189-
if cpe23_uri and cpe23_uri not in cpes:
190-
cpes.append(cpe23_uri)
189+
for nodes in get_item(self.cve_item, "cve", "configurations") or []:
190+
for node in nodes.get("nodes") or []:
191+
for cpe_data in node.get("cpeMatch") or []:
192+
cpe23_uri = cpe_data.get("criteria")
193+
if cpe23_uri and cpe23_uri not in cpes:
194+
cpes.append(cpe23_uri)
191195
return cpes
192196

193197
@property
@@ -243,7 +247,7 @@ def reference_urls(self):
243247
# FIXME: we should also collect additional data from the references such as tags and ids
244248

245249
urls = []
246-
for reference in get_item(self.cve_item, "cve", "references", "reference_data") or []:
250+
for reference in get_item(self.cve_item, "cve", "references") or []:
247251
ref_url = reference.get("url")
248252
if ref_url and ref_url.startswith(("http", "ftp")) and ref_url not in urls:
249253
urls.append(ref_url)
@@ -294,9 +298,7 @@ def weaknesses(self):
294298
Return a list of CWE IDs like: [119, 189]
295299
"""
296300
weaknesses = []
297-
for weaknesses_item in (
298-
get_item(self.cve_item, "cve", "problemtype", "problemtype_data") or []
299-
):
301+
for weaknesses_item in get_item(self.cve_item, "cve", "weaknesses") or []:
300302
weaknesses_description = weaknesses_item.get("description") or []
301303
for weaknesses_value in weaknesses_description:
302304
cwe_id = (
@@ -315,7 +317,9 @@ def to_advisory(self):
315317
aliases=[self.cve_id],
316318
summary=self.summary,
317319
references=self.references,
318-
date_published=dateparser.parse(self.cve_item.get("publishedDate")),
320+
date_published=dateparser.parse(self.cve_item["cve"].get("published")).replace(
321+
tzinfo=timezone.utc
322+
),
319323
weaknesses=self.weaknesses,
320324
url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}",
321325
)

vulnerabilities/pipelines/v2_importers/nvd_importer.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import json
1212
import logging
1313
from datetime import date
14+
from datetime import timezone
1415
from traceback import format_exc as traceback_format_exc
1516
from typing import Iterable
1617

@@ -93,7 +94,7 @@ def advisories_count(self):
9394
return advisory_count
9495

9596
def collect_advisories(self) -> Iterable[AdvisoryData]:
96-
for _year, cve_data in fetch_cve_data_1_1(logger=self.log):
97+
for _year, cve_data in fetch_cve_data_2_0(logger=self.log):
9798
yield from to_advisories(cve_data=cve_data)
9899

99100

@@ -111,15 +112,15 @@ def fetch(url, logger=None):
111112
return json.loads(data)
112113

113114

114-
def fetch_cve_data_1_1(starting_year=2002, logger=None):
115+
def fetch_cve_data_2_0(starting_year=2002, logger=None):
115116
"""
116117
Yield tuples of (year, lists of CVE mappings) from the NVD, one for each
117118
year since ``starting_year`` defaulting to 2002.
118119
"""
119120
current_year = date.today().year
120121
# NVD json feeds start from 2002.
121122
for year in range(starting_year, current_year + 1):
122-
download_url = f"https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{year}.json.gz"
123+
download_url = f"https://nvd.nist.gov/feeds/json/cve/2.0/nvdcve-2.0-{year}.json.gz"
123124
yield year, fetch(url=download_url, logger=logger)
124125

125126

@@ -151,7 +152,7 @@ def from_cve_data(cls, cve_data, skip_hardware=True):
151152
"""
152153
Yield CVE items mapping from a cve_data list of CVE mappings from the NVD.
153154
"""
154-
for cve_item in cve_data.get("CVE_Items") or []:
155+
for cve_item in cve_data.get("vulnerabilities") or []:
155156
if not cve_item:
156157
continue
157158
if not isinstance(cve_item, dict):
@@ -163,7 +164,7 @@ def from_cve_data(cls, cve_data, skip_hardware=True):
163164

164165
@property
165166
def cve_id(self):
166-
return self.cve_item["cve"]["CVE_data_meta"]["ID"]
167+
return self.cve_item["cve"]["id"]
167168

168169
@property
169170
def summary(self):
@@ -175,8 +176,8 @@ def summary(self):
175176
# In the remaining 1% cases this returns the longest summary.
176177
# FIXME: we should retun the full description WITH the summry as the first line instead
177178
summaries = []
178-
for desc in get_item(self.cve_item, "cve", "description", "description_data") or []:
179-
if desc.get("value"):
179+
for desc in get_item(self.cve_item, "cve", "descriptions") or []:
180+
if desc.get("value") and desc.get("lang") == "en":
180181
summaries.append(desc["value"])
181182
return max(summaries, key=len) if summaries else None
182183

@@ -187,11 +188,12 @@ def cpes(self):
187188
"""
188189
# FIXME: we completely ignore the configurations here
189190
cpes = []
190-
for node in get_item(self.cve_item, "configurations", "nodes") or []:
191-
for cpe_data in node.get("cpe_match") or []:
192-
cpe23_uri = cpe_data.get("cpe23Uri")
193-
if cpe23_uri and cpe23_uri not in cpes:
194-
cpes.append(cpe23_uri)
191+
for nodes in get_item(self.cve_item, "cve", "configurations") or []:
192+
for node in nodes.get("nodes") or []:
193+
for cpe_data in node.get("cpeMatch") or []:
194+
cpe23_uri = cpe_data.get("criteria")
195+
if cpe23_uri and cpe23_uri not in cpes:
196+
cpes.append(cpe23_uri)
195197
return cpes
196198

197199
@property
@@ -250,7 +252,7 @@ def reference_urls(self):
250252
# FIXME: we should also collect additional data from the references such as tags and ids
251253

252254
urls = []
253-
for reference in get_item(self.cve_item, "cve", "references", "reference_data") or []:
255+
for reference in get_item(self.cve_item, "cve", "references") or []:
254256
ref_url = reference.get("url")
255257
if ref_url and ref_url.startswith(("http", "ftp")) and ref_url not in urls:
256258
urls.append(ref_url)
@@ -300,9 +302,7 @@ def weaknesses(self):
300302
Return a list of CWE IDs like: [119, 189]
301303
"""
302304
weaknesses = []
303-
for weaknesses_item in (
304-
get_item(self.cve_item, "cve", "problemtype", "problemtype_data") or []
305-
):
305+
for weaknesses_item in get_item(self.cve_item, "cve", "weaknesses") or []:
306306
weaknesses_description = weaknesses_item.get("description") or []
307307
for weaknesses_value in weaknesses_description:
308308
cwe_id = (
@@ -322,7 +322,9 @@ def to_advisory(self):
322322
aliases=[],
323323
summary=self.summary,
324324
references_v2=self.references,
325-
date_published=dateparser.parse(self.cve_item.get("publishedDate")),
325+
date_published=dateparser.parse(self.cve_item["cve"].get("published")).replace(
326+
tzinfo=timezone.utc
327+
),
326328
weaknesses=self.weaknesses,
327329
severities=self.severities,
328330
url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}",

0 commit comments

Comments
 (0)