Skip to content

Commit e4246aa

Browse files
authored
Merge pull request #322 from nexB/improve-ubuntu
Improve Ubuntu OVAL importer
2 parents a187f90 + 09c6522 commit e4246aa

File tree

13 files changed

+282
-215
lines changed

13 files changed

+282
-215
lines changed

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,5 @@ addopts =
5454
-rfEsxXw
5555
--strict
5656
--ignore setup.py
57+
--ignore vulnerabilities/lib_oval.py
5758
--doctest-modules

vulnerabilities/data_source.py

Lines changed: 38 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
# VulnerableCode is a free software code scanning tool from nexB Inc. and others.
2121
# Visit https://github.com/nexB/vulnerablecode/ for support and download.
2222

23+
import pickle
2324
import dataclasses
2425
import logging
2526
import os
@@ -466,20 +467,19 @@ def _collect_pkgs(parsed_oval_data: Mapping) -> Set:
466467

467468
def _fetch(self) -> Tuple[Mapping, Iterable[ET.ElementTree]]:
468469
"""
469-
This method contains logic to fetch OVAL files and yield them into
470-
a tuple of file's metadata and it's ET.ElementTree.
470+
Return a two-tuple of ({mapping of Package URL data}, it's ET.ElementTree)
471471
Subclasses must implement this method.
472472
473-
Note: Mapping MUST INCLUDE "type" key. Example values of Mapping
474-
{"type":"deb","qualifiers":{"distro":"buster"} }
473+
Note: Package URL data MUST INCLUDE a Package URL "type" key so
474+
implement _fetch() accordingly.
475+
For example::
475476
477+
{"type":"deb","qualifiers":{"distro":"buster"} }
476478
"""
479+
# TODO: enforce that we receive the proper data here
477480
raise NotImplementedError
478481

479482
def updated_advisories(self) -> List[Advisory]:
480-
"""
481-
Note: metadata MUST INCLUDE "type" key, implement _fetch accordingly.
482-
"""
483483
for metadata, oval_file in self._fetch():
484484
try:
485485
oval_data = self.get_data_from_xml_doc(oval_file, metadata)
@@ -506,34 +506,40 @@ def set_api(self, all_pkgs: Iterable[str]):
506506

507507
def get_data_from_xml_doc(self, xml_doc: ET.ElementTree, pkg_metadata={}) -> List[Advisory]:
508508
"""
509-
The orchestration method of the OvalDataSource. This method breaks an OVAL xml
510-
ElementTree into a list of `Advisory`.
509+
The orchestration method of the OvalDataSource. This method breaks an
510+
OVAL xml ElementTree into a list of `Advisory`.
511+
512+
Note: pkg_metadata is a mapping of Package URL data that MUST INCLUDE
513+
"type" key.
511514
512-
Note: pkg_metadata MUST INCLUDE "type" key. Example value of pkg_metadata,
515+
Example value of pkg_metadata:
513516
{"type":"deb","qualifiers":{"distro":"buster"} }
514517
"""
518+
515519
all_adv = []
516520
oval_doc = OvalParser(self.translations, xml_doc)
517521
raw_data = oval_doc.get_data()
518522
all_pkgs = self._collect_pkgs(raw_data)
519523
self.set_api(all_pkgs)
520-
for definition_data in raw_data: # definition_data -> Advisory
521524

522-
# These fields are definition level, i.e common for all
523-
# elements connected/linked to an OvalDefinition
525+
# convert definition_data to Advisory objects
526+
for definition_data in raw_data:
527+
# These fields are definition level, i.e common for all elements
528+
# connected/linked to an OvalDefinition
524529
vuln_id = definition_data["vuln_id"]
525530
description = definition_data["description"]
526531
affected_purls = set()
527532
safe_purls = set()
528533
references = [Reference(url=url) for url in definition_data["reference_urls"]]
529-
530534
for test_data in definition_data["test_data"]:
531-
for package in test_data["package_list"]:
532-
pkg_name = package
533-
if package and len(pkg_name) >= 50:
535+
for package_name in test_data["package_list"]:
536+
if package_name and len(package_name) >= 50:
534537
continue
535-
aff_ver_range = test_data["version_ranges"]
536-
all_versions = self.pkg_manager_api.get(package)
538+
aff_ver_range = test_data["version_ranges"] or set()
539+
all_versions = self.pkg_manager_api.get(package_name)
540+
541+
# FIXME: what is this 50 DB limit? that's too small for versions
542+
# FIXME: we should not drop data this way
537543
# This filter is for filtering out long versions.
538544
# 50 is limit because that's what db permits atm.
539545
all_versions = set(filter(lambda x: len(x) < 50, all_versions))
@@ -543,22 +549,28 @@ def get_data_from_xml_doc(self, xml_doc: ET.ElementTree, pkg_metadata={}) -> Lis
543549
safe_versions = all_versions - affected_versions
544550

545551
for version in affected_versions:
546-
pkg_url = self.create_purl(
547-
pkg_name=pkg_name, pkg_version=version, pkg_data=pkg_metadata
552+
purl = self.create_purl(
553+
pkg_name=package_name,
554+
pkg_version=version,
555+
pkg_data=pkg_metadata,
548556
)
549-
affected_purls.add(pkg_url)
557+
affected_purls.add(purl)
550558

551559
for version in safe_versions:
552-
pkg_url = self.create_purl(
553-
pkg_name=pkg_name, pkg_version=version, pkg_data=pkg_metadata
560+
purl = self.create_purl(
561+
pkg_name=package_name,
562+
pkg_version=version,
563+
pkg_data=pkg_metadata,
554564
)
555-
safe_purls.add(pkg_url)
565+
safe_purls.add(purl)
556566

557567
all_adv.append(
558568
Advisory(
559569
summary=description,
560570
impacted_package_urls=affected_purls,
561571
resolved_package_urls=safe_purls,
562572
vulnerability_id=vuln_id,
563-
vuln_references=references))
573+
vuln_references=references,
574+
)
575+
)
564576
return all_adv

vulnerabilities/helpers.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@
2323
import json
2424
import re
2525

26-
import yaml
2726
import requests
2827
import toml
28+
import yaml
2929

3030
# TODO add logging here
3131

@@ -50,6 +50,13 @@ def fetch_yaml(url):
5050
return yaml.safe_load(response.content)
5151

5252

53+
# FIXME: this is NOT how etags work .
54+
# We should instead send the proper HTTP header
55+
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-None-Match
56+
# and integrate this finely in the processing as this typically needs to use
57+
# streaming=True requests, and proper handling of the HTTP return code
58+
# In all cases this ends up being a single request, not a HEADD followed
59+
# by another real request
5360
def create_etag(data_src, url, etag_key):
5461
"""
5562
Etags are like hashes of web responses. For a data source `data_src`,

vulnerabilities/importers/ubuntu.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import asyncio
2525
import bz2
2626
import dataclasses
27+
import logging
2728
from typing import Iterable
2829
from typing import List
2930
from typing import Mapping
@@ -38,6 +39,8 @@
3839
from vulnerabilities.package_managers import LaunchpadVersionAPI
3940
from vulnerabilities.helpers import create_etag
4041

42+
logger = logging.getLogger(__name__)
43+
4144

4245
@dataclasses.dataclass
4346
class UbuntuConfiguration(DataSourceConfiguration):
@@ -58,22 +61,25 @@ def __init__(self, *args, **kwargs):
5861
self.pkg_manager_api = LaunchpadVersionAPI()
5962

6063
def _fetch(self):
64+
base_url = "https://people.canonical.com/~ubuntu-security/oval"
6165
releases = self.config.releases
62-
for release in releases:
63-
file_url = f"https://people.canonical.com/~ubuntu-security/oval/com.ubuntu.{release}.cve.oval.xml.bz2" # nopep8
64-
if not create_etag(data_src=self, url=file_url, etag_key="ETag"):
66+
for i, release in enumerate(releases, 1):
67+
file_url = f"{base_url}/com.ubuntu.{release}.cve.oval.xml.bz2" # nopep8
68+
logger.info(f"Fetching Ubuntu Oval: {file_url}")
69+
response = requests.get(file_url)
70+
if response.status_code != requests.codes.ok:
71+
logger.error(
72+
f"Failed to fetch Ubuntu Oval: HTTP {response.status_code} : {file_url}"
73+
)
6574
continue
66-
resp = requests.get(file_url)
67-
extracted = bz2.decompress(resp.content)
75+
76+
extracted = bz2.decompress(response.content)
6877
yield (
6978
{"type": "deb", "namespace": "ubuntu"},
7079
ET.ElementTree(ET.fromstring(extracted.decode("utf-8"))),
7180
)
72-
# In case every file is latest, _fetch won't yield anything(due to checking for new etags),
73-
# this would return None to added_advisories
74-
# which will cause error, hence this
75-
# function return an empty list
76-
return []
81+
82+
logger.info(f"Fetched {i} Ubuntu Oval releases from {base_url}")
7783

7884
def set_api(self, packages):
7985
asyncio.run(self.pkg_manager_api.load_api(packages))

vulnerabilities/lib_oval.py

Lines changed: 41 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,32 @@
11
#!/usr/bin/env/ python3
2-
# Copyright© 2010 United States Government. All Rights Reserved.
2+
# Copyright (c) 2010 United States Government. All Rights Reserved.
3+
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are met:
6+
#
7+
# * Redistributions of source code must retain the above copyright notice, this
8+
# list of conditions and the following disclaimer.
9+
#
10+
# * Redistributions in binary form must reproduce the above copyright notice, this
11+
# list of conditions and the following disclaimer in the documentation and/or
12+
# other materials provided with the distribution.
13+
#
14+
# * Neither the name of the Center for Internet Security, Inc. (CIS) nor the names
15+
# of its contributors may be used to endorse or promote products derived from
16+
# this software without specific prior written permission.
17+
18+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER, CIS AND CONTRIBUTORS "AS
19+
# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER, CIS OR CONTRIBUTORS BE
22+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28+
# POSSIBILITY OF SUCH DAMAGE.
329

4-
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5-
6-
# * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7-
# * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
8-
# * Neither the name of the Center for Internet Security, Inc. (CIS) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
9-
10-
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER, CIS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER, CIS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1130
"""Library to simplify working with the OVAL XML structure
1231
1332
@@ -39,29 +58,29 @@
3958
4059
1. Create an OvalDocument:
4160
42-
>> tree = ElementTree()
43-
>> tree.parse("OvalTest.xml")
44-
>> document = OvalDocument(tree)
61+
>>> tree = ElementTree()
62+
>>> tree.parse("OvalTest.xml")
63+
>>> document = OvalDocument(tree)
4564
4665
2. Find an oval element within the loaded document:
4766
48-
>> element = document.getElementByID("oval:org.mitre.oval:def:22382")
49-
>> if element is not None:
50-
>> ....
67+
>>> element = document.getElementByID("oval:org.mitre.oval:def:22382")
68+
>>> if element is not None:
69+
>>> ....
5170
5271
3. Read an XML file with a single OVAL Definition (error checking omitted for brevity):
5372
54-
>> tree = ElementTree()
55-
>> tree.parse('test-definition.xml')
56-
>> root = tree.getroot()
57-
>> definition = lib_oval.OvalDefinition(root)
73+
>>> tree = ElementTree()
74+
>>> tree.parse('test-definition.xml')
75+
>>> root = tree.getroot()
76+
>>> definition = lib_oval.OvalDefinition(root)
5877
5978
4. Change information in the definition from #3 and write the changes
6079
61-
>> meta = definition.getMetadata()
62-
>> repo = meta.getOvalRepositoryInformation()
63-
>> repo.setMinimumSchemaVersion("5.9")
64-
>> tree.write("outfilename.xml", UTF-8", True)
80+
>>> meta = definition.getMetadata()
81+
>>> repo = meta.getOvalRepositoryInformation()
82+
>>> repo.setMinimumSchemaVersion("5.9")
83+
>>> tree.write("outfilename.xml", UTF-8", True)
6584
6685
6786

vulnerabilities/lib_oval.py.ABOUT

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
about_resource: lib_oval.py
2-
version: 6aaae0
3-
download_url: https://raw.githubusercontent.com/CISecurity/OVALRepo/6aaae00876ec716927e0ae5b9ccfa12f310427a0/scripts/lib_oval.py
4-
5-
name: OVALRepo - lib_oval
2+
version: e74da20d7c1de91c098c564ce65d6b93656eb86f
3+
download_url: https://raw.githubusercontent.com/CISecurity/OVALRepo/e74da20d7c1de91c098c564ce65d6b93656eb86f/scripts/lib_oval.py
4+
package_url: pkg:github/CISecurity/OVALRepo@e74da20d7c1de91c098c564ce65d6b93656eb86f#scripts/lib_oval.py
65
homepage_url: https://github.com/CISecurity/OVALRepo
76
owner: Center for Internet Security
87
author: Gunnar Engelbach <Gunnar.Engelbach@ThreatGuard.com>
98
notes: This a single file extracted from OVALRepo that parses OVAL files.
109

11-
license: bsd-new
10+
copyright: Copyright (c) 2010 United States Government. All Rights Reserved.
11+
license_expression: bsd-new
1212
license_file: lib_oval.py.LICENSE
13-
14-
copyright: Copyright (c) 2010 United States Government. All Rights Reserved.
Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,26 @@
1-
Copyright© 2010 United States Government. All Rights Reserved.
1+
Copyright (c) 2010 United States Government. All Rights Reserved.
22

3-
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
3+
Redistribution and use in source and binary forms, with or without modification,
4+
are permitted provided that the following conditions are met:
45

5-
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
6-
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
7-
Neither the name of the Center for Internet Security, Inc. (CIS) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
6+
Redistributions of source code must retain the above copyright notice, this
7+
list of conditions and the following disclaimer.
88

9-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER, CIS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER, CIS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
9+
Redistributions in binary form must reproduce the above copyright notice,
10+
this list of conditions and the following disclaimer in the documentation
11+
and/or other materials provided with the distribution.
12+
13+
Neither the name of the Center for Internet Security, Inc. (CIS) nor the
14+
names of its contributors may be used to endorse or promote products derived
15+
from this software without specific prior written permission.
16+
17+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER, CIS AND CONTRIBUTORS "AS IS"
18+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER, CIS OR CONTRIBUTORS BE LIABLE
21+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
25+
TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26+
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

0 commit comments

Comments
 (0)