Skip to content

Migrate debian-oval and ubuntu importer #740

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 53 additions & 68 deletions vulnerabilities/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,16 @@
from typing import Set
from typing import Tuple

import pytz
from dateutil import parser as dateparser
from fetchcode.vcs import fetch_via_vcs
from license_expression import Licensing
from packageurl import PackageURL
from univers.version_range import RANGE_CLASS_BY_SCHEMES
from univers.version_range import VersionRange
from univers.versions import Version

from vulnerabilities import severity_systems
from vulnerabilities.oval_parser import OvalParser
from vulnerabilities.severity_systems import SCORING_SYSTEMS
from vulnerabilities.severity_systems import ScoringSystem
Expand Down Expand Up @@ -350,13 +354,13 @@ class OvalImporter(Importer):
"""

@staticmethod
def create_purl(pkg_name: str, pkg_version: str, pkg_data: Mapping) -> PackageURL:
def create_purl(pkg_name: str, pkg_data: Mapping) -> PackageURL:
"""
Helper method for creating different purls for subclasses without them reimplementing
get_data_from_xml_doc method
Note: pkg_data must include 'type' of package
"""
return PackageURL(name=pkg_name, version=pkg_version, **pkg_data)
return PackageURL(name=pkg_name, **pkg_data)

@staticmethod
def _collect_pkgs(parsed_oval_data: Mapping) -> Set:
Expand Down Expand Up @@ -390,28 +394,17 @@ def advisory_data(self) -> List[AdvisoryData]:
for metadata, oval_file in self._fetch():
try:
oval_data = self.get_data_from_xml_doc(oval_file, metadata)
yield oval_data
yield from oval_data
except Exception:
logger.error(
f"Failed to get updated_advisories: {oval_file!r} "
f"with {metadata!r}:\n" + traceback.format_exc()
)
continue

def set_api(self, all_pkgs: Iterable[str]):
"""
This method loads the self.pkg_manager_api with the specified packages.
It fetches and caches all the versions of these packages and exposes
them through self.pkg_manager_api.get(<package_name>). Example

>> self.set_api(['electron'])
Assume 'electron' has only versions 1.0.0 and 1.2.0
>> assert self.pkg_manager_api.get('electron') == {'1.0.0','1.2.0'}

"""
raise NotImplementedError

def get_data_from_xml_doc(self, xml_doc: ET.ElementTree, pkg_metadata={}) -> List[AdvisoryData]:
def get_data_from_xml_doc(
self, xml_doc: ET.ElementTree, pkg_metadata={}
) -> Iterable[AdvisoryData]:
"""
The orchestration method of the OvalDataSource. This method breaks an
OVAL xml ElementTree into a list of `Advisory`.
Expand All @@ -422,66 +415,58 @@ def get_data_from_xml_doc(self, xml_doc: ET.ElementTree, pkg_metadata={}) -> Lis
Example value of pkg_metadata:
{"type":"deb","qualifiers":{"distro":"buster"} }
"""

all_adv = []
oval_doc = OvalParser(self.translations, xml_doc)
raw_data = oval_doc.get_data()
all_pkgs = self._collect_pkgs(raw_data)
self.set_api(all_pkgs)
oval_parsed_data = OvalParser(self.translations, xml_doc)
raw_data = oval_parsed_data.get_data()
oval_doc = oval_parsed_data.oval_document
timestamp = oval_doc.getGenerator().getTimestamp()

# convert definition_data to Advisory objects
for definition_data in raw_data:
# These fields are definition level, i.e common for all elements
# connected/linked to an OvalDefinition
vuln_id = definition_data["vuln_id"]
description = definition_data["description"]
references = [Reference(url=url) for url in definition_data["reference_urls"]]
severities = (
[
VulnerabilitySeverity(
system=severity_systems.GENERIC, value=definition_data.get("severity")
)
]
if definition_data.get("severity")
else []
)
references = [
Reference(url=url, severities=severities)
for url in definition_data["reference_urls"]
]
affected_packages = []
for test_data in definition_data["test_data"]:
for package_name in test_data["package_list"]:
if package_name and len(package_name) >= 50:
continue

affected_version_range = test_data["version_ranges"] or set()
version_class = version_class_by_package_type[pkg_metadata["type"]]
version_scheme = version_class.scheme

affected_version_range = VersionRange.from_scheme_version_spec_string(
version_scheme, affected_version_range
)
all_versions = self.pkg_manager_api.get(package_name).valid_versions

# FIXME: what is this 50 DB limit? that's too small for versions
# FIXME: we should not drop data this way
# This filter is for filtering out long versions.
# 50 is limit because that's what db permits atm.
all_versions = [version for version in all_versions if len(version) < 50]
if not all_versions:
continue

affected_purls = []
safe_purls = []
for version in all_versions:
purl = self.create_purl(
pkg_name=package_name,
pkg_version=version,
pkg_data=pkg_metadata,
affected_version_range = test_data["version_ranges"]
vrc = RANGE_CLASS_BY_SCHEMES[pkg_metadata["type"]]
if affected_version_range:
try:
affected_version_range = vrc.from_native(affected_version_range)
except Exception as e:
logger.error(
f"Failed to parse version range {affected_version_range!r} "
f"for package {package_name!r}:\n{e}"
)
continue
if package_name:
affected_packages.append(
AffectedPackage(
package=self.create_purl(package_name, pkg_metadata),
affected_version_range=affected_version_range,
)
)
if version_class(version) in affected_version_range:
affected_purls.append(purl)
else:
safe_purls.append(purl)

affected_packages.extend(
nearest_patched_package(affected_purls, safe_purls),
)

all_adv.append(
AdvisoryData(
summary=description,
affected_packages=affected_packages,
vulnerability_id=vuln_id,
references=references,
)
date_published = dateparser.parse(timestamp)
if not date_published.tzinfo:
date_published = date_published.replace(tzinfo=pytz.UTC)
yield AdvisoryData(
aliases=[vuln_id],
summary=description,
affected_packages=affected_packages,
references=sorted(references),
date_published=date_published,
)
return all_adv
4 changes: 4 additions & 0 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from vulnerabilities.importers import alpine_linux
from vulnerabilities.importers import archlinux
from vulnerabilities.importers import debian
from vulnerabilities.importers import debian_oval
from vulnerabilities.importers import github
from vulnerabilities.importers import gitlab
from vulnerabilities.importers import nginx
Expand All @@ -18,6 +19,7 @@
from vulnerabilities.importers import pypa
from vulnerabilities.importers import pysec
from vulnerabilities.importers import redhat
from vulnerabilities.importers import ubuntu

IMPORTERS_REGISTRY = [
nginx.NginxImporter,
Expand All @@ -31,6 +33,8 @@
gitlab.GitLabAPIImporter,
pypa.PyPaImporter,
archlinux.ArchlinuxImporter,
ubuntu.UbuntuImporter,
debian_oval.DebianOvalImporter,
]

IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY}
2 changes: 1 addition & 1 deletion vulnerabilities/importers/debian.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@

class DebianImporter(Importer):

spdx_license_expression = "MIT"
spdx_license_expression = "LicenseRef-scancode-other-permissive"
license_url = "https://www.debian.org/license"
notice = """
From: Tushar Goel <tgoel@nexb.com>
Expand Down
51 changes: 39 additions & 12 deletions vulnerabilities/importers/debian_oval.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,38 +8,65 @@
#


import asyncio
import xml.etree.ElementTree as ET

import requests

from vulnerabilities.importer import OvalImporter
from vulnerabilities.package_managers import DebianVersionAPI
from vulnerabilities.utils import create_etag


class DebianOvalImporter(OvalImporter):

spdx_license_expression = "LicenseRef-scancode-other-permissive"
license_url = "https://www.debian.org/license"
notice = """
From: Tushar Goel <tgoel@nexb.com>
Date: Thu, May 12, 2022 at 11:42 PM +00:00
Subject: Usage of Debian Security Data in VulnerableCode
To: <team@security.debian.org>
Hey,
We would like to integrate the debian security data in vulnerablecode
[1][2] which is a FOSS db of FOSS vulnerability data. We were not able
to know under which license the debian security data comes. We would
be grateful to have your acknowledgement over usage of the debian
security data in vulnerablecode and have some kind of licensing
declaration from your side.
[1] - https://github.com/nexB/vulnerablecode
[2] - https://github.com/nexB/vulnerablecode/pull/723
Regards,
From: Moritz Mühlenhoff <jmm@inutil.org>
Date: Wed, May 17, 2022, 19:12 PM +00:00
Subject: Re: Usage of Debian Security Data in VulnerableCode
To: Tushar Goel <tgoel@nexb.com>
Cc: <team@security.debian.org>
Am Thu, May 12, 2022 at 05:12:48PM +0530 schrieb Tushar Goel:
> Hey,
>
> We would like to integrate the debian security data in vulnerablecode
> [1][2] which is a FOSS db of FOSS vulnerability data. We were not able
> to know under which license the debian security data comes. We would
> be grateful to have your acknowledgement over usage of the debian
> security data in vulnerablecode and have some kind of licensing
> declaration from your side.
We don't have a specific license, but you have our endorsemen to
reuse the data by all means :-)
Cheers,
Moritz
"""

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# we could avoid setting translations, and have it
# set by default in the OvalParser, but we don't yet know
# whether all OVAL providers use the same format
self.translations = {"less than": "<"}
self.pkg_manager_api = DebianVersionAPI()

def _fetch(self):
releases = self.config.releases
releases = ["wheezy", "stretch", "jessie", "buster", "bullseye"]
for release in releases:
file_url = f"https://www.debian.org/security/oval/oval-definitions-{release}.xml"
if not create_etag(data_src=self, url=file_url, etag_key="ETag"):
continue

resp = requests.get(file_url).content
yield (
{"type": "deb", "namespace": "debian", "qualifiers": {"distro": release}},
ET.ElementTree(ET.fromstring(resp.decode("utf-8"))),
)
return []

def set_api(self, packages):
asyncio.run(self.pkg_manager_api.load_api(packages))
15 changes: 5 additions & 10 deletions vulnerabilities/importers/ubuntu.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,32 @@
# See https://aboutcode.org for more information about nexB OSS projects.
#

import asyncio
import bz2
import logging
import xml.etree.ElementTree as ET

import requests

from vulnerabilities.importer import OvalImporter
from vulnerabilities.package_managers import LaunchpadVersionAPI

logger = logging.getLogger(__name__)


class UbuntuImporter(OvalImporter):
spdx_license_expression = "GPL"
license_url = "https://ubuntu.com/legal/terms"

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# we could avoid setting translations, and have it
# set by default in the OvalParser, but we don't yet know
# whether all OVAL providers use the same format
self.translations = {"less than": "<"}
self.pkg_manager_api = LaunchpadVersionAPI()

def _fetch(self):
base_url = "https://people.canonical.com/~ubuntu-security/oval"
releases = self.config.releases
for i, release in enumerate(releases, 1):
releases = ["bionic", "trusty", "focal", "eoan", "xenial"]
for release in releases:
file_url = f"{base_url}/com.ubuntu.{release}.cve.oval.xml.bz2" # nopep8
logger.info(f"Fetching Ubuntu Oval: {file_url}")
response = requests.get(file_url)
Expand All @@ -47,8 +47,3 @@ def _fetch(self):
{"type": "deb", "namespace": "ubuntu"},
ET.ElementTree(ET.fromstring(extracted.decode("utf-8"))),
)

logger.info(f"Fetched {i} Ubuntu Oval releases from {base_url}")

def set_api(self, packages):
asyncio.run(self.pkg_manager_api.load_api(packages))
3 changes: 3 additions & 0 deletions vulnerabilities/improvers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@

from vulnerabilities import importers
from vulnerabilities.improvers import default
from vulnerabilities.improvers import oval

IMPROVERS_REGISTRY = [
default.DefaultImprover,
importers.nginx.NginxBasicImprover,
importers.github.GitHubBasicImprover,
importers.debian.DebianBasicImprover,
importers.gitlab.GitLabBasicImprover,
oval.DebianOvalBasicImprover,
oval.UbuntuOvalBasicImprover,
]

IMPROVERS_REGISTRY = {x.qualified_name: x for x in IMPROVERS_REGISTRY}
Loading