Skip to content

Commit 6ec2e9e

Browse files
authored
Merge pull request #662 from TG1999/new_importer/gitlab
Add gitlab importer
2 parents 1717f51 + b7ae8dc commit 6ec2e9e

40 files changed

+1629
-159
lines changed

requirements.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,11 @@ toml==0.10.2
107107
tomli==2.0.1
108108
traitlets==5.1.1
109109
typing_extensions==4.1.1
110-
univers==30.5.1
110+
univers==30.7.0
111111
urllib3==1.26.9
112112
wcwidth==0.2.5
113113
websocket-client==0.59.0
114114
yarl==1.7.2
115115
zipp==3.8.0
116-
dateparser==1.1.1
116+
dateparser==1.1.1
117+
fetchcode==0.1.0

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ install_requires =
8181
GitPython>=3.1.17
8282
aiohttp>=3.7.4.post0
8383
requests>=2.25.1
84+
fetchcode>=0.1.0
8485

8586
[options.extras_require]
8687
dev =

vulnerabilities/import_runner.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ def process_advisories(advisory_datas: Iterable[AdvisoryData], importer_name: st
6666
"""
6767
count = 0
6868
for data in advisory_datas:
69+
# https://nvd.nist.gov/vuln/detail/CVE-2013-4314
70+
# https://github.com/cms-dev/cms/issues/888#issuecomment-516977572
71+
data.summary = data.summary.replace("\x00", "\uFFFD")
6972
obj, created = Advisory.objects.get_or_create(
7073
aliases=data.aliases,
7174
summary=data.summary,

vulnerabilities/importer.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@
4848
from vulnerabilities.severity_systems import ScoringSystem
4949
from vulnerabilities.utils import classproperty
5050
from vulnerabilities.utils import evolve_purl
51+
from vulnerabilities.utils import get_reference_id
52+
from vulnerabilities.utils import is_cve
5153
from vulnerabilities.utils import nearest_patched_package
5254

5355
logger = logging.getLogger(__name__)
@@ -105,6 +107,15 @@ def from_dict(cls, ref: dict):
105107
],
106108
)
107109

110+
@classmethod
111+
def from_url(cls, url):
112+
reference_id = get_reference_id(url)
113+
if "GHSA-" in reference_id.upper():
114+
return cls(reference_id=reference_id, url=url)
115+
if is_cve(reference_id):
116+
return cls(url=url, reference_id=reference_id.upper())
117+
return cls(url=url)
118+
108119

109120
class UnMergeablePackageError(Exception):
110121
"""

vulnerabilities/importers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from vulnerabilities.importers import alpine_linux
2323
from vulnerabilities.importers import debian
2424
from vulnerabilities.importers import github
25+
from vulnerabilities.importers import gitlab
2526
from vulnerabilities.importers import nginx
2627
from vulnerabilities.importers import nvd
2728
from vulnerabilities.importers import openssl
@@ -37,6 +38,7 @@
3738
redhat.RedhatImporter,
3839
pysec.PyPIImporter,
3940
debian.DebianImporter,
41+
gitlab.GitLabAPIImporter,
4042
]
4143

4244
IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY}

vulnerabilities/importers/github.py

Lines changed: 8 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,10 @@
2626
from typing import List
2727
from typing import Mapping
2828
from typing import Optional
29-
from typing import Tuple
3029

3130
from dateutil import parser as dateparser
3231
from django.db.models.query import QuerySet
3332
from packageurl import PackageURL
34-
from univers.version_range import VersionRange
3533
from univers.version_range import build_range_from_github_advisory_constraint
3634

3735
from vulnerabilities import severity_systems
@@ -45,17 +43,15 @@
4543
from vulnerabilities.improver import Improver
4644
from vulnerabilities.improver import Inference
4745
from vulnerabilities.models import Advisory
48-
from vulnerabilities.package_managers import ComposerVersionAPI
46+
from vulnerabilities.package_managers import VERSION_API_CLASSES_BY_PACKAGE_TYPE
4947
from vulnerabilities.package_managers import GoproxyVersionAPI
50-
from vulnerabilities.package_managers import MavenVersionAPI
51-
from vulnerabilities.package_managers import NugetVersionAPI
52-
from vulnerabilities.package_managers import PypiVersionAPI
53-
from vulnerabilities.package_managers import RubyVersionAPI
5448
from vulnerabilities.package_managers import VersionAPI
49+
from vulnerabilities.package_managers import get_api_package_name
5550
from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage
5651
from vulnerabilities.utils import get_affected_packages_by_patched_package
5752
from vulnerabilities.utils import get_item
5853
from vulnerabilities.utils import nearest_patched_package
54+
from vulnerabilities.utils import resolve_version_range
5955

6056
logger = logging.getLogger(__name__)
6157

@@ -129,7 +125,7 @@
129125
"COMPOSER": "composer",
130126
"PIP": "pypi",
131127
"RUBYGEMS": "gem",
132-
"GO": "golang",
128+
# "GO": "golang",
133129
}
134130

135131
GITHUB_ECOSYSTEM_BY_PACKAGE_TYPE = {
@@ -171,17 +167,6 @@
171167
}
172168
"""
173169

174-
VERSION_API_CLASSES = [
175-
MavenVersionAPI,
176-
NugetVersionAPI,
177-
ComposerVersionAPI,
178-
PypiVersionAPI,
179-
RubyVersionAPI,
180-
GoproxyVersionAPI,
181-
]
182-
183-
VERSION_API_CLASSES_BY_PACKAGE_TYPE = {cls.package_type: cls for cls in VERSION_API_CLASSES}
184-
185170

186171
class GitHubAPIImporter(Importer):
187172
spdx_license_expression = "CC-BY-4.0"
@@ -205,38 +190,6 @@ def advisory_data(self) -> Iterable[AdvisoryData]:
205190
break
206191

207192

208-
def get_reference_id(url: str):
209-
"""
210-
Return the reference id from a URL
211-
For example:
212-
>>> get_reference_id("https://github.com/advisories/GHSA-c9hw-wf7x-jp9j")
213-
'GHSA-c9hw-wf7x-jp9j'
214-
"""
215-
url_parts = url.split("/")
216-
last_url_part = url_parts[-1]
217-
return last_url_part
218-
219-
220-
def extract_references(reference_data: List[dict]) -> Iterable[Reference]:
221-
"""
222-
Yield `reference` by iterating over `reference_data`
223-
>>> list(extract_references([{'url': "https://github.com/advisories/GHSA-c9hw-wf7x-jp9j"}]))
224-
[Reference(url="https://github.com/advisories/GHSA-c9hw-wf7x-jp9j"), reference_id = "GHSA-c9hw-wf7x-jp9j" ]
225-
>>> list(extract_references([{'url': "https://github.com/advisories/c9hw-wf7x-jp9j"}]))
226-
[Reference(url="https://github.com/advisories/c9hw-wf7x-jp9j")]
227-
"""
228-
for ref in reference_data:
229-
url = ref["url"]
230-
if not isinstance(url, str):
231-
logger.error(f"extract_references: url is not of type `str`: {url}")
232-
continue
233-
if "GHSA-" in url.upper():
234-
reference = Reference(url=url, reference_id=get_reference_id(url))
235-
else:
236-
reference = Reference(url=url)
237-
yield reference
238-
239-
240193
def get_purl(pkg_type: str, github_name: str) -> Optional[PackageURL]:
241194
"""
242195
Return a PackageURL by splitting the `github_name` using the `pkg_type` convention.
@@ -255,8 +208,7 @@ def get_purl(pkg_type: str, github_name: str) -> Optional[PackageURL]:
255208

256209
if pkg_type == "composer":
257210
if "/" not in github_name:
258-
logger.error(f"get_purl: Invalid composer package name {github_name}")
259-
return
211+
return PackageURL(type=pkg_type, name=github_name)
260212
vendor, _, name = github_name.partition("/")
261213
return PackageURL(type=pkg_type, namespace=vendor, name=name)
262214

@@ -272,26 +224,6 @@ class InvalidVersionRange(Exception):
272224
"""
273225

274226

275-
def get_api_package_name(purl: PackageURL) -> str:
276-
"""
277-
Return the package name expected by the GitHub API given a PackageURL
278-
>>> get_api_package_name(PackageURL(type="maven", namespace="org.apache.commons", name="commons-lang3"))
279-
"org.apache.commons:commons-lang3"
280-
>>> get_api_package_name(PackageURL(type="composer", namespace="foo", name="bar"))
281-
"foo/bar"
282-
"""
283-
if purl.type == "maven":
284-
return f"{purl.namespace}:{purl.name}"
285-
286-
if purl.type == "composer":
287-
return f"{purl.namespace}/{purl.name}"
288-
289-
if purl.type in ("nuget", "pypi", "gem", "golang"):
290-
return purl.name
291-
292-
logger.error(f"get_api_package_name: Unknown PURL {purl!r}")
293-
294-
295227
def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]:
296228
"""
297229
Yield `AdvisoryData` by taking `resp` and `ecosystem` as input
@@ -349,7 +281,8 @@ def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]:
349281

350282
references = get_item(advisory, "references") or []
351283
if references:
352-
references: List[Reference] = list(extract_references(references))
284+
urls = (ref["url"] for ref in references)
285+
references = [Reference.from_url(u) for u in urls]
353286

354287
summary = get_item(advisory, "summary")
355288
identifiers = get_item(advisory, "identifiers") or []
@@ -451,6 +384,7 @@ def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]:
451384
aff_vers, unaff_vers = resolve_version_range(
452385
affected_version_range=affected_version_range,
453386
package_versions=valid_versions,
387+
ignorable_versions=WEIRD_IGNORABLE_VERSIONS,
454388
)
455389
affected_purls = [
456390
PackageURL(type=pkg_type, namespace=pkg_namespace, name=pkg_name, version=version)
@@ -476,37 +410,3 @@ def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]:
476410
affected_purls=affected_packages,
477411
fixed_purl=fixed_package,
478412
)
479-
480-
481-
def resolve_version_range(
482-
affected_version_range: VersionRange,
483-
package_versions: List[str],
484-
ignorable_versions=WEIRD_IGNORABLE_VERSIONS,
485-
) -> Tuple[List[str], List[str]]:
486-
"""
487-
Given an affected version range and a list of `package_versions`, resolve
488-
which versions are in this range and return a tuple of two lists of
489-
`affected_versions` and `unaffected_versions`.
490-
"""
491-
if not affected_version_range:
492-
logger.error(f"affected version range is {affected_version_range!r}")
493-
return [], []
494-
affected_versions = []
495-
unaffected_versions = []
496-
for package_version in package_versions or []:
497-
if package_version in ignorable_versions:
498-
continue
499-
# Remove whitespace
500-
package_version = package_version.replace(" ", "")
501-
# Remove leading 'v'
502-
package_version = package_version.lstrip("vV")
503-
try:
504-
version = affected_version_range.version_class(package_version)
505-
except Exception:
506-
logger.error(f"Could not parse version {package_version!r}")
507-
continue
508-
if version in affected_version_range:
509-
affected_versions.append(package_version)
510-
else:
511-
unaffected_versions.append(package_version)
512-
return affected_versions, unaffected_versions

0 commit comments

Comments
 (0)