Skip to content

Commit e4e1684

Browse files
authored
Merge branch 'main' into add-tuxcare-importer
2 parents 7d47d46 + 4a24324 commit e4e1684

File tree

3 files changed

+465
-0
lines changed

3 files changed

+465
-0
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
from vulnerabilities.pipelines.v2_importers import aosp_importer as aosp_importer_v2
4545
from vulnerabilities.pipelines.v2_importers import apache_httpd_importer as apache_httpd_v2
4646
from vulnerabilities.pipelines.v2_importers import apache_kafka_importer as apache_kafka_importer_v2
47+
from vulnerabilities.pipelines.v2_importers import apache_tomcat_importer as apache_tomcat_v2
4748
from vulnerabilities.pipelines.v2_importers import archlinux_importer as archlinux_importer_v2
4849
from vulnerabilities.pipelines.v2_importers import curl_importer as curl_importer_v2
4950
from vulnerabilities.pipelines.v2_importers import (
@@ -105,6 +106,7 @@
105106
nginx_importer_v2.NginxImporterPipeline,
106107
mattermost_importer_v2.MattermostImporterPipeline,
107108
tuxcare_importer_v2.TuxCareImporterPipeline,
109+
apache_tomcat_v2.ApacheTomcatImporterPipeline,
108110
nvd_importer.NVDImporterPipeline,
109111
github_importer.GitHubAPIImporterPipeline,
110112
gitlab_importer.GitLabImporterPipeline,
Lines changed: 318 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,318 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import dataclasses
11+
import re
12+
import urllib
13+
from collections import defaultdict
14+
from collections import namedtuple
15+
from typing import Iterable
16+
17+
import requests
18+
from bs4 import BeautifulSoup
19+
from packageurl import PackageURL
20+
from univers.version_constraint import VersionConstraint
21+
from univers.version_range import ApacheVersionRange
22+
from univers.version_range import MavenVersionRange
23+
from univers.versions import MavenVersion
24+
from univers.versions import SemverVersion
25+
26+
from vulnerabilities.importer import AdvisoryData
27+
from vulnerabilities.importer import AffectedPackageV2
28+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
29+
30+
31+
class ApacheTomcatImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
32+
"""
33+
Apache HTTPD Importer Pipeline
34+
35+
This pipeline imports security advisories from the Apache HTTPD project.
36+
"""
37+
38+
pipeline_id = "apache_tomcat_importer_v2"
39+
spdx_license_expression = "Apache-2.0"
40+
license_url = "https://www.apache.org/licenses/LICENSE-2.0"
41+
base_url = "https://tomcat.apache.org/security"
42+
43+
def fetch_advisory_links(self):
44+
"""
45+
Yield the URLs of each Tomcat version security-related page.
46+
Each page link is in the form of `https://tomcat.apache.org/security-10.html`,
47+
for instance, for v10.
48+
"""
49+
data = requests.get(self.base_url).content
50+
soup = BeautifulSoup(data, features="lxml")
51+
for tag in soup.find_all("a"):
52+
link = tag.get("href")
53+
54+
if link and "security-" in link and any(char.isdigit() for char in link):
55+
yield urllib.parse.urljoin(self.base_url, link)
56+
57+
@classmethod
58+
def steps(cls):
59+
return (cls.collect_and_store_advisories,)
60+
61+
@classmethod
62+
def advisories_count(cls):
63+
return 0
64+
65+
def collect_advisories(self) -> Iterable[AdvisoryData]:
66+
for page_url in self.fetch_advisory_links():
67+
try:
68+
content = requests.get(page_url).content
69+
tomcat_advisories = parse_tomcat_security(content)
70+
self.log(f"Processing {len(tomcat_advisories)} advisories from {page_url}")
71+
grouped = defaultdict(list)
72+
for advisory in tomcat_advisories:
73+
grouped[advisory.cve].append(advisory)
74+
for cve, advisory_list in grouped.items():
75+
affected_packages = []
76+
for advisory in advisory_list:
77+
self.log(f"Processing advisory {advisory.cve}")
78+
apache_range = to_version_ranges_apache(
79+
advisory.affected_versions,
80+
advisory.fixed_in,
81+
self,
82+
)
83+
84+
maven_range = to_version_ranges_maven(
85+
advisory.affected_versions,
86+
advisory.fixed_in,
87+
)
88+
89+
affected_packages.append(
90+
AffectedPackageV2(
91+
package=PackageURL(type="apache", name="tomcat"),
92+
affected_version_range=apache_range,
93+
)
94+
)
95+
96+
affected_packages.append(
97+
AffectedPackageV2(
98+
package=PackageURL(
99+
type="maven",
100+
namespace="org.apache.tomcat",
101+
name="tomcat",
102+
),
103+
affected_version_range=maven_range,
104+
)
105+
)
106+
page_id = page_url.split("/")[-1].replace(".html", "")
107+
yield AdvisoryData(
108+
advisory_id=f"{page_id}/{cve}",
109+
summary=advisory_list[0].summary,
110+
affected_packages=affected_packages,
111+
url=page_url,
112+
)
113+
114+
except Exception as e:
115+
self.log(f"{e!r}")
116+
117+
118+
def to_version_ranges_apache(version_item, fixed_version, self=None):
119+
constraints = []
120+
121+
VersionConstraintTuple = namedtuple("VersionConstraintTuple", ["comparator", "version"])
122+
affected_constraint_tuple_list = []
123+
fixed_constraint_tuple_list = []
124+
125+
if version_item:
126+
version_item = version_item.strip()
127+
if "to" in version_item:
128+
version_item_split = version_item.split(" ")
129+
affected_constraint_tuple_list.append(
130+
VersionConstraintTuple(">=", version_item_split[0])
131+
)
132+
affected_constraint_tuple_list.append(
133+
VersionConstraintTuple("<=", version_item_split[-1])
134+
)
135+
136+
elif "-" in version_item:
137+
version_item_split = version_item.split("-")
138+
affected_constraint_tuple_list.append(
139+
VersionConstraintTuple(">=", version_item_split[0])
140+
)
141+
affected_constraint_tuple_list.append(
142+
VersionConstraintTuple("<=", version_item_split[-1])
143+
)
144+
145+
elif version_item.startswith("<"):
146+
version_item_split = version_item.split("<")
147+
affected_constraint_tuple_list.append(
148+
VersionConstraintTuple("<", version_item_split[-1])
149+
)
150+
151+
else:
152+
version_item_split = version_item.split(" ")
153+
affected_constraint_tuple_list.append(
154+
VersionConstraintTuple("=", version_item_split[0])
155+
)
156+
157+
if fixed_version:
158+
fixed_item_split = fixed_version.split(" ")
159+
fixed_constraint_tuple_list.append(VersionConstraintTuple("=", fixed_item_split[0]))
160+
161+
for record in affected_constraint_tuple_list:
162+
try:
163+
constraints.append(
164+
VersionConstraint(
165+
comparator=record.comparator,
166+
version=SemverVersion(record.version),
167+
)
168+
)
169+
except Exception as e:
170+
if self:
171+
self.log(f"{record.version!r} is not a valid SemverVersion {e!r}")
172+
continue
173+
174+
for record in fixed_constraint_tuple_list:
175+
constraints.append(
176+
VersionConstraint(
177+
comparator=record.comparator,
178+
version=SemverVersion(record.version),
179+
).invert()
180+
)
181+
182+
return ApacheVersionRange(constraints=constraints)
183+
184+
185+
def to_version_ranges_maven(version_item, fixed_version):
186+
constraints = []
187+
188+
if version_item:
189+
version_item = version_item.strip()
190+
if "to" in version_item:
191+
version_item_split = version_item.split(" ")
192+
193+
constraints.append(
194+
VersionConstraint(
195+
comparator=">=",
196+
version=MavenVersion(version_item_split[0]),
197+
)
198+
)
199+
constraints.append(
200+
VersionConstraint(
201+
comparator="<=",
202+
version=MavenVersion(version_item_split[-1]),
203+
)
204+
)
205+
206+
elif "-" in version_item:
207+
version_item_split = version_item.split("-")
208+
209+
constraints.append(
210+
VersionConstraint(
211+
comparator=">=",
212+
version=MavenVersion(version_item_split[0]),
213+
)
214+
)
215+
constraints.append(
216+
VersionConstraint(
217+
comparator="<=",
218+
version=MavenVersion(version_item_split[-1]),
219+
)
220+
)
221+
222+
elif version_item.startswith("<"):
223+
version_item_split = version_item.split("<")
224+
225+
constraints.append(
226+
VersionConstraint(
227+
comparator="<",
228+
version=MavenVersion(version_item_split[-1]),
229+
)
230+
)
231+
232+
else:
233+
version_item_split = version_item.split(" ")
234+
235+
constraints.append(
236+
VersionConstraint(
237+
comparator="=",
238+
version=MavenVersion(version_item_split[0]),
239+
)
240+
)
241+
242+
if fixed_version:
243+
fixed_item_split = fixed_version.split(" ")
244+
245+
constraints.append(
246+
VersionConstraint(
247+
comparator="=",
248+
version=MavenVersion(fixed_item_split[0]),
249+
).invert()
250+
)
251+
252+
return MavenVersionRange(constraints=constraints)
253+
254+
255+
@dataclasses.dataclass(order=True)
256+
class TomcatAdvisoryData:
257+
cve: str
258+
summary: str
259+
fixed_in: str
260+
affected_versions: str
261+
262+
263+
def parse_tomcat_security(html_content):
264+
soup = BeautifulSoup(html_content, "lxml")
265+
results = []
266+
267+
for header in soup.find_all("h3", id=re.compile(r"Fixed_in_Apache_Tomcat")):
268+
m = re.search(r"Tomcat\s+([\d\.]+)", header.get_text())
269+
if not m:
270+
continue
271+
fixed_in = m.group(1)
272+
273+
container = header.find_next_sibling("div", class_="text")
274+
if not container:
275+
continue
276+
277+
current = None
278+
279+
for p in container.find_all("p", recursive=False):
280+
281+
strong = p.find("strong")
282+
cve_link = p.find("a", href=re.compile(r"CVE-"))
283+
284+
if strong and cve_link:
285+
if current:
286+
results.append(current)
287+
288+
current = {
289+
"cve": cve_link.get_text(strip=True),
290+
"summary": strong.get_text(" ", strip=True),
291+
"affected_versions": None,
292+
"fixed_in": fixed_in,
293+
}
294+
continue
295+
296+
if current:
297+
text = p.get_text(" ", strip=True)
298+
if text.startswith("Affects:"):
299+
current["affected_versions"] = text.replace("Affects:", "").strip()
300+
current = TomcatAdvisoryData(
301+
cve=current["cve"],
302+
summary=current["summary"],
303+
affected_versions=current["affected_versions"],
304+
fixed_in=current["fixed_in"],
305+
)
306+
results.append(current)
307+
current = None
308+
309+
if current:
310+
current = TomcatAdvisoryData(
311+
cve=current["cve"],
312+
summary=current["summary"],
313+
affected_versions=current["affected_versions"],
314+
fixed_in=current["fixed_in"],
315+
)
316+
results.append(current)
317+
318+
return results

0 commit comments

Comments
 (0)