Skip to content

Commit 165791c

Browse files
committed
Add OpenSSF Malicious Packages importer pipeline (#2019)
Implement V2 importer for the OpenSSF malicious-packages repository to collect advisories for malicious packages (typosquatting, dependency confusion, etc.) across npm, PyPI, Cargo, RubyGems, Maven, NuGet, and Go. - Add openssf_malicious_importer.py pipeline using OSV format parser - Add comprehensive unit tests - Register importer in IMPORTERS_REGISTRY Reference: https://github.com/ossf/malicious-packages Signed-off-by: Mrityunjay Raj <mr.raj.earth@gmail.com>
1 parent a8aa6da commit 165791c

File tree

3 files changed

+336
-0
lines changed

3 files changed

+336
-0
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@
5656
from vulnerabilities.pipelines.v2_importers import mozilla_importer as mozilla_importer_v2
5757
from vulnerabilities.pipelines.v2_importers import npm_importer as npm_importer_v2
5858
from vulnerabilities.pipelines.v2_importers import nvd_importer as nvd_importer_v2
59+
from vulnerabilities.pipelines.v2_importers import (
60+
openssf_malicious_importer as openssf_malicious_importer_v2,
61+
)
5962
from vulnerabilities.pipelines.v2_importers import oss_fuzz as oss_fuzz_v2
6063
from vulnerabilities.pipelines.v2_importers import postgresql_importer as postgresql_importer_v2
6164
from vulnerabilities.pipelines.v2_importers import pypa_importer as pypa_importer_v2
@@ -89,6 +92,7 @@
8992
ruby_importer_v2.RubyImporterPipeline,
9093
epss_importer_v2.EPSSImporterPipeline,
9194
mattermost_importer_v2.MattermostImporterPipeline,
95+
openssf_malicious_importer_v2.OpenSSFMaliciousImporterPipeline,
9296
nvd_importer.NVDImporterPipeline,
9397
github_importer.GitHubAPIImporterPipeline,
9498
gitlab_importer.GitLabImporterPipeline,
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import json
11+
import logging
12+
from pathlib import Path
13+
from typing import Iterable
14+
15+
from fetchcode.vcs import fetch_via_vcs
16+
17+
from vulnerabilities.importer import AdvisoryData
18+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
19+
from vulnerabilities.utils import get_advisory_url
20+
21+
logger = logging.getLogger(__name__)
22+
23+
24+
class OpenSSFMaliciousImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
25+
"""
26+
OpenSSF Malicious Packages Importer Pipeline
27+
28+
Collect advisories for malicious packages from the OpenSSF malicious-packages
29+
repository. This includes typosquatting, dependency confusion, and other
30+
malicious packages discovered in npm, PyPI, RubyGems, and other ecosystems.
31+
32+
See: https://github.com/ossf/malicious-packages
33+
"""
34+
35+
pipeline_id = "openssf_malicious_importer"
36+
spdx_license_expression = "Apache-2.0"
37+
license_url = "https://github.com/ossf/malicious-packages/blob/main/LICENSE"
38+
repo_url = "git+https://github.com/ossf/malicious-packages/"
39+
40+
@classmethod
41+
def steps(cls):
42+
return (
43+
cls.clone,
44+
cls.collect_and_store_advisories,
45+
cls.clean_downloads,
46+
)
47+
48+
def clone(self):
49+
self.log(f"Cloning `{self.repo_url}`")
50+
self.vcs_response = fetch_via_vcs(self.repo_url)
51+
52+
def advisories_count(self):
53+
advisory_dir = Path(self.vcs_response.dest_dir) / "osv" / "malicious"
54+
return sum(1 for _ in advisory_dir.rglob("*.json"))
55+
56+
def collect_advisories(self) -> Iterable[AdvisoryData]:
57+
from vulnerabilities.importers.osv import parse_advisory_data_v2
58+
59+
# Ecosystems supported by both OpenSSF malicious-packages and VulnerableCode
60+
# Mapping: OSV ecosystem name -> purl type
61+
supported_ecosystems = [
62+
"pypi", # Python packages
63+
"npm", # JavaScript/Node.js packages
64+
"cargo", # Rust packages (crates.io)
65+
"gem", # Ruby packages (rubygems)
66+
"maven", # Java packages
67+
"nuget", # .NET packages
68+
"golang", # Go packages
69+
]
70+
71+
base_path = Path(self.vcs_response.dest_dir)
72+
advisory_dir = base_path / "osv" / "malicious"
73+
74+
for file in advisory_dir.rglob("*.json"):
75+
try:
76+
with open(file) as f:
77+
raw_data = json.load(f)
78+
except json.JSONDecodeError as e:
79+
logger.error(f"Failed to parse JSON from {file}: {e}")
80+
continue
81+
82+
advisory_url = get_advisory_url(
83+
file=file,
84+
base_path=base_path,
85+
url="https://github.com/ossf/malicious-packages/blob/main/",
86+
)
87+
advisory_text = file.read_text()
88+
89+
advisory = parse_advisory_data_v2(
90+
raw_data=raw_data,
91+
supported_ecosystems=supported_ecosystems,
92+
advisory_url=advisory_url,
93+
advisory_text=advisory_text,
94+
)
95+
96+
if advisory:
97+
yield advisory
98+
99+
def clean_downloads(self):
100+
if self.vcs_response:
101+
self.log("Removing cloned repository")
102+
self.vcs_response.delete()
103+
104+
def on_failure(self):
105+
self.clean_downloads()
Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import json
11+
from pathlib import Path
12+
13+
import pytest
14+
15+
from vulnerabilities.importer import AdvisoryData
16+
from vulnerabilities.pipelines.v2_importers.openssf_malicious_importer import (
17+
OpenSSFMaliciousImporterPipeline,
18+
)
19+
20+
21+
@pytest.fixture
22+
def sample_malicious_advisory(tmp_path: Path):
23+
"""Create a sample malicious package advisory in OSV format."""
24+
advisory_data = {
25+
"modified": "2025-03-28T13:05:11Z",
26+
"published": "2025-03-28T13:05:11Z",
27+
"schema_version": "1.5.0",
28+
"id": "MAL-2025-1234",
29+
"summary": "Malicious code in malicious-test-package (PyPI)",
30+
"details": "This package contains malicious code that exfiltrates data.",
31+
"affected": [
32+
{
33+
"package": {
34+
"ecosystem": "PyPI",
35+
"name": "malicious-test-package",
36+
"purl": "pkg:pypi/malicious-test-package",
37+
},
38+
"versions": ["0.0.1", "0.0.2"],
39+
}
40+
],
41+
"credits": [
42+
{
43+
"name": "Security Researcher",
44+
"type": "FINDER",
45+
"contact": ["https://example.com"],
46+
}
47+
],
48+
"database_specific": {
49+
"malicious-packages-origins": [
50+
{
51+
"id": "TEST-2025-01234",
52+
"import_time": "2025-03-31T07:07:04.129197674Z",
53+
"modified_time": "2025-03-28T13:05:11Z",
54+
"sha256": "abc123def456",
55+
"source": "test-source",
56+
"versions": ["0.0.1", "0.0.2"],
57+
}
58+
]
59+
},
60+
}
61+
62+
advisory_dir = tmp_path / "osv" / "malicious" / "pypi" / "malicious-test-package"
63+
advisory_dir.mkdir(parents=True)
64+
65+
advisory_file = advisory_dir / "MAL-2025-1234.json"
66+
advisory_file.write_text(json.dumps(advisory_data, indent=2))
67+
68+
return tmp_path, advisory_file.read_text(), advisory_data
69+
70+
71+
@pytest.fixture
72+
def sample_npm_malicious_advisory(tmp_path: Path):
73+
"""Create a sample npm malicious package advisory."""
74+
advisory_data = {
75+
"modified": "2025-01-15T10:00:00Z",
76+
"published": "2025-01-15T10:00:00Z",
77+
"schema_version": "1.5.0",
78+
"id": "MAL-2025-5678",
79+
"summary": "Malicious code in typosquat-package (npm)",
80+
"details": "Typosquatting attack targeting popular package.",
81+
"affected": [
82+
{
83+
"package": {
84+
"ecosystem": "npm",
85+
"name": "typosquat-package",
86+
},
87+
"versions": ["1.0.0"],
88+
}
89+
],
90+
}
91+
92+
advisory_dir = tmp_path / "osv" / "malicious" / "npm" / "typosquat-package"
93+
advisory_dir.mkdir(parents=True)
94+
95+
advisory_file = advisory_dir / "MAL-2025-5678.json"
96+
advisory_file.write_text(json.dumps(advisory_data, indent=2))
97+
98+
return tmp_path, advisory_file.read_text(), advisory_data
99+
100+
101+
class DummyVCSResponse:
102+
"""Mock VCS response for testing."""
103+
104+
def __init__(self, dest_dir):
105+
self.dest_dir = dest_dir
106+
107+
def delete(self):
108+
pass
109+
110+
111+
def test_collect_advisories_from_openssf_malicious(sample_malicious_advisory):
112+
"""Test collecting advisories from OpenSSF malicious packages repo."""
113+
tmp_path, advisory_text, advisory_json = sample_malicious_advisory
114+
115+
importer = OpenSSFMaliciousImporterPipeline()
116+
importer.vcs_response = DummyVCSResponse(str(tmp_path))
117+
118+
advisories = list(importer.collect_advisories())
119+
assert len(advisories) == 1
120+
121+
advisory = advisories[0]
122+
assert isinstance(advisory, AdvisoryData)
123+
assert advisory.advisory_id == "MAL-2025-1234"
124+
assert "Malicious code" in advisory.summary
125+
assert advisory.original_advisory_text.strip().startswith("{")
126+
assert advisory.affected_packages
127+
assert advisory.affected_packages[0].package.type == "pypi"
128+
assert advisory.affected_packages[0].package.name == "malicious-test-package"
129+
130+
131+
def test_collect_npm_advisories(sample_npm_malicious_advisory):
132+
"""Test collecting npm malicious package advisories."""
133+
tmp_path, advisory_text, advisory_json = sample_npm_malicious_advisory
134+
135+
importer = OpenSSFMaliciousImporterPipeline()
136+
importer.vcs_response = DummyVCSResponse(str(tmp_path))
137+
138+
advisories = list(importer.collect_advisories())
139+
assert len(advisories) == 1
140+
141+
advisory = advisories[0]
142+
assert advisory.advisory_id == "MAL-2025-5678"
143+
assert advisory.affected_packages[0].package.type == "npm"
144+
assert advisory.affected_packages[0].package.name == "typosquat-package"
145+
146+
147+
def test_advisories_count(sample_malicious_advisory):
148+
"""Test counting advisories."""
149+
tmp_path, _, _ = sample_malicious_advisory
150+
151+
importer = OpenSSFMaliciousImporterPipeline()
152+
importer.vcs_response = DummyVCSResponse(str(tmp_path))
153+
154+
count = importer.advisories_count()
155+
assert count == 1
156+
157+
158+
def test_multiple_advisories(tmp_path: Path):
159+
"""Test collecting multiple advisories from different ecosystems."""
160+
# Create PyPI advisory
161+
pypi_dir = tmp_path / "osv" / "malicious" / "pypi" / "bad-pkg"
162+
pypi_dir.mkdir(parents=True)
163+
(pypi_dir / "MAL-2025-0001.json").write_text(
164+
json.dumps(
165+
{
166+
"id": "MAL-2025-0001",
167+
"summary": "Bad PyPI package",
168+
"affected": [{"package": {"ecosystem": "PyPI", "name": "bad-pkg"}, "versions": ["1.0"]}],
169+
}
170+
)
171+
)
172+
173+
# Create npm advisory
174+
npm_dir = tmp_path / "osv" / "malicious" / "npm" / "bad-js"
175+
npm_dir.mkdir(parents=True)
176+
(npm_dir / "MAL-2025-0002.json").write_text(
177+
json.dumps(
178+
{
179+
"id": "MAL-2025-0002",
180+
"summary": "Bad npm package",
181+
"affected": [{"package": {"ecosystem": "npm", "name": "bad-js"}, "versions": ["2.0"]}],
182+
}
183+
)
184+
)
185+
186+
importer = OpenSSFMaliciousImporterPipeline()
187+
importer.vcs_response = DummyVCSResponse(str(tmp_path))
188+
189+
advisories = list(importer.collect_advisories())
190+
assert len(advisories) == 2
191+
assert importer.advisories_count() == 2
192+
193+
advisory_ids = {a.advisory_id for a in advisories}
194+
assert advisory_ids == {"MAL-2025-0001", "MAL-2025-0002"}
195+
196+
197+
def test_pipeline_metadata():
198+
"""Test pipeline metadata is correctly set."""
199+
assert OpenSSFMaliciousImporterPipeline.pipeline_id == "openssf_malicious_importer"
200+
assert OpenSSFMaliciousImporterPipeline.spdx_license_expression == "Apache-2.0"
201+
assert "ossf/malicious-packages" in OpenSSFMaliciousImporterPipeline.repo_url
202+
203+
204+
def test_unsupported_ecosystem_skipped(tmp_path: Path):
205+
"""Test that unsupported ecosystems are skipped gracefully."""
206+
# Create advisory with unsupported ecosystem
207+
advisory_dir = tmp_path / "osv" / "malicious" / "unsupported" / "pkg"
208+
advisory_dir.mkdir(parents=True)
209+
(advisory_dir / "MAL-2025-9999.json").write_text(
210+
json.dumps(
211+
{
212+
"id": "MAL-2025-9999",
213+
"summary": "Package in unsupported ecosystem",
214+
"affected": [
215+
{"package": {"ecosystem": "UnsupportedEcosystem", "name": "pkg"}, "versions": ["1.0"]}
216+
],
217+
}
218+
)
219+
)
220+
221+
importer = OpenSSFMaliciousImporterPipeline()
222+
importer.vcs_response = DummyVCSResponse(str(tmp_path))
223+
224+
advisories = list(importer.collect_advisories())
225+
# Advisory should be yielded but with no affected packages due to unsupported ecosystem
226+
assert len(advisories) == 1
227+
assert advisories[0].affected_packages == []

0 commit comments

Comments
 (0)