From 22b4226078b041a16bf05163347a66ab4dbcf3a5 Mon Sep 17 00:00:00 2001 From: Eric Brown Date: Fri, 8 Mar 2024 07:19:40 -0800 Subject: [PATCH] Add a SARIF output formatter (#1113) This commit adds a formatter that outputs JSON in a specific SARIF format according to spec at [1]. This code is largely leveraged from an existing implementation found here [2]. SARIF format is very useful for integration into ecosystems such as GitHub's Actions. [1] https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/sarif-v2.1.0-cs01.html [2] https://github.com/microsoft/bandit-sarif-formatter Closes #646 Signed-off-by: Eric Brown --- bandit/__init__.py | 1 + bandit/formatters/sarif.py | 372 ++++++++++++++++++++++++++++ doc/source/formatters/sarif.rst | 5 + doc/source/man/bandit.rst | 2 +- doc/source/start.rst | 7 + setup.cfg | 4 + tests/unit/formatters/test_sarif.py | 139 +++++++++++ tox.ini | 1 + 8 files changed, 530 insertions(+), 1 deletion(-) create mode 100644 bandit/formatters/sarif.py create mode 100644 doc/source/formatters/sarif.rst create mode 100644 tests/unit/formatters/test_sarif.py diff --git a/bandit/__init__.py b/bandit/__init__.py index 75f863db2..7c7bf00a8 100644 --- a/bandit/__init__.py +++ b/bandit/__init__.py @@ -16,4 +16,5 @@ from bandit.core.issue import * # noqa from bandit.core.test_properties import * # noqa +__author__ = metadata.metadata("bandit")["Author"] __version__ = metadata.version("bandit") diff --git a/bandit/formatters/sarif.py b/bandit/formatters/sarif.py new file mode 100644 index 000000000..ce2f03b7b --- /dev/null +++ b/bandit/formatters/sarif.py @@ -0,0 +1,372 @@ +# Copyright (c) Microsoft. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 +# +# Note: this code mostly incorporated from +# https://github.com/microsoft/bandit-sarif-formatter +# +r""" +=============== +SARIF formatter +=============== + +This formatter outputs the issues in SARIF formatted JSON. + +:Example: + +.. code-block:: javascript + + { + "runs": [ + { + "tool": { + "driver": { + "name": "Bandit", + "organization": "PyCQA", + "rules": [ + { + "id": "B101", + "name": "assert_used", + "properties": { + "tags": [ + "security", + "external/cwe/cwe-703" + ], + "precision": "high" + }, + "helpUri": "https://bandit.readthedocs.io/en/1.7.8/plugins/b101_assert_used.html" + } + ], + "version": "1.7.8", + "semanticVersion": "1.7.8" + } + }, + "invocations": [ + { + "executionSuccessful": true, + "endTimeUtc": "2024-03-05T03:28:48Z" + } + ], + "properties": { + "metrics": { + "_totals": { + "loc": 1, + "nosec": 0, + "skipped_tests": 0, + "SEVERITY.UNDEFINED": 0, + "CONFIDENCE.UNDEFINED": 0, + "SEVERITY.LOW": 1, + "CONFIDENCE.LOW": 0, + "SEVERITY.MEDIUM": 0, + "CONFIDENCE.MEDIUM": 0, + "SEVERITY.HIGH": 0, + "CONFIDENCE.HIGH": 1 + }, + "./examples/assert.py": { + "loc": 1, + "nosec": 0, + "skipped_tests": 0, + "SEVERITY.UNDEFINED": 0, + "SEVERITY.LOW": 1, + "SEVERITY.MEDIUM": 0, + "SEVERITY.HIGH": 0, + "CONFIDENCE.UNDEFINED": 0, + "CONFIDENCE.LOW": 0, + "CONFIDENCE.MEDIUM": 0, + "CONFIDENCE.HIGH": 1 + } + } + }, + "results": [ + { + "message": { + "text": "Use of assert detected. The enclosed code will be removed when compiling to optimised byte code." + }, + "level": "note", + "locations": [ + { + "physicalLocation": { + "region": { + "snippet": { + "text": "assert True\n" + }, + "endColumn": 11, + "endLine": 1, + "startColumn": 0, + "startLine": 1 + }, + "artifactLocation": { + "uri": "examples/assert.py" + }, + "contextRegion": { + "snippet": { + "text": "assert True\n" + }, + "endLine": 1, + "startLine": 1 + } + } + } + ], + "properties": { + "issue_confidence": "HIGH", + "issue_severity": "LOW" + }, + "ruleId": "B101", + "ruleIndex": 0 + } + ] + } + ], + "version": "2.1.0", + "$schema": "https://json.schemastore.org/sarif-2.1.0.json" + } + +.. versionadded:: 1.7.8 + +""" # noqa: E501 +import logging +import pathlib +import sys +import urllib.parse as urlparse +from datetime import datetime + +import sarif_om as om +from jschema_to_python.to_json import to_json + +import bandit +from bandit.core import docs_utils + +LOG = logging.getLogger(__name__) +SCHEMA_URI = "https://json.schemastore.org/sarif-2.1.0.json" +SCHEMA_VER = "2.1.0" +TS_FORMAT = "%Y-%m-%dT%H:%M:%SZ" + + +def report(manager, fileobj, sev_level, conf_level, lines=-1): + """Prints issues in SARIF format + + :param manager: the bandit manager object + :param fileobj: The output file object, which may be sys.stdout + :param sev_level: Filtering severity level + :param conf_level: Filtering confidence level + :param lines: Number of lines to report, -1 for all + """ + + log = om.SarifLog( + schema_uri=SCHEMA_URI, + version=SCHEMA_VER, + runs=[ + om.Run( + tool=om.Tool( + driver=om.ToolComponent( + name="Bandit", + organization=bandit.__author__, + semantic_version=bandit.__version__, + version=bandit.__version__, + ) + ), + invocations=[ + om.Invocation( + end_time_utc=datetime.utcnow().strftime(TS_FORMAT), + execution_successful=True, + ) + ], + properties={"metrics": manager.metrics.data}, + ) + ], + ) + + run = log.runs[0] + invocation = run.invocations[0] + + skips = manager.get_skipped() + add_skipped_file_notifications(skips, invocation) + + issues = manager.get_issue_list(sev_level=sev_level, conf_level=conf_level) + + add_results(issues, run) + + serializedLog = to_json(log) + + with fileobj: + fileobj.write(serializedLog) + + if fileobj.name != sys.stdout.name: + LOG.info("SARIF output written to file: %s", fileobj.name) + + +def add_skipped_file_notifications(skips, invocation): + if skips is None or len(skips) == 0: + return + + if invocation.tool_configuration_notifications is None: + invocation.tool_configuration_notifications = [] + + for skip in skips: + (file_name, reason) = skip + + notification = om.Notification( + level="error", + message=om.Message(text=reason), + locations=[ + om.Location( + physical_location=om.PhysicalLocation( + artifact_location=om.ArtifactLocation( + uri=to_uri(file_name) + ) + ) + ) + ], + ) + + invocation.tool_configuration_notifications.append(notification) + + +def add_results(issues, run): + if run.results is None: + run.results = [] + + rules = {} + rule_indices = {} + for issue in issues: + result = create_result(issue, rules, rule_indices) + run.results.append(result) + + if len(rules) > 0: + run.tool.driver.rules = list(rules.values()) + + +def create_result(issue, rules, rule_indices): + issue_dict = issue.as_dict() + + rule, rule_index = create_or_find_rule(issue_dict, rules, rule_indices) + + physical_location = om.PhysicalLocation( + artifact_location=om.ArtifactLocation( + uri=to_uri(issue_dict["filename"]) + ) + ) + + add_region_and_context_region( + physical_location, + issue_dict["line_range"], + issue_dict["col_offset"], + issue_dict["end_col_offset"], + issue_dict["code"], + ) + + return om.Result( + rule_id=rule.id, + rule_index=rule_index, + message=om.Message(text=issue_dict["issue_text"]), + level=level_from_severity(issue_dict["issue_severity"]), + locations=[om.Location(physical_location=physical_location)], + properties={ + "issue_confidence": issue_dict["issue_confidence"], + "issue_severity": issue_dict["issue_severity"], + }, + ) + + +def level_from_severity(severity): + if severity == "HIGH": + return "error" + elif severity == "MEDIUM": + return "warning" + elif severity == "LOW": + return "note" + else: + return "warning" + + +def add_region_and_context_region( + physical_location, line_range, col_offset, end_col_offset, code +): + if code: + first_line_number, snippet_lines = parse_code(code) + snippet_line = snippet_lines[line_range[0] - first_line_number] + snippet = om.ArtifactContent(text=snippet_line) + else: + snippet = None + + physical_location.region = om.Region( + start_line=line_range[0], + end_line=line_range[1] if len(line_range) > 1 else line_range[0], + start_column=col_offset + 1, + end_column=end_col_offset + 1, + snippet=snippet, + ) + + if code: + physical_location.context_region = om.Region( + start_line=first_line_number, + end_line=first_line_number + len(snippet_lines) - 1, + snippet=om.ArtifactContent(text="".join(snippet_lines)), + ) + + +def parse_code(code): + code_lines = code.split("\n") + + # The last line from the split has nothing in it; it's an artifact of the + # last "real" line ending in a newline. Unless, of course, it doesn't: + last_line = code_lines[len(code_lines) - 1] + + last_real_line_ends_in_newline = False + if len(last_line) == 0: + code_lines.pop() + last_real_line_ends_in_newline = True + + snippet_lines = [] + first_line_number = 0 + first = True + for code_line in code_lines: + number_and_snippet_line = code_line.split(" ", 1) + if first: + first_line_number = int(number_and_snippet_line[0]) + first = False + + snippet_line = number_and_snippet_line[1] + "\n" + snippet_lines.append(snippet_line) + + if not last_real_line_ends_in_newline: + last_line = snippet_lines[len(snippet_lines) - 1] + snippet_lines[len(snippet_lines) - 1] = last_line[: len(last_line) - 1] + + return first_line_number, snippet_lines + + +def create_or_find_rule(issue_dict, rules, rule_indices): + rule_id = issue_dict["test_id"] + if rule_id in rules: + return rules[rule_id], rule_indices[rule_id] + + rule = om.ReportingDescriptor( + id=rule_id, + name=issue_dict["test_name"], + help_uri=docs_utils.get_url(rule_id), + properties={ + "tags": [ + "security", + f"external/cwe/cwe-{issue_dict['issue_cwe'].get('id')}", + ], + "precision": issue_dict["issue_confidence"].lower(), + }, + ) + + index = len(rules) + rules[rule_id] = rule + rule_indices[rule_id] = index + return rule, index + + +def to_uri(file_path): + pure_path = pathlib.PurePath(file_path) + if pure_path.is_absolute(): + return pure_path.as_uri() + else: + # Replace backslashes with slashes. + posix_path = pure_path.as_posix() + # %-encode special characters. + return urlparse.quote(posix_path) diff --git a/doc/source/formatters/sarif.rst b/doc/source/formatters/sarif.rst new file mode 100644 index 000000000..58b9633a7 --- /dev/null +++ b/doc/source/formatters/sarif.rst @@ -0,0 +1,5 @@ +----- +sarif +----- + +.. automodule:: bandit.formatters.sarif diff --git a/doc/source/man/bandit.rst b/doc/source/man/bandit.rst index 46125e613..eef10d271 100644 --- a/doc/source/man/bandit.rst +++ b/doc/source/man/bandit.rst @@ -44,7 +44,7 @@ OPTIONS (-l for LOW, -ll for MEDIUM, -lll for HIGH) -i, --confidence report only issues of a given confidence level or higher (-i for LOW, -ii for MEDIUM, -iii for HIGH) - -f {csv,custom,html,json,screen,txt,xml,yaml}, --format {csv,custom,html,json,screen,txt,xml,yaml} + -f {csv,custom,html,json,sarif,screen,txt,xml,yaml}, --format {csv,custom,html,json,sarif,screen,txt,xml,yaml} specify output format --msg-template MSG_TEMPLATE specify output message template (only usable with diff --git a/doc/source/start.rst b/doc/source/start.rst index 069ec7108..cd8f3dadf 100644 --- a/doc/source/start.rst +++ b/doc/source/start.rst @@ -38,6 +38,13 @@ extras: pip install bandit[baseline] +If you want to include SARIF output formatter support, install it with the +`sarif` extras: + +.. code-block:: console + + pip install bandit[sarif] + Run Bandit: .. code-block:: console diff --git a/setup.cfg b/setup.cfg index 54d4096a2..2dbee597c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,6 +37,9 @@ toml = tomli>=1.1.0; python_version < "3.11" baseline = GitPython>=3.1.30 +sarif = + sarif-om>=1.0.4 + jschema-to-python>=1.2.3 [entry_points] console_scripts = @@ -52,6 +55,7 @@ bandit.formatters = txt = bandit.formatters.text:report xml = bandit.formatters.xml:report html = bandit.formatters.html:report + sarif = bandit.formatters.sarif:report screen = bandit.formatters.screen:report yaml = bandit.formatters.yaml:report custom = bandit.formatters.custom:report diff --git a/tests/unit/formatters/test_sarif.py b/tests/unit/formatters/test_sarif.py new file mode 100644 index 000000000..a5306fa81 --- /dev/null +++ b/tests/unit/formatters/test_sarif.py @@ -0,0 +1,139 @@ +# SPDX-License-Identifier: Apache-2.0 +import collections +import json +import tempfile +from unittest import mock + +import testtools + +import bandit +from bandit.core import config +from bandit.core import constants +from bandit.core import issue +from bandit.core import manager +from bandit.core import metrics +from bandit.formatters import sarif + + +class SarifFormatterTests(testtools.TestCase): + def setUp(self): + super().setUp() + conf = config.BanditConfig() + self.manager = manager.BanditManager(conf, "file") + (tmp_fd, self.tmp_fname) = tempfile.mkstemp() + self.context = { + "filename": self.tmp_fname, + "lineno": 4, + "linerange": [4], + "code": ( + "import socket\n\n" + "s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n" + "s.bind(('0.0.0.0', 31137))" + ), + } + self.check_name = "hardcoded_bind_all_interfaces" + self.issue = issue.Issue( + severity=bandit.MEDIUM, + cwe=issue.Cwe.MULTIPLE_BINDS, + confidence=bandit.MEDIUM, + text="Possible binding to all interfaces.", + test_id="B104", + ) + + self.candidates = [ + issue.Issue( + issue.Cwe.MULTIPLE_BINDS, + bandit.LOW, + bandit.LOW, + "Candidate A", + lineno=1, + ), + issue.Issue( + bandit.HIGH, + issue.Cwe.MULTIPLE_BINDS, + bandit.HIGH, + "Candiate B", + lineno=2, + ), + ] + + self.manager.out_file = self.tmp_fname + + self.issue.fname = self.context["filename"] + self.issue.lineno = self.context["lineno"] + self.issue.linerange = self.context["linerange"] + self.issue.code = self.context["code"] + self.issue.test = self.check_name + + self.manager.results.append(self.issue) + self.manager.metrics = metrics.Metrics() + + # mock up the metrics + for key in ["_totals", "binding.py"]: + self.manager.metrics.data[key] = {"loc": 4, "nosec": 2} + for criteria, default in constants.CRITERIA: + for rank in constants.RANKING: + self.manager.metrics.data[key][f"{criteria}.{rank}"] = 0 + + @mock.patch("bandit.core.manager.BanditManager.get_issue_list") + def test_report(self, get_issue_list): + self.manager.files_list = ["binding.py"] + self.manager.scores = [ + { + "SEVERITY": [0] * len(constants.RANKING), + "CONFIDENCE": [0] * len(constants.RANKING), + } + ] + + get_issue_list.return_value = collections.OrderedDict( + [(self.issue, self.candidates)] + ) + + with open(self.tmp_fname, "w") as tmp_file: + sarif.report( + self.manager, + tmp_file, + self.issue.severity, + self.issue.confidence, + ) + + with open(self.tmp_fname) as f: + data = json.loads(f.read()) + run = data["runs"][0] + self.assertEqual(sarif.SCHEMA_URI, data["$schema"]) + self.assertEqual(sarif.SCHEMA_VER, data["version"]) + driver = run["tool"]["driver"] + self.assertEqual("Bandit", driver["name"]) + self.assertEqual(bandit.__author__, driver["organization"]) + self.assertEqual(bandit.__version__, driver["semanticVersion"]) + self.assertEqual("B104", driver["rules"][0]["id"]) + self.assertEqual(self.check_name, driver["rules"][0]["name"]) + self.assertIn("security", driver["rules"][0]["properties"]["tags"]) + self.assertIn( + "external/cwe/cwe-605", + driver["rules"][0]["properties"]["tags"], + ) + self.assertEqual( + "medium", driver["rules"][0]["properties"]["precision"] + ) + invocation = run["invocations"][0] + self.assertTrue(invocation["executionSuccessful"]) + self.assertIsNotNone(invocation["endTimeUtc"]) + result = run["results"][0] + # If the level is "warning" like in this case, SARIF will remove + # from output, as "warning" is the default value. + self.assertIsNone(result.get("level")) + self.assertEqual(self.issue.text, result["message"]["text"]) + physicalLocation = result["locations"][0]["physicalLocation"] + self.assertEqual( + self.context["linerange"][0], + physicalLocation["region"]["startLine"], + ) + self.assertEqual( + self.context["linerange"][0], + physicalLocation["region"]["endLine"], + ) + self.assertIn( + self.tmp_fname, + physicalLocation["artifactLocation"]["uri"], + ) diff --git a/tox.ini b/tox.ini index 27b3d75e7..13e3458de 100644 --- a/tox.ini +++ b/tox.ini @@ -14,6 +14,7 @@ extras = yaml toml baseline + sarif commands = find bandit -type f -name "*.pyc" -delete stestr run {posargs}