From cb99815c3e6a9e22b97dcc8ad689abf568b9b3e0 Mon Sep 17 00:00:00 2001 From: Chris Hipple Date: Wed, 13 Dec 2023 00:33:19 -0500 Subject: [PATCH] Feature: Add SARIF output support (#9078) ## Summary Adds support for sarif v2.1.0 output to cli, usable via the output-format paramter. `ruff . --output-format=sarif` Includes a few changes I wasn't sure of, namely: * Adds a few derives for Clone & Copy, which I think could be removed with a little extra work as well. ## Test Plan I built and ran this against several large open source projects and verified that the output sarif was valid, using [Microsoft's SARIF validator tool](https://sarifweb.azurewebsites.net/Validation) I've also attached an output of the sarif generated by this version of ruff on the main branch of django at commit: b287af5dc9 [django_main_b287af5dc9_sarif.json](https://github.com/astral-sh/ruff/files/13626222/django_main_b287af5dc9_sarif.json) Note: this needs to be regenerated with the latest changes and confirmed. ## Open Points [ ] Convert to just using all Rules all the time [ ] Fix the issue with getting the file URI when compiling for web assembly --- Cargo.lock | 1 + crates/ruff_cli/src/printer.rs | 5 +- crates/ruff_linter/Cargo.toml | 1 + crates/ruff_linter/src/message/mod.rs | 2 + crates/ruff_linter/src/message/sarif.rs | 212 +++++++++++++++++++++++ crates/ruff_linter/src/settings/types.rs | 1 + docs/configuration.md | 2 +- ruff.schema.json | 3 +- 8 files changed, 224 insertions(+), 3 deletions(-) create mode 100644 crates/ruff_linter/src/message/sarif.rs diff --git a/Cargo.lock b/Cargo.lock index 9b71a65a67481..8b2225369f672 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2259,6 +2259,7 @@ dependencies = [ "typed-arena", "unicode-width", "unicode_names2", + "url", "wsl", ] diff --git a/crates/ruff_cli/src/printer.rs b/crates/ruff_cli/src/printer.rs index 228c124d68569..5bfdece63beaa 100644 --- a/crates/ruff_cli/src/printer.rs +++ b/crates/ruff_cli/src/printer.rs @@ -13,7 +13,7 @@ use ruff_linter::fs::relativize_path; use ruff_linter::logging::LogLevel; use ruff_linter::message::{ AzureEmitter, Emitter, EmitterContext, GithubEmitter, GitlabEmitter, GroupedEmitter, - JsonEmitter, JsonLinesEmitter, JunitEmitter, PylintEmitter, TextEmitter, + JsonEmitter, JsonLinesEmitter, JunitEmitter, PylintEmitter, SarifEmitter, TextEmitter, }; use ruff_linter::notify_user; use ruff_linter::registry::{AsRule, Rule}; @@ -291,6 +291,9 @@ impl Printer { SerializationFormat::Azure => { AzureEmitter.emit(writer, &diagnostics.messages, &context)?; } + SerializationFormat::Sarif => { + SarifEmitter.emit(writer, &diagnostics.messages, &context)?; + } } writer.flush()?; diff --git a/crates/ruff_linter/Cargo.toml b/crates/ruff_linter/Cargo.toml index fcbc1121a0912..60206ec2f42fc 100644 --- a/crates/ruff_linter/Cargo.toml +++ b/crates/ruff_linter/Cargo.toml @@ -71,6 +71,7 @@ toml = { workspace = true } typed-arena = { version = "2.0.2" } unicode-width = { workspace = true } unicode_names2 = { workspace = true } +url = { version = "2.2.2" } wsl = { version = "0.1.0" } [dev-dependencies] diff --git a/crates/ruff_linter/src/message/mod.rs b/crates/ruff_linter/src/message/mod.rs index 69f7241b04099..2f44de44eda71 100644 --- a/crates/ruff_linter/src/message/mod.rs +++ b/crates/ruff_linter/src/message/mod.rs @@ -17,6 +17,7 @@ use ruff_diagnostics::{Diagnostic, DiagnosticKind, Fix}; use ruff_notebook::NotebookIndex; use ruff_source_file::{SourceFile, SourceLocation}; use ruff_text_size::{Ranged, TextRange, TextSize}; +pub use sarif::SarifEmitter; pub use text::TextEmitter; mod azure; @@ -28,6 +29,7 @@ mod json; mod json_lines; mod junit; mod pylint; +mod sarif; mod text; #[derive(Debug, PartialEq, Eq)] diff --git a/crates/ruff_linter/src/message/sarif.rs b/crates/ruff_linter/src/message/sarif.rs new file mode 100644 index 0000000000000..3517c0eee335a --- /dev/null +++ b/crates/ruff_linter/src/message/sarif.rs @@ -0,0 +1,212 @@ +use std::io::Write; + +use anyhow::Result; +use serde::{Serialize, Serializer}; +use serde_json::json; + +use ruff_source_file::OneIndexed; + +use crate::codes::Rule; +use crate::fs::normalize_path; +use crate::message::{Emitter, EmitterContext, Message}; +use crate::registry::{AsRule, Linter, RuleNamespace}; +use crate::VERSION; + +use strum::IntoEnumIterator; + +pub struct SarifEmitter; + +impl Emitter for SarifEmitter { + fn emit( + &mut self, + writer: &mut dyn Write, + messages: &[Message], + _context: &EmitterContext, + ) -> Result<()> { + let results = messages + .iter() + .map(SarifResult::from_message) + .collect::>>()?; + + let output = json!({ + "$schema": "https://json.schemastore.org/sarif-2.1.0.json", + "version": "2.1.0", + "runs": [{ + "tool": { + "driver": { + "name": "ruff", + "informationUri": "https://github.com/astral-sh/ruff", + "rules": Rule::iter().map(SarifRule::from).collect::>(), + "version": VERSION.to_string(), + } + }, + "results": results, + }], + }); + serde_json::to_writer_pretty(writer, &output)?; + Ok(()) + } +} + +#[derive(Debug, Clone)] +struct SarifRule<'a> { + name: &'a str, + code: String, + linter: &'a str, + summary: &'a str, + explanation: Option<&'a str>, + url: Option, +} + +impl From for SarifRule<'_> { + fn from(rule: Rule) -> Self { + let code = rule.noqa_code().to_string(); + let (linter, _) = Linter::parse_code(&code).unwrap(); + Self { + name: rule.into(), + code, + linter: linter.name(), + summary: rule.message_formats()[0], + explanation: rule.explanation(), + url: rule.url(), + } + } +} + +impl Serialize for SarifRule<'_> { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + json!({ + "id": self.code, + "shortDescription": { + "text": self.summary, + }, + "fullDescription": { + "text": self.explanation, + }, + "help": { + "text": self.summary, + }, + "helpUri": self.url, + "properties": { + "id": self.code, + "kind": self.linter, + "name": self.name, + "problem.severity": "error".to_string(), + }, + }) + .serialize(serializer) + } +} + +#[derive(Debug)] +struct SarifResult { + rule: Rule, + level: String, + message: String, + uri: String, + start_line: OneIndexed, + start_column: OneIndexed, + end_line: OneIndexed, + end_column: OneIndexed, +} + +impl SarifResult { + #[cfg(not(target_arch = "wasm32"))] + fn from_message(message: &Message) -> Result { + let start_location = message.compute_start_location(); + let end_location = message.compute_end_location(); + let path = normalize_path(message.filename()); + Ok(Self { + rule: message.kind.rule(), + level: "error".to_string(), + message: message.kind.name.clone(), + uri: url::Url::from_file_path(&path) + .map_err(|()| anyhow::anyhow!("Failed to convert path to URL: {}", path.display()))? + .to_string(), + start_line: start_location.row, + start_column: start_location.column, + end_line: end_location.row, + end_column: end_location.column, + }) + } + + #[cfg(target_arch = "wasm32")] + #[allow(clippy::unnecessary_wraps)] + fn from_message(message: &Message) -> Result { + let start_location = message.compute_start_location(); + let end_location = message.compute_end_location(); + let path = normalize_path(message.filename()); + Ok(Self { + rule: message.kind.rule(), + level: "error".to_string(), + message: message.kind.name.clone(), + uri: path.display().to_string(), + start_line: start_location.row, + start_column: start_location.column, + end_line: end_location.row, + end_column: end_location.column, + }) + } +} + +impl Serialize for SarifResult { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + json!({ + "level": self.level, + "message": { + "text": self.message, + }, + "locations": [{ + "physicalLocation": { + "artifactLocation": { + "uri": self.uri, + }, + "region": { + "startLine": self.start_line, + "startColumn": self.start_column, + "endLine": self.end_line, + "endColumn": self.end_column, + } + } + }], + "ruleId": self.rule.noqa_code().to_string(), + }) + .serialize(serializer) + } +} + +#[cfg(test)] +mod tests { + + use crate::message::tests::{capture_emitter_output, create_messages}; + use crate::message::SarifEmitter; + + fn get_output() -> String { + let mut emitter = SarifEmitter {}; + capture_emitter_output(&mut emitter, &create_messages()) + } + + #[test] + fn valid_json() { + let content = get_output(); + serde_json::from_str::(&content).unwrap(); + } + + #[test] + fn test_results() { + let content = get_output(); + let sarif = serde_json::from_str::(content.as_str()).unwrap(); + let rules = sarif["runs"][0]["tool"]["driver"]["rules"] + .as_array() + .unwrap(); + let results = sarif["runs"][0]["results"].as_array().unwrap(); + assert_eq!(results.len(), 3); + assert!(rules.len() > 3); + } +} diff --git a/crates/ruff_linter/src/settings/types.rs b/crates/ruff_linter/src/settings/types.rs index 70ff7a9190f3f..10bfb5189ebc2 100644 --- a/crates/ruff_linter/src/settings/types.rs +++ b/crates/ruff_linter/src/settings/types.rs @@ -423,6 +423,7 @@ pub enum SerializationFormat { Gitlab, Pylint, Azure, + Sarif, } impl Default for SerializationFormat { diff --git a/docs/configuration.md b/docs/configuration.md index 08a33ffad2b34..32d6b2eb130fc 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -481,7 +481,7 @@ Options: --ignore-noqa Ignore any `# noqa` comments --output-format - Output serialization format for violations [env: RUFF_OUTPUT_FORMAT=] [possible values: text, json, json-lines, junit, grouped, github, gitlab, pylint, azure] + Output serialization format for violations [env: RUFF_OUTPUT_FORMAT=] [possible values: text, json, json-lines, junit, grouped, github, gitlab, pylint, azure, sarif] -o, --output-file Specify file to write the linter output to (default: stdout) --target-version diff --git a/ruff.schema.json b/ruff.schema.json index 8f67f9a12d787..d7ff44db9d7b0 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -3663,7 +3663,8 @@ "github", "gitlab", "pylint", - "azure" + "azure", + "sarif" ] }, "Strictness": {