Skip to content

Remove our custom diff parsing and prefer Github compare json API #2006

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 26 additions & 134 deletions src/github.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,16 +308,7 @@ pub struct PullRequestDetails {
/// handlers want to see PR changes, and getting the diff can be
/// expensive.
#[serde(skip)]
files_changed: tokio::sync::OnceCell<Vec<FileDiff>>,
}

/// Representation of a diff to a single file.
#[derive(Debug)]
pub struct FileDiff {
/// The full path of the file.
pub path: String,
/// The diff for the file.
pub diff: String,
compare: tokio::sync::OnceCell<GithubCompare>,
}

/// The return from GitHub compare API
Expand All @@ -329,13 +320,23 @@ pub struct GithubCompare {
///
/// See <https://git-scm.com/docs/git-merge-base> for more details
pub merge_base_commit: GithubCommit,
// FIXME: Also retrieve and use the files list (see our diff function)
/// List of file differences
pub files: Vec<FileDiff>,
}

/// Representation of a diff to a single file.
#[derive(Debug, serde::Deserialize)]
pub struct FileDiff {
/// The fullname path of the file.
pub filename: String,
/// The patch/diff for the file.
pub patch: String,
}

impl PullRequestDetails {
pub fn new() -> PullRequestDetails {
PullRequestDetails {
files_changed: tokio::sync::OnceCell::new(),
compare: tokio::sync::OnceCell::new(),
}
}
}
Expand Down Expand Up @@ -977,6 +978,13 @@ impl Issue {
///
/// Returns `None` if the issue is not a PR.
pub async fn diff(&self, client: &GithubClient) -> anyhow::Result<Option<&[FileDiff]>> {
Ok(self.compare(client).await?.map(|c| c.files.as_ref()))
}

/// Returns the comparison of this event.
///
/// Returns `None` if the issue is not a PR.
pub async fn compare(&self, client: &GithubClient) -> anyhow::Result<Option<&GithubCompare>> {
let Some(pr) = &self.pull_request else {
return Ok(None);
};
Expand All @@ -986,41 +994,17 @@ impl Issue {
return Ok(None);
};

let diff = pr
.files_changed
let compare = pr
.compare
.get_or_try_init::<anyhow::Error, _, _>(|| async move {
let url = format!(
let req = client.get(&format!(
"{}/compare/{before}...{after}",
self.repository().url(client)
);
let mut req = client.get(&url);
req = req.header("Accept", "application/vnd.github.v3.diff");
let (diff, _) = client
.send_req(req)
.await
.with_context(|| format!("failed to fetch diff comparison for {url}"))?;
let body = String::from_utf8_lossy(&diff);
Ok(parse_diff(&body))
));
Ok(client.json(req).await?)
})
.await?;
Ok(Some(diff))
}

/// Returns the comparison of this event.
///
/// Returns `None` if the issue is not a PR.
pub async fn compare(&self, client: &GithubClient) -> anyhow::Result<Option<GithubCompare>> {
let (before, after) = if let (Some(base), Some(head)) = (&self.base, &self.head) {
(&base.sha, &head.sha)
} else {
return Ok(None);
};

let req = client.get(&format!(
"{}/compare/{before}...{after}",
self.repository().url(client)
));
Ok(Some(client.json(req).await?))
Ok(Some(compare))
}

/// Returns the commits from this pull request (no commits are returned if this `Issue` is not
Expand Down Expand Up @@ -1264,31 +1248,6 @@ pub struct CommitBase {
pub repo: Option<Repository>,
}

pub fn parse_diff(diff: &str) -> Vec<FileDiff> {
// This does not properly handle filenames with spaces.
let re = regex::Regex::new("(?m)^diff --git .* b/(.*)").unwrap();
let mut files: Vec<_> = re
.captures_iter(diff)
.map(|cap| {
let start = cap.get(0).unwrap().start();
let path = cap.get(1).unwrap().as_str().to_string();
(start, path)
})
.collect();
// Break the list up into (start, end) pairs starting from the "diff --git" line.
files.push((diff.len(), String::new()));
files
.windows(2)
.map(|w| {
let (start, end) = (&w[0], &w[1]);
FileDiff {
path: start.1.clone(),
diff: diff[start.0..end.0].to_string(),
}
})
.collect()
}

#[derive(Debug, serde::Deserialize)]
pub struct IssueSearchResult {
pub total_count: u64,
Expand Down Expand Up @@ -3221,71 +3180,4 @@ mod tests {
};
assert_eq!(x.to_string(), "Unknown labels: A-bootstrap, xxx");
}

#[test]
fn extract_one_file() {
let input = r##"\
diff --git a/triagebot.toml b/triagebot.toml
index fb9cee43b2d..b484c25ea51 100644
--- a/triagebot.toml
+++ b/triagebot.toml
@@ -114,6 +114,15 @@ trigger_files = [
"src/tools/rustdoc-themes",
]
+[autolabel."T-compiler"]
+trigger_files = [
+ # Source code
+ "compiler",
+
+ # Tests
+ "src/test/ui",
+]
+
[notify-zulip."I-prioritize"]
zulip_stream = 245100 # #t-compiler/wg-prioritization/alerts
topic = "#{number} {title}"
"##;
let files: Vec<_> = parse_diff(input).into_iter().map(|d| d.path).collect();
assert_eq!(files, vec!["triagebot.toml".to_string()]);
}

#[test]
fn extract_several_files() {
let input = r##"\
diff --git a/library/stdarch b/library/stdarch
index b70ae88ef2a..cfba59fccd9 160000
--- a/library/stdarch
+++ b/library/stdarch
@@ -1 +1 @@
-Subproject commit b70ae88ef2a6c83acad0a1e83d5bd78f9655fd05
+Subproject commit cfba59fccd90b3b52a614120834320f764ab08d1
diff --git a/src/librustdoc/clean/types.rs b/src/librustdoc/clean/types.rs
index 1fe4aa9023e..f0330f1e424 100644
--- a/src/librustdoc/clean/types.rs
+++ b/src/librustdoc/clean/types.rs
@@ -2322,3 +2322,4 @@ impl SubstParam {
if let Self::Lifetime(lt) = self { Some(lt) } else { None }
}
}
+
diff --git a/src/librustdoc/core.rs b/src/librustdoc/core.rs
index c58310947d2..3b0854d4a9b 100644
--- a/src/librustdoc/core.rs
+++ b/src/librustdoc/core.rs
@@ -591,3 +591,4 @@ fn from(idx: u32) -> Self {
ImplTraitParam::ParamIndex(idx)
}
}
+
"##;
let files: Vec<_> = parse_diff(input).into_iter().map(|d| d.path).collect();
assert_eq!(
files,
vec![
"library/stdarch".to_string(),
"src/librustdoc/clean/types.rs".to_string(),
"src/librustdoc/core.rs".to_string(),
]
)
}
}
4 changes: 2 additions & 2 deletions src/handlers/assign.rs
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ fn find_reviewers_from_diff(
.with_context(|| format!("owner file pattern `{owner_pattern}` is not valid"))?
.build()?;
if ignore
.matched_path_or_any_parents(&file_diff.path, false)
.matched_path_or_any_parents(&file_diff.filename, false)
.is_ignore()
{
let owner_len = owner_pattern.split('/').count();
Expand All @@ -442,7 +442,7 @@ fn find_reviewers_from_diff(
}

// Count the modified lines.
for line in file_diff.diff.lines() {
for line in file_diff.patch.lines() {
if (!line.starts_with("+++") && line.starts_with('+'))
|| (!line.starts_with("---") && line.starts_with('-'))
{
Expand Down
49 changes: 23 additions & 26 deletions src/handlers/assign/tests/tests_from_diff.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
//! Tests for `find_reviewers_from_diff`

use super::super::*;
use crate::github::parse_diff;
use std::fmt::Write;

fn test_from_diff(diff: &str, config: toml::Table, expected: &[&str]) {
let files = parse_diff(diff);
fn test_from_diff(diff: &Vec<FileDiff>, config: toml::Table, expected: &[&str]) {
let aconfig: AssignConfig = config.try_into().unwrap();
assert_eq!(
find_reviewers_from_diff(&aconfig, &files).unwrap(),
find_reviewers_from_diff(&aconfig, &*diff).unwrap(),
expected.iter().map(|x| x.to_string()).collect::<Vec<_>>()
);
}
Expand All @@ -18,27 +16,24 @@ fn test_from_diff(diff: &str, config: toml::Table, expected: &[&str]) {
/// `paths` should be a slice of `(path, added, removed)` tuples where `added`
/// is the number of lines added, and `removed` is the number of lines
/// removed.
fn make_fake_diff(paths: &[(&str, u32, u32)]) -> String {
fn make_fake_diff(paths: &[(&str, u32, u32)]) -> Vec<FileDiff> {
// This isn't a properly structured diff, but it has approximately enough
// information for what is needed for testing.
paths
.iter()
.map(|(path, added, removed)| {
let mut diff = format!(
"diff --git a/{path} b/{path}\n\
index 1677422122e..1108c1f4d4c 100644\n\
--- a/{path}\n\
+++ b/{path}\n\
@@ -0,0 +1 @@\n"
);
let mut diff = "@@ -0,0 +1 @@ ".to_string();
for n in 0..*added {
writeln!(diff, "+Added line {n}").unwrap();
}
for n in 0..*removed {
writeln!(diff, "-Removed line {n}").unwrap();
}
diff.push('\n');
diff
FileDiff {
filename: path.to_string(),
patch: diff,
}
})
.collect()
}
Expand All @@ -61,16 +56,15 @@ fn from_diff_submodule() {
[owners]
"/src" = ["user1", "user2"]
);
let diff = "\
diff --git a/src/jemalloc b/src/jemalloc\n\
index 2dba541..b001609 160000\n\
--- a/src/jemalloc\n\
+++ b/src/jemalloc\n\
@@ -1 +1 @@\n\
-Subproject commit 2dba541881fb8e35246d653bbe2e7c7088777a4a\n\
+Subproject commit b001609960ca33047e5cbc5a231c1e24b6041d4b\n\
";
test_from_diff(diff, config, &["user1", "user2"]);
let diff = vec![FileDiff {
filename: "src/jemalloc".to_string(),
patch: "@@ -1 +1 @@\n\
-Subproject commit 2dba541881fb8e35246d653bbe2e7c7088777a4a\n\
+Subproject commit b001609960ca33047e5cbc5a231c1e24b6041d4b\n\
"
.to_string(),
}];
test_from_diff(&diff, config, &["user1", "user2"]);
}

#[test]
Expand Down Expand Up @@ -131,9 +125,12 @@ fn empty_file_still_counts() {
"/compiler" = ["compiler"]
"/compiler/rustc_parse" = ["parser"]
);
let diff = "diff --git a/compiler/rustc_parse/src/foo.rs b/compiler/rustc_parse/src/foo.rs\n\
new file mode 100644\n\
index 0000000..e69de29\n";
let diff = vec![FileDiff {
filename: "compiler/rustc_parse/src/foo.rs".to_string(),
patch: "new file mode 100644\n\
index 0000000..e69de29\n"
.to_string(),
}];
test_from_diff(&diff, config, &["parser"]);
}

Expand Down
10 changes: 5 additions & 5 deletions src/handlers/autolabel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,11 @@ pub(super) async fn parse_input(
// This a PR with modified files.

// Add the matching labels for the modified files paths
if cfg
.trigger_files
.iter()
.any(|f| files.iter().any(|file_diff| file_diff.path.starts_with(f)))
{
if cfg.trigger_files.iter().any(|f| {
files
.iter()
.any(|file_diff| file_diff.filename.starts_with(f))
}) {
autolabels.push(Label {
name: label.to_owned(),
});
Expand Down
7 changes: 1 addition & 6 deletions src/handlers/check_commits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,19 +52,14 @@ pub(super) async fn handle(ctx: &Context, event: &Event, config: &Config) -> any
return Ok(());
}

let Some(diff) = event.issue.diff(&ctx.github).await? else {
bail!(
"expected issue {} to be a PR, but the diff could not be determined",
event.issue.number
)
};
let Some(compare) = event.issue.compare(&ctx.github).await? else {
bail!(
"expected issue {} to be a PR, but the compare could not be determined",
event.issue.number
)
};
let commits = event.issue.commits(&ctx.github).await?;
let diff = &compare.files;

let mut warnings = Vec::new();
let mut labels = Vec::new();
Expand Down
2 changes: 1 addition & 1 deletion src/handlers/check_commits/modified_submodule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ const SUBMODULE_WARNING_MSG: &str = "Some commits in this PR modify **submodules
/// Returns a message if the PR modifies a git submodule.
pub(super) fn modifies_submodule(diff: &[FileDiff]) -> Option<String> {
let re = regex::Regex::new(r"\+Subproject\scommit\s").unwrap();
if diff.iter().any(|fd| re.is_match(&fd.diff)) {
if diff.iter().any(|fd| re.is_match(&fd.patch)) {
Some(SUBMODULE_WARNING_MSG.to_string())
} else {
None
Expand Down
2 changes: 1 addition & 1 deletion src/handlers/mentions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ pub(super) async fn parse_input(
})
.unwrap_or_default()
{
let file_paths: Vec<_> = files.iter().map(|fd| Path::new(&fd.path)).collect();
let file_paths: Vec<_> = files.iter().map(|fd| Path::new(&fd.filename)).collect();
let to_mention: Vec<_> = config
.paths
.iter()
Expand Down
4 changes: 2 additions & 2 deletions src/handlers/milestone_prs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ pub(super) async fn handle(ctx: &Context, event: &Event) -> anyhow::Result<()> {

let files = e.issue.diff(&ctx.github).await?;
if let Some(files) = files {
if let Some(cargo) = files.iter().find(|fd| fd.path == "src/tools/cargo") {
if let Some(cargo) = files.iter().find(|fd| fd.filename == "src/tools/cargo") {
// The webhook timeout of 10 seconds can be too short, so process in
// the background.
let diff = cargo.diff.clone();
let diff = cargo.patch.clone();
tokio::task::spawn(async move {
let gh = GithubClient::new_from_env();
if let Err(e) = milestone_cargo(&gh, &version, &diff).await {
Expand Down
2 changes: 1 addition & 1 deletion src/handlers/validate_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ pub(super) async fn parse_input(
return Ok(None);
}
};
if !diff.iter().any(|diff| diff.path == CONFIG_FILE_NAME) {
if !diff.iter().any(|diff| diff.filename == CONFIG_FILE_NAME) {
return Ok(None);
}

Expand Down