From fca2b7f865ac2d33e9abad622b22ba634e2939aa Mon Sep 17 00:00:00 2001 From: David Riazati <9407960+driazati@users.noreply.github.com> Date: Thu, 3 Mar 2022 08:26:37 -0800 Subject: [PATCH] [ci] Add workflow to cc teams (#10322) As discussed in https://discuss.tvm.apache.org/t/rfc-remove-codeowners/12095/2?u=driazati, this adds a mechanism to auto-tag people based on PR/issue titles and labels. This should improve visibility across the project and make it easy for interested people to subscribe to various topics. Details on usage will be posted in the relevant issue: #10317 Co-authored-by: driazati --- .github/workflows/tag_teams.yml | 47 +++++ tests/python/unittest/test_ci.py | 236 +++++++++++++++++++++++++ tests/scripts/git_utils.py | 29 +++- tests/scripts/github_tag_teams.py | 275 ++++++++++++++++++++++++++++++ 4 files changed, 584 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/tag_teams.yml create mode 100755 tests/scripts/github_tag_teams.py diff --git a/.github/workflows/tag_teams.yml b/.github/workflows/tag_teams.yml new file mode 100644 index 0000000000000..9711f718b1e16 --- /dev/null +++ b/.github/workflows/tag_teams.yml @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# GH actions. +# We use it to cover windows and mac builds +# Jenkins is still the primary CI + +name: Teams + +on: + # See https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request_target + pull_request_target: + types: [opened, reopened, edited, ready_for_review, labeled] + issues: + types: [opened, edited, reopened, labeled] + +concurrency: + group: Teams-${{ github.event.pull_request.number }}-${{ github.event.issue.number }} + cancel-in-progress: true + +jobs: + tag-teams: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Tag people from relevant teams + env: + PR: ${{ toJson(github.event.pull_request) }} + ISSUE: ${{ toJson(github.event.issue) }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -eux + python tests/scripts/github_tag_teams.py || echo failed diff --git a/tests/python/unittest/test_ci.py b/tests/python/unittest/test_ci.py index 6ca46bc60cd57..90c9262c34956 100644 --- a/tests/python/unittest/test_ci.py +++ b/tests/python/unittest/test_ci.py @@ -19,6 +19,7 @@ import subprocess import sys import json +import textwrap import tempfile import pytest @@ -406,5 +407,240 @@ def all_time_keys(time): ) +def assert_in(needle: str, haystack: str): + if needle not in haystack: + raise AssertionError(f"item not found:\n{needle}\nin:\n{haystack}") + + +def test_github_tag_teams(tmpdir_factory): + tag_script = REPO_ROOT / "tests" / "scripts" / "github_tag_teams.py" + + def run(type, data, check): + git = TempGit(tmpdir_factory.mktemp("tmp_git_dir")) + git.run("init") + git.run("checkout", "-b", "main") + git.run("remote", "add", "origin", "https://github.com/apache/tvm.git") + + issue_body = """ + some text + [temporary] opt-in: @person5 + + - something: @person1 @person2 + - something else @person1 @person2 + - something else2: @person1 @person2 + - something-else @person1 @person2 + """ + comment1 = """ + another thing: @person3 + another-thing @person3 + """ + comment2 = """ + something @person4 + """ + teams = { + "data": { + "repository": { + "issue": { + "body": issue_body, + "comments": {"nodes": [{"body": comment1}, {"body": comment2}]}, + } + } + } + } + env = { + type: json.dumps(data), + } + proc = subprocess.run( + [ + str(tag_script), + "--dry-run", + "--team-issue-json", + json.dumps(teams), + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding="utf-8", + cwd=git.cwd, + env=env, + ) + if proc.returncode != 0: + raise RuntimeError(f"Process failed:\nstdout:\n{proc.stdout}\n\nstderr:\n{proc.stderr}") + + assert_in(check, proc.stdout) + + run( + "ISSUE", + { + "title": "A title", + "number": 1234, + "user": { + "login": "person5", + }, + "labels": [{"name": "abc"}], + "body": textwrap.dedent( + """ + hello + """.strip() + ), + }, + "No one to cc, exiting", + ) + + run( + "ISSUE", + { + "title": "A title", + "number": 1234, + "user": { + "login": "person5", + }, + "labels": [{"name": "abc"}], + "body": textwrap.dedent( + """ + hello + + cc @test + """.strip() + ), + }, + "No one to cc, exiting", + ) + + run( + type="ISSUE", + data={ + "title": "A title", + "number": 1234, + "user": { + "login": "person5", + }, + "labels": [{"name": "something"}], + "body": textwrap.dedent( + """ + hello + + something""" + ), + }, + check="would have updated issues/1234 with {'body': '\\nhello\\n\\nsomething\\n\\ncc @person1 @person2 @person4'}", + ) + + run( + type="ISSUE", + data={ + "title": "A title", + "number": 1234, + "user": { + "login": "person6", + }, + "labels": [{"name": "something"}], + "body": textwrap.dedent( + """ + hello + + something""" + ), + }, + check="Author person6 is not opted in, quitting", + ) + + run( + type="ISSUE", + data={ + "title": "A title", + "number": 1234, + "user": { + "login": "person5", + }, + "labels": [{"name": "something"}], + "body": textwrap.dedent( + """ + hello + + cc @person1 @person2 @person4""" + ), + }, + check="Everyone to cc is already cc'ed, no update needed", + ) + + run( + type="ISSUE", + data={ + "title": "[something] A title", + "number": 1234, + "user": { + "login": "person5", + }, + "labels": [{"name": "something2"}], + "body": textwrap.dedent( + """ + hello + + something""" + ), + }, + check="would have updated issues/1234 with {'body': '\\nhello\\n\\nsomething\\n\\ncc @person1 @person2 @person4'}", + ) + + run( + type="ISSUE", + data={ + "title": "[something] A title", + "number": 1234, + "user": { + "login": "person5", + }, + "labels": [{"name": "something2"}], + "body": textwrap.dedent( + """ + hello + + cc @person1 @person2 @person4""" + ), + }, + check="Everyone to cc is already cc'ed, no update needed", + ) + + run( + type="PR", + data={ + "title": "[something] A title", + "number": 1234, + "draft": False, + "user": { + "login": "person5", + }, + "labels": [{"name": "something2"}], + "body": textwrap.dedent( + """ + hello + + cc @person1 @person2 @person4""" + ), + }, + check="Everyone to cc is already cc'ed, no update needed", + ) + + run( + type="PR", + data={ + "title": "[something] A title", + "number": 1234, + "draft": True, + "user": { + "login": "person5", + }, + "labels": [{"name": "something2"}], + "body": textwrap.dedent( + """ + hello + + cc @person1 @person2 @person4""" + ), + }, + check="Terminating since 1234 is a draft", + ) + + if __name__ == "__main__": sys.exit(pytest.main([__file__] + sys.argv[1:])) diff --git a/tests/scripts/git_utils.py b/tests/scripts/git_utils.py index 0885907130013..8e8cbfb1e2616 100644 --- a/tests/scripts/git_utils.py +++ b/tests/scripts/git_utils.py @@ -20,7 +20,7 @@ import subprocess import re from urllib import request -from typing import Dict, Tuple, Any +from typing import Dict, Tuple, Any, Optional, List class GitHubRepo: @@ -35,8 +35,16 @@ def headers(self): "Authorization": f"Bearer {self.token}", } - def graphql(self, query: str) -> Dict[str, Any]: - return self._post("https://api.github.com/graphql", {"query": query}) + def graphql(self, query: str, variables: Optional[Dict[str, str]] = None) -> Dict[str, Any]: + if variables is None: + variables = {} + response = self._post( + "https://api.github.com/graphql", {"query": query, "variables": variables} + ) + if "data" not in response: + msg = f"Error fetching data with query:\n{query}\n\nvariables:\n{variables}\n\nerror:\n{json.dumps(response, indent=2)}" + raise RuntimeError(msg) + return response def _post(self, full_url: str, body: Dict[str, Any]) -> Dict[str, Any]: print("Requesting POST to", full_url, "with", body) @@ -95,3 +103,18 @@ def git(command, **kwargs): if proc.returncode != 0: raise RuntimeError(f"Command failed {command}:\nstdout:\n{proc.stdout}") return proc.stdout.strip() + + +def find_ccs(body: str) -> List[str]: + matches = re.findall(r"(cc( @[-A-Za-z0-9]+)+)", body, flags=re.MULTILINE) + matches = [full for full, last in matches] + + reviewers = [] + for match in matches: + if match.startswith("cc "): + match = match.replace("cc ", "") + users = [x.strip() for x in match.split("@")] + reviewers += users + + reviewers = set(x for x in reviewers if x != "") + return list(reviewers) diff --git a/tests/scripts/github_tag_teams.py b/tests/scripts/github_tag_teams.py new file mode 100755 index 0000000000000..a461f562d7845 --- /dev/null +++ b/tests/scripts/github_tag_teams.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import json +import argparse +import re +from urllib import error +from typing import Dict, Any, List, Tuple + + +from git_utils import git, GitHubRepo, parse_remote, find_ccs + + +def parse_line(line: str) -> Tuple[str, List[str]]: + line = line.lstrip(" -") + line = line.split() + + # Parse out the name as anything up until the first tagged person + tag_items = [] + tag_end = 0 + for i, piece in enumerate(line): + if piece.startswith("@"): + tag_end = i + break + + tag_items.append(piece) + + tag = " ".join(tag_items).rstrip(":") + + # From the last word that was part of the tag name, start looking for users + # tagged with a '@' + users = [] + for piece in line[tag_end:]: + if piece.startswith("@"): + users.append(piece.lstrip("@")) + + return (tag, list(sorted(users))) + + +def fetch_issue(github: GitHubRepo, issue_number: int): + query = """query($owner: String!, $name: String!, $number: Int!){ + repository(owner: $owner, name: $name) { + issue(number: $number) { + body + comments(first:100) { + nodes { + body + } + } + } + } + }""" + r = github.graphql( + query, + variables={ + "owner": github.user, + "name": github.repo, + "number": issue_number, + }, + ) + return r + + +def find_rollout_users(r: Dict[str, Any]): + issue = r["data"]["repository"]["issue"] + body = issue["body"] + for line in body.split("\n"): + line = line.strip() + if line.startswith("[temporary] opt-in: "): + line = line[len("[temporary] opt-in: ") :] + return find_ccs("cc " + line) + + return [] + + +def parse_teams(r: Dict[str, Any], issue_number: int) -> Dict[str, str]: + """ + Fetch an issue and parse out series of tagged people from the issue body + and comments + """ + issue = r["data"]["repository"]["issue"] + + if issue is None or issue.get("body") is None: + raise RuntimeError(f"Could not find issue #{issue_number}\n\n{json.dumps(r, indent=2)}") + + result = {} + + def add_tag(tag, users): + if tag in result: + result[tag] += users + else: + result[tag] = users + + # Parse the issue body (only bullets are looked at) + for line in issue["body"].split("\n"): + line = line.strip() + if not line.startswith("- "): + continue + if "@" not in line: + continue + + tag, users = parse_line(line) + add_tag(tag, users) + + # Parse comment bodies + for comment in issue["comments"]["nodes"]: + for line in comment["body"].split("\n"): + if "@" not in line: + continue + + tag, users = parse_line(line) + add_tag(tag, users) + + # De-duplicate users listed twice for the same tag + for tag in result: + result[tag] = list(set(result[tag])) + + return {k.lower(): v for k, v in result.items()} + + +def tags_from_title(title: str) -> List[str]: + tags = re.findall(r"\[(.*?)\]", title) + tags = [t.strip() for t in tags] + return tags + + +def tags_from_labels(labels: List[Dict[str, Any]]) -> List[str]: + return [label["name"] for label in labels] + + +def add_ccs_to_body(body: str, to_cc: List[str]) -> str: + lines = body.split("\n") + + cc_line_idx = None + for i, line in enumerate(reversed(lines)): + if line.strip() == "": + continue + if line.startswith("cc @"): + cc_line_idx = len(lines) - i - 1 + else: + break + + def gen_cc_line(users): + users = sorted(users) + return "cc " + " ".join([f"@{user}" for user in users]) + + if cc_line_idx is None: + print("Did not find existing cc line") + lines.append("") + lines.append(gen_cc_line(to_cc)) + else: + # Edit cc line in place + line = lines[cc_line_idx] + print(f"Found existing cc line at {cc_line_idx}: {line}") + existing_ccs = find_ccs(line) + print(f"Found cc's: {existing_ccs}") + + if set(to_cc).issubset(set(existing_ccs)): + # Don't do anything if there is no update needed + return None + + line = gen_cc_line(set(existing_ccs + to_cc)) + + lines[cc_line_idx] = line + + return "\n".join(lines) + + +if __name__ == "__main__": + help = "Automatically tag people based on PR / issue labels" + parser = argparse.ArgumentParser(description=help) + parser.add_argument("--remote", default="origin", help="ssh remote to parse") + parser.add_argument("--team-issue", default="10317", help="issue number to look at for ccs") + parser.add_argument( + "--team-issue-json", help="(testing only) issue JSON to parse rather than fetch from GitHub" + ) + parser.add_argument( + "--dry-run", + action="store_true", + default=False, + help="run but don't send any request to GitHub", + ) + args = parser.parse_args() + + remote = git(["config", "--get", f"remote.{args.remote}.url"]) + user, repo = parse_remote(remote) + + if args.team_issue_json: + issue_data = json.loads(args.team_issue_json) + else: + github = GitHubRepo(token=os.environ["GITHUB_TOKEN"], user=user, repo=repo) + issue_data = fetch_issue(github, issue_number=int(args.team_issue)) + + # Fetch the list of teams + teams = parse_teams(issue_data, issue_number=int(args.team_issue)) + # When rolling out this tool it is limited to certain users, so find that list + rollout_users = find_rollout_users(issue_data) + print(f"[slow rollout] Limiting to opted-in users: {rollout_users}") + + print(f"Found these teams in issue #{args.team_issue}\n{json.dumps(teams, indent=2)}") + + # Extract the payload from GitHub Actions + issue = json.loads(os.getenv("ISSUE", "null")) + pr = json.loads(os.getenv("PR", "null")) + if (issue is None and pr is None) or (issue is not None and pr is not None): + raise RuntimeError("Exactly one of $PR or $ISSUE must be set in the environment") + + if pr is not None: + if pr["draft"]: + print(f"Terminating since {pr['number']} is a draft") + exit(0) + + # PRs/issues have the same structure for the fields needed here + item = issue if issue is not None else pr + title = item["title"] + body = item["body"] + author = item["user"]["login"] + tags = tags_from_title(item["title"]) + tags_from_labels(item["labels"]) + + tags = [t.lower() for t in tags] + print(f"Found tags: {tags}") + + if author not in rollout_users: + print(f"Author {author} is not opted in, quitting") + exit(0) + + # Update the PR or issue based on tags in the title and GitHub tags + to_cc = [teams.get(t, []) for t in tags] + to_cc = list(set(item for sublist in to_cc for item in sublist)) + to_cc = [user for user in to_cc if user != author] + print("Users to cc based on labels", to_cc) + + # Create the new PR/issue body + if len(to_cc) == 0: + print("No one to cc, exiting") + exit(0) + + new_body = add_ccs_to_body(body, to_cc) + if new_body is None: + print(f"Everyone to cc is already cc'ed, no update needed") + exit(0) + + print(f"Changing body from:\n----\n{body}\n----\nto:\n----\n{new_body}\n----") + + # Set the PR/issue body on GitHub + data = {"body": new_body} + if issue is not None: + issue_number = issue["number"] + url = f"issues/{issue_number}" + elif pr is not None: + pr_number = pr["number"] + url = f"pulls/{pr_number}" + else: + raise RuntimeError("Unreachable, please report a bug with a link to the failed workflow") + + if not args.dry_run: + github.post(url, data=data) + else: + print(f"Dry run, would have updated {url} with {data}")