Skip to content

Commit 97f5611

Browse files
cathtengtrillville
authored andcommitted
ref(open-pr-comments): use parser classes instead of standalone regexes (#63294)
1 parent ac6ce24 commit 97f5611

File tree

4 files changed

+75
-47
lines changed

4 files changed

+75
-47
lines changed

src/sentry/tasks/integrations/github/open_pr_comment.py

Lines changed: 7 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import itertools
44
import logging
5-
import re
65
from dataclasses import dataclass
76
from datetime import datetime, timedelta
87
from typing import Any, Dict, List, Set, Tuple
@@ -37,6 +36,7 @@
3736
from sentry.snuba.dataset import Dataset
3837
from sentry.snuba.referrer import Referrer
3938
from sentry.tasks.base import instrumented_task
39+
from sentry.tasks.integrations.github.patch_parsers import PATCH_PARSERS
4040
from sentry.tasks.integrations.github.pr_comment import (
4141
ISSUE_LOCKED_ERROR_MESSAGE,
4242
RATE_LIMITED_MESSAGE,
@@ -189,7 +189,7 @@ def safe_for_comment(
189189
for file in pr_files:
190190
filename = file["filename"]
191191
# don't count the file if it was added or is not a Python file
192-
if file["status"] == "added" or not filename.endswith(".py"):
192+
if file["status"] == "added" or filename.split(".")[-1] not in PATCH_PARSERS:
193193
continue
194194

195195
changed_file_count += 1
@@ -224,27 +224,6 @@ def get_pr_files(pr_files: List[Dict[str, str]]) -> List[PullRequestFile]:
224224
return pullrequest_files
225225

226226

227-
# currently Python only
228-
def get_file_functions(patch: str) -> Set[str]:
229-
r"""
230-
Function header regex pattern
231-
^ - Asserts the start of a line.
232-
@@.*@@ - Matches a string that starts with two "@" characters, followed by any characters
233-
(except newline), and ends with two "@" characters.
234-
\s+ - Matches one or more whitespace characters (spaces, tabs, etc.).
235-
def - Matches the literal characters "def".
236-
\\s+ - Matches one or more whitespace characters.
237-
(?P<fnc>.*) - This is a named capturing group that captures any characters (except newline)
238-
and assigns them to the named group "fnc".
239-
\( - Matches an opening parenthesis "(".
240-
.* - Matches any characters (except newline).
241-
$ - Asserts the end of a line.
242-
"""
243-
244-
python_function_regex = r"^@@.*@@\s+def\s+(?P<fnc>.*)\(.*$"
245-
return set(re.findall(python_function_regex, patch, flags=re.M))
246-
247-
248227
def get_projects_and_filenames_from_source_file(
249228
org_id: int,
250229
pr_filename: str,
@@ -448,7 +427,11 @@ def open_pr_comment_workflow(pr_id: int) -> None:
448427
if not len(projects) or not len(sentry_filenames):
449428
continue
450429

451-
function_names = get_file_functions(file.patch)
430+
language_parser = PATCH_PARSERS.get(file.filename.split(".")[-1], None)
431+
if not language_parser:
432+
continue
433+
434+
function_names = language_parser.extract_functions_from_patch(file.patch)
452435

453436
if not len(function_names):
454437
continue
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import re
2+
from abc import abstractmethod
3+
from typing import Set
4+
5+
6+
class LanguageParser:
7+
@staticmethod
8+
@abstractmethod
9+
def extract_functions_from_patch(patch: str) -> Set[str]:
10+
pass
11+
12+
13+
class PythonParser(LanguageParser):
14+
@staticmethod
15+
def extract_functions_from_patch(patch: str) -> Set[str]:
16+
r"""
17+
Function header regex pattern
18+
^ - Asserts the start of a line.
19+
@@.*@@ - Matches a string that starts with two "@" characters, followed by any characters
20+
(except newline), and ends with two "@" characters.
21+
\s+ - Matches one or more whitespace characters (spaces, tabs, etc.).
22+
def - Matches the literal characters "def".
23+
\\s+ - Matches one or more whitespace characters.
24+
(?P<fnc>.*) - This is a named capturing group that captures any characters (except newline)
25+
and assigns them to the named group "fnc".
26+
\( - Matches an opening parenthesis "(".
27+
.* - Matches any characters (except newline).
28+
$ - Asserts the end of a line.
29+
"""
30+
python_function_regex = r"^@@.*@@\s+def\s+(?P<fnc>.*)\(.*$"
31+
return set(re.findall(python_function_regex, patch, flags=re.M))
32+
33+
34+
PATCH_PARSERS = {"py": PythonParser}

tests/sentry/tasks/integrations/github/test_open_pr_comment.py

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
PullRequestFile,
1414
format_issue_table,
1515
format_open_pr_comment,
16-
get_file_functions,
1716
get_issue_table_contents,
1817
get_pr_files,
1918
get_projects_and_filenames_from_source_file,
@@ -308,24 +307,6 @@ def test_get_projects_and_filenames_from_source_file(self):
308307
assert project_list == set(projects)
309308
assert sentry_filenames == set(correct_filenames)
310309

311-
def test_get_file_functions(self):
312-
# from https://github.com/getsentry/sentry/pull/61981
313-
patch = """@@ -36,6 +36,7 @@\n from sentry.templatetags.sentry_helpers import small_count\n from sentry.types.referrer_ids import GITHUB_OPEN_PR_BOT_REFERRER\n from sentry.utils import metrics\n+from sentry.utils.json import JSONData\n from sentry.utils.snuba import raw_snql_query\n \n logger = logging.getLogger(__name__)\n@@ -134,10 +135,10 @@ def get_issue_table_contents(issue_list: List[Dict[str, int]]) -> List[PullReque\n # TODO(cathy): Change the client typing to allow for multiple SCM Integrations\n def safe_for_comment(\n gh_client: GitHubAppsClient, repository: Repository, pull_request: PullRequest\n-) -> bool:\n+) -> Tuple[bool, JSONData]:\n logger.info("github.open_pr_comment.check_safe_for_comment")\n try:\n- pullrequest_resp = gh_client.get_pullrequest(\n+ pr_files = gh_client.get_pullrequest_files(\n repo=repository.name, pull_number=pull_request.key\n )\n except ApiError as e:\n@@ -158,34 +159,47 @@ def safe_for_comment(\n tags={"type": GithubAPIErrorType.UNKNOWN.value, "code": e.code},\n )\n logger.exception("github.open_pr_comment.unknown_api_error", extra={"error": str(e)})\n- return False\n+ return False, []\n \n safe_to_comment = True\n- if pullrequest_resp["state"] != "open":\n- metrics.incr(\n- OPEN_PR_METRICS_BASE.format(key="rejected_comment"), tags={"reason": "incorrect_state"}\n- )\n- safe_to_comment = False\n- if pullrequest_resp["changed_files"] > OPEN_PR_MAX_FILES_CHANGED:\n+\n+ changed_file_count = 0\n+ changed_lines_count = 0\n+\n+ for file in pr_files:\n+ filename = file["filename"]\n+ # don't count the file if it was added or is not a Python file\n+ if file["status"] == "added" or not filename.endswith(".py"):\n+ continue\n+\n+ changed_file_count += 1\n+ changed_lines_count += file["changes"]\n+\n+ if changed_file_count > OPEN_PR_MAX_FILES_CHANGED:\n metrics.incr(\n OPEN_PR_METRICS_BASE.format(key="rejected_comment"), tags={"reason": "too_many_files"}\n )\n safe_to_comment = False\n- if pullrequest_resp["additions"] + pullrequest_resp["deletions"] > OPEN_PR_MAX_LINES_CHANGED:\n+ if changed_lines_count > OPEN_PR_MAX_LINES_CHANGED:\n metrics.incr(\n OPEN_PR_METRICS_BASE.format(key="rejected_comment"), tags={"reason": "too_many_lines"}\n )\n safe_to_comment = False\n- return safe_to_comment\n \n+ if not safe_to_comment:\n+ pr_files = []\n+\n+ return safe_to_comment, pr_files\n \n-def get_pr_filenames(\n- gh_client: GitHubAppsClient, repository: Repository, pull_request: PullRequest\n-) -> List[str]:\n- pr_files = gh_client.get_pullrequest_files(repo=repository.name, pull_number=pull_request.key)\n \n+def get_pr_filenames(pr_files: JSONData) -> List[str]:\n # new files will not have sentry issues associated with them\n- pr_filenames: List[str] = [file["filename"] for file in pr_files if file["status"] != "added"]\n+ # only fetch Python files\n+ pr_filenames: List[str] = [\n+ file["filename"]\n+ for file in pr_files\n+ if file["status"] != "added" and file["filename"].endswith(".py")\n+ ]\n \n logger.info("github.open_pr_comment.pr_filenames", extra={"count": len(pr_filenames)})\n return pr_filenames\n@@ -316,15 +330,22 @@ def open_pr_comment_workflow(pr_id: int) -> None:\n client = installation.get_client()\n \n # CREATING THE COMMENT\n- if not safe_for_comment(gh_client=client, repository=repo, pull_request=pull_request):\n+ logger.info("github.open_pr_comment.check_safe_for_comment")\n+\n+ # fetch the files in the PR and determine if it is safe to comment\n+ safe_to_comment, pr_files = safe_for_comment(\n+ gh_client=client, repository=repo, pull_request=pull_request\n+ )\n+\n+ if not safe_to_comment:\n logger.info("github.open_pr_comment.not_safe_for_comment")\n metrics.incr(\n OPEN_PR_METRICS_BASE.format(key="error"),\n tags={"type": "unsafe_for_comment"},\n )\n return\n \n- pr_filenames = get_pr_filenames(gh_client=client, repository=repo, pull_request=pull_request)\n+ pr_filenames = get_pr_filenames(pr_files)\n \n issue_table_contents = {}\n top_issues_per_file = []"""
314-
assert get_file_functions(patch) == {
315-
"get_issue_table_contents",
316-
"safe_for_comment",
317-
"open_pr_comment_workflow",
318-
}
319-
320-
def test_get_file_functions_in_class(self):
321-
# from https://github.com/getsentry/sentry/pull/59152
322-
patch = '@@ -274,6 +274,14 @@ def patch(self, request: Request, organization, member):\n \n result = serializer.validated_data\n \n+ if getattr(member.flags, "partnership:restricted"):\n+ return Response(\n+ {\n+ "detail": "This member is managed by an active partnership and cannot be modified until the end of the partnership."\n+ },\n+ status=403,\n+ )\n+\n for operation in result["operations"]:\n # we only support setting active to False which deletes the orgmember\n if self._should_delete_member(operation):\n@@ -310,6 +318,14 @@ def delete(self, request: Request, organization, member) -> Response:\n """\n Delete an organization member with a SCIM User DELETE Request.\n """\n+ if getattr(member.flags, "partnership:restricted"):\n+ return Response(\n+ {\n+ "detail": "This member is managed by an active partnership and cannot be modified until the end of the partnership."\n+ },\n+ status=403,\n+ )\n+\n self._delete_member(request, organization, member)\n metrics.incr("sentry.scim.member.delete", tags={"organization": organization})\n return Response(status=204)\n@@ -348,6 +364,14 @@ def put(self, request: Request, organization, member):\n )\n return Response(context, status=200)\n \n+ if getattr(member.flags, "partnership:restricted"):\n+ return Response(\n+ {\n+ "detail": "This member is managed by an active partnership and cannot be modified until the end of the partnership."\n+ },\n+ status=403,\n+ )\n+\n if request.data.get("sentryOrgRole"):\n # Don\'t update if the org role is the same\n if ('
323-
assert get_file_functions(patch) == {
324-
"patch",
325-
"delete",
326-
"put",
327-
}
328-
329310

330311
@region_silo_test
331312
class TestGetCommentIssues(CreateEventTestCase):
@@ -656,7 +637,9 @@ def setUp(self):
656637
@patch(
657638
"sentry.tasks.integrations.github.open_pr_comment.get_projects_and_filenames_from_source_file"
658639
)
659-
@patch("sentry.tasks.integrations.github.open_pr_comment.get_file_functions")
640+
@patch(
641+
"sentry.tasks.integrations.github.patch_parsers.PythonParser.extract_functions_from_patch"
642+
)
660643
@patch("sentry.tasks.integrations.github.open_pr_comment.get_top_5_issues_by_count_for_file")
661644
@patch(
662645
"sentry.tasks.integrations.github.open_pr_comment.safe_for_comment",
@@ -707,7 +690,9 @@ def test_comment_workflow(
707690
@patch(
708691
"sentry.tasks.integrations.github.open_pr_comment.get_projects_and_filenames_from_source_file"
709692
)
710-
@patch("sentry.tasks.integrations.github.open_pr_comment.get_file_functions")
693+
@patch(
694+
"sentry.tasks.integrations.github.patch_parsers.PythonParser.extract_functions_from_patch"
695+
)
711696
@patch("sentry.tasks.integrations.github.open_pr_comment.get_top_5_issues_by_count_for_file")
712697
@patch(
713698
"sentry.tasks.integrations.github.open_pr_comment.safe_for_comment",
@@ -765,7 +750,9 @@ def test_comment_workflow_comment_exists(
765750
@patch(
766751
"sentry.tasks.integrations.github.open_pr_comment.get_projects_and_filenames_from_source_file"
767752
)
768-
@patch("sentry.tasks.integrations.github.open_pr_comment.get_file_functions")
753+
@patch(
754+
"sentry.tasks.integrations.github.patch_parsers.PythonParser.extract_functions_from_patch"
755+
)
769756
@patch("sentry.tasks.integrations.github.open_pr_comment.safe_for_comment")
770757
@patch("sentry.tasks.integrations.github.open_pr_comment.metrics")
771758
@responses.activate
@@ -822,7 +809,9 @@ def test_comment_workflow_early_return(
822809
@patch(
823810
"sentry.tasks.integrations.github.open_pr_comment.get_projects_and_filenames_from_source_file"
824811
)
825-
@patch("sentry.tasks.integrations.github.open_pr_comment.get_file_functions")
812+
@patch(
813+
"sentry.tasks.integrations.github.patch_parsers.PythonParser.extract_functions_from_patch"
814+
)
826815
@patch("sentry.tasks.integrations.github.open_pr_comment.get_top_5_issues_by_count_for_file")
827816
@patch(
828817
"sentry.tasks.integrations.github.open_pr_comment.safe_for_comment",

0 commit comments

Comments
 (0)