Skip to content

fix(seer-issues-patch) More parsing of functions for Python #86558

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/sentry/api/endpoints/seer_rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from sentry.hybridcloud.rpc.service import RpcAuthenticationSetupException, RpcResolutionException
from sentry.hybridcloud.rpc.sig import SerializableFunctionValueException
from sentry.models.organization import Organization
from sentry.seer.fetch_issues_given_patches import get_issues_related_to_file_patches
from sentry.seer.fetch_issues.fetch_issues_given_patches import get_issues_related_to_file_patches
from sentry.silo.base import SiloMode
from sentry.utils.env import in_test_environment

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,6 @@
from sentry import eventstore
from sentry.api.serializers import EventSerializer, serialize
from sentry.api.serializers.models.event import EventSerializerResponse
from sentry.integrations.github.tasks.language_parsers import (
PATCH_PARSERS,
LanguageParser,
SimpleLanguageParser,
)
from sentry.integrations.github.tasks.open_pr_comment import (
MAX_RECENT_ISSUES,
OPEN_PR_MAX_FILES_CHANGED,
Expand All @@ -26,6 +21,7 @@
from sentry.models.group import Group, GroupStatus
from sentry.models.project import Project
from sentry.models.repository import Repository
from sentry.seer.fetch_issues.more_parsing import patch_parsers_more
from sentry.snuba.dataset import Dataset
from sentry.snuba.referrer import Referrer
from sentry.utils.snuba import raw_snql_query
Expand Down Expand Up @@ -53,15 +49,7 @@
class PrFile(TypedDict):
filename: str
patch: str
status: Literal[
"added",
"removed",
"modified",
"renamed",
"copied",
"changed",
"unchanged",
]
status: Literal["added", "removed", "modified", "renamed", "copied", "changed", "unchanged"]
changes: int


Expand All @@ -71,7 +59,7 @@ def safe_for_fetching_issues(pr_files: list[PrFile]) -> list[PrFile]:
filtered_pr_files = []
for file in pr_files:
file_extension = file["filename"].split(".")[-1]
if file["status"] != "modified" or file_extension not in PATCH_PARSERS:
if file["status"] != "modified" or file_extension not in patch_parsers_more:
continue

changed_file_count += 1
Expand Down Expand Up @@ -102,14 +90,12 @@ def _get_issues_for_file(
if not projects:
return []

patch_parsers: dict[str, LanguageParser | SimpleLanguageParser] = PATCH_PARSERS

# Gets the appropriate parser for formatting the snuba query given the file extension.
# The extension is never replaced in reverse codemapping.
file_extension = sentry_filenames[0].split(".")[-1]
if file_extension not in patch_parsers:
if file_extension not in patch_parsers_more:
return []
language_parser = patch_parsers[file_extension]
language_parser = patch_parsers_more[file_extension]

# Fetch an initial, candidate set of groups.
group_ids: list[int] = list(
Expand Down Expand Up @@ -376,7 +362,6 @@ def get_issues_related_to_file_patches(
]

filename_to_issues = {}
patch_parsers: dict[str, LanguageParser | SimpleLanguageParser] = PATCH_PARSERS

for file in pullrequest_files:
logger_extra = {"file": file.filename, "run_id": run_id}
Expand All @@ -390,16 +375,18 @@ def get_issues_related_to_file_patches(
continue

file_extension = file.filename.split(".")[-1]
if file_extension not in patch_parsers:
if file_extension not in patch_parsers_more:
logger.warning("No language parser", extra=logger_extra)
continue
language_parser = patch_parsers[file_extension]

language_parser = patch_parsers_more[file_extension]
function_names = language_parser.extract_functions_from_patch(file.patch)
if not function_names:
logger.warning("No function names", extra=logger_extra)
continue

logger.info("Function names", extra=logger_extra | {"function_names": function_names})

issues = get_issues_with_event_details_for_file(
list(projects),
list(sentry_filenames),
Expand Down
44 changes: 44 additions & 0 deletions src/sentry/seer/fetch_issues/more_parsing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import re

from sentry.integrations.github.tasks.language_parsers import (
PATCH_PARSERS,
LanguageParser,
PythonParser,
SimpleLanguageParser,
)


class PythonParserMore(PythonParser):
@classmethod
def extract_functions_from_rest_of_patch(cls, patch: str) -> set[str]:
r"""
Extract function names from a patch, excluding those in hunk headers.

Regex:
^ - Start of line
[\- ] - Match a '-', or space character (removed or context lines)
(?:\s*) - Match any whitespace characters (indentation) without capturing
def\s+ - Match the 'def' keyword followed by whitespace
([a-zA-Z_][a-zA-Z0-9_]*) - Capture the function name, which must start with a letter or
underscore followed by letters, numbers, or underscores
"""
pattern = r"^[\- ](?:\s*)def\s+([a-zA-Z_][a-zA-Z0-9_]*)"
matches = re.finditer(pattern, patch, re.MULTILINE)
return {match.group(1) for match in matches if match.group(1)}

@classmethod
def extract_functions_from_patch(cls, patch: str) -> set[str]:
function_names_from_hunk_headers = super().extract_functions_from_patch(patch)
function_names_from_rest_of_patch = cls.extract_functions_from_rest_of_patch(patch)
return function_names_from_hunk_headers | function_names_from_rest_of_patch


patch_parsers_more: dict[str, SimpleLanguageParser | LanguageParser] = PATCH_PARSERS | {
"py": PythonParserMore,
}
"""
Parsers that extract function names from the entire patch (hunk headers, diff, context), excluding
added functions.

May have false positives.
"""
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from sentry.api.serializers import EventSerializer, serialize
from sentry.models.group import Group
from sentry.models.repository import Repository
from sentry.seer.fetch_issues_given_patches import (
from sentry.seer.fetch_issues.fetch_issues_given_patches import (
NUM_DAYS_AGO,
STACKFRAME_COUNT,
PrFile,
Expand Down Expand Up @@ -247,12 +247,14 @@ def test__get_projects_and_filenames_from_source_file(self):
assert projects == {self.project}
assert filenames == {"some/path/foo.py", "path/foo.py", "foo.py"}

@patch("sentry.seer.fetch_issues_given_patches.safe_for_fetching_issues")
@patch("sentry.seer.fetch_issues_given_patches._get_projects_and_filenames_from_source_file")
@patch("sentry.seer.fetch_issues.fetch_issues_given_patches.safe_for_fetching_issues")
@patch(
"sentry.integrations.github.tasks.language_parsers.PythonParser.extract_functions_from_patch"
"sentry.seer.fetch_issues.fetch_issues_given_patches._get_projects_and_filenames_from_source_file"
)
@patch("sentry.seer.fetch_issues.more_parsing.PythonParserMore.extract_functions_from_patch")
@patch(
"sentry.seer.fetch_issues.fetch_issues_given_patches.get_issues_with_event_details_for_file"
)
@patch("sentry.seer.fetch_issues_given_patches.get_issues_with_event_details_for_file")
def test_get_issues_related_to_file_patches(
self,
mock_get_issues_with_event_details_for_file: Mock,
Expand All @@ -263,7 +265,7 @@ def test_get_issues_related_to_file_patches(
mock_get_issues_with_event_details_for_file.side_effect = (
lambda *args, **kwargs: self.issues_with_event_details
)
mock_extract_functions_from_patch.return_value = ["world", "planet"]
mock_extract_functions_from_patch.return_value = {"world", "planet"}
mock_get_projects_and_filenames_from_source_file.return_value = ({self.project}, {"foo.py"})
mock_safe_for_fetching_issues.side_effect = lambda x: x

Expand All @@ -289,12 +291,14 @@ def test_get_issues_related_to_file_patches(
)
assert filename_to_issues == filename_to_issues_expected

@patch("sentry.seer.fetch_issues_given_patches.safe_for_fetching_issues")
@patch("sentry.seer.fetch_issues_given_patches._get_projects_and_filenames_from_source_file")
@patch("sentry.seer.fetch_issues.fetch_issues_given_patches.safe_for_fetching_issues")
@patch(
"sentry.seer.fetch_issues.fetch_issues_given_patches._get_projects_and_filenames_from_source_file"
)
@patch("sentry.seer.fetch_issues.more_parsing.PythonParserMore.extract_functions_from_patch")
@patch(
"sentry.integrations.github.tasks.language_parsers.PythonParser.extract_functions_from_patch"
"sentry.seer.fetch_issues.fetch_issues_given_patches.get_issues_with_event_details_for_file"
)
@patch("sentry.seer.fetch_issues_given_patches.get_issues_with_event_details_for_file")
def test_get_issues_related_to_file_patches_no_function_names(
self,
mock_get_issues_with_event_details_for_file: Mock,
Expand All @@ -305,7 +309,7 @@ def test_get_issues_related_to_file_patches_no_function_names(
mock_get_issues_with_event_details_for_file.side_effect = (
lambda *args, **kwargs: self.issues_with_event_details
)
mock_extract_functions_from_patch.return_value = []
mock_extract_functions_from_patch.return_value = set()
mock_get_projects_and_filenames_from_source_file.return_value = ({self.project}, {"foo.py"})
mock_safe_for_fetching_issues.side_effect = lambda x: x

Expand All @@ -325,12 +329,14 @@ def test_get_issues_related_to_file_patches_no_function_names(
)
assert filename_to_issues == filename_to_issues_expected

@patch("sentry.seer.fetch_issues_given_patches.safe_for_fetching_issues")
@patch("sentry.seer.fetch_issues_given_patches._get_projects_and_filenames_from_source_file")
@patch("sentry.seer.fetch_issues.fetch_issues_given_patches.safe_for_fetching_issues")
@patch(
"sentry.seer.fetch_issues.fetch_issues_given_patches._get_projects_and_filenames_from_source_file"
)
@patch("sentry.seer.fetch_issues.more_parsing.PythonParserMore.extract_functions_from_patch")
@patch(
"sentry.integrations.github.tasks.language_parsers.PythonParser.extract_functions_from_patch"
"sentry.seer.fetch_issues.fetch_issues_given_patches.get_issues_with_event_details_for_file"
)
@patch("sentry.seer.fetch_issues_given_patches.get_issues_with_event_details_for_file")
def test_get_issues_related_to_file_patches_no_issues_found(
self,
mock_get_issues_with_event_details_for_file: Mock,
Expand All @@ -339,7 +345,7 @@ def test_get_issues_related_to_file_patches_no_issues_found(
mock_safe_for_fetching_issues: Mock,
):
mock_get_issues_with_event_details_for_file.side_effect = lambda *args, **kwargs: []
mock_extract_functions_from_patch.return_value = ["world", "planet"]
mock_extract_functions_from_patch.return_value = {"world", "planet"}
mock_get_projects_and_filenames_from_source_file.return_value = ({self.project}, {"foo.py"})
mock_safe_for_fetching_issues.side_effect = lambda x: x

Expand Down
Loading
Loading