Skip to content

Commit 7af61b4

Browse files
vrigalBastien Abadie
andauthored
Move Issue non hashed attributes to IssueLink (#2451)
* Move Issue in_patch & new_for_revision to IssueLink * Deduplicate issues by hash * Single query migration 0012 * Drop deduplication migration * Update bulk issue creation * Update load_in_patch command * Move publishable to IssueLink * Serialize the missing data on read endpoints * fix load_issues * Patch other endpoints * Update tests * Move positions too * Fix tests * Fix load_issues --------- Co-authored-by: Bastien Abadie <bastien@nextcairn.com>
1 parent 01349ff commit 7af61b4

File tree

13 files changed

+377
-135
lines changed

13 files changed

+377
-135
lines changed

backend/code_review_backend/issues/admin.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ class IssueAdmin(admin.ModelAdmin):
3535
list_display = (
3636
"id",
3737
"path",
38-
"line",
3938
"level",
4039
"analyzer",
4140
"analyzer_check",

backend/code_review_backend/issues/api.py

Lines changed: 70 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
LEVEL_ERROR,
2121
Diff,
2222
Issue,
23+
IssueLink,
2324
Repository,
2425
Revision,
2526
)
@@ -121,19 +122,31 @@ def get_queryset(self):
121122
# Because of the perf. hit filter issues that are not older than today - 3 months.
122123
.filter(created__gte=date.today() - timedelta(days=90))
123124
.prefetch_related(
124-
"issues",
125+
Prefetch(
126+
"issue_links",
127+
queryset=IssueLink.objects.select_related("issue"),
128+
),
125129
"revision",
126130
"revision__base_repository",
127131
"revision__head_repository",
128132
"repository",
129133
)
130-
.annotate(nb_issues=Count("issues"))
131-
.annotate(nb_errors=Count("issues", filter=Q(issues__level="error")))
132-
.annotate(nb_warnings=Count("issues", filter=Q(issues__level="warning")))
134+
.annotate(nb_issues=Count("issue_links"))
135+
.annotate(
136+
nb_errors=Count(
137+
"issue_links", filter=Q(issue_links__issue__level=LEVEL_ERROR)
138+
)
139+
)
140+
.annotate(
141+
nb_warnings=Count(
142+
"issue_links", filter=Q(issue_links__issue__level="warning")
143+
)
144+
)
133145
.annotate(
134146
nb_issues_publishable=Count(
135-
"issues",
136-
filter=Q(issues__in_patch=True) | Q(issues__level=LEVEL_ERROR),
147+
"issue_links",
148+
filter=Q(issue_links__in_patch=True)
149+
| Q(issue_links__issue__level=LEVEL_ERROR),
137150
)
138151
)
139152
.order_by("-id")
@@ -190,9 +203,25 @@ def get_queryset(self):
190203
if not self.kwargs.get("diff_id"):
191204
return Issue.objects.none()
192205
diff = get_object_or_404(Diff, id=self.kwargs["diff_id"])
193-
# No multiple revision should be linked to a single diff
194-
# but we use the distinct clause to match the DB state.
195-
return Issue.objects.filter(diffs=diff).distinct()
206+
return (
207+
Issue.objects.filter(issue_links__diff=diff)
208+
.annotate(publishable=Q(issue_links__in_patch=True) & Q(level=LEVEL_ERROR))
209+
.values(
210+
"id",
211+
"hash",
212+
"analyzer",
213+
"analyzer_check",
214+
"path",
215+
"issue_links__line",
216+
"issue_links__nb_lines",
217+
"issue_links__char",
218+
"level",
219+
"message",
220+
"publishable",
221+
"issue_links__in_patch",
222+
"issue_links__new_for_revision",
223+
)
224+
)
196225

197226

198227
class IssueBulkCreate(generics.CreateAPIView):
@@ -212,7 +241,7 @@ def get_serializer_context(self):
212241
return context
213242

214243

215-
class IssueCheckDetails(CachedView, generics.ListAPIView):
244+
class IssueCheckDetails(generics.ListAPIView):
216245
"""
217246
List all the issues found by a specific analyzer check in a repository
218247
"""
@@ -223,24 +252,27 @@ def get_queryset(self):
223252
repo = self.kwargs["repository"]
224253

225254
queryset = (
226-
Issue.objects.filter(revisions__head_repository__slug=repo)
255+
Issue.objects.filter(issue_links__revision__head_repository__slug=repo)
227256
.filter(analyzer=self.kwargs["analyzer"])
228257
.filter(analyzer_check=self.kwargs["check"])
258+
.annotate(publishable=Q(issue_links__in_patch=True) & Q(level=LEVEL_ERROR))
229259
.prefetch_related(
230-
"diffs__repository",
260+
"issue_links__diff__repository",
231261
Prefetch(
232-
"diffs__revision",
262+
"issue_links__diff__revision",
233263
queryset=Revision.objects.select_related(
234264
"base_repository", "head_repository"
235265
),
236266
),
237267
)
268+
# List of diffs for each link of this issue
269+
.prefetch_related("diffs")
238270
.order_by("-created")
239271
)
240272

241273
# Display only publishable issues by default
242274
publishable = self.request.query_params.get("publishable", "true").lower()
243-
_filter = Q(in_patch=True) | Q(level=LEVEL_ERROR)
275+
_filter = Q(issue_links__in_patch=True) | Q(level=LEVEL_ERROR)
244276
if publishable == "true":
245277
queryset = queryset.filter(_filter)
246278
elif publishable == "false":
@@ -271,14 +303,22 @@ class IssueCheckStats(CachedView, generics.ListAPIView):
271303
def get_queryset(self):
272304
queryset = (
273305
Issue.objects.values(
274-
"revisions__head_repository__slug", "analyzer", "analyzer_check"
306+
"issue_links__revision__head_repository__slug",
307+
"analyzer",
308+
"analyzer_check",
275309
)
276310
# We want to count distinct issues because they can be referenced on multiple diffs
277311
.annotate(total=Count("id", distinct=True))
278312
.annotate(
279-
publishable=Count("id", filter=Q(in_patch=True) | Q(level=LEVEL_ERROR))
313+
publishable=Count(
314+
"id", filter=Q(issue_links__in_patch=True) | Q(level=LEVEL_ERROR)
315+
)
316+
)
317+
.distinct(
318+
"issue_links__revision__head_repository__slug",
319+
"analyzer",
320+
"analyzer_check",
280321
)
281-
.distinct("revisions__head_repository__slug", "analyzer", "analyzer_check")
282322
)
283323

284324
# Filter issues by date
@@ -292,10 +332,21 @@ def get_queryset(self):
292332
# Because of the perf. hit filter, issues that are not older than today - 3 months.
293333
since = date.today() - timedelta(days=90)
294334

295-
queryset = queryset.filter(revisions__created__gte=since).distinct()
335+
queryset = queryset.filter(issue_links__revision__created__gte=since).distinct()
296336

297337
return queryset.order_by(
298-
"-total", "revisions__head_repository__slug", "analyzer", "analyzer_check"
338+
"-total",
339+
"issue_links__revision__head_repository__slug",
340+
"analyzer",
341+
"analyzer_check",
342+
).values(
343+
"issue_links__revision__head_repository__slug",
344+
"analyzer",
345+
"analyzer_check",
346+
"total",
347+
"publishable",
348+
"issue_links__in_patch",
349+
"issue_links__new_for_revision",
299350
)
300351

301352

backend/code_review_backend/issues/management/commands/load_in_patch.py

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from parsepatch.patch import Patch
1212

1313
from code_review_backend.app.settings import BACKEND_USER_AGENT
14-
from code_review_backend.issues.models import Diff, Issue
14+
from code_review_backend.issues.models import Diff, IssueLink
1515

1616
logging.basicConfig(level=logging.INFO)
1717

@@ -47,35 +47,41 @@ def load_hgmo_patch(diff):
4747
return lines
4848

4949

50-
def detect_in_patch(issue, lines):
50+
def detect_in_patch(issue_link, lines):
5151
"""From the code-review bot revisions.py contains() method"""
52-
modified_lines = lines.get(issue.path)
52+
modified_lines = lines.get(issue_link.issue.path)
5353

5454
if modified_lines is None:
5555
# File not in patch
56-
issue.in_patch = False
56+
issue_link.in_patch = False
5757

58-
elif issue.line is None:
58+
elif issue_link.issue.line is None:
5959
# Empty line means full file
60-
issue.in_patch = True
60+
issue_link.in_patch = True
6161

6262
else:
6363
# Detect if this issue is in the patch
64-
chunk_lines = set(range(issue.line, issue.line + issue.nb_lines))
65-
issue.in_patch = not chunk_lines.isdisjoint(modified_lines)
66-
return issue
64+
chunk_lines = set(
65+
range(
66+
issue_link.issue.line, issue_link.issue.line + issue_link.issue.nb_lines
67+
)
68+
)
69+
issue_link.in_patch = not chunk_lines.isdisjoint(modified_lines)
70+
return issue_link
6771

6872

6973
def process_diff(diff: Diff):
7074
"""This function needs to be on the top level in order to be usable by the pool"""
7175
try:
7276
lines = load_hgmo_patch(diff)
7377

74-
issues = [detect_in_patch(issue, lines) for issue in diff.issues.all()]
78+
issue_links = [
79+
detect_in_patch(issue_link, lines) for issue_link in diff.issue_links.all()
80+
]
7581
logging.info(
76-
f"Found {len([i for i in issues if i.in_patch])} issues in patch for {diff.id}"
82+
f"Found {len([i for i in issue_links if i.in_patch])} issue link in patch for {diff.id}"
7783
)
78-
Issue.objects.bulk_update(issues, ["in_patch"])
84+
IssueLink.objects.bulk_update(issue_links, ["in_patch"])
7985
except Exception as e:
8086
logging.info(f"Failure on diff {diff.id}: {e}")
8187

@@ -94,7 +100,9 @@ def add_arguments(self, parser):
94100
def handle(self, *args, **options):
95101
# Only apply on diffs with issues that are not already processed
96102
diffs = (
97-
Diff.objects.filter(issues__in_patch__isnull=True).order_by("id").distinct()
103+
Diff.objects.filter(issue_links__in_patch__isnull=True)
104+
.order_by("id")
105+
.distinct()
98106
)
99107
logger.debug(f"Will process {diffs.count()} diffs")
100108

backend/code_review_backend/issues/management/commands/load_issues.py

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -72,30 +72,36 @@ def save_issues(self, diff, issues):
7272

7373
# Build all issues for that diff, in a single DB call
7474
created_issues = Issue.objects.bulk_create(
75-
Issue(
76-
path=i["path"],
77-
line=i["line"],
78-
nb_lines=i.get("nb_lines", 1),
79-
char=i.get("char"),
80-
level=i.get("level", "warning"),
81-
analyzer_check=i.get("kind") or i.get("check"),
82-
message=i.get("message"),
83-
analyzer=i["analyzer"],
84-
hash=i["hash"],
85-
new_for_revision=detect_new_for_revision(
86-
diff, path=i["path"], hash=i["hash"]
87-
),
88-
)
89-
for i in issues
90-
if i["hash"]
75+
[
76+
Issue(
77+
path=i["path"],
78+
level=i.get("level", "warning"),
79+
analyzer_check=i.get("kind") or i.get("check"),
80+
message=i.get("message"),
81+
analyzer=i["analyzer"],
82+
hash=i["hash"],
83+
)
84+
for i in issues
85+
if i["hash"]
86+
],
87+
ignore_conflicts=True,
9188
)
9289
IssueLink.objects.bulk_create(
93-
IssueLink(
94-
issue=i,
95-
diff=diff,
96-
revision_id=diff.revision_id,
97-
)
98-
for i in created_issues
90+
[
91+
IssueLink(
92+
issue=issue_db,
93+
diff=diff,
94+
revision_id=diff.revision_id,
95+
new_for_revision=detect_new_for_revision(
96+
diff, path=issue_db.path, hash=issue_db.hash
97+
),
98+
line=issue_src["line"],
99+
nb_lines=issue_src.get("nb_lines", 1),
100+
char=issue_src.get("char"),
101+
)
102+
for issue_db, issue_src in zip(created_issues, issues)
103+
],
104+
ignore_conflicts=True,
99105
)
100106
return created_issues
101107

0 commit comments

Comments
 (0)