Skip to content

Commit ab86a6e

Browse files
authored
Only use the bulk endpoint for issues creation (#2445)
* Remove non bulk issue publication from the bot * Simplify bulk size configuration * Update tests * Cleanup revision.diff_issues_url * Disallow creation of a single issue in the backend
1 parent 978802d commit ab86a6e

File tree

9 files changed

+124
-239
lines changed

9 files changed

+124
-239
lines changed

backend/code_review_backend/issues/api.py

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from datetime import date, datetime, timedelta
77

88
from django.conf import settings
9-
from django.db import transaction
109
from django.db.models import Count, Prefetch, Q
1110
from django.db.models.functions import TruncDate
1211
from django.shortcuts import get_object_or_404
@@ -17,12 +16,10 @@
1716
from rest_framework.exceptions import APIException, ValidationError
1817
from rest_framework.response import Response
1918

20-
from code_review_backend.issues.compare import detect_new_for_revision
2119
from code_review_backend.issues.models import (
2220
LEVEL_ERROR,
2321
Diff,
2422
Issue,
25-
IssueLink,
2623
Repository,
2724
Revision,
2825
)
@@ -174,7 +171,18 @@ def get_queryset(self):
174171
return diffs
175172

176173

177-
class IssueViewSet(viewsets.ModelViewSet):
174+
class IssueViewSet(
175+
mixins.RetrieveModelMixin,
176+
mixins.UpdateModelMixin,
177+
mixins.DestroyModelMixin,
178+
mixins.ListModelMixin,
179+
viewsets.GenericViewSet,
180+
):
181+
"""
182+
Viewset for Issue management.
183+
Creation can only be performed in bulk via the revision.
184+
"""
185+
178186
serializer_class = IssueSerializer
179187

180188
def get_queryset(self):
@@ -186,22 +194,6 @@ def get_queryset(self):
186194
# but we use the distinct clause to match the DB state.
187195
return Issue.objects.filter(diffs=diff).distinct()
188196

189-
@transaction.atomic
190-
def perform_create(self, serializer):
191-
# Attach diff to issue created
192-
# and detect if the issue is new for the revision
193-
diff = get_object_or_404(Diff, id=self.kwargs["diff_id"])
194-
issue = serializer.save(
195-
new_for_revision=detect_new_for_revision(
196-
diff,
197-
path=serializer.validated_data["path"],
198-
hash=serializer.validated_data["hash"],
199-
),
200-
)
201-
IssueLink.objects.create(
202-
issue=issue, diff_id=diff.id, revision_id=diff.revision_id
203-
)
204-
205197

206198
class IssueBulkCreate(generics.CreateAPIView):
207199
"""

backend/code_review_backend/issues/tests/test_api.py

Lines changed: 4 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def test_create_diff(self):
135135
self.assertEqual(diff.mercurial_hash, "coffee12345")
136136
self.assertEqual(diff.revision, self.revision)
137137

138-
def test_create_issue(self):
138+
def test_create_issue_disabled(self):
139139
"""
140140
Check we can create a issue through the API
141141
"""
@@ -156,44 +156,12 @@ def test_create_issue(self):
156156
self.assertEqual(Issue.objects.count(), 0)
157157
self.client.force_authenticate(user=self.user)
158158
response = self.client.post("/v1/diff/1234/issues/", data, format="json")
159-
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
159+
self.assertEqual(response.status_code, status.HTTP_405_METHOD_NOT_ALLOWED)
160160

161161
# Do not check the content of issue created as it's a random UUID
162-
issue_data = response.json()
163-
self.assertTrue("id" in issue_data)
164-
del issue_data["id"]
165-
self.assertDictEqual(
166-
issue_data,
167-
{
168-
"analyzer": "remote-flake8",
169-
"char": None,
170-
"check": None,
171-
"hash": "somemd5hash",
172-
"level": "error",
173-
"line": 1,
174-
"message": None,
175-
"nb_lines": None,
176-
"new_for_revision": True,
177-
"path": "path/to/file.py",
178-
"in_patch": True,
179-
"publishable": True,
180-
},
181-
)
182-
183-
# Check a revision has been created
184-
self.assertEqual(Diff.objects.count(), 1)
185-
issue = Issue.objects.first()
186-
self.assertEqual(issue.path, "path/to/file.py")
187-
self.assertEqual(issue.line, 1)
188-
self.assertListEqual(
189-
list(issue.diffs.values_list("id", flat=True)), [self.diff.id]
162+
self.assertEqual(
163+
response.content, b'{"detail":"Method \\"POST\\" not allowed."}'
190164
)
191-
self.assertTrue(issue.new_for_revision)
192-
193-
# The diff now counts an issue
194-
response = self.client.get("/v1/diff/1234/")
195-
self.assertEqual(response.status_code, status.HTTP_200_OK)
196-
self.assertEqual(response.json()["nb_issues"], 1)
197165

198166
def test_create_issue_bulk_methods(self):
199167
self.client.force_authenticate(user=self.user)

backend/code_review_backend/issues/tests/test_compare.py

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import random
77

88
from django.contrib.auth.models import User
9-
from rest_framework import status
109
from rest_framework.test import APITestCase
1110

1211
from code_review_backend.issues.compare import detect_new_for_revision
@@ -87,30 +86,3 @@ def test_detect_new_for_revision(self):
8786
# But adding an issue with a different hash on second diff will be set as new
8887
issue = self.build_issue(2, 12345)
8988
self.assertTrue(detect_new_for_revision(second_diff, issue.path, issue.hash))
90-
91-
def test_api_creation(self):
92-
"""
93-
Check the detection is automatically applied through the API
94-
"""
95-
# No issues on second diff at first
96-
self.assertFalse(Issue.objects.filter(diffs=2).exists())
97-
98-
# Adding an issue with same hash on second diff will be set as existing
99-
data = {
100-
"line": 10,
101-
"analyzer": "analyzer-x",
102-
"check": "check-y",
103-
"level": "error",
104-
"path": "path/to/file",
105-
"hash": self.build_hash(1),
106-
}
107-
self.client.force_authenticate(user=self.user)
108-
response = self.client.post("/v1/diff/2/issues/", data, format="json")
109-
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
110-
self.assertFalse(response.json()["new_for_revision"])
111-
112-
# But adding an issue with a different hash on second diff will be set as new
113-
data["hash"] = self.build_hash(456)
114-
response = self.client.post("/v1/diff/2/issues/", data, format="json")
115-
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
116-
self.assertTrue(response.json()["new_for_revision"])

bot/code_review_bot/backend.py

Lines changed: 44 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,7 @@ def enabled(self):
4040
def publish_revision(self, revision):
4141
"""
4242
Create a Revision on the backend.
43-
In case revision.diff_id exists, also create revision's diff and set
44-
revision.diff_issues_url to the URL for issues creation.
43+
In case revision.diff_id exists, also create revision's diff.
4544
"""
4645
if not self.enabled:
4746
logger.warn("Skipping revision publication on backend")
@@ -106,79 +105,31 @@ def publish_revision(self, revision):
106105
if backend_diff is None:
107106
return backend_revision
108107

109-
# Store the issues url on the revision
110-
revision.diff_issues_url = backend_diff["issues_url"]
111-
112108
return backend_revision
113109

114-
def publish_issues(self, issues, revision, bulk=0):
110+
def publish_issues(self, issues, revision):
115111
"""
116-
Publish all issues on the backend.
117-
If set to an integer, `bulk` parameter allow to use the bulk creation endpoint of the backend.
112+
Publish all issues on the backend in bulk.
118113
"""
119114
if not self.enabled:
120115
logger.warn("Skipping issues publication on backend")
121116
return
122117

123118
published = 0
124-
if bulk > 0:
125-
assert revision.issues_url is not None, (
126-
"Missing issues_url on the revision. "
127-
"This attribute is required when publishing issues in bulk."
128-
)
129-
chunks = (issues[i : i + bulk] for i in range(0, len(issues), bulk))
130-
for issues_chunk in chunks:
131-
# Store valid data as couples of (<issue>, <json_data>)
132-
valid_data = []
133-
# Build issues' payload for that given chunk
134-
for issue in issues_chunk:
135-
if (
136-
isinstance(issue, MozLintIssue)
137-
and issue.linter == "rust"
138-
and issue.path == "."
139-
):
140-
# Silently ignore issues with path "." from rustfmt, as they cannot be published
141-
# https://github.com/mozilla/code-review/issues/1577
142-
continue
143-
if issue.hash is None:
144-
logger.warning(
145-
"Missing issue hash, cannot publish on backend",
146-
issue=str(issue),
147-
)
148-
continue
149-
valid_data.append((issue, issue.as_dict()))
150-
151-
if not valid_data:
152-
# May happen when a series of issues are missing a hash
153-
logger.warning(
154-
"No issue is valid over an entire chunk",
155-
head_repository=revision.head_repository,
156-
head_changeset=revision.head_changeset,
157-
)
158-
continue
159-
160-
response = self.create(
161-
revision.issues_url,
162-
{"issues": [json_data for _, json_data in valid_data]},
163-
)
164-
if response is None:
165-
# Backend rejected the payload, nothing more to do.
166-
continue
167-
created = response.get("issues")
168-
169-
assert created and len(created) == len(valid_data)
170-
for (issue, _), return_value in zip(valid_data, created):
171-
# Set the returned value on each issue
172-
issue.on_backend = return_value
173-
174-
published += len(valid_data)
175-
else:
176-
assert revision.diff_issues_url is not None, (
177-
"Missing diff_issues_url on the revision. "
178-
"This attribute is required when publishing a single issue on a revision's diff."
179-
)
180-
181-
for issue in issues:
119+
assert (
120+
revision.issues_url is not None
121+
), "Missing issues_url on the revision to publish issues in bulk."
122+
123+
logger.info(f"Publishing issues in bulk of {settings.bulk_issue_chunks} items.")
124+
chunks = (
125+
issues[i : i + settings.bulk_issue_chunks]
126+
for i in range(0, len(issues), settings.bulk_issue_chunks)
127+
)
128+
for issues_chunk in chunks:
129+
# Store valid data as couples of (<issue>, <json_data>)
130+
valid_data = []
131+
# Build issues' payload for that given chunk
132+
for issue in issues_chunk:
182133
if (
183134
isinstance(issue, MozLintIssue)
184135
and issue.linter == "rust"
@@ -187,21 +138,38 @@ def publish_issues(self, issues, revision, bulk=0):
187138
# Silently ignore issues with path "." from rustfmt, as they cannot be published
188139
# https://github.com/mozilla/code-review/issues/1577
189140
continue
190-
try:
191-
assert issue.hash is not None
192-
except Exception:
193-
# An exception may occur computing issues hash (e.g. network error)
141+
if issue.hash is None:
194142
logger.warning(
195143
"Missing issue hash, cannot publish on backend",
196144
issue=str(issue),
197145
)
198146
continue
199-
payload = issue.as_dict()
200-
issue.on_backend = self.create(revision.diff_issues_url, payload)
201-
if issue.on_backend is not None:
202-
published += 1
203-
else:
204-
logger.warn("Failed backend publication", issue=str(issue))
147+
valid_data.append((issue, issue.as_dict()))
148+
149+
if not valid_data:
150+
# May happen when a series of issues are missing a hash
151+
logger.warning(
152+
"No issue is valid over an entire chunk",
153+
head_repository=revision.head_repository,
154+
head_changeset=revision.head_changeset,
155+
)
156+
continue
157+
158+
response = self.create(
159+
revision.issues_url,
160+
{"issues": [json_data for _, json_data in valid_data]},
161+
)
162+
if response is None:
163+
# Backend rejected the payload, nothing more to do.
164+
continue
165+
created = response.get("issues")
166+
167+
assert created and len(created) == len(valid_data)
168+
for (issue, _), return_value in zip(valid_data, created):
169+
# Set the returned value on each issue
170+
issue.on_backend = return_value
171+
172+
published += len(valid_data)
205173

206174
total = len(issues)
207175
if published < total:

bot/code_review_bot/config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ def __init__(self):
5050
self.repositories = []
5151
self.decision_env_prefixes = []
5252

53+
# Max number of issues published to the backend at a time during the ingestion of a revision
54+
self.bulk_issue_chunks = 100
55+
5356
# Cache to store file-by-file from HGMO Rest API
5457
self.hgmo_cache = tempfile.mkdtemp(suffix="hgmo")
5558

@@ -87,6 +90,9 @@ def setup(self, app_channel, allowed_paths, repositories, mercurial_cache=None):
8790
if not os.path.isdir(self.taskcluster.results_dir):
8891
os.makedirs(self.taskcluster.results_dir)
8992

93+
if "BULK_ISSUE_CHUNKS" in os.environ:
94+
self.bulk_issue_chunks = int(os.environ["BULK_ISSUE_CHUNKS"])
95+
9096
# Save allowed paths
9197
assert isinstance(allowed_paths, list)
9298
assert all(map(lambda p: isinstance(p, str), allowed_paths))

bot/code_review_bot/revisions.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,6 @@ def __init__(
116116
# the phabricator repository payload for later identification
117117
self.phabricator_repository = phabricator_repository
118118

119-
# backend's returned URL to list or create issues on the revision's diff
120-
self.diff_issues_url = None
121119
# backend's returned URL to list or create issues linked to the revision in bulk (diff is optional)
122120
self.issues_url = None
123121

bot/code_review_bot/workflow.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,6 @@
3030
TASKCLUSTER_NAMESPACE = "project.relman.{channel}.code-review.{name}"
3131
TASKCLUSTER_INDEX_TTL = 7 # in days
3232

33-
# Max number of issues published to the backend at a time during the ingestion of a revision
34-
BULK_ISSUE_CHUNKS = 100
35-
3633

3734
class Workflow:
3835
"""
@@ -212,11 +209,7 @@ def _build_tasks(tasks):
212209
self.clone_repository(revision)
213210

214211
# Publish issues in the backend
215-
self.backend_api.publish_issues(
216-
issues,
217-
revision,
218-
bulk=BULK_ISSUE_CHUNKS,
219-
)
212+
self.backend_api.publish_issues(issues, revision)
220213

221214
def clone_repository(self, revision):
222215
"""

bot/tests/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def mock_config(mock_repositories):
5757
del os.environ["TASK_ID"]
5858
os.environ["TRY_TASK_ID"] = "remoteTryTask"
5959
os.environ["TRY_TASK_GROUP_ID"] = "remoteTryGroup"
60+
os.environ["BULK_ISSUE_CHUNKS"] = "10"
6061
settings.setup("test", ["dom/*", "tests/*.py", "test/*.c"], mock_repositories)
6162
return settings
6263

0 commit comments

Comments
 (0)