Skip to content

Commit 4a70883

Browse files
committed
fixup! ✨(backend) add duplicate action to the document API endpoint
1 parent ca6538d commit 4a70883

13 files changed

+548
-93
lines changed

src/backend/core/api/viewsets.py

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from core import authentication, enums, models
3131
from core.services.ai_services import AIService
3232
from core.services.collaboration_services import CollaborationService
33-
from core.utils import extract_attachments
33+
from core.utils import extract_attachments, filter_descendants
3434

3535
from . import permissions, serializers, utils
3636
from .filters import DocumentFilter, ListDocumentFilter
@@ -891,7 +891,8 @@ def tree(self, request, pk, *args, **kwargs):
891891
)
892892
return drf.response.Response(
893893
utils.nest_tree(serializer.data, self.queryset.model.steplen)
894-
894+
)
895+
895896
@drf.decorators.action(
896897
detail=True,
897898
methods=["post"],
@@ -907,9 +908,11 @@ def duplicate(self, request, *args, **kwargs):
907908
Optionally duplicates accesses if `with_accesses` is set to true
908909
in the payload.
909910
"""
910-
serializer = serializers.DocumentDuplicationSerializer(data=request.GET)
911+
serializer = serializers.DocumentDuplicationSerializer(
912+
data=request.data, partial=True
913+
)
911914
serializer.is_valid(raise_exception=True)
912-
with_accesses = serializer.validated_data["with_accesses"]
915+
with_accesses = serializer.validated_data.get("with_accesses", False)
913916

914917
# Get document while checking permissions
915918
document = self.get_object()
@@ -1114,7 +1117,7 @@ def attachment_upload(self, request, *args, **kwargs):
11141117

11151118
# Generate a generic yet unique filename to store the image in object storage
11161119
file_id = uuid.uuid4()
1117-
extension = serializer.validated_data["expected_extension"]
1120+
ext = serializer.validated_data["expected_extension"]
11181121

11191122
# Prepare metadata for storage
11201123
extra_args = {
@@ -1126,7 +1129,7 @@ def attachment_upload(self, request, *args, **kwargs):
11261129
extra_args["Metadata"]["is_unsafe"] = "true"
11271130
file_unsafe = "-unsafe"
11281131

1129-
key = f"{document.key_base}/{ATTACHMENTS_FOLDER:s}/{file_id!s}{file_unsafe}.{extension:s}"
1132+
key = f"{document.key_base}/{enums.ATTACHMENTS_FOLDER:s}/{file_id!s}{file_unsafe}.{ext:s}"
11301133

11311134
file_name = serializer.validated_data["file_name"]
11321135
if (
@@ -1221,12 +1224,27 @@ def media_auth(self, request, *args, **kwargs):
12211224
user = request.user
12221225
key = f"{url_params['pk']:s}/{url_params['attachment']:s}"
12231226

1224-
if (
1225-
not self.queryset.readable(user)
1226-
.filter(attachments__contains=[key])
1227-
.exists()
1228-
):
1229-
logger.debug("User '%s' lacks permission for image", user)
1227+
# Look for a document to which the user has access and that includes this attachment
1228+
# We must look into all descendants of any document to which the user has access per se
1229+
readable_per_se_paths = (
1230+
self.queryset.readable_per_se(user)
1231+
.order_by("path")
1232+
.values_list("path", flat=True)
1233+
)
1234+
1235+
attachments_documents = (
1236+
self.queryset.filter(attachments__contains=[key])
1237+
.only("path")
1238+
.order_by("path")
1239+
)
1240+
readable_attachments_paths = filter_descendants(
1241+
[doc.path for doc in attachments_documents],
1242+
readable_per_se_paths,
1243+
skip_sorting=True,
1244+
)
1245+
1246+
if not readable_attachments_paths:
1247+
logger.debug("User '%s' lacks permission for attachment", user)
12301248
raise drf.exceptions.PermissionDenied()
12311249

12321250
# Generate S3 authorization headers using the extracted URL parameters

src/backend/core/enums.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@
1212
UUID_REGEX = (
1313
r"[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}"
1414
)
15-
FILE_EXT_REGEX = r"\.[a-zA-Z]{3,4}"
15+
FILE_EXT_REGEX = r"\.[a-zA-Z0-9]{1,10}"
1616
MEDIA_STORAGE_URL_PATTERN = re.compile(
1717
f"{settings.MEDIA_URL:s}(?P<pk>{UUID_REGEX:s})/"
18-
f"(?P<key>{ATTACHMENTS_FOLDER:s}/{UUID_REGEX:s}(?:-unsafe)?{FILE_EXT_REGEX:s})$"
18+
f"(?P<attachment>{ATTACHMENTS_FOLDER:s}/{UUID_REGEX:s}(?:-unsafe)?{FILE_EXT_REGEX:s})$"
1919
)
2020
MEDIA_STORAGE_URL_EXTRACT = re.compile(
2121
f"{settings.MEDIA_URL:s}({UUID_REGEX}/{ATTACHMENTS_FOLDER}/{UUID_REGEX}{FILE_EXT_REGEX})"

src/backend/core/migrations/0018_remove_is_public_add_field_attachments_and_duplicated_from.py

Lines changed: 0 additions & 55 deletions
This file was deleted.
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
# Generated by Django 5.1.4 on 2025-01-18 11:53
2+
import re
3+
4+
import django.contrib.postgres.fields
5+
import django.db.models.deletion
6+
from django.core.files.storage import default_storage
7+
from django.db import migrations, models
8+
9+
from botocore.exceptions import ClientError
10+
11+
import core.models
12+
from core.utils import extract_attachments
13+
14+
15+
def populate_attachments_on_all_documents(apps, schema_editor):
16+
"""Populate "attachments" field on all existing documents in the database."""
17+
Document = apps.get_model("core", "Document")
18+
19+
for document in Document.objects.all():
20+
try:
21+
response = default_storage.connection.meta.client.get_object(
22+
Bucket=default_storage.bucket_name, Key=f"{document.pk!s}/file"
23+
)
24+
except (FileNotFoundError, ClientError):
25+
pass
26+
else:
27+
content = response["Body"].read().decode("utf-8")
28+
document.attachments = extract_attachments(content)
29+
document.save(update_fields=["attachments"])
30+
31+
32+
class Migration(migrations.Migration):
33+
dependencies = [
34+
("core", "0019_alter_user_language_default_to_null"),
35+
]
36+
37+
operations = [
38+
# v2.0.0 was released so we can now remove BC field "is_public"
39+
migrations.RemoveField(
40+
model_name="document",
41+
name="is_public",
42+
),
43+
migrations.AlterModelManagers(
44+
name="user",
45+
managers=[
46+
("objects", core.models.UserManager()),
47+
],
48+
),
49+
migrations.AddField(
50+
model_name="document",
51+
name="attachments",
52+
field=django.contrib.postgres.fields.ArrayField(
53+
base_field=models.CharField(max_length=255),
54+
blank=True,
55+
default=list,
56+
editable=False,
57+
null=True,
58+
size=None,
59+
),
60+
),
61+
migrations.AddField(
62+
model_name="document",
63+
name="duplicated_from",
64+
field=models.ForeignKey(
65+
blank=True,
66+
editable=False,
67+
null=True,
68+
on_delete=django.db.models.deletion.SET_NULL,
69+
related_name="duplicates",
70+
to="core.document",
71+
),
72+
),
73+
migrations.RunPython(
74+
populate_attachments_on_all_documents,
75+
reverse_code=migrations.RunPython.noop,
76+
),
77+
]

src/backend/core/models.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -428,10 +428,12 @@ class DocumentQuerySet(MP_NodeQuerySet):
428428

429429
def readable_per_se(self, user):
430430
"""
431-
Filters the queryset to return documents that the given user has
432-
permission to read.
431+
Filters the queryset to return documents on which the given user has
432+
direct access, team access or link access. This will not return all the
433+
documents that a user can read because it can be obtained via an ancestor.
433434
:param user: The user for whom readable documents are to be fetched.
434-
:return: A queryset of documents readable by the user.
435+
:return: A queryset of documents for which the user has direct access,
436+
team access or link access.
435437
"""
436438
if user.is_authenticated:
437439
return self.filter(
@@ -460,7 +462,9 @@ def readable_per_se(self, user):
460462
"""
461463
Filters documents based on user permissions using the custom queryset.
462464
:param user: The user for whom readable documents are to be fetched.
463-
:return: A queryset of documents readable by the user.
465+
:return: A queryset of documents for which the user has direct access,
466+
team access or link access. This will not return all the documents
467+
that a user can read because it can be obtained via an ancestor.
464468
"""
465469
return self.get_queryset().readable_per_se(user)
466470

src/backend/core/tests/documents/test_api_documents_attachment_upload.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -313,16 +313,16 @@ def test_api_documents_attachment_upload_fix_extension(
313313
match = pattern.search(file_path)
314314
file_id = match.group(1)
315315

316-
assert "-unsafe" in file_id
317-
# Validate that file_id is a valid UUID
318-
file_id = file_id.replace("-unsafe", "")
319-
uuid.UUID(file_id)
320-
321316
document.refresh_from_db()
322317
assert document.attachments == [
323318
f"{document.id!s}/attachments/{file_id!s}.{extension:s}"
324319
]
325320

321+
assert "-unsafe" in file_id
322+
# Validate that file_id is a valid UUID
323+
file_id = file_id.replace("-unsafe", "")
324+
uuid.UUID(file_id)
325+
326326
# Now, check the metadata of the uploaded file
327327
key = file_path.replace("/media", "")
328328
file_head = default_storage.connection.meta.client.head_object(
@@ -373,14 +373,14 @@ def test_api_documents_attachment_upload_unsafe():
373373
match = pattern.search(file_path)
374374
file_id = match.group(1)
375375

376+
document.refresh_from_db()
377+
assert document.attachments == [f"{document.id!s}/attachments/{file_id!s}.exe"]
378+
376379
assert "-unsafe" in file_id
377380
# Validate that file_id is a valid UUID
378381
file_id = file_id.replace("-unsafe", "")
379382
uuid.UUID(file_id)
380383

381-
document.refresh_from_db()
382-
assert document.attachments == [f"{document.id!s}/attachments/{file_id!s}.exe"]
383-
384384
# Now, check the metadata of the uploaded file
385385
key = file_path.replace("/media", "")
386386
file_head = default_storage.connection.meta.client.head_object(

src/backend/core/tests/documents/test_api_documents_duplicate.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,9 @@ def test_api_documents_duplicate_with_accesses():
182182

183183
# Duplicate the document via the API endpoint requesting to duplicate accesses
184184
response = client.post(
185-
f"/api/v1.0/documents/{document.id!s}/duplicate/?with_accesses=true"
185+
f"/api/v1.0/documents/{document.id!s}/duplicate/",
186+
{"with_accesses": True},
187+
format="json",
186188
)
187189

188190
assert response.status_code == 201

src/backend/core/tests/documents/test_api_documents_media_auth.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,6 @@ def test_api_documents_media_auth_anonymous_public():
8181

8282
def test_api_documents_media_auth_extensions():
8383
"""Files with extensions of any format should work."""
84-
document = factories.DocumentFactory(link_reach="public")
85-
8684
extensions = [
8785
"c",
8886
"go",
@@ -91,10 +89,15 @@ def test_api_documents_media_auth_extensions():
9189
"woff2",
9290
"appimage",
9391
]
92+
document_id = uuid4()
93+
keys = []
9494
for ext in extensions:
95-
filename = f"{uuid.uuid4()!s}.{ext:s}"
96-
key = f"{document.pk!s}/attachments/{filename:s}"
95+
filename = f"{uuid4()!s}.{ext:s}"
96+
keys.append(f"{document_id!s}/attachments/{filename:s}")
97+
98+
factories.DocumentFactory(link_reach="public", attachments=keys)
9799

100+
for key in keys:
98101
original_url = f"http://localhost/media/{key:s}"
99102
response = APIClient().get(
100103
"/api/v1.0/documents/media-auth/", HTTP_X_ORIGINAL_URL=original_url
@@ -127,7 +130,7 @@ def test_api_documents_media_auth_anonymous_attachments():
127130
"""
128131
Declaring a media key as original attachment on a document to which
129132
a user has access should give them access to the attachment file
130-
regarless of their access rights on the original document.
133+
regardless of their access rights on the original document.
131134
"""
132135
document_id = uuid4()
133136
filename = f"{uuid4()!s}.jpg"
@@ -150,7 +153,8 @@ def test_api_documents_media_auth_anonymous_attachments():
150153

151154
# Let's now add a document to which the anonymous user has access and
152155
# pointing to the attachment
153-
factories.DocumentFactory(link_reach="public", attachments=[key])
156+
parent = factories.DocumentFactory(link_reach="public")
157+
factories.DocumentFactory(parent=parent, link_reach="restricted", attachments=[key])
154158

155159
response = APIClient().get(
156160
"/api/v1.0/documents/media-auth/", HTTP_X_ORIGINAL_URL=media_url

0 commit comments

Comments
 (0)