Skip to content

Commit ca6538d

Browse files
committed
✨(backend) add duplicate action to the document API endpoint
We took this opportunity to refactor the way access is controlled on media attachments. We now add the media key to a list on the document instance each time a media is uploaded to a document. This list is passed along when a document is duplicated, allowing us to grant access to readers on the new document, even if they don't have or lost access to the original document. We also propose an option to reproduce the same access rights on the duplicate document as what was in place on the original document. This can be requested by passing the "with_accesses=true" option in the query string. The tricky point is that we need to extract attachment keys from the existing documents and set them on the new "attachments" field that is now used to track access rights on media files.
1 parent b76fb7f commit ca6538d

15 files changed

+586
-55
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ and this project adheres to
128128

129129
## Added
130130

131+
- ✨(backend) add duplicate action to the document API endpoint
131132
- ⚗️(backend) add util to extract text from base64 yjs document
132133
- ✨(backend) add soft delete and restore API endpoints to documents #516
133134
- ✨(backend) allow organizing documents in a tree structure #516

src/backend/core/admin.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ class DocumentAdmin(TreeAdmin):
151151
"path",
152152
"depth",
153153
"numchild",
154+
"duplicated_from",
155+
"attachments",
154156
)
155157
},
156158
),
@@ -166,8 +168,10 @@ class DocumentAdmin(TreeAdmin):
166168
"updated_at",
167169
)
168170
readonly_fields = (
171+
"attachments",
169172
"creator",
170173
"depth",
174+
"duplicated_from",
171175
"id",
172176
"numchild",
173177
"path",

src/backend/core/api/serializers.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,27 @@ class Meta:
381381
]
382382

383383

384+
class DocumentDuplicationSerializer(serializers.Serializer):
385+
"""
386+
Serializer for duplicating a document.
387+
Allows specifying whether to keep access permissions.
388+
"""
389+
390+
with_accesses = serializers.BooleanField(default=False)
391+
392+
def create(self, validated_data):
393+
"""
394+
This serializer is not intended to create objects.
395+
"""
396+
raise NotImplementedError("This serializer does not support creation.")
397+
398+
def update(self, instance, validated_data):
399+
"""
400+
This serializer is not intended to update objects.
401+
"""
402+
raise NotImplementedError("This serializer does not support updating.")
403+
404+
384405
# Suppress the warning about not implementing `create` and `update` methods
385406
# since we don't use a model and only rely on the serializer for validation
386407
# pylint: disable=abstract-method

src/backend/core/api/viewsets.py

Lines changed: 89 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
from django.db.models.expressions import RawSQL
1717
from django.db.models.functions import Left, Length
1818
from django.http import Http404, StreamingHttpResponse
19+
from django.utils.text import capfirst
20+
from django.utils.translation import gettext_lazy as _
1921

2022
import requests
2123
import rest_framework as drf
@@ -28,26 +30,13 @@
2830
from core import authentication, enums, models
2931
from core.services.ai_services import AIService
3032
from core.services.collaboration_services import CollaborationService
33+
from core.utils import extract_attachments
3134

3235
from . import permissions, serializers, utils
3336
from .filters import DocumentFilter, ListDocumentFilter
3437

3538
logger = logging.getLogger(__name__)
3639

37-
<<<<<<< HEAD
38-
ATTACHMENTS_FOLDER = "attachments"
39-
UUID_REGEX = (
40-
r"[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}"
41-
)
42-
FILE_EXT_REGEX = r"\.[a-zA-Z0-9]{1,10}"
43-
MEDIA_STORAGE_URL_PATTERN = re.compile(
44-
f"{settings.MEDIA_URL:s}(?P<pk>{UUID_REGEX:s})/"
45-
f"(?P<key>{ATTACHMENTS_FOLDER:s}/{UUID_REGEX:s}(?:-unsafe)?{FILE_EXT_REGEX:s})$"
46-
)
47-
COLLABORATION_WS_URL_PATTERN = re.compile(rf"(?:^|&)room=(?P<pk>{UUID_REGEX})(?:&|$)")
48-
=======
49-
>>>>>>> 8076486a (✅(backend) add missing test on media-auth and collaboration-auth)
50-
5140
# pylint: disable=too-many-ancestors
5241

5342

@@ -902,6 +891,78 @@ def tree(self, request, pk, *args, **kwargs):
902891
)
903892
return drf.response.Response(
904893
utils.nest_tree(serializer.data, self.queryset.model.steplen)
894+
895+
@drf.decorators.action(
896+
detail=True,
897+
methods=["post"],
898+
permission_classes=[permissions.IsAuthenticated, permissions.AccessPermission],
899+
url_path="duplicate",
900+
)
901+
@transaction.atomic
902+
def duplicate(self, request, *args, **kwargs):
903+
"""
904+
Duplicate a document and store the links to attached files in the duplicated
905+
document to allow cross-access.
906+
907+
Optionally duplicates accesses if `with_accesses` is set to true
908+
in the payload.
909+
"""
910+
serializer = serializers.DocumentDuplicationSerializer(data=request.GET)
911+
serializer.is_valid(raise_exception=True)
912+
with_accesses = serializer.validated_data["with_accesses"]
913+
914+
# Get document while checking permissions
915+
document = self.get_object()
916+
base64_yjs_content = document.content
917+
918+
# Duplicate the document instance
919+
link_kwargs = (
920+
{"link_reach": document.link_reach, "link_role": document.link_role}
921+
if with_accesses
922+
else {}
923+
)
924+
extracted_attachments = set(extract_attachments(document.content))
925+
attachments = list(extracted_attachments & set(document.attachments))
926+
duplicated_document = document.add_sibling(
927+
"right",
928+
title=capfirst(_("copy of {title}").format(title=document.title)),
929+
content=base64_yjs_content,
930+
attachments=attachments,
931+
duplicated_from=document,
932+
creator=request.user,
933+
**link_kwargs,
934+
)
935+
936+
# Always add the logged-in user as OWNER
937+
accesses_to_create = [
938+
models.DocumentAccess(
939+
document=duplicated_document,
940+
user=request.user,
941+
role=models.RoleChoices.OWNER,
942+
)
943+
]
944+
945+
# If accesses should be duplicated, add other users' accesses as per original document
946+
if with_accesses:
947+
original_accesses = models.DocumentAccess.objects.filter(
948+
document=document
949+
).exclude(user=request.user)
950+
951+
accesses_to_create.extend(
952+
models.DocumentAccess(
953+
document=duplicated_document,
954+
user_id=access.user_id,
955+
team=access.team,
956+
role=access.role,
957+
)
958+
for access in original_accesses
959+
)
960+
961+
# Bulk create all the duplicated accesses
962+
models.DocumentAccess.objects.bulk_create(accesses_to_create)
963+
964+
return drf_response.Response(
965+
{"id": str(duplicated_document.id)}, status=status.HTTP_201_CREATED
905966
)
906967

907968
@drf.decorators.action(detail=True, methods=["get"], url_path="versions")
@@ -1085,6 +1146,10 @@ def attachment_upload(self, request, *args, **kwargs):
10851146
file, default_storage.bucket_name, key, ExtraArgs=extra_args
10861147
)
10871148

1149+
# Make the attachment readable by document readers
1150+
document.attachments.append(key)
1151+
document.save()
1152+
10881153
return drf.response.Response(
10891154
{"file": f"{settings.MEDIA_URL:s}{key:s}"},
10901155
status=drf.status.HTTP_201_CREATED,
@@ -1152,20 +1217,20 @@ def media_auth(self, request, *args, **kwargs):
11521217
url_params = self._auth_get_url_params(
11531218
enums.MEDIA_STORAGE_URL_PATTERN, parsed_url.path
11541219
)
1155-
document = self._auth_get_document(url_params["pk"])
11561220

1157-
if not document.get_abilities(request.user).get(self.action, False):
1158-
logger.debug(
1159-
"User '%s' lacks permission for document '%s'",
1160-
request.user,
1161-
document.pk,
1162-
)
1221+
user = request.user
1222+
key = f"{url_params['pk']:s}/{url_params['attachment']:s}"
1223+
1224+
if (
1225+
not self.queryset.readable(user)
1226+
.filter(attachments__contains=[key])
1227+
.exists()
1228+
):
1229+
logger.debug("User '%s' lacks permission for image", user)
11631230
raise drf.exceptions.PermissionDenied()
11641231

11651232
# Generate S3 authorization headers using the extracted URL parameters
1166-
request = utils.generate_s3_authorization_headers(
1167-
f"{url_params['pk']:s}/{url_params['key']:s}"
1168-
)
1233+
request = utils.generate_s3_authorization_headers(key)
11691234

11701235
return drf.response.Response("authorized", headers=request.headers, status=200)
11711236

src/backend/core/enums.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@
1515
FILE_EXT_REGEX = r"\.[a-zA-Z]{3,4}"
1616
MEDIA_STORAGE_URL_PATTERN = re.compile(
1717
f"{settings.MEDIA_URL:s}(?P<pk>{UUID_REGEX:s})/"
18-
f"(?P<key>{ATTACHMENTS_FOLDER:s}/{UUID_REGEX:s}{FILE_EXT_REGEX:s})$"
18+
f"(?P<key>{ATTACHMENTS_FOLDER:s}/{UUID_REGEX:s}(?:-unsafe)?{FILE_EXT_REGEX:s})$"
19+
)
20+
MEDIA_STORAGE_URL_EXTRACT = re.compile(
21+
f"{settings.MEDIA_URL:s}({UUID_REGEX}/{ATTACHMENTS_FOLDER}/{UUID_REGEX}{FILE_EXT_REGEX})"
1922
)
2023
COLLABORATION_WS_URL_PATTERN = re.compile(rf"(?:^|&)room=(?P<pk>{UUID_REGEX})(?:&|$)")
2124

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Generated by Django 5.1.4 on 2025-01-18 11:53
2+
import re
3+
4+
import core.models
5+
import django.contrib.postgres.fields
6+
import django.db.models.deletion
7+
from django.db import migrations, models
8+
9+
from core.utils import extract_attachments
10+
11+
12+
def populate_attachments_on_all_documents(apps, schema_editor):
13+
"""Populate "attachments" field on all existing documents in the database."""
14+
Document = apps.get_model("core", "Document")
15+
16+
for document in Document.objects.all():
17+
document.attachments = extract_attachments(document.content)
18+
document.save(update_fields=['attachments'])
19+
20+
21+
class Migration(migrations.Migration):
22+
23+
dependencies = [
24+
('core', '0017_add_fields_for_soft_delete'),
25+
]
26+
27+
operations = [
28+
# v2.0.0 was released so we can now remove BC field "is_public"
29+
migrations.RemoveField(
30+
model_name='document',
31+
name='is_public',
32+
),
33+
migrations.AlterModelManagers(
34+
name='user',
35+
managers=[
36+
('objects', core.models.UserManager()),
37+
],
38+
),
39+
migrations.AddField(
40+
model_name='document',
41+
name='attachments',
42+
field=django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=255), blank=True, default=list, editable=False, null=True, size=None),
43+
),
44+
migrations.AddField(
45+
model_name='document',
46+
name='duplicated_from',
47+
field=models.ForeignKey(blank=True, editable=False, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='duplicates', to='core.document'),
48+
),
49+
migrations.AlterField(
50+
model_name='user',
51+
name='language',
52+
field=models.CharField(choices="(('en-us', 'English'), ('fr-fr', 'French'), ('de-de', 'German'))", default='en-us', help_text='The language in which the user wants to see the interface.', max_length=10, verbose_name='language'),
53+
),
54+
migrations.RunPython(populate_attachments_on_all_documents),
55+
]

src/backend/core/models.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from django.conf import settings
1414
from django.contrib.auth import models as auth_models
1515
from django.contrib.auth.base_user import AbstractBaseUser
16+
from django.contrib.postgres.fields import ArrayField
1617
from django.contrib.sites.models import Site
1718
from django.core import mail, validators
1819
from django.core.cache import cache
@@ -486,6 +487,21 @@ class Document(MP_Node, BaseModel):
486487
)
487488
deleted_at = models.DateTimeField(null=True, blank=True)
488489
ancestors_deleted_at = models.DateTimeField(null=True, blank=True)
490+
duplicated_from = models.ForeignKey(
491+
"self",
492+
on_delete=models.SET_NULL,
493+
related_name="duplicates",
494+
editable=False,
495+
blank=True,
496+
null=True,
497+
)
498+
attachments = ArrayField(
499+
models.CharField(max_length=255),
500+
default=list,
501+
editable=False,
502+
blank=True,
503+
null=True,
504+
)
489505

490506
_content = None
491507

@@ -800,6 +816,7 @@ def get_abilities(self, user, ancestors_links=None):
800816
"cors_proxy": can_get,
801817
"descendants": can_get,
802818
"destroy": is_owner,
819+
"duplicate": can_get,
803820
"favorite": can_get and user.is_authenticated,
804821
"link_configuration": is_owner_or_admin,
805822
"invite_owner": is_owner,

0 commit comments

Comments
 (0)