Skip to content

Commit 045fa09

Browse files
committed
✨(backend) extract attachment keys from updated content for access
We can't prevent document editors from copy/pasting content to from one document to another. The problem is that copying content, will copy the urls pointing to attachments but if we don't do anything, the reader of the document to which the content is being pasted, may not be allowed to access the attachment files from the original document. Using the work from the previous commit, we can grant access to the readers of the target document by extracting the attachment keys from the content and adding themto the target document's "attachments" field. Before doing this, we check that the current user can indeed access the attachment files extracted from the content and that they are allowed to edit the current document.
1 parent 4a70883 commit 045fa09

File tree

4 files changed

+75
-2
lines changed

4 files changed

+75
-2
lines changed

src/backend/core/api/serializers.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import magic
1111
from rest_framework import exceptions, serializers
1212

13-
from core import enums, models
13+
from core import enums, models, utils
1414
from core.services.ai_services import AI_ACTIONS
1515
from core.services.converter_services import (
1616
ConversionError,
@@ -268,6 +268,36 @@ def validate_id(self, value):
268268

269269
return value
270270

271+
def save(self, **kwargs):
272+
"""
273+
Process the content field to extract attachment keys and update the document's
274+
"attachments" field for access control.
275+
"""
276+
content = self.validated_data.get("content", "")
277+
extracted_attachments = set(utils.extract_attachments(content))
278+
279+
existing_attachments = (
280+
set(self.instance.attachments or []) if self.instance else set()
281+
)
282+
new_attachments = extracted_attachments - existing_attachments
283+
284+
if new_attachments:
285+
user = self.context["request"].user
286+
readable_documents = models.Document.objects.readable(user).filter(
287+
Q(attachments__overlap=list(new_attachments))
288+
)
289+
290+
readable_attachments = set()
291+
for document in readable_documents:
292+
readable_attachments.update(set(document.attachments) & new_attachments)
293+
294+
# Update attachments with readable keys
295+
self.validated_data["attachments"] = list(
296+
existing_attachments | readable_attachments
297+
)
298+
299+
return super().save(**kwargs)
300+
271301

272302
class ServerCreateDocumentSerializer(serializers.Serializer):
273303
"""

src/backend/core/tests/documents/test_api_documents_media_auth.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
Test file uploads API endpoint for users in impress's core app.
2+
Test media-auth authorization API endpoint in docs core app.
33
"""
44

55
from io import BytesIO

src/backend/core/tests/test_utils.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
"""Test util base64_yjs_to_text."""
22

3+
import base64
4+
import uuid
5+
6+
import y_py
7+
38
from core import utils
49

510
# This base64 string is an example of what is saved in the database.
@@ -35,3 +40,38 @@ def test_utils_base64_yjs_to_xml():
3540
or '<heading "textAlignment"="left" "level"="1">Hello</heading>' in content
3641
)
3742
assert '<bulletListItem "textAlignment"="left">world</bulletListItem>' in content
43+
44+
45+
def test_utils_extract_attachments():
46+
"""
47+
All attachment keys in the document content should be extracted.
48+
"""
49+
document_id = uuid.uuid4()
50+
image_key1 = f"{document_id!s}/attachments/{uuid.uuid4()!s}.png"
51+
image_url1 = f"http://localhost/media/{image_key1:s}"
52+
53+
image_key2 = f"{uuid.uuid4()!s}/attachments/{uuid.uuid4()!s}.png"
54+
image_url2 = f"http://localhost/{image_key2:s}"
55+
56+
image_key3 = f"{uuid.uuid4()!s}/attachments/{uuid.uuid4()!s}.png"
57+
image_url3 = f"http://localhost/media/{image_key3:s}"
58+
59+
ydoc = y_py.YDoc() # pylint: disable=no-member
60+
with ydoc.begin_transaction() as txn:
61+
xml_fragment = ydoc.get_xml_element("document-store")
62+
63+
xml_image = xml_fragment.push_xml_element(txn, "image")
64+
xml_image.set_attribute(txn, "src", image_url1)
65+
66+
xml_image = xml_fragment.push_xml_element(txn, "image")
67+
xml_image.set_attribute(txn, "src", image_url2)
68+
69+
xml_paragraph = xml_fragment.push_xml_element(txn, "paragraph")
70+
xml_text = xml_paragraph.push_xml_text(txn)
71+
xml_text.push(txn, image_url3)
72+
73+
update = y_py.encode_state_as_update(ydoc) # pylint: disable=no-member
74+
base64_string = base64.b64encode(update).decode("utf-8")
75+
76+
# image_key2 is missing the "/media/" part and shouldn't get extracted
77+
assert utils.extract_attachments(base64_string) == [image_key1, image_key3]

src/backend/core/utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,5 +69,8 @@ def base64_yjs_to_text(base64_string):
6969

7070
def extract_attachments(content):
7171
"""Helper method to extract media paths from a document's content."""
72+
if not content:
73+
return []
74+
7275
xml_content = base64_yjs_to_xml(content)
7376
return re.findall(enums.MEDIA_STORAGE_URL_EXTRACT, xml_content)

0 commit comments

Comments
 (0)