From f65403975a5a07a1ade5ed5ddd37f8d194c792f0 Mon Sep 17 00:00:00 2001 From: Kyle McCormick Date: Wed, 25 Sep 2024 10:32:25 -0400 Subject: [PATCH] fix: don't wrap HTML data with newlines when serializing for LC (#35532) When serializing to OLX, the Learning Core runtime wraps HTML content in CDATA to avoid having to escape every individual `<`, `>`, and `&`. The runtime also puts newlines around the content within the CDATA, So, given HTML content `...`, we get ``. The problem is that every time you serialize an HTML block to OLX, it adds another pair of newlines. These newlines aren't visible to the end users, but they do make it so that importing and exporting content never reached a stable, aka "canonical" form. It also makes unit testing difficult, because the value of `html_block.data` becomes a moving target. We do not believe these newlines are necessary, so we have removed them from the `CDATA` block, and added a unit test to ensure that HTML blocks having a canonical serialization. Closes: https://github.com/openedx/edx-platform/issues/35525 --- .../content_libraries/tests/test_runtime.py | 82 +++++++++++++++++++ .../content_staging/tests/test_clipboard.py | 2 +- .../lib/xblock_serializer/block_serializer.py | 2 +- 3 files changed, 84 insertions(+), 2 deletions(-) diff --git a/openedx/core/djangoapps/content_libraries/tests/test_runtime.py b/openedx/core/djangoapps/content_libraries/tests/test_runtime.py index 89b8cdefd86b..f79808a7ec9a 100644 --- a/openedx/core/djangoapps/content_libraries/tests/test_runtime.py +++ b/openedx/core/djangoapps/content_libraries/tests/test_runtime.py @@ -3,6 +3,7 @@ """ import json from gettext import GNUTranslations +from django.test import TestCase from completion.test_utils import CompletionWaffleTestMixin from django.db import connections, transaction @@ -24,6 +25,7 @@ from openedx.core.djangoapps.dark_lang.models import DarkLangConfig from openedx.core.djangoapps.xblock import api as xblock_api from openedx.core.djangolib.testing.utils import skip_unless_lms, skip_unless_cms +from openedx.core.lib.xblock_serializer import api as serializer_api from common.djangoapps.student.tests.factories import UserFactory @@ -59,6 +61,86 @@ def setUp(self): ) +@skip_unless_cms +class ContentLibraryOlxTests(ContentLibraryContentTestMixin, TestCase): + """ + Basic test of the Learning-Core-based XBlock serialization-deserialization, using XBlocks in a content library. + """ + + def test_html_round_trip(self): + """ + Test that if we deserialize and serialize an HTMLBlock repeatedly, two things hold true: + + 1. Even if the OLX changes format, the inner content does not change format. + 2. The OLX settles into a stable state after 1 round trip. + + (We are particularly testing HTML, but it would be good to confirm that these principles hold true for + XBlocks in general.) + """ + usage_key = library_api.create_library_block(self.library.key, "html", "roundtrip").usage_key + + # The block's actual HTML has some extraneous spaces and newlines, as well as comment. + # We expect this to be preserved through the round-trips. + block_content = '''\ +
+
+

There is a space on either side of this sentence.

+

\tThere is a tab on either side of this sentence.\t

+

🙃There is an emoji on either side of this sentence.🙂

+

There is nothing on either side of this sentence.

+
+

\t ]]>

+ +
''' + + # The OLX containing the HTML also has some extraneous stuff, which do *not* expect to survive the round-trip. + olx_1 = f'''\ + + ''' + + # Here is what we expect the OLX to settle down to. Notable changes: + # * url_name is added. + # * some_fake_field is gone. + # * The OLX comment is gone. + # * A trailing newline is added at the end of the export. + # DEVS: If you are purposefully tweaking the formatting of the xblock serializer, then it's fine to + # update the value of this variable, as long as: + # 1. the {block_content} remains unchanged, and + # 2. the canonical_olx remains stable through the 2nd round trip. + canonical_olx = ( + f'\n' + ) + + # Save the block to LC, and re-load it. + library_api.set_library_block_olx(usage_key, olx_1) + library_api.publish_changes(self.library.key) + block_saved_1 = xblock_api.load_block(usage_key, self.staff_user) + + # Content should be preserved... + assert block_saved_1.data == block_content + + # ...but the serialized OLX will have changed to match the 'canonical' OLX. + olx_2 = serializer_api.serialize_xblock_to_olx(block_saved_1).olx_str + assert olx_2 == canonical_olx + + # Now, save that OLX back to LC, and re-load it again. + library_api.set_library_block_olx(usage_key, olx_2) + library_api.publish_changes(self.library.key) + block_saved_2 = xblock_api.load_block(usage_key, self.staff_user) + + # Again, content should be preserved... + assert block_saved_2.data == block_saved_1.data == block_content + + # ...and this time, the OLX should have settled too. + olx_3 = serializer_api.serialize_xblock_to_olx(block_saved_2).olx_str + assert olx_3 == olx_2 == canonical_olx + + class ContentLibraryRuntimeTests(ContentLibraryContentTestMixin): """ Basic tests of the Learning-Core-based XBlock runtime using XBlocks in a diff --git a/openedx/core/djangoapps/content_staging/tests/test_clipboard.py b/openedx/core/djangoapps/content_staging/tests/test_clipboard.py index 00c4466b7d48..551f94e90e1a 100644 --- a/openedx/core/djangoapps/content_staging/tests/test_clipboard.py +++ b/openedx/core/djangoapps/content_staging/tests/test_clipboard.py @@ -159,7 +159,7 @@ def test_copy_html(self): Sample ]]> - """).lstrip() + """).replace("\n", "") + "\n" # No newlines, expect one trailing newline. # Now if we GET the clipboard again, the GET response should exactly equal the last POST response: assert client.get(CLIPBOARD_ENDPOINT).json() == response_data diff --git a/openedx/core/lib/xblock_serializer/block_serializer.py b/openedx/core/lib/xblock_serializer/block_serializer.py index 966380f25061..f12bf5336af5 100644 --- a/openedx/core/lib/xblock_serializer/block_serializer.py +++ b/openedx/core/lib/xblock_serializer/block_serializer.py @@ -133,7 +133,7 @@ def _serialize_html_block(self, block) -> etree.Element: # Escape any CDATA special chars escaped_block_data = block.data.replace("]]>", "]]>") - olx_node.text = etree.CDATA("\n" + escaped_block_data + "\n") + olx_node.text = etree.CDATA(escaped_block_data) return olx_node