diff --git a/cms/envs/test.py b/cms/envs/test.py index 38b7c7817149..49db50608858 100644 --- a/cms/envs/test.py +++ b/cms/envs/test.py @@ -333,3 +333,13 @@ "SECRET": "***", "URL": "***", } + +############## openedx-learning (Learning Core) config ############## +OPENEDX_LEARNING = { + 'MEDIA': { + 'BACKEND': 'django.core.files.storage.InMemoryStorage', + 'OPTIONS': { + 'location': MEDIA_ROOT + "_private" + } + } +} diff --git a/openedx/core/djangoapps/content_libraries/api.py b/openedx/core/djangoapps/content_libraries/api.py index f30d5f047747..4d09eb4549b7 100644 --- a/openedx/core/djangoapps/content_libraries/api.py +++ b/openedx/core/djangoapps/content_libraries/api.py @@ -56,6 +56,7 @@ import base64 import hashlib import logging +import mimetypes import attr import requests @@ -68,6 +69,7 @@ from django.db.models import Q, QuerySet from django.utils.translation import gettext as _ from edx_rest_api_client.client import OAuthAPIClient +from django.urls import reverse from lxml import etree from opaque_keys.edx.keys import BlockTypeKey, UsageKey, UsageKeyV2 from opaque_keys.edx.locator import ( @@ -96,7 +98,11 @@ from xblock.core import XBlock from xblock.exceptions import XBlockNotFoundError -from openedx.core.djangoapps.xblock.api import get_component_from_usage_key, xblock_type_display_name +from openedx.core.djangoapps.xblock.api import ( + get_component_from_usage_key, + get_xblock_app_config, + xblock_type_display_name, +) from openedx.core.lib.xblock_serializer.api import serialize_modulestore_block_for_learning_core from xmodule.modulestore.django import modulestore @@ -1018,18 +1024,48 @@ def get_library_block_static_asset_files(usage_key) -> list[LibraryXBlockStaticF Returns a list of LibraryXBlockStaticFile objects, sorted by path. - TODO: This is not yet implemented for Learning Core backed libraries. TODO: Should this be in the general XBlock API rather than the libraries API? """ - return [] + component = get_component_from_usage_key(usage_key) + component_version = component.versioning.draft + + # If there is no Draft version, then this was soft-deleted + if component_version is None: + return [] + + # cvc = the ComponentVersionContent through table + cvc_set = ( + component_version + .componentversioncontent_set + .filter(content__has_file=True) + .order_by('key') + .select_related('content') + ) + + site_root_url = get_xblock_app_config().get_site_root_url() + + return [ + LibraryXBlockStaticFile( + path=cvc.key, + size=cvc.content.size, + url=site_root_url + reverse( + 'content_libraries:library-assets', + kwargs={ + 'component_version_uuid': component_version.uuid, + 'asset_path': cvc.key, + } + ), + ) + for cvc in cvc_set + ] -def add_library_block_static_asset_file(usage_key, file_name, file_content) -> LibraryXBlockStaticFile: +def add_library_block_static_asset_file(usage_key, file_path, file_content, user=None) -> LibraryXBlockStaticFile: """ Upload a static asset file into the library, to be associated with the specified XBlock. Will silently overwrite an existing file of the same name. - file_name should be a name like "doc.pdf". It may optionally contain slashes + file_path should be a name like "doc.pdf". It may optionally contain slashes like 'en/doc.pdf' file_content should be a binary string. @@ -1041,10 +1077,67 @@ def add_library_block_static_asset_file(usage_key, file_name, file_content) -> L video_block = UsageKey.from_string("lb:VideoTeam:python-intro:video:1") add_library_block_static_asset_file(video_block, "subtitles-en.srt", subtitles.encode('utf-8')) """ - raise NotImplementedError("Static assets not yet implemented for Learning Core") + # File path validations copied over from v1 library logic. This can't really + # hurt us inside our system because we never use these paths in an actual + # file system–they're just string keys that point to hash-named data files + # in a common library (learning package) level directory. But it might + # become a security issue during import/export serialization. + if file_path != file_path.strip().strip('/'): + raise InvalidNameError("file_path cannot start/end with / or whitespace.") + if '//' in file_path or '..' in file_path: + raise InvalidNameError("Invalid sequence (// or ..) in file_path.") + + component = get_component_from_usage_key(usage_key) + + media_type_str, _encoding = mimetypes.guess_type(file_path) + # We use "application/octet-stream" as a generic fallback media type, per + # RFC 2046: https://datatracker.ietf.org/doc/html/rfc2046 + # TODO: This probably makes sense to push down to openedx-learning? + media_type_str = media_type_str or "application/octet-stream" + + now = datetime.now(tz=timezone.utc) + + with transaction.atomic(): + media_type = authoring_api.get_or_create_media_type(media_type_str) + content = authoring_api.get_or_create_file_content( + component.publishable_entity.learning_package.id, + media_type.id, + data=file_content, + created=now, + ) + component_version = authoring_api.create_next_component_version( + component.pk, + content_to_replace={file_path: content.id}, + created=now, + created_by=user.id if user else None, + ) + transaction.on_commit( + lambda: LIBRARY_BLOCK_UPDATED.send_event( + library_block=LibraryBlockData( + library_key=usage_key.context_key, + usage_key=usage_key, + ) + ) + ) + # Now figure out the URL for the newly created asset... + site_root_url = get_xblock_app_config().get_site_root_url() + local_path = reverse( + 'content_libraries:library-assets', + kwargs={ + 'component_version_uuid': component_version.uuid, + 'asset_path': file_path, + } + ) + + return LibraryXBlockStaticFile( + path=file_path, + url=site_root_url + local_path, + size=content.size, + ) -def delete_library_block_static_asset_file(usage_key, file_name): + +def delete_library_block_static_asset_file(usage_key, file_path, user=None): """ Delete a static asset file from the library. @@ -1054,7 +1147,24 @@ def delete_library_block_static_asset_file(usage_key, file_name): video_block = UsageKey.from_string("lb:VideoTeam:python-intro:video:1") delete_library_block_static_asset_file(video_block, "subtitles-en.srt") """ - raise NotImplementedError("Static assets not yet implemented for Learning Core") + component = get_component_from_usage_key(usage_key) + now = datetime.now(tz=timezone.utc) + + with transaction.atomic(): + component_version = authoring_api.create_next_component_version( + component.pk, + content_to_replace={file_path: None}, + created=now, + created_by=user.id if user else None, + ) + transaction.on_commit( + lambda: LIBRARY_BLOCK_UPDATED.send_event( + library_block=LibraryBlockData( + library_key=usage_key.context_key, + usage_key=usage_key, + ) + ) + ) def get_allowed_block_types(library_key): # pylint: disable=unused-argument diff --git a/openedx/core/djangoapps/content_libraries/tests/test_content_libraries.py b/openedx/core/djangoapps/content_libraries/tests/test_content_libraries.py index 56e258f8a1f1..03b32e08ad36 100644 --- a/openedx/core/djangoapps/content_libraries/tests/test_content_libraries.py +++ b/openedx/core/djangoapps/content_libraries/tests/test_content_libraries.py @@ -661,13 +661,13 @@ def test_library_permissions(self): # pylint: disable=too-many-statements self._get_library_block_olx(block3_key, expect_response=403) self._get_library_block_fields(block3_key, expect_response=403) self._get_library_block_assets(block3_key, expect_response=403) - self._get_library_block_asset(block3_key, file_name="whatever.png", expect_response=403) + self._get_library_block_asset(block3_key, file_name="static/whatever.png", expect_response=403) # Nor can they preview the block: self._render_block_view(block3_key, view_name="student_view", expect_response=403) # But if we grant allow_public_read, then they can: with self.as_user(admin): self._update_library(lib_id, allow_public_read=True) - # self._set_library_block_asset(block3_key, "whatever.png", b"data") + self._set_library_block_asset(block3_key, "static/whatever.png", b"data") with self.as_user(random_user): self._get_library_block_olx(block3_key) self._render_block_view(block3_key, view_name="student_view") @@ -680,7 +680,7 @@ def test_library_permissions(self): # pylint: disable=too-many-statements with self.as_user(user): self._set_library_block_olx(block3_key, "", expect_response=403) self._set_library_block_fields(block3_key, {"data": "", "metadata": {}}, expect_response=403) - # self._set_library_block_asset(block3_key, "test.txt", b"data", expect_response=403) + self._set_library_block_asset(block3_key, "static/test.txt", b"data", expect_response=403) self._delete_library_block(block3_key, expect_response=403) self._commit_library_changes(lib_id, expect_response=403) self._revert_library_changes(lib_id, expect_response=403) @@ -690,9 +690,9 @@ def test_library_permissions(self): # pylint: disable=too-many-statements olx = self._get_library_block_olx(block3_key) self._set_library_block_olx(block3_key, olx) self._set_library_block_fields(block3_key, {"data": olx, "metadata": {}}) - # self._get_library_block_assets(block3_key) - # self._set_library_block_asset(block3_key, "test.txt", b"data") - # self._get_library_block_asset(block3_key, file_name="test.txt") + self._get_library_block_assets(block3_key) + self._set_library_block_asset(block3_key, "static/test.txt", b"data") + self._get_library_block_asset(block3_key, file_name="static/test.txt") self._delete_library_block(block3_key) self._commit_library_changes(lib_id) self._revert_library_changes(lib_id) # This is a no-op after the commit, but should still have 200 response @@ -915,7 +915,6 @@ def test_library_block_olx_update_event(self): event_receiver.call_args.kwargs ) - @skip("We still need to re-implement static asset handling.") def test_library_block_add_asset_update_event(self): """ Check that LIBRARY_BLOCK_CREATED event is sent when a static asset is @@ -934,7 +933,7 @@ def test_library_block_add_asset_update_event(self): block = self._add_block_to_library(lib_id, "unit", "u1") block_id = block["id"] - self._set_library_block_asset(block_id, "test.txt", b"data") + self._set_library_block_asset(block_id, "static/test.txt", b"data") usage_key = LibraryUsageLocatorV2( lib_key=library_key, @@ -955,7 +954,6 @@ def test_library_block_add_asset_update_event(self): event_receiver.call_args.kwargs ) - @skip("We still need to re-implement static asset handling.") def test_library_block_del_asset_update_event(self): """ Check that LIBRARY_BLOCK_CREATED event is sent when a static asset is @@ -974,9 +972,9 @@ def test_library_block_del_asset_update_event(self): block = self._add_block_to_library(lib_id, "unit", "u1") block_id = block["id"] - self._set_library_block_asset(block_id, "test.txt", b"data") + self._set_library_block_asset(block_id, "static/test.txt", b"data") - self._delete_library_block_asset(block_id, 'text.txt') + self._delete_library_block_asset(block_id, 'static/text.txt') usage_key = LibraryUsageLocatorV2( lib_key=library_key, diff --git a/openedx/core/djangoapps/content_libraries/tests/test_static_assets.py b/openedx/core/djangoapps/content_libraries/tests/test_static_assets.py index 92ff4c1767d0..a5f69f94b174 100644 --- a/openedx/core/djangoapps/content_libraries/tests/test_static_assets.py +++ b/openedx/core/djangoapps/content_libraries/tests/test_static_assets.py @@ -1,11 +1,16 @@ """ Tests for static asset files in Learning-Core-based Content Libraries """ -from unittest import skip +from uuid import UUID +from opaque_keys.edx.keys import UsageKey + +from common.djangoapps.student.tests.factories import UserFactory from openedx.core.djangoapps.content_libraries.tests.base import ( ContentLibrariesRestApiTest, ) +from openedx.core.djangoapps.xblock.api import get_component_from_usage_key +from openedx.core.djangolib.testing.utils import skip_unless_cms # Binary data representing an SVG image file SVG_DATA = """ @@ -23,15 +28,10 @@ """ -@skip("Assets are being reimplemented in Learning Core. Disable until that's ready.") +@skip_unless_cms class ContentLibrariesStaticAssetsTest(ContentLibrariesRestApiTest): """ Tests for static asset files in Learning-Core-based Content Libraries - - WARNING: every test should have a unique library slug, because even though - the django/mysql database gets reset for each test case, the lookup between - library slug and bundle UUID does not because it's assumed to be immutable - and cached forever. """ def test_asset_filenames(self): @@ -79,7 +79,7 @@ def test_video_transcripts(self): /> """) # Upload the transcript file - self._set_library_block_asset(block_id, "3_yD_cEKoCk-en.srt", TRANSCRIPT_DATA) + self._set_library_block_asset(block_id, "static/3_yD_cEKoCk-en.srt", TRANSCRIPT_DATA) transcript_handler_url = self._get_block_handler_url(block_id, "transcript") @@ -108,3 +108,79 @@ def check_download(): self._commit_library_changes(library["id"]) check_sjson() check_download() + + +@skip_unless_cms +class ContentLibrariesComponentVersionAssetTest(ContentLibrariesRestApiTest): + """ + Tests for the view that actually delivers the Library asset in Studio. + """ + + def setUp(self): + super().setUp() + + library = self._create_library(slug="asset-lib2", title="Static Assets Test Library") + block = self._add_block_to_library(library["id"], "html", "html1") + self._set_library_block_asset(block["id"], "static/test.svg", SVG_DATA) + usage_key = UsageKey.from_string(block["id"]) + self.component = get_component_from_usage_key(usage_key) + self.draft_component_version = self.component.versioning.draft + + def test_good_responses(self): + get_response = self.client.get( + f"/library_assets/{self.draft_component_version.uuid}/static/test.svg" + ) + assert get_response.status_code == 200 + content = b''.join(chunk for chunk in get_response.streaming_content) + assert content == SVG_DATA + + good_head_response = self.client.head( + f"/library_assets/{self.draft_component_version.uuid}/static/test.svg" + ) + assert good_head_response.headers == get_response.headers + + def test_missing(self): + """Test asset requests that should 404.""" + # Non-existent version... + wrong_version_uuid = UUID('11111111-1111-1111-1111-111111111111') + response = self.client.get( + f"/library_assets/{wrong_version_uuid}/static/test.svg" + ) + assert response.status_code == 404 + + # Non-existent file... + response = self.client.get( + f"/library_assets/{self.draft_component_version.uuid}/static/missing.svg" + ) + assert response.status_code == 404 + + # File-like ComponenVersionContent entry that isn't an actually + # downloadable file... + response = self.client.get( + f"/library_assets/{self.draft_component_version.uuid}/block.xml" + ) + assert response.status_code == 404 + + def test_anonymous_user(self): + """Anonymous users shouldn't get access to library assets.""" + self.client.logout() + response = self.client.get( + f"/library_assets/{self.draft_component_version.uuid}/static/test.svg" + ) + assert response.status_code == 403 + + def test_unauthorized_user(self): + """User who is not a Content Library staff should not have access.""" + self.client.logout() + student = UserFactory.create( + username="student", + email="student@example.com", + password="student-pass", + is_staff=False, + is_superuser=False, + ) + self.client.login(username="student", password="student-pass") + get_response = self.client.get( + f"/library_assets/{self.draft_component_version.uuid}/static/test.svg" + ) + assert get_response.status_code == 403 diff --git a/openedx/core/djangoapps/content_libraries/urls.py b/openedx/core/djangoapps/content_libraries/urls.py index e77c1b34d277..b9dc05fabc84 100644 --- a/openedx/core/djangoapps/content_libraries/urls.py +++ b/openedx/core/djangoapps/content_libraries/urls.py @@ -75,4 +75,9 @@ path('pub/jwks/', views.LtiToolJwksView.as_view(), name='lti-pub-jwks'), ])), ])), + path( + 'library_assets//', + views.component_version_asset, + name='library-assets', + ), ] diff --git a/openedx/core/djangoapps/content_libraries/views.py b/openedx/core/djangoapps/content_libraries/views.py index 6e50559f38f6..a20ba06b23d7 100644 --- a/openedx/core/djangoapps/content_libraries/views.py +++ b/openedx/core/djangoapps/content_libraries/views.py @@ -71,14 +71,16 @@ from django.conf import settings from django.contrib.auth import authenticate, get_user_model, login from django.contrib.auth.models import Group +from django.core.exceptions import ObjectDoesNotExist from django.db.transaction import atomic, non_atomic_requests -from django.http import Http404, HttpResponseBadRequest, JsonResponse +from django.http import Http404, HttpResponse, HttpResponseBadRequest, JsonResponse, StreamingHttpResponse from django.shortcuts import get_object_or_404 from django.urls import reverse from django.utils.decorators import method_decorator from django.utils.translation import gettext as _ from django.views.decorators.clickjacking import xframe_options_exempt from django.views.decorators.csrf import csrf_exempt +from django.views.decorators.http import require_safe from django.views.generic.base import TemplateResponseMixin, View from pylti1p3.contrib.django import DjangoCacheDataStorage, DjangoDbToolConf, DjangoMessageLaunch, DjangoOIDCLogin from pylti1p3.exception import LtiException, OIDCException @@ -86,6 +88,7 @@ import edx_api_doc_tools as apidocs from opaque_keys import InvalidKeyError from opaque_keys.edx.locator import LibraryLocatorV2, LibraryUsageLocatorV2 +from openedx_learning.api import authoring from organizations.api import ensure_organization from organizations.exceptions import InvalidOrganizationException from organizations.models import Organization @@ -792,7 +795,7 @@ def put(self, request, usage_key_str, file_path): raise ValidationError("File too big") file_content = file_wrapper.read() try: - result = api.add_library_block_static_asset_file(usage_key, file_path, file_content) + result = api.add_library_block_static_asset_file(usage_key, file_path, file_content, request.user) except ValueError: raise ValidationError("Invalid file path") # lint-amnesty, pylint: disable=raise-missing-from return Response(LibraryXBlockStaticFileSerializer(result).data) @@ -807,7 +810,7 @@ def delete(self, request, usage_key_str, file_path): usage_key.lib_key, request.user, permissions.CAN_EDIT_THIS_CONTENT_LIBRARY, ) try: - api.delete_library_block_static_asset_file(usage_key, file_path) + api.delete_library_block_static_asset_file(usage_key, file_path, request.user) except ValueError: raise ValidationError("Invalid file path") # lint-amnesty, pylint: disable=raise-missing-from return Response(status=status.HTTP_204_NO_CONTENT) @@ -1143,3 +1146,74 @@ def get(self, request): Return the JWKS. """ return JsonResponse(self.lti_tool_config.get_jwks(), safe=False) + + +@require_safe +def component_version_asset(request, component_version_uuid, asset_path): + """ + Serves static assets associated with particular Component versions. + + Important notes: + * This is meant for Studio/authoring use ONLY. It requires read access to + the content library. + * It uses the UUID because that's easier to parse than the key field (which + could be part of an OpaqueKey, but could also be almost anything else). + * This is not very performant, and we still want to use the X-Accel-Redirect + method for serving LMS traffic in the longer term (and probably Studio + eventually). + """ + try: + component_version = authoring.get_component_version_by_uuid( + component_version_uuid + ) + except ObjectDoesNotExist as exc: + raise Http404() from exc + + # Permissions check... + learning_package = component_version.component.learning_package + library_key = LibraryLocatorV2.from_string(learning_package.key) + api.require_permission_for_library_key( + library_key, request.user, permissions.CAN_VIEW_THIS_CONTENT_LIBRARY, + ) + + # We already have logic for getting the correct content and generating the + # proper headers in Learning Core, but the response generated here is an + # X-Accel-Redirect and lacks the actual content. We eventually want to use + # this response in conjunction with a media reverse proxy (Caddy or Nginx), + # but in the short term we're just going to remove the redirect and stream + # the content directly. + redirect_response = authoring.get_redirect_response_for_component_asset( + component_version_uuid, + asset_path, + public=False, + learner_downloadable_only=False, + ) + + # If there was any error, we return that response because it will have the + # correct headers set and won't have any X-Accel-Redirect header set. + if redirect_response.status_code != 200: + return redirect_response + + # If we got here, we know that the asset exists and it's okay to download. + cv_content = component_version.componentversioncontent_set.get(key=asset_path) + content = cv_content.content + + # Delete the re-direct part of the response headers. We'll copy the rest. + headers = redirect_response.headers + headers.pop('X-Accel-Redirect') + + # We need to set the content size header manually because this is a + # streaming response. It's not included in the redirect headers because it's + # not needed there (the reverse-proxy would have direct access to the file). + headers['Content-Length'] = content.size + + if request.method == "HEAD": + return HttpResponse(headers=headers) + + # Otherwise it's going to be a GET response. We don't support response + # offsets or anything fancy, because we don't expect to run this view at + # scale. + return StreamingHttpResponse( + content.read_file().chunks(), + headers=redirect_response.headers, + ) diff --git a/xmodule/video_block/transcripts_utils.py b/xmodule/video_block/transcripts_utils.py index 132b8cff1e14..18ea6152aa80 100644 --- a/xmodule/video_block/transcripts_utils.py +++ b/xmodule/video_block/transcripts_utils.py @@ -8,17 +8,20 @@ import html import logging import os +import pathlib import re from functools import wraps import requests import simplejson as json from django.conf import settings +from django.core.exceptions import ObjectDoesNotExist from lxml import etree from opaque_keys.edx.keys import UsageKeyV2 from pysrt import SubRipFile, SubRipItem, SubRipTime from pysrt.srtexc import Error +from openedx.core.djangoapps.xblock.api import get_component_from_usage_key from xmodule.contentstore.content import StaticContent from xmodule.contentstore.django import contentstore from xmodule.exceptions import NotFoundError @@ -1041,6 +1044,8 @@ def get_transcript_from_learning_core(video_block, language, output_format, tran """ Get video transcript from Learning Core. + Limitation: This is only going to grab from the Draft version. + HISTORIC INFORMATION FROM WHEN THIS FUNCTION WAS `get_transcript_from_blockstore`: Blockstore expects video transcripts to be placed into the 'static/' @@ -1072,9 +1077,71 @@ def get_transcript_from_learning_core(video_block, language, output_format, tran Returns: tuple containing content, filename, mimetype """ - # TODO: Update to use Learning Core data models once static assets support - # has been added. - raise NotFoundError("No transcript - transcripts not supported yet by learning core components.") + usage_key = video_block.scope_ids.usage_id + + # Validate that the format is something we even support... + if output_format not in (Transcript.SRT, Transcript.SJSON, Transcript.TXT): + raise NotFoundError(f'Invalid transcript format `{output_format}`') + + # See if the requested language exists. + transcripts = transcripts_info['transcripts'] + if language not in transcripts: + raise NotFoundError( + f"Video {usage_key} does not have a transcript file defined for the " + f"'{language}' language in its OLX." + ) + + # Grab the underlying Component. There's no version parameter to this call, + # so we're just going to grab the file associated with the latest draft + # version for now. + component = get_component_from_usage_key(usage_key) + component_version = component.versioning.draft + if not component_version: + raise NotFoundError( + f"No transcript for {usage_key} because Component {component.uuid} " + "was soft-deleted." + ) + + file_path = pathlib.Path(f"static/{transcripts[language]}") + if file_path.suffix != '.srt': + # We want to standardize on .srt + raise NotFoundError( + "Video XBlocks in Content Libraries only support storing .srt " + f"transcript files, but we tried to look up {file_path} for {usage_key}" + ) + + # TODO: There should be a Learning Core API call for this: + try: + content = ( + component_version + .componentversioncontent_set + .filter(content__has_file=True) + .select_related('content') + .get(key=file_path) + .content + ) + data = content.read_file().read() + except ObjectDoesNotExist as exc: + raise NotFoundError( + f"No file {file_path} found for {usage_key} " + f"(ComponentVersion {component_version.uuid})" + ) from exc + + # Now convert the transcript data to the requested format: + output_filename = f'{file_path.stem}.{output_format}' + output_transcript = Transcript.convert( + data.decode('utf-8'), + input_format=Transcript.SRT, + output_format=output_format, + ) + if not output_transcript.strip(): + raise NotFoundError( + f"Transcript file {file_path} found for {usage_key} " + f"(ComponentVersion {component_version.uuid}), but it has no " + "content or is malformed." + ) + + return output_transcript, output_filename, Transcript.mime_types[output_format] def get_transcript(video, lang=None, output_format=Transcript.SRT, youtube_id=None):