Skip to content

Commit

Permalink
fix: get some VideoBlock support for Learning Core assets
Browse files Browse the repository at this point in the history
  • Loading branch information
ormsbee committed Oct 11, 2024
1 parent 40abbe5 commit 7f07ee3
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 5 deletions.
8 changes: 6 additions & 2 deletions openedx/core/djangoapps/content_libraries/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1060,7 +1060,11 @@ def add_library_block_static_asset_file(usage_key, file_path, file_content, user
video_block = UsageKey.from_string("lb:VideoTeam:python-intro:video:1")
add_library_block_static_asset_file(video_block, "subtitles-en.srt", subtitles.encode('utf-8'))
"""
# File path validations copied over from v1 library logic...
# File path validations copied over from v1 library logic. This can't really
# hurt us inside our system because we never use these paths in an actual
# file system–they're just string keys that point to hash-named data files
# in a common library (learning package) level directory. But it might
# become a security issue during import/export serialization.
if file_path != file_path.strip().strip('/'):
raise InvalidNameError("file_path cannot start/end with / or whitespace.")
if '//' in file_path or '..' in file_path:
Expand All @@ -1069,10 +1073,10 @@ def add_library_block_static_asset_file(usage_key, file_path, file_content, user
component = get_component_from_usage_key(usage_key)

media_type_str, _encoding = mimetypes.guess_type(file_path)
media_type = authoring_api.get_or_create_media_type(media_type_str)
now = datetime.now(tz=timezone.utc)

with transaction.atomic():
media_type = authoring_api.get_or_create_media_type(media_type_str)
content = authoring_api.get_or_create_file_content(
component.publishable_entity.learning_package.id,
media_type.id,
Expand Down
61 changes: 58 additions & 3 deletions xmodule/video_block/transcripts_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import html
import logging
import os
import pathlib
import re
from functools import wraps

Expand All @@ -16,9 +17,11 @@
from django.conf import settings
from lxml import etree
from opaque_keys.edx.keys import UsageKeyV2
from openedx_learning.api import authoring
from pysrt import SubRipFile, SubRipItem, SubRipTime
from pysrt.srtexc import Error

from openedx.core.djangoapps.xblock.api import get_component_from_usage_key
from xmodule.contentstore.content import StaticContent
from xmodule.contentstore.django import contentstore
from xmodule.exceptions import NotFoundError
Expand Down Expand Up @@ -1041,6 +1044,8 @@ def get_transcript_from_learning_core(video_block, language, output_format, tran
"""
Get video transcript from Learning Core.
Limitation: This is only going to grab from the Draft version.
HISTORIC INFORMATION FROM WHEN THIS FUNCTION WAS `get_transcript_from_blockstore`:
Blockstore expects video transcripts to be placed into the 'static/'
Expand Down Expand Up @@ -1072,9 +1077,59 @@ def get_transcript_from_learning_core(video_block, language, output_format, tran
Returns:
tuple containing content, filename, mimetype
"""
# TODO: Update to use Learning Core data models once static assets support
# has been added.
raise NotFoundError("No transcript - transcripts not supported yet by learning core components.")
usage_key = video_block.scope_ids.usage_id

# Validate that the format is something we even support...
if output_format not in (Transcript.SRT, Transcript.SJSON, Transcript.TXT):
raise NotFoundError(f'Invalid transcript format `{output_format}`')

# See if the requested language exists.
transcripts = transcripts_info['transcripts']
if language not in transcripts:
raise NotFoundError(
f"Video {usage_key} does not have a transcript file defined for the "
f"'{language}' language in its OLX."
)

# Grab the underlying Component. There's no version parameter to this call,
# so we're just going to grab the file associated with the latest draft
# version for now.
component = get_component_from_usage_key(usage_key)
component_version = component.versioning.draft
if not component_version:
raise NotFoundError(
f"No transcript for {usage_key}: Component {component.uuid} was soft-deleted."
)

file_path = pathlib.Path(f"static/{transcripts[language]}")
if file_path.suffix != '.srt':
# We want to standardize on .srt
raise NotFoundError("Video XBlocks in Content Libraries only support .srt transcript files.")

# TODO: There should be a Learning Core API call for this:
print(
[(cvc.key, cvc.content.has_file) for cvc in component_version.componentversioncontent_set.all()]
)
content = (
component_version
.componentversioncontent_set
.filter(content__has_file=True)
.select_related('content')
.get(key=file_path)
)
data = content.read_file().read()

# Now convert the transcript data to the requested format:
output_filename = f'{file_path.stem}.{output_format}'
output_transcript = Transcript.convert(
data.decode('utf-8'),
input_format=Transcript.SRT,
output_format=output_format,
)
if not output_transcript.strip():
raise NotFoundError('No transcript content')

return output_transcript, output_filename, Transcript.mime_types[output_format]


def get_transcript(video, lang=None, output_format=Transcript.SRT, youtube_id=None):
Expand Down

0 comments on commit 7f07ee3

Please sign in to comment.