Skip to content

Commit

Permalink
refactor: Fix content server methods and tests (finish method lift)
Browse files Browse the repository at this point in the history
Some lingering cleanup from the transition from a middleware to a view.
See #34702 for context.

- Remove IMPL and self/StaticContentServer references
- Add newlines to satisfy code style linter
- Fix test references
- Update module docstring
  • Loading branch information
timmc-edx committed Sep 30, 2024
1 parent 8b162b4 commit 660c30b
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 44 deletions.
20 changes: 10 additions & 10 deletions openedx/core/djangoapps/contentserver/test/test_contentserver.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Tests for StaticContentServer
Tests for content server.
"""


Expand Down Expand Up @@ -27,7 +27,7 @@
from xmodule.modulestore.tests.django_utils import TEST_DATA_SPLIT_MODULESTORE, SharedModuleStoreTestCase
from xmodule.modulestore.xml_importer import import_course_from_xml

from ..views import HTTP_DATE_FORMAT, StaticContentServer, parse_range_header
from .. import views

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -356,8 +356,8 @@ def test_cache_headers_without_ttl_locked(self, mock_get_cache_ttl):
assert 'private, no-cache, no-store' == resp['Cache-Control']

def test_get_expiration_value(self):
start_dt = datetime.datetime.strptime("Thu, 01 Dec 1983 20:00:00 GMT", HTTP_DATE_FORMAT)
near_expire_dt = StaticContentServer.get_expiration_value(start_dt, 55)
start_dt = datetime.datetime.strptime("Thu, 01 Dec 1983 20:00:00 GMT", views.HTTP_DATE_FORMAT)
near_expire_dt = views.get_expiration_value(start_dt, 55)
assert 'Thu, 01 Dec 1983 20:00:55 GMT' == near_expire_dt

@patch('openedx.core.djangoapps.contentserver.models.CdnUserAgentsConfig.get_cdn_user_agents')
Expand All @@ -371,7 +371,7 @@ def test_cache_is_cdn_with_normal_request(self, mock_get_cdn_user_agents):
request_factory = RequestFactory()
browser_request = request_factory.get('/fake', HTTP_USER_AGENT='Chrome 1234')

is_from_cdn = StaticContentServer.is_cdn_request(browser_request)
is_from_cdn = views.is_cdn_request(browser_request)
assert is_from_cdn is False

@patch('openedx.core.djangoapps.contentserver.models.CdnUserAgentsConfig.get_cdn_user_agents')
Expand All @@ -385,7 +385,7 @@ def test_cache_is_cdn_with_cdn_request(self, mock_get_cdn_user_agents):
request_factory = RequestFactory()
browser_request = request_factory.get('/fake', HTTP_USER_AGENT='Amazon CloudFront')

is_from_cdn = StaticContentServer.is_cdn_request(browser_request)
is_from_cdn = views.is_cdn_request(browser_request)
assert is_from_cdn is True

@patch('openedx.core.djangoapps.contentserver.models.CdnUserAgentsConfig.get_cdn_user_agents')
Expand All @@ -400,7 +400,7 @@ def test_cache_is_cdn_with_cdn_request_multiple_user_agents(self, mock_get_cdn_u
request_factory = RequestFactory()
browser_request = request_factory.get('/fake', HTTP_USER_AGENT='Amazon CloudFront')

is_from_cdn = StaticContentServer.is_cdn_request(browser_request)
is_from_cdn = views.is_cdn_request(browser_request)
assert is_from_cdn is True


Expand All @@ -415,7 +415,7 @@ def setUp(self):
self.content_length = 10000

def test_bytes_unit(self):
unit, __ = parse_range_header('bytes=100-', self.content_length)
unit, __ = views.parse_range_header('bytes=100-', self.content_length)
assert unit == 'bytes'

@ddt.data(
Expand All @@ -428,7 +428,7 @@ def test_bytes_unit(self):
)
@ddt.unpack
def test_valid_syntax(self, header_value, excepted_ranges_length, expected_ranges):
__, ranges = parse_range_header(header_value, self.content_length)
__, ranges = views.parse_range_header(header_value, self.content_length)
assert len(ranges) == excepted_ranges_length
assert ranges == expected_ranges

Expand All @@ -446,5 +446,5 @@ def test_valid_syntax(self, header_value, excepted_ranges_length, expected_range
@ddt.unpack
def test_invalid_syntax(self, header_value, exception_class, exception_message_regex):
self.assertRaisesRegex(
exception_class, exception_message_regex, parse_range_header, header_value, self.content_length
exception_class, exception_message_regex, views.parse_range_header, header_value, self.content_length
)
59 changes: 25 additions & 34 deletions openedx/core/djangoapps/contentserver/views.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,9 @@
"""
Views for serving course assets.
Historically, this was implemented as a *middleware* (StaticContentServer) that
intercepted requests with paths matching certain patterns, rather than using
urlpatterns and a view. There wasn't any good reason for this, as far as I can
tell. It causes some problems for telemetry: When the code-owner middleware asks
Django what view handled the request, it does so by looking at the result of the
`resolve` utility, but these URLs get a Resolver404 (because there's no
registered urlpattern).
We've turned it into a proper view, with a few warts remaining:
- The view implementation is all bundled into a StaticContentServer class that
doesn't appear to have any state. The methods could likely just be extracted
as top-level functions.
- All three urlpatterns are registered to the same view, which then has to
re-parse the URL to determine which pattern is in effect. We should probably
have 3 views as entry points.
For historical reasons, this is just one view that matches to three different URL patterns, and then has to
re-parse the URL to determine which pattern is in effect. We should probably
have 3 views as entry points.
"""
import datetime
import logging
Expand Down Expand Up @@ -51,7 +38,7 @@ def course_assets_view(request):
"""
Serve course assets to end users. Colloquially referred to as "contentserver."
"""
return IMPL.process_request(request)
return process_request(request)


log = logging.getLogger(__name__)
Expand All @@ -62,7 +49,7 @@ def course_assets_view(request):
HTTP_DATE_FORMAT = "%a, %d %b %Y %H:%M:%S GMT"


def is_asset_request(self, request):
def is_asset_request(request):
"""Determines whether the given request is an asset request"""
# Don't change this without updating urls.py! See docstring of views.py.
return (
Expand All @@ -73,12 +60,13 @@ def is_asset_request(self, request):
StaticContent.is_versioned_asset_path(request.path)
)


# pylint: disable=too-many-statements
def process_request(self, request):
def process_request(request):
"""Process the given request"""
asset_path = request.path

if self.is_asset_request(request): # lint-amnesty, pylint: disable=too-many-nested-blocks
if is_asset_request(request): # lint-amnesty, pylint: disable=too-many-nested-blocks
# Make sure we can convert this request into a location.
if AssetLocator.CANONICAL_NAMESPACE in asset_path:
asset_path = asset_path.replace('block/', 'block@', 1)
Expand All @@ -98,7 +86,7 @@ def process_request(self, request):
# if we're able to load it.
actual_digest = None
try:
content = self.load_asset_from_location(loc)
content = load_asset_from_location(loc)
actual_digest = getattr(content, "content_digest", None)
except (ItemNotFoundError, NotFoundError):
return HttpResponseNotFound()
Expand All @@ -121,15 +109,15 @@ def process_request(self, request):
set_custom_attribute('contentserver.path', loc.path)

# Figure out if this is a CDN using us as the origin.
is_from_cdn = StaticContentServer.is_cdn_request(request)
is_from_cdn = is_cdn_request(request)
set_custom_attribute('contentserver.from_cdn', is_from_cdn)

# Check if this content is locked or not.
locked = self.is_content_locked(content)
locked = is_content_locked(content)
set_custom_attribute('contentserver.locked', locked)

# Check that user has access to the content.
if not self.is_user_authorized(request, content, loc):
if not is_user_authorized(request, content, loc):
return HttpResponseForbidden('Unauthorized')

# Figure out if the client sent us a conditional request, and let them know
Expand Down Expand Up @@ -209,11 +197,12 @@ def process_request(self, request):
# middleware we have in place, there's no easy way to use the built-in Django
# utilities and properly sanitize and modify a response to ensure that it is as
# cacheable as possible, which is why we do it ourselves.
self.set_caching_headers(content, response)
set_caching_headers(content, response)

return response

def set_caching_headers(self, content, response):

def set_caching_headers(content, response):
"""
Sets caching headers based on whether or not the asset is locked.
"""
Expand All @@ -229,7 +218,7 @@ def set_caching_headers(self, content, response):
if cache_ttl > 0 and not is_locked:
set_custom_attribute('contentserver.cacheable', True)

response['Expires'] = StaticContentServer.get_expiration_value(datetime.datetime.utcnow(), cache_ttl)
response['Expires'] = get_expiration_value(datetime.datetime.utcnow(), cache_ttl)
response['Cache-Control'] = "public, max-age={ttl}, s-maxage={ttl}".format(ttl=cache_ttl)
elif is_locked:
set_custom_attribute('contentserver.cacheable', False)
Expand All @@ -243,6 +232,7 @@ def set_caching_headers(self, content, response):
# caches a version of the response without CORS headers, in turn breaking XHR requests.
force_header_for_response(response, 'Vary', 'Origin')


@staticmethod
def is_cdn_request(request):
"""
Expand All @@ -259,23 +249,26 @@ def is_cdn_request(request):

return False


@staticmethod
def get_expiration_value(now, cache_ttl):
"""Generates an RFC1123 datetime string based on a future offset."""
expire_dt = now + datetime.timedelta(seconds=cache_ttl)
return expire_dt.strftime(HTTP_DATE_FORMAT)

def is_content_locked(self, content):

def is_content_locked(content):
"""
Determines whether or not the given content is locked.
"""
return bool(getattr(content, "locked", False))

def is_user_authorized(self, request, content, location):

def is_user_authorized(request, content, location):
"""
Determines whether or not the user for this request is authorized to view the given asset.
"""
if not self.is_content_locked(content):
if not is_content_locked(content):
return True

if not hasattr(request, "user") or not request.user.is_authenticated:
Expand All @@ -290,7 +283,8 @@ def is_user_authorized(self, request, content, location):

return True

def load_asset_from_location(self, location):

def load_asset_from_location(location):
"""
Loads an asset based on its location, either retrieving it from a cache
or loading it directly from the contentstore.
Expand All @@ -315,9 +309,6 @@ def load_asset_from_location(self, location):
return content


IMPL = StaticContentServer()


def parse_range_header(header_value, content_length):
"""
Returns the unit and a list of (start, end) tuples of ranges.
Expand Down

0 comments on commit 660c30b

Please sign in to comment.