From cf50a22d76f1ab3294e85c0a753a7c369663b293 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kevin=20Guti=C3=A9rrez?= Date: Sat, 6 Nov 2021 12:50:43 -0500 Subject: [PATCH] [gcloud] Add gzip support (#980) Closes #654. Closes #818. --- .gitignore | 3 + docs/backends/gcloud.rst | 10 +++- setup.cfg | 3 +- storages/backends/azure_storage.py | 5 +- storages/backends/gcloud.py | 31 +++++++--- storages/backends/s3boto3.py | 16 ++--- storages/compress.py | 49 +++++++++++++++ storages/utils.py | 35 ----------- tests/test_gcloud.py | 95 ++++++++++++++++++++++++++++++ tests/test_s3boto3.py | 13 +--- tests/utils.py | 13 ++++ 11 files changed, 203 insertions(+), 70 deletions(-) create mode 100644 storages/compress.py create mode 100644 tests/utils.py diff --git a/.gitignore b/.gitignore index 84ea11c76..24d900226 100644 --- a/.gitignore +++ b/.gitignore @@ -11,8 +11,11 @@ __pycache__ .cache .idea/ +.vscode/ .pytest_cache/ venv/ dist/ docs/_build + +.DS_Store diff --git a/docs/backends/gcloud.rst b/docs/backends/gcloud.rst index 6651d1d2e..9a87ef36a 100644 --- a/docs/backends/gcloud.rst +++ b/docs/backends/gcloud.rst @@ -83,6 +83,14 @@ Your Google Storage bucket name, as a string. Required. Your Google Cloud project ID. If unset, falls back to the default inferred from the environment. +``GS_IS_GZIPPED`` (optional: default is ``False``) + +Whether or not to enable gzipping of content types specified by ``GZIP_CONTENT_TYPES`` + +``GZIP_CONTENT_TYPES`` (optional: default is ``text/css``, ``text/javascript``, ``application/javascript``, ``application/x-javascript``, ``image/svg+xml``) + +When ``GS_IS_GZIPPED`` is set to ``True`` the content types which will be gzipped + .. _gs-creds: ``GS_CREDENTIALS`` (optional) @@ -121,7 +129,7 @@ a signed (expiring) url. .. note:: When using this setting, make sure you have ``fine-grained`` access control enabled on your bucket, as opposed to ``Uniform`` access control, or else, file uploads will return with HTTP 400. If you - already have a bucket with ``Uniform`` access control set to public read, please keep + already have a bucket with ``Uniform`` access control set to public read, please keep ``GS_DEFAULT_ACL`` to ``None`` and set ``GS_QUERYSTRING_AUTH`` to ``False``. ``GS_QUERYSTRING_AUTH`` (optional, default is True) diff --git a/setup.cfg b/setup.cfg index c66a12b40..fffe83912 100644 --- a/setup.cfg +++ b/setup.cfg @@ -54,7 +54,8 @@ sftp = [flake8] exclude = .tox, - docs + docs, + venv max-line-length = 119 [isort] diff --git a/storages/backends/azure_storage.py b/storages/backends/azure_storage.py index f8e242a48..ffdc0bdf7 100644 --- a/storages/backends/azure_storage.py +++ b/storages/backends/azure_storage.py @@ -11,11 +11,10 @@ from django.core.files.base import File from django.utils import timezone from django.utils.deconstruct import deconstructible -from django.utils.encoding import force_bytes from storages.base import BaseStorage from storages.utils import ( - clean_name, get_available_overwrite_name, safe_join, setting, + clean_name, get_available_overwrite_name, safe_join, setting, to_bytes, ) @@ -67,7 +66,7 @@ def write(self, content): 'a' not in self._mode): raise AttributeError("File was not opened in write mode.") self._is_dirty = True - return super().write(force_bytes(content)) + return super().write(to_bytes(content)) def close(self): if self._file is None: diff --git a/storages/backends/gcloud.py b/storages/backends/gcloud.py index e22262e61..15aa2e5d4 100644 --- a/storages/backends/gcloud.py +++ b/storages/backends/gcloud.py @@ -7,12 +7,12 @@ from django.core.files.base import File from django.utils import timezone from django.utils.deconstruct import deconstructible -from django.utils.encoding import force_bytes from storages.base import BaseStorage +from storages.compress import CompressedFileMixin, CompressStorageMixin from storages.utils import ( check_location, clean_name, get_available_overwrite_name, safe_join, - setting, + setting, to_bytes, ) try: @@ -24,10 +24,11 @@ "See https://github.com/GoogleCloudPlatform/gcloud-python") +CONTENT_ENCODING = 'content_encoding' CONTENT_TYPE = 'content_type' -class GoogleCloudFile(File): +class GoogleCloudFile(CompressedFileMixin, File): def __init__(self, name, mode, storage): self.name = name self.mime_type = mimetypes.guess_type(name)[0] @@ -56,6 +57,8 @@ def _get_file(self): self._is_dirty = False self.blob.download_to_file(self._file) self._file.seek(0) + if self._storage.gzip and self.blob.content_encoding == 'gzip': + self._file = self._decompress_file(mode=self._mode, file=self._file) return self._file def _set_file(self, value): @@ -76,7 +79,7 @@ def write(self, content): if 'w' not in self._mode: raise AttributeError("File was not opened in write mode.") self._is_dirty = True - return super().write(force_bytes(content)) + return super().write(to_bytes(content)) def close(self): if self._file is not None: @@ -90,7 +93,7 @@ def close(self): @deconstructible -class GoogleCloudStorage(BaseStorage): +class GoogleCloudStorage(CompressStorageMixin, BaseStorage): def __init__(self, **settings): super().__init__(**settings) @@ -109,6 +112,14 @@ def get_default_settings(self): "default_acl": setting('GS_DEFAULT_ACL'), "querystring_auth": setting('GS_QUERYSTRING_AUTH', True), "expiration": setting('GS_EXPIRATION', timedelta(seconds=86400)), + "gzip": setting('GS_IS_GZIPPED', False), + "gzip_content_types": setting('GZIP_CONTENT_TYPES', ( + 'text/css', + 'text/javascript', + 'application/javascript', + 'application/x-javascript', + 'image/svg+xml', + )), "file_overwrite": setting('GS_FILE_OVERWRITE', True), "cache_control": setting('GS_CACHE_CONTROL'), "object_parameters": setting('GS_OBJECT_PARAMETERS', {}), @@ -164,14 +175,18 @@ def _save(self, name, content): upload_params = {} blob_params = self.get_object_parameters(name) upload_params['predefined_acl'] = blob_params.pop('acl', self.default_acl) + upload_params[CONTENT_TYPE] = blob_params.pop(CONTENT_TYPE, file_object.mime_type) - if CONTENT_TYPE not in blob_params: - upload_params[CONTENT_TYPE] = file_object.mime_type + if (self.gzip and + upload_params[CONTENT_TYPE] in self.gzip_content_types and + CONTENT_ENCODING not in blob_params): + content = self._compress_content(content) + blob_params[CONTENT_ENCODING] = 'gzip' for prop, val in blob_params.items(): setattr(file_object.blob, prop, val) - file_object.blob.upload_from_file(content, rewind=True, size=content.size, **upload_params) + file_object.blob.upload_from_file(content, rewind=True, size=getattr(content, 'size', None), **upload_params) return cleaned_name def get_object_parameters(self, name): diff --git a/storages/backends/s3boto3.py b/storages/backends/s3boto3.py index dc402e344..ac0e26708 100644 --- a/storages/backends/s3boto3.py +++ b/storages/backends/s3boto3.py @@ -4,7 +4,6 @@ import tempfile import threading from datetime import datetime, timedelta -from gzip import GzipFile from tempfile import SpooledTemporaryFile from urllib.parse import parse_qsl, urlencode, urlsplit @@ -16,9 +15,10 @@ from django.utils.timezone import is_naive, make_naive from storages.base import BaseStorage +from storages.compress import CompressedFileMixin, CompressStorageMixin from storages.utils import ( - GzipCompressionWrapper, check_location, get_available_overwrite_name, - lookup_env, safe_join, setting, to_bytes, + check_location, get_available_overwrite_name, lookup_env, safe_join, + setting, to_bytes, ) try: @@ -79,7 +79,7 @@ def _cloud_front_signer_from_pem(key_id, pem): @deconstructible -class S3Boto3StorageFile(File): +class S3Boto3StorageFile(CompressedFileMixin, File): """ The default file object used by the S3Boto3Storage backend. @@ -136,7 +136,7 @@ def _get_file(self): self.obj.download_fileobj(self._file) self._file.seek(0) if self._storage.gzip and self.obj.content_encoding == 'gzip': - self._file = GzipFile(mode=self._mode, fileobj=self._file, mtime=0.0) + self._file = self._decompress_file(mode=self._mode, file=self._file) return self._file def _set_file(self, value): @@ -231,7 +231,7 @@ def close(self): @deconstructible -class S3Boto3Storage(BaseStorage): +class S3Boto3Storage(CompressStorageMixin, BaseStorage): """ Amazon Simple Storage Service using Boto3 @@ -428,10 +428,6 @@ def _normalize_name(self, name): except ValueError: raise SuspiciousOperation("Attempted access to '%s' denied." % name) - def _compress_content(self, content): - """Gzip a given string content.""" - return GzipCompressionWrapper(content) - def _open(self, name, mode='rb'): name = self._normalize_name(self._clean_name(name)) try: diff --git a/storages/compress.py b/storages/compress.py new file mode 100644 index 000000000..963c23a67 --- /dev/null +++ b/storages/compress.py @@ -0,0 +1,49 @@ +import io +import zlib +from gzip import GzipFile +from typing import Optional + +from storages.utils import to_bytes + + +class GzipCompressionWrapper(io.RawIOBase): + """Wrapper for compressing file contents on the fly.""" + + def __init__(self, raw, level=zlib.Z_BEST_COMPRESSION): + super().__init__() + self.raw = raw + self.compress = zlib.compressobj(level=level, wbits=31) + self.leftover = bytearray() + + @staticmethod + def readable(): + return True + + def readinto(self, buf: bytearray) -> Optional[int]: + size = len(buf) + while len(self.leftover) < size: + chunk = to_bytes(self.raw.read(size)) + if not chunk: + if self.compress: + self.leftover += self.compress.flush(zlib.Z_FINISH) + self.compress = None + break + self.leftover += self.compress.compress(chunk) + if len(self.leftover) == 0: + return 0 + output = self.leftover[:size] + size = len(output) + buf[:size] = output + self.leftover = self.leftover[size:] + return size + + +class CompressStorageMixin(): + def _compress_content(self, content): + """Gzip a given string content.""" + return GzipCompressionWrapper(content) + + +class CompressedFileMixin(): + def _decompress_file(self, mode, file, mtime=0.0): + return GzipFile(mode=mode, fileobj=file, mtime=mtime) diff --git a/storages/utils.py b/storages/utils.py index 37e7a8fba..dbd81e096 100644 --- a/storages/utils.py +++ b/storages/utils.py @@ -1,8 +1,5 @@ -import io import os import posixpath -import zlib -from typing import Optional from django.conf import settings from django.core.exceptions import ( @@ -129,35 +126,3 @@ def get_available_overwrite_name(name, max_length): 'allows sufficient "max_length".' % name ) return os.path.join(dir_name, "{}{}".format(file_root, file_ext)) - - -class GzipCompressionWrapper(io.RawIOBase): - """Wrapper for compressing file contents on the fly.""" - - def __init__(self, raw, level=zlib.Z_BEST_COMPRESSION): - super().__init__() - self.raw = raw - self.compress = zlib.compressobj(level=level, wbits=31) - self.leftover = bytearray() - - @staticmethod - def readable(): - return True - - def readinto(self, buf: bytearray) -> Optional[int]: - size = len(buf) - while len(self.leftover) < size: - chunk = to_bytes(self.raw.read(size)) - if not chunk: - if self.compress: - self.leftover += self.compress.flush(zlib.Z_FINISH) - self.compress = None - break - self.leftover += self.compress.compress(chunk) - if len(self.leftover) == 0: - return 0 - output = self.leftover[:size] - size = len(output) - buf[:size] = output - self.leftover = self.leftover[size:] - return size diff --git a/tests/test_gcloud.py b/tests/test_gcloud.py index 9fd4cce78..17d912c8d 100644 --- a/tests/test_gcloud.py +++ b/tests/test_gcloud.py @@ -1,3 +1,4 @@ +import gzip import mimetypes from datetime import datetime, timedelta from unittest import mock @@ -10,6 +11,7 @@ from google.cloud.storage.blob import Blob from storages.backends import gcloud +from tests.utils import NonSeekableContentFile class GCloudTestCase(TestCase): @@ -402,6 +404,99 @@ def test_cache_control(self): blob = bucket.get_blob(filename) self.assertEqual(blob.cache_control, cache_control) + def test_storage_save_gzipped(self): + """ + Test saving a gzipped file + """ + name = 'test_storage_save.gz' + content = ContentFile("I am gzip'd") + self.storage.save(name, content) + obj = self.storage._bucket.get_blob() + obj.upload_from_file.assert_called_with( + mock.ANY, + rewind=True, + size=11, + predefined_acl=None, + content_type=None + ) + + def test_storage_save_gzipped_non_seekable(self): + """ + Test saving a gzipped file + """ + name = 'test_storage_save.gz' + content = NonSeekableContentFile("I am gzip'd") + self.storage.save(name, content) + obj = self.storage._bucket.get_blob() + obj.upload_from_file.assert_called_with( + mock.ANY, + rewind=True, + size=11, + predefined_acl=None, + content_type=None + ) + + def test_storage_save_gzip(self): + """ + Test saving a file with gzip enabled. + """ + self.storage.gzip = True + name = 'test_storage_save.css' + content = ContentFile("I should be gzip'd") + self.storage.save(name, content) + self.storage._client.bucket.assert_called_with(self.bucket_name) + obj = self.storage._bucket.get_blob() + self.assertEqual(obj.content_encoding, 'gzip') + obj.upload_from_file.assert_called_with( + mock.ANY, + rewind=True, + size=None, + predefined_acl=None, + content_type='text/css', + ) + args, kwargs = obj.upload_from_file.call_args + content = args[0] + zfile = gzip.GzipFile(mode='rb', fileobj=content) + self.assertEqual(zfile.read(), b"I should be gzip'd") + + def test_storage_save_gzip_twice(self): + """ + Test saving the same file content twice with gzip enabled. + """ + # Given + self.storage.gzip = True + name = 'test_storage_save.css' + content = ContentFile("I should be gzip'd") + + # When + self.storage.save(name, content) + self.storage.save('test_storage_save_2.css', content) + + # Then + self.storage._client.bucket.assert_called_with(self.bucket_name) + obj = self.storage._bucket.get_blob() + self.assertEqual(obj.content_encoding, 'gzip') + obj.upload_from_file.assert_called_with( + mock.ANY, + rewind=True, + size=None, + predefined_acl=None, + content_type='text/css', + ) + args, kwargs = obj.upload_from_file.call_args + content = args[0] + zfile = gzip.GzipFile(mode='rb', fileobj=content) + self.assertEqual(zfile.read(), b"I should be gzip'd") + + def test_compress_content_len(self): + """ + Test that file returned by _compress_content() is readable. + """ + self.storage.gzip = True + content = ContentFile("I should be gzip'd") + content = self.storage._compress_content(content) + self.assertTrue(len(content.read()) > 0) + def test_location_leading_slash(self): msg = ( "GoogleCloudStorage.location cannot begin with a leading slash. " diff --git a/tests/test_s3boto3.py b/tests/test_s3boto3.py index b389ef2c7..7aa7252a2 100644 --- a/tests/test_s3boto3.py +++ b/tests/test_s3boto3.py @@ -14,6 +14,7 @@ from django.utils.timezone import is_aware, utc from storages.backends import s3boto3 +from tests.utils import NonSeekableContentFile class S3ManifestStaticStorageTestStorage(s3boto3.S3ManifestStaticStorage): @@ -21,18 +22,6 @@ def read_manifest(self): return None -class NonSeekableContentFile(ContentFile): - - def open(self, mode=None): - return self - - def seekable(self): - return False - - def seek(self, pos, whence=0): - raise AttributeError() - - class S3Boto3StorageTests(TestCase): def setUp(self): self.storage = s3boto3.S3Boto3Storage() diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 000000000..5d67b298e --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,13 @@ +from django.core.files.base import ContentFile + + +class NonSeekableContentFile(ContentFile): + + def open(self, mode=None): + return self + + def seekable(self): + return False + + def seek(self, pos, whence=0): + raise AttributeError()