Skip to content

Commit

Permalink
[gcloud] Add gzip support (#980)
Browse files Browse the repository at this point in the history
Closes #654. Closes #818.
  • Loading branch information
jkevingutierrez committed Nov 6, 2021
1 parent 52a85a3 commit cf50a22
Show file tree
Hide file tree
Showing 11 changed files with 203 additions and 70 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@ __pycache__
.cache

.idea/
.vscode/
.pytest_cache/
venv/

dist/
docs/_build

.DS_Store
10 changes: 9 additions & 1 deletion docs/backends/gcloud.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,14 @@ Your Google Storage bucket name, as a string. Required.
Your Google Cloud project ID. If unset, falls back to the default
inferred from the environment.

``GS_IS_GZIPPED`` (optional: default is ``False``)

Whether or not to enable gzipping of content types specified by ``GZIP_CONTENT_TYPES``

``GZIP_CONTENT_TYPES`` (optional: default is ``text/css``, ``text/javascript``, ``application/javascript``, ``application/x-javascript``, ``image/svg+xml``)

When ``GS_IS_GZIPPED`` is set to ``True`` the content types which will be gzipped

.. _gs-creds:

``GS_CREDENTIALS`` (optional)
Expand Down Expand Up @@ -121,7 +129,7 @@ a signed (expiring) url.
.. note::
When using this setting, make sure you have ``fine-grained`` access control enabled on your bucket,
as opposed to ``Uniform`` access control, or else, file uploads will return with HTTP 400. If you
already have a bucket with ``Uniform`` access control set to public read, please keep
already have a bucket with ``Uniform`` access control set to public read, please keep
``GS_DEFAULT_ACL`` to ``None`` and set ``GS_QUERYSTRING_AUTH`` to ``False``.

``GS_QUERYSTRING_AUTH`` (optional, default is True)
Expand Down
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ sftp =
[flake8]
exclude =
.tox,
docs
docs,
venv
max-line-length = 119

[isort]
Expand Down
5 changes: 2 additions & 3 deletions storages/backends/azure_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,10 @@
from django.core.files.base import File
from django.utils import timezone
from django.utils.deconstruct import deconstructible
from django.utils.encoding import force_bytes

from storages.base import BaseStorage
from storages.utils import (
clean_name, get_available_overwrite_name, safe_join, setting,
clean_name, get_available_overwrite_name, safe_join, setting, to_bytes,
)


Expand Down Expand Up @@ -67,7 +66,7 @@ def write(self, content):
'a' not in self._mode):
raise AttributeError("File was not opened in write mode.")
self._is_dirty = True
return super().write(force_bytes(content))
return super().write(to_bytes(content))

def close(self):
if self._file is None:
Expand Down
31 changes: 23 additions & 8 deletions storages/backends/gcloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
from django.core.files.base import File
from django.utils import timezone
from django.utils.deconstruct import deconstructible
from django.utils.encoding import force_bytes

from storages.base import BaseStorage
from storages.compress import CompressedFileMixin, CompressStorageMixin
from storages.utils import (
check_location, clean_name, get_available_overwrite_name, safe_join,
setting,
setting, to_bytes,
)

try:
Expand All @@ -24,10 +24,11 @@
"See https://github.com/GoogleCloudPlatform/gcloud-python")


CONTENT_ENCODING = 'content_encoding'
CONTENT_TYPE = 'content_type'


class GoogleCloudFile(File):
class GoogleCloudFile(CompressedFileMixin, File):
def __init__(self, name, mode, storage):
self.name = name
self.mime_type = mimetypes.guess_type(name)[0]
Expand Down Expand Up @@ -56,6 +57,8 @@ def _get_file(self):
self._is_dirty = False
self.blob.download_to_file(self._file)
self._file.seek(0)
if self._storage.gzip and self.blob.content_encoding == 'gzip':
self._file = self._decompress_file(mode=self._mode, file=self._file)
return self._file

def _set_file(self, value):
Expand All @@ -76,7 +79,7 @@ def write(self, content):
if 'w' not in self._mode:
raise AttributeError("File was not opened in write mode.")
self._is_dirty = True
return super().write(force_bytes(content))
return super().write(to_bytes(content))

def close(self):
if self._file is not None:
Expand All @@ -90,7 +93,7 @@ def close(self):


@deconstructible
class GoogleCloudStorage(BaseStorage):
class GoogleCloudStorage(CompressStorageMixin, BaseStorage):
def __init__(self, **settings):
super().__init__(**settings)

Expand All @@ -109,6 +112,14 @@ def get_default_settings(self):
"default_acl": setting('GS_DEFAULT_ACL'),
"querystring_auth": setting('GS_QUERYSTRING_AUTH', True),
"expiration": setting('GS_EXPIRATION', timedelta(seconds=86400)),
"gzip": setting('GS_IS_GZIPPED', False),
"gzip_content_types": setting('GZIP_CONTENT_TYPES', (
'text/css',
'text/javascript',
'application/javascript',
'application/x-javascript',
'image/svg+xml',
)),
"file_overwrite": setting('GS_FILE_OVERWRITE', True),
"cache_control": setting('GS_CACHE_CONTROL'),
"object_parameters": setting('GS_OBJECT_PARAMETERS', {}),
Expand Down Expand Up @@ -164,14 +175,18 @@ def _save(self, name, content):
upload_params = {}
blob_params = self.get_object_parameters(name)
upload_params['predefined_acl'] = blob_params.pop('acl', self.default_acl)
upload_params[CONTENT_TYPE] = blob_params.pop(CONTENT_TYPE, file_object.mime_type)

if CONTENT_TYPE not in blob_params:
upload_params[CONTENT_TYPE] = file_object.mime_type
if (self.gzip and
upload_params[CONTENT_TYPE] in self.gzip_content_types and
CONTENT_ENCODING not in blob_params):
content = self._compress_content(content)
blob_params[CONTENT_ENCODING] = 'gzip'

for prop, val in blob_params.items():
setattr(file_object.blob, prop, val)

file_object.blob.upload_from_file(content, rewind=True, size=content.size, **upload_params)
file_object.blob.upload_from_file(content, rewind=True, size=getattr(content, 'size', None), **upload_params)
return cleaned_name

def get_object_parameters(self, name):
Expand Down
16 changes: 6 additions & 10 deletions storages/backends/s3boto3.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import tempfile
import threading
from datetime import datetime, timedelta
from gzip import GzipFile
from tempfile import SpooledTemporaryFile
from urllib.parse import parse_qsl, urlencode, urlsplit

Expand All @@ -16,9 +15,10 @@
from django.utils.timezone import is_naive, make_naive

from storages.base import BaseStorage
from storages.compress import CompressedFileMixin, CompressStorageMixin
from storages.utils import (
GzipCompressionWrapper, check_location, get_available_overwrite_name,
lookup_env, safe_join, setting, to_bytes,
check_location, get_available_overwrite_name, lookup_env, safe_join,
setting, to_bytes,
)

try:
Expand Down Expand Up @@ -79,7 +79,7 @@ def _cloud_front_signer_from_pem(key_id, pem):


@deconstructible
class S3Boto3StorageFile(File):
class S3Boto3StorageFile(CompressedFileMixin, File):
"""
The default file object used by the S3Boto3Storage backend.
Expand Down Expand Up @@ -136,7 +136,7 @@ def _get_file(self):
self.obj.download_fileobj(self._file)
self._file.seek(0)
if self._storage.gzip and self.obj.content_encoding == 'gzip':
self._file = GzipFile(mode=self._mode, fileobj=self._file, mtime=0.0)
self._file = self._decompress_file(mode=self._mode, file=self._file)
return self._file

def _set_file(self, value):
Expand Down Expand Up @@ -231,7 +231,7 @@ def close(self):


@deconstructible
class S3Boto3Storage(BaseStorage):
class S3Boto3Storage(CompressStorageMixin, BaseStorage):
"""
Amazon Simple Storage Service using Boto3
Expand Down Expand Up @@ -428,10 +428,6 @@ def _normalize_name(self, name):
except ValueError:
raise SuspiciousOperation("Attempted access to '%s' denied." % name)

def _compress_content(self, content):
"""Gzip a given string content."""
return GzipCompressionWrapper(content)

def _open(self, name, mode='rb'):
name = self._normalize_name(self._clean_name(name))
try:
Expand Down
49 changes: 49 additions & 0 deletions storages/compress.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import io
import zlib
from gzip import GzipFile
from typing import Optional

from storages.utils import to_bytes


class GzipCompressionWrapper(io.RawIOBase):
"""Wrapper for compressing file contents on the fly."""

def __init__(self, raw, level=zlib.Z_BEST_COMPRESSION):
super().__init__()
self.raw = raw
self.compress = zlib.compressobj(level=level, wbits=31)
self.leftover = bytearray()

@staticmethod
def readable():
return True

def readinto(self, buf: bytearray) -> Optional[int]:
size = len(buf)
while len(self.leftover) < size:
chunk = to_bytes(self.raw.read(size))
if not chunk:
if self.compress:
self.leftover += self.compress.flush(zlib.Z_FINISH)
self.compress = None
break
self.leftover += self.compress.compress(chunk)
if len(self.leftover) == 0:
return 0
output = self.leftover[:size]
size = len(output)
buf[:size] = output
self.leftover = self.leftover[size:]
return size


class CompressStorageMixin():
def _compress_content(self, content):
"""Gzip a given string content."""
return GzipCompressionWrapper(content)


class CompressedFileMixin():
def _decompress_file(self, mode, file, mtime=0.0):
return GzipFile(mode=mode, fileobj=file, mtime=mtime)
35 changes: 0 additions & 35 deletions storages/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import io
import os
import posixpath
import zlib
from typing import Optional

from django.conf import settings
from django.core.exceptions import (
Expand Down Expand Up @@ -129,35 +126,3 @@ def get_available_overwrite_name(name, max_length):
'allows sufficient "max_length".' % name
)
return os.path.join(dir_name, "{}{}".format(file_root, file_ext))


class GzipCompressionWrapper(io.RawIOBase):
"""Wrapper for compressing file contents on the fly."""

def __init__(self, raw, level=zlib.Z_BEST_COMPRESSION):
super().__init__()
self.raw = raw
self.compress = zlib.compressobj(level=level, wbits=31)
self.leftover = bytearray()

@staticmethod
def readable():
return True

def readinto(self, buf: bytearray) -> Optional[int]:
size = len(buf)
while len(self.leftover) < size:
chunk = to_bytes(self.raw.read(size))
if not chunk:
if self.compress:
self.leftover += self.compress.flush(zlib.Z_FINISH)
self.compress = None
break
self.leftover += self.compress.compress(chunk)
if len(self.leftover) == 0:
return 0
output = self.leftover[:size]
size = len(output)
buf[:size] = output
self.leftover = self.leftover[size:]
return size
Loading

0 comments on commit cf50a22

Please sign in to comment.