Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add gzip support Google Cloud Storage #980

Merged
merged 7 commits into from
Nov 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@ __pycache__
.cache

.idea/
.vscode/
.pytest_cache/
venv/

dist/
docs/_build

.DS_Store
10 changes: 9 additions & 1 deletion docs/backends/gcloud.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,14 @@ Your Google Storage bucket name, as a string. Required.
Your Google Cloud project ID. If unset, falls back to the default
inferred from the environment.

``GS_IS_GZIPPED`` (optional: default is ``False``)

Whether or not to enable gzipping of content types specified by ``GZIP_CONTENT_TYPES``

``GZIP_CONTENT_TYPES`` (optional: default is ``text/css``, ``text/javascript``, ``application/javascript``, ``application/x-javascript``, ``image/svg+xml``)

When ``GS_IS_GZIPPED`` is set to ``True`` the content types which will be gzipped

.. _gs-creds:

``GS_CREDENTIALS`` (optional)
Expand Down Expand Up @@ -121,7 +129,7 @@ a signed (expiring) url.
.. note::
When using this setting, make sure you have ``fine-grained`` access control enabled on your bucket,
as opposed to ``Uniform`` access control, or else, file uploads will return with HTTP 400. If you
already have a bucket with ``Uniform`` access control set to public read, please keep
already have a bucket with ``Uniform`` access control set to public read, please keep
``GS_DEFAULT_ACL`` to ``None`` and set ``GS_QUERYSTRING_AUTH`` to ``False``.

``GS_QUERYSTRING_AUTH`` (optional, default is True)
Expand Down
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ sftp =
[flake8]
exclude =
.tox,
docs
docs,
venv
max-line-length = 119

[isort]
Expand Down
5 changes: 2 additions & 3 deletions storages/backends/azure_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,10 @@
from django.core.files.base import File
from django.utils import timezone
from django.utils.deconstruct import deconstructible
from django.utils.encoding import force_bytes

from storages.base import BaseStorage
from storages.utils import (
clean_name, get_available_overwrite_name, safe_join, setting,
clean_name, get_available_overwrite_name, safe_join, setting, to_bytes,
)


Expand Down Expand Up @@ -67,7 +66,7 @@ def write(self, content):
'a' not in self._mode):
raise AttributeError("File was not opened in write mode.")
self._is_dirty = True
return super().write(force_bytes(content))
return super().write(to_bytes(content))

def close(self):
if self._file is None:
Expand Down
31 changes: 23 additions & 8 deletions storages/backends/gcloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
from django.core.files.base import File
from django.utils import timezone
from django.utils.deconstruct import deconstructible
from django.utils.encoding import force_bytes

from storages.base import BaseStorage
from storages.compress import CompressedFileMixin, CompressStorageMixin
from storages.utils import (
check_location, clean_name, get_available_overwrite_name, safe_join,
setting,
setting, to_bytes,
)

try:
Expand All @@ -24,10 +24,11 @@
"See https://github.com/GoogleCloudPlatform/gcloud-python")


CONTENT_ENCODING = 'content_encoding'
CONTENT_TYPE = 'content_type'


class GoogleCloudFile(File):
class GoogleCloudFile(CompressedFileMixin, File):
def __init__(self, name, mode, storage):
self.name = name
self.mime_type = mimetypes.guess_type(name)[0]
Expand Down Expand Up @@ -56,6 +57,8 @@ def _get_file(self):
self._is_dirty = False
self.blob.download_to_file(self._file)
self._file.seek(0)
if self._storage.gzip and self.blob.content_encoding == 'gzip':
self._file = self._decompress_file(mode=self._mode, file=self._file)
return self._file

def _set_file(self, value):
Expand All @@ -76,7 +79,7 @@ def write(self, content):
if 'w' not in self._mode:
raise AttributeError("File was not opened in write mode.")
self._is_dirty = True
return super().write(force_bytes(content))
return super().write(to_bytes(content))
jkevingutierrez marked this conversation as resolved.
Show resolved Hide resolved

def close(self):
if self._file is not None:
Expand All @@ -90,7 +93,7 @@ def close(self):


@deconstructible
class GoogleCloudStorage(BaseStorage):
class GoogleCloudStorage(CompressStorageMixin, BaseStorage):
def __init__(self, **settings):
super().__init__(**settings)

Expand All @@ -109,6 +112,14 @@ def get_default_settings(self):
"default_acl": setting('GS_DEFAULT_ACL'),
"querystring_auth": setting('GS_QUERYSTRING_AUTH', True),
"expiration": setting('GS_EXPIRATION', timedelta(seconds=86400)),
"gzip": setting('GS_IS_GZIPPED', False),
"gzip_content_types": setting('GZIP_CONTENT_TYPES', (
'text/css',
'text/javascript',
'application/javascript',
'application/x-javascript',
'image/svg+xml',
jkevingutierrez marked this conversation as resolved.
Show resolved Hide resolved
)),
"file_overwrite": setting('GS_FILE_OVERWRITE', True),
"cache_control": setting('GS_CACHE_CONTROL'),
"object_parameters": setting('GS_OBJECT_PARAMETERS', {}),
Expand Down Expand Up @@ -164,14 +175,18 @@ def _save(self, name, content):
upload_params = {}
blob_params = self.get_object_parameters(name)
upload_params['predefined_acl'] = blob_params.pop('acl', self.default_acl)
upload_params[CONTENT_TYPE] = blob_params.pop(CONTENT_TYPE, file_object.mime_type)

if CONTENT_TYPE not in blob_params:
upload_params[CONTENT_TYPE] = file_object.mime_type
if (self.gzip and
upload_params[CONTENT_TYPE] in self.gzip_content_types and
CONTENT_ENCODING not in blob_params):
content = self._compress_content(content)
blob_params[CONTENT_ENCODING] = 'gzip'

for prop, val in blob_params.items():
setattr(file_object.blob, prop, val)

file_object.blob.upload_from_file(content, rewind=True, size=content.size, **upload_params)
file_object.blob.upload_from_file(content, rewind=True, size=getattr(content, 'size', None), **upload_params)
jschneier marked this conversation as resolved.
Show resolved Hide resolved
return cleaned_name

def get_object_parameters(self, name):
Expand Down
16 changes: 6 additions & 10 deletions storages/backends/s3boto3.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import tempfile
import threading
from datetime import datetime, timedelta
from gzip import GzipFile
from tempfile import SpooledTemporaryFile
from urllib.parse import parse_qsl, urlencode, urlsplit

Expand All @@ -16,9 +15,10 @@
from django.utils.timezone import is_naive, make_naive

from storages.base import BaseStorage
from storages.compress import CompressedFileMixin, CompressStorageMixin
from storages.utils import (
GzipCompressionWrapper, check_location, get_available_overwrite_name,
lookup_env, safe_join, setting, to_bytes,
check_location, get_available_overwrite_name, lookup_env, safe_join,
setting, to_bytes,
)

try:
Expand Down Expand Up @@ -79,7 +79,7 @@ def _cloud_front_signer_from_pem(key_id, pem):


@deconstructible
class S3Boto3StorageFile(File):
class S3Boto3StorageFile(CompressedFileMixin, File):
"""
The default file object used by the S3Boto3Storage backend.

Expand Down Expand Up @@ -136,7 +136,7 @@ def _get_file(self):
self.obj.download_fileobj(self._file)
self._file.seek(0)
if self._storage.gzip and self.obj.content_encoding == 'gzip':
self._file = GzipFile(mode=self._mode, fileobj=self._file, mtime=0.0)
self._file = self._decompress_file(mode=self._mode, file=self._file)
return self._file

def _set_file(self, value):
Expand Down Expand Up @@ -231,7 +231,7 @@ def close(self):


@deconstructible
class S3Boto3Storage(BaseStorage):
class S3Boto3Storage(CompressStorageMixin, BaseStorage):
"""
Amazon Simple Storage Service using Boto3

Expand Down Expand Up @@ -428,10 +428,6 @@ def _normalize_name(self, name):
except ValueError:
raise SuspiciousOperation("Attempted access to '%s' denied." % name)

def _compress_content(self, content):
"""Gzip a given string content."""
return GzipCompressionWrapper(content)

def _open(self, name, mode='rb'):
name = self._normalize_name(self._clean_name(name))
try:
Expand Down
49 changes: 49 additions & 0 deletions storages/compress.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import io
import zlib
from gzip import GzipFile
from typing import Optional

from storages.utils import to_bytes


class GzipCompressionWrapper(io.RawIOBase):
"""Wrapper for compressing file contents on the fly."""

def __init__(self, raw, level=zlib.Z_BEST_COMPRESSION):
super().__init__()
self.raw = raw
self.compress = zlib.compressobj(level=level, wbits=31)
self.leftover = bytearray()

@staticmethod
def readable():
return True

def readinto(self, buf: bytearray) -> Optional[int]:
size = len(buf)
while len(self.leftover) < size:
chunk = to_bytes(self.raw.read(size))
if not chunk:
if self.compress:
self.leftover += self.compress.flush(zlib.Z_FINISH)
self.compress = None
break
self.leftover += self.compress.compress(chunk)
if len(self.leftover) == 0:
return 0
output = self.leftover[:size]
size = len(output)
buf[:size] = output
self.leftover = self.leftover[size:]
return size


class CompressStorageMixin():
def _compress_content(self, content):
"""Gzip a given string content."""
return GzipCompressionWrapper(content)


class CompressedFileMixin():
def _decompress_file(self, mode, file, mtime=0.0):
return GzipFile(mode=mode, fileobj=file, mtime=mtime)
35 changes: 0 additions & 35 deletions storages/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import io
import os
import posixpath
import zlib
from typing import Optional

from django.conf import settings
from django.core.exceptions import (
Expand Down Expand Up @@ -129,35 +126,3 @@ def get_available_overwrite_name(name, max_length):
'allows sufficient "max_length".' % name
)
return os.path.join(dir_name, "{}{}".format(file_root, file_ext))


class GzipCompressionWrapper(io.RawIOBase):
"""Wrapper for compressing file contents on the fly."""

def __init__(self, raw, level=zlib.Z_BEST_COMPRESSION):
super().__init__()
self.raw = raw
self.compress = zlib.compressobj(level=level, wbits=31)
self.leftover = bytearray()

@staticmethod
def readable():
return True

def readinto(self, buf: bytearray) -> Optional[int]:
size = len(buf)
while len(self.leftover) < size:
chunk = to_bytes(self.raw.read(size))
if not chunk:
if self.compress:
self.leftover += self.compress.flush(zlib.Z_FINISH)
self.compress = None
break
self.leftover += self.compress.compress(chunk)
if len(self.leftover) == 0:
return 0
output = self.leftover[:size]
size = len(output)
buf[:size] = output
self.leftover = self.leftover[size:]
return size
Loading