Skip to content

Commit

Permalink
If an encoding is not specified, ReadBytesWrapper uses the file's enc…
Browse files Browse the repository at this point in the history
…oding attribute, and then falls back to utf-8.
  • Loading branch information
LincolnPuzey committed Aug 18, 2020
1 parent 8c9b9f7 commit b24e9a9
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 7 deletions.
14 changes: 12 additions & 2 deletions storages/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,19 @@ class ReadBytesWrapper:
"""
A wrapper for a file-like object, that makes read() always returns bytes.
"""
def __init__(self, file, encoding="utf-8"):
def __init__(self, file, encoding=None):
"""
:param file: The file-like object to wrap.
:param encoding: Specify the encoding to use when file.read() returns strings.
If not provided will default to file.encoding, of if that's not available,
to utf-8.
"""
self._file = file
self._encoding = encoding
self._encoding = (
encoding
or getattr(file, "encoding", None)
or "utf-8"
)
self._needs_encoding = None

def read(self, *args, **kwargs):
Expand Down
1 change: 1 addition & 0 deletions tests/test_files/windows-1252-encoded.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
���
38 changes: 33 additions & 5 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
from io import BytesIO, StringIO, SEEK_CUR
from os.path import dirname, join

from django.conf import settings
from django.core.exceptions import SuspiciousFileOperation
Expand Down Expand Up @@ -175,9 +176,36 @@ def test_with_string_file(self):
self.assertEqual(2, file_wrapped.seek(0, SEEK_CUR))
self.assertEqual(b"yz", file_wrapped.read())

def test_with_string_file_custom_encoding(self):
file = StringIO("\u2122\u20AC\u2030")
file_wrapped = utils.ReadBytesWrapper(file, encoding="windows-1252")
# I chose the characters ™€‰ for the following tests because they produce different
# bytes when encoding with utf-8 vs windows-1252 vs utf-16

# test read() returns custom encoding
self.assertEqual(b"\x99\x80\x89", file_wrapped.read())
def test_with_string_file_specified_encoding(self):
content = "\u2122\u20AC\u2030"
file = StringIO(content)
file_wrapped = utils.ReadBytesWrapper(file, encoding="utf-16")

# test read() returns specified encoding
self.assertEqual(file_wrapped.read(), content.encode("utf-16"))

def test_with_string_file_detect_encoding(self):
content = "\u2122\u20AC\u2030"
with open(
file=join(dirname(__file__), "test_files", "windows-1252-encoded.txt"),
mode="r",
encoding="windows-1252",
) as file:
self.assertEqual(file.read(), content)
file.seek(0)

file_wrapped = utils.ReadBytesWrapper(file)

# test read() returns encoding detected from file object.
self.assertEqual(file_wrapped.read(), content.encode("windows-1252"))

def test_with_string_file_fallback_encoding(self):
content = "\u2122\u20AC\u2030"
file = StringIO(content)
file_wrapped = utils.ReadBytesWrapper(file)

# test read() returns fallback utf-8 encoding
self.assertEqual(file_wrapped.read(), content.encode("utf-8"))

0 comments on commit b24e9a9

Please sign in to comment.