Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support normalization of line endings #156

Merged
merged 3 commits into from
Sep 27, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 33 additions & 5 deletions securesystemslib/hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
# Import securesystemslib logger to log warning messages.
logger = logging.getLogger('securesystemslib.hash')

DEFAULT_CHUNK_SIZE = 4096
DEFAULT_HASH_ALGORITHM = 'sha256'
DEFAULT_HASH_LIBRARY = 'hashlib'
SUPPORTED_LIBRARIES = ['hashlib']
Expand Down Expand Up @@ -125,7 +126,7 @@ def digest(algorithm=DEFAULT_HASH_ALGORITHM, hash_library=DEFAULT_HASH_LIBRARY):


def digest_fileobject(file_object, algorithm=DEFAULT_HASH_ALGORITHM,
hash_library=DEFAULT_HASH_LIBRARY):
hash_library=DEFAULT_HASH_LIBRARY, normalize_line_endings=False):
"""
<Purpose>
Generate a digest object given a file object. The new digest object
Expand All @@ -143,6 +144,14 @@ def digest_fileobject(file_object, algorithm=DEFAULT_HASH_ALGORITHM,
hash_library:
The library providing the hash algorithms (e.g., 'hashlib').

normalize_line_endings: (default False)
Whether or not to normalize line endings for cross-platform support.
Note that this results in ambiguous hashes (e.g. 'abc\n' and 'abc\r\n'
will produce the same hash), so be careful to only apply this to text
files (not binary), when that equivalence is desirable and cannot result
in easily-maliciously-corrupted files producing the same hash as a valid
file.

<Exceptions>
securesystemslib.exceptions.FormatError, if the arguments are
improperly formatted.
Expand Down Expand Up @@ -179,11 +188,26 @@ def digest_fileobject(file_object, algorithm=DEFAULT_HASH_ALGORITHM,
# Update the hash with the data read from each chunk and return after
# the entire file is processed.
while True:
chunksize = 4096
data = file_object.read(chunksize)
data = file_object.read(DEFAULT_CHUNK_SIZE)
if not data:
break

if normalize_line_endings:
while data[-1:] == b'\r':
c = file_object.read(1)
if not c:
break

data += c

data = (
data
# First Windows
.replace(b'\r\n', b'\n')
# Then Mac
.replace(b'\r', b'\n')
)

if not isinstance(data, six.binary_type):
digest_object.update(data.encode('utf-8'))

Expand All @@ -197,7 +221,7 @@ def digest_fileobject(file_object, algorithm=DEFAULT_HASH_ALGORITHM,


def digest_filename(filename, algorithm=DEFAULT_HASH_ALGORITHM,
hash_library=DEFAULT_HASH_LIBRARY):
hash_library=DEFAULT_HASH_LIBRARY, normalize_line_endings=False):
"""
<Purpose>
Generate a digest object, update its hash using a file object
Expand All @@ -213,6 +237,9 @@ def digest_filename(filename, algorithm=DEFAULT_HASH_ALGORITHM,
hash_library:
The library providing the hash algorithms (e.g., 'hashlib').

normalize_line_endings:
Whether or not to normalize line endings for cross-platform support.

<Exceptions>
securesystemslib.exceptions.FormatError, if the arguments are
improperly formatted.
Expand Down Expand Up @@ -243,6 +270,7 @@ def digest_filename(filename, algorithm=DEFAULT_HASH_ALGORITHM,
# digest_fileobject() raises:
# securesystemslib.exceptions.UnsupportedAlgorithmError
# securesystemslib.exceptions.UnsupportedLibraryError
digest_object = digest_fileobject(file_object, algorithm, hash_library)
digest_object = digest_fileobject(
file_object, algorithm, hash_library, normalize_line_endings)

return digest_object
22 changes: 22 additions & 0 deletions tests/test_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,28 @@ def _do_update_filename(self, library):
os.remove(filename)


def test_update_filename_normalize(self):
self._run_with_all_hash_libraries(self._do_update_filename_normalize)


def _do_update_filename_normalize(self, library):
data = b'ab\r\nd\nf\r' * 4096
normalized_data = data.replace(b'\r\n', b'\n').replace(b'\r', b'\n')
fd, filename = tempfile.mkstemp()
try:
os.write(fd, data)
os.close(fd)
for algorithm in ['md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512']:
digest_object_truth = securesystemslib.hash.digest(algorithm, library)
digest_object_truth.update(normalized_data)
digest_object = securesystemslib.hash.digest_filename(filename,
algorithm, library, normalize_line_endings=True)
self.assertEqual(digest_object_truth.digest(), digest_object.digest())

finally:
os.remove(filename)


def test_update_file_obj(self):
self._run_with_all_hash_libraries(self._do_update_file_obj)

Expand Down