diff --git a/.gitignore b/.gitignore index 9cf775b..93135d8 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ build dist ImageHash.egg-info/ .eggs +.DS_Store \ No newline at end of file diff --git a/README.rst b/README.rst index b12bc6e..1690b32 100644 --- a/README.rst +++ b/README.rst @@ -10,6 +10,12 @@ A image hashing library written in Python. ImageHash supports: |Travis|_ |Coveralls|_ +Rationale +--------- +Why can’t we use md5, sha-1, etc.? + +Unfortunately, we cannot use cryptographic hashing algorithms in our implementation. Due to the nature of cryptographic hashing algorithms, very tiny changes in the input file will result in a substantially different hash. In the case of image fingerprinting, we actually want our similar inputs to have similar output hashes as well. + Requirements ------------- Based on PIL/Pillow Image, numpy and scipy.fftpack (for pHash) diff --git a/imagehash/__init__.py b/imagehash/__init__.py index 30b816f..d074dec 100644 --- a/imagehash/__init__.py +++ b/imagehash/__init__.py @@ -121,6 +121,8 @@ def average_hash(image, hash_size=8): @image must be a PIL instance. """ + if hash_size < 0: + raise ValueError("Hash size must be positive") # reduce size and complexity, then covert to grayscale image = image.convert("L").resize((hash_size, hash_size), Image.ANTIALIAS) @@ -143,6 +145,9 @@ def phash(image, hash_size=8, highfreq_factor=4): @image must be a PIL instance. """ + if hash_size < 0: + raise ValueError("Hash size must be positive") + import scipy.fftpack img_size = hash_size * highfreq_factor image = image.convert("L").resize((img_size, img_size), Image.ANTIALIAS) @@ -184,6 +189,9 @@ def dhash(image, hash_size=8): @image must be a PIL instance. """ # resize(w, h), but numpy.array((h, w)) + if hash_size < 0: + raise ValueError("Hash size must be positive") + image = image.convert("L").resize((hash_size + 1, hash_size), Image.ANTIALIAS) pixels = numpy.array(image.getdata(), dtype=numpy.float).reshape((hash_size, hash_size + 1)) # compute differences between columns diff --git a/imagehash/tests/__init__.py b/imagehash/tests/__init__.py index cf84ef1..13ab8f0 100644 --- a/imagehash/tests/__init__.py +++ b/imagehash/tests/__init__.py @@ -58,3 +58,7 @@ def check_hash_stored(self, func, image): '- stringified hash {}'.format(distance, image_hash, other_hash)) self.assertEqual(distance, 0, emsg) + + def check_hash_size(self, func, image, size): + with self.assertRaises(ValueError): + func(image, -1) diff --git a/imagehash/tests/test_average_hash.py b/imagehash/tests/test_average_hash.py index 12ff9a9..bf26323 100644 --- a/imagehash/tests/test_average_hash.py +++ b/imagehash/tests/test_average_hash.py @@ -21,6 +21,10 @@ def test_average_hash_length(self): def test_average_hash_stored(self): self.check_hash_stored(self.func, self.image) + def test_average_hash_size(self): + self.check_hash_size(self.func, self.image, -1) + + if __name__ == '__main__': unittest.main() diff --git a/imagehash/tests/test_dhash.py b/imagehash/tests/test_dhash.py index 32a4367..60179db 100644 --- a/imagehash/tests/test_dhash.py +++ b/imagehash/tests/test_dhash.py @@ -21,6 +21,8 @@ def test_dhash_length(self): def test_dhash_stored(self): self.check_hash_stored(self.func, self.image) + def test_dhash_size(self): + self.check_hash_size(self.func, self.image, -1) if __name__ == '__main__': unittest.main() diff --git a/imagehash/tests/test_phash.py b/imagehash/tests/test_phash.py index c785fbb..9061753 100644 --- a/imagehash/tests/test_phash.py +++ b/imagehash/tests/test_phash.py @@ -20,7 +20,9 @@ def test_phash_length(self): def test_phash_stored(self): self.check_hash_stored(self.func, self.image) - + + def test_phash_size(self): + self.check_hash_size(self.func, self.image, -1) if __name__ == '__main__': unittest.main() diff --git a/imagehash/tests/test_whash.py b/imagehash/tests/test_whash.py index 144f4c8..3134ef9 100644 --- a/imagehash/tests/test_whash.py +++ b/imagehash/tests/test_whash.py @@ -57,6 +57,5 @@ def test_image_scale_not_2power(self): with six.assertRaisesRegex(self, AssertionError, emsg): imagehash.whash(self.image, image_scale=image_scale+1) - if __name__ == '__main__': unittest.main()