-
Notifications
You must be signed in to change notification settings - Fork 0
/
hash.py
90 lines (77 loc) · 3.33 KB
/
hash.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/python
# coding=utf-8
##############################################################################
__author__ = "Jason A. Arter"
__date__ = "2021-04-01"
__copyright__ = ""
__credits__ = ["Jason A. Arter"]
__license__ = "GPL"
__version__ = "0.1"
__maintainer__ = "Jason A. Arter"
__email__ = "jason.arter@gmail.com"
__status__ = "Prototype"
__updated__ = "2021-04-01"
__python_version__ = "3.11"
##############################################################################
import imagehash
import argparse
import os
import numpy as np
from pathlib import Path
from PIL import Image
def find_duplicates(self):
source_path = Path(self.dirname)
if not source_path.exists():
raise FileNotFoundError
if not source_path.is_dir():
raise NotADirectoryError
filenames = [x for x in source_path.glob('**/*.*') if x.suffix in ['.jpg', '.JPG', '.jpeg']]
hashes = {}
duplicates = []
print("Finding duplicates now...\n")
for image_file in filenames:
with Image.open(image_file) as img:
temp_hash = imagehash.average_hash(img, self.hash_size)
if temp_hash in hashes:
print(f"Duplicate {image_file} \nfound for Image {hashes[temp_hash]}!\n")
duplicates.append(image_file)
else:
hashes[temp_hash] = image_file
if len(duplicates) != 0:
# a = input(f"Do you want to delete these {len(duplicates)} Images? Press Y or N: ")
# space_saved = 0
# if a.strip().lower() == "y":
# for duplicate in duplicates:
# space_saved += os.path.getsize(os.path.join(self.dirname, duplicate))
# os.remove(os.path.join(self.dirname, duplicate))
# print(f"{duplicate} Deleted Successfully!")
# print("\n\nYou saved {} mb of Space!".format(round(space_saved / 1000000), 2))
# else:
number_of_dupes = len(duplicates)
print(f"Number of duplicates: {number_of_dupes}")
print("Thank you for Using Duplicate Remover")
else:
print("No Duplicates Found :(")
def test_hash(location: str, hash_size: int = 8, similarity: int = 80) -> None:
with Image.open(location) as img:
hash1 = imagehash.average_hash(img, hash_size).hash
print(hash1)
def find_similar(self, location: str, similarity: int = 80) -> None:
filenames = os.listdir(self.dirname)
threshold = 1 - similarity / 100
diff_limit = int(threshold * (self.hash_size ** 2))
with Image.open(location) as img:
hash1 = imagehash.average_hash(img, self.hash_size).hash
print(f"Finding Similar Images to {location} Now!\n")
for image in filenames:
with Image.open(os.path.join(self.dirname, image)) as img:
hash2 = imagehash.average_hash(img, self.hash_size).hash
if np.count_nonzero(hash1 != hash2) <= diff_limit:
print(f"{image} image found {similarity}% similar to {location}")
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-s", "--source", required=True, help="Source path containing photos to compare.", type=str)
parser.add_argument("-a", "--autodelete", required=False, help="Auto-delete duplicates?", type=bool)
my_args = parser.parse_args()
dr = DuplicateRemover(dirname=r"F:/Files/Photos/Ariel Arter")
dr.find_duplicates()