Skip to content

Commit

Permalink
Merge branch 'master' into staging
Browse files Browse the repository at this point in the history
  • Loading branch information
torzdf committed May 16, 2019
2 parents c4e24d6 + 8d1502c commit f078071
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 123 deletions.
30 changes: 17 additions & 13 deletions lib/vgg_face.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import cv2
import numpy as np
from fastcluster import linkage
from scipy.spatial.distance import pdist, squareform

from lib.utils import GetModel

Expand All @@ -24,28 +23,36 @@ class VGGFace():
""" VGG Face feature extraction.
Input images should be in BGR Order """

def __init__(self):
logger.debug("Initializing %s", self.__class__.__name__)
def __init__(self, backend="CPU"):
logger.debug("Initializing %s: (backend: %s)", self.__class__.__name__, backend)
git_model_id = 7
model_filename = ["vgg_face_v1.caffemodel", "vgg_face_v1.prototxt"]
self.input_size = 224
# Average image provided in http://www.robots.ox.ac.uk/~vgg/software/vgg_face/
self.average_img = [129.1863, 104.7624, 93.5940]

self.model = self.get_model(git_model_id, model_filename)
self.model = self.get_model(git_model_id, model_filename, backend)
logger.debug("Initialized %s", self.__class__.__name__)

# <<< GET MODEL >>> #
@staticmethod
def get_model(git_model_id, model_filename):
def get_model(self, git_model_id, model_filename, backend):
""" Check if model is available, if not, download and unzip it """
root_path = os.path.abspath(os.path.dirname(sys.argv[0]))
cache_path = os.path.join(root_path, "plugins", "extract", ".cache")
model = GetModel(model_filename, cache_path, git_model_id).model_path
model = cv2.dnn.readNetFromCaffe(model[1], model[0]) # pylint: disable=no-member
model.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU) # pylint: disable=no-member
model.setPreferableTarget(self.get_backend(backend))
return model

@staticmethod
def get_backend(backend):
""" Return the cv2 DNN backend """
if backend == "OPENCL":
logger.info("Using OpenCL backend. If the process runs, you can safely ignore any of "
"the failure messages.")
retval = getattr(cv2.dnn, "DNN_TARGET_{}".format(backend)) # pylint: disable=no-member
return retval

def predict(self, face):
""" Return encodings for given image from vgg_face """
if face.shape[0] != self.input_size:
Expand Down Expand Up @@ -92,12 +99,9 @@ def sorted_similarity(self, predictions, method="ward"):
sorted_similarity transforms a distance matrix into a sorted distance matrix according to
the order implied by the hierarchical tree (dendrogram)
"""
logger.verbose("Calculating pairwise distances")
flat_distance_matrix = pdist(predictions)
distance_matrix = squareform(flat_distance_matrix)
num_predictions = len(distance_matrix)
logger.verbose("Sorting distances")
result_linkage = linkage(flat_distance_matrix, method=method, preserve_input=True)
logger.info("Sorting face distances. Depending on your dataset this may take some time...")
num_predictions = predictions.shape[0]
result_linkage = linkage(predictions, method=method, preserve_input=False)
result_order = self.seriation(result_linkage,
num_predictions,
num_predictions + num_predictions - 2)
Expand Down
69 changes: 42 additions & 27 deletions tools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,21 +389,35 @@ def get_argument_list():
argument_list.append({"opts": ('-s', '--sort-by'),
"action": Radio,
"type": str,
"choices": ("blur", "face", "face-cnn",
"face-cnn-dissim", "face-dissim",
"face-yaw", "hist",
"hist-dissim"),
"choices": ("blur", "face", "face-cnn", "face-cnn-dissim",
"face-yaw", "hist", "hist-dissim"),
"dest": 'sort_method',
"default": "hist",
"help": "Sort by method. "
"Choose how images are sorted. "
"Default: hist"})
"help": "R|Sort by method. Choose how images are sorted. "
"\nL|'blur': Sort faces by blurriness."
"\nL|'face': Use VGG Face to sort by face similarity. This "
"uses a pairwise clustering algorithm to check the "
"distances between 4096 features on every face in your set "
"and order them appropriately. WARNING: On very large "
"datasets it is possible to run out of memory performing "
"this calculation."
"\nL|'face-cnn': Sort faces by their landmarks. You can "
"adjust the threshold with the '-t' (--ref_threshold) "
"option."
"\nL|'face-cnn-dissim': Like 'face-cnn' but sorts by "
"dissimilarity."
"\nL|'face-yaw': Sort faces by Yaw (rotation left to right)."
"\nL|'hist': Sort faces by their color histogram. You can "
"adjust the threshold with the '-t' (--ref_threshold) "
"option."
"\nL|'hist-dissim': Like 'hist' but sorts by "
"dissimilarity."
"\nDefault: hist"})

argument_list.append({"opts": ('-g', '--group-by'),
"action": Radio,
"type": str,
"choices": ("blur", "face", "face-cnn",
"face-yaw", "hist"),
"choices": ("blur", "face-cnn", "face-yaw", "hist"),
"dest": 'group_method',
"default": "hist",
"help": "Group by method. "
Expand All @@ -420,24 +434,16 @@ def get_argument_list():
"dest": 'min_threshold',
"default": -1.0,
"help": "Float value. "
"Minimum threshold to use for grouping "
"comparison with 'face' and 'hist' "
"methods. The lower the value the more "
"discriminating the grouping is. "
"Leaving -1.0 will make the program "
"set the default value automatically. "
"For face 0.6 should be enough, with "
"0.5 being very discriminating. "
"For face-cnn 7.2 should be enough, "
"with 4 being very discriminating. "
"For hist 0.3 should be enough, with "
"0.2 being very discriminating. "
"Be careful setting a value that's too "
"low in a directory with many images, "
"as this could result in a lot of "
"directories being created. "
"Defaults: face 0.6, face-cnn 7.2, "
"hist 0.3"})
"Minimum threshold to use for grouping comparison with "
"'face-cnn' and 'hist' methods. The lower the value the "
"more discriminating the grouping is. Leaving -1.0 will "
"allow the program set the default value automatically. "
"For face-cnn 7.2 should be enough, with 4 being very "
"discriminating. For hist 0.3 should be enough, with 0.2 "
"being very discriminating. Be careful setting a value "
"that's too low in a directory with many images, as this "
"could result in a lot of directories being created. "
"Defaults: face-cnn 7.2, hist 0.3"})

argument_list.append({"opts": ('-b', '--bins'),
"action": Slider,
Expand Down Expand Up @@ -466,6 +472,15 @@ def get_argument_list():
"the last bin."
"Default value: 5"})

argument_list.append({"opts": ("-be", "--backend"),
"action": Radio,
"type": str.upper,
"choices": ("CPU", "OPENCL"),
"default": "CPU",
"help": "Backend to use for VGG Face inference. OpenCL is slightly "
"faster but may not be available on all systems. Only used "
"for sort by 'face'."})

argument_list.append({"opts": ('-l', '--log-changes'),
"action": 'store_true',
"dest": 'log_changes',
Expand Down
89 changes: 6 additions & 83 deletions tools/sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def __init__(self, arguments):
self.args = arguments
self.changes = None
self.serializer = None
self.vgg_face = VGGFace()
self.vgg_face = None

def process(self):
""" Main processing function of the sort tool """
Expand All @@ -49,13 +49,15 @@ def process(self):
if (self.args.final_process == "folders"
and self.args.min_threshold < 0.0):
method = self.args.group_method.lower()
if method == 'face':
self.args.min_threshold = 0.6
elif method == 'face-cnn':
if method == 'face-cnn':
self.args.min_threshold = 7.2
elif method == 'hist':
self.args.min_threshold = 0.3

# Load VGG Face if sorting by face
if self.args.sort_method.lower() == "face":
self.vgg_face = VGGFace(backend=self.args.backend)

# If logging is enabled, prepare container
if self.args.log_changes:
self.changes = dict()
Expand Down Expand Up @@ -190,19 +192,6 @@ def sort_face(self):
img_list = images[indices]
return img_list

def sort_face_dissim(self):
""" Sort by face dissimilarity """
input_dir = self.args.input_dir

logger.info("Sorting by face dissimilarity...")
images = np.array(self.find_images(input_dir))
preds = np.array([self.vgg_face.predict(cv2.imread(img))
for img in tqdm(images, desc="loading", file=sys.stdout)])
logger.info("Sorting. Depending on ths size of your dataset, this may take a while...")
indices = self.vgg_face.sorted_similarity(preds, method="complete")
img_list = images[indices]
return img_list

def sort_face_cnn(self):
""" Sort by CNN similarity """
self.launch_aligner()
Expand Down Expand Up @@ -375,57 +364,6 @@ def group_blur(self, img_list):

return bins

def group_face(self, img_list):
""" Group into bins by face similarity """
logger.info("Grouping by face similarity...")

# Groups are of the form: group_num -> reference face
reference_groups = dict()

# Bins array, where index is the group number and value is
# an array containing the file paths to the images in that group.
# The first group (0), is always the non-face group.
bins = [[]]

# Comparison threshold used to decide how similar
# faces have to be to be grouped together.
min_threshold = self.args.min_threshold
img_list_len = len(img_list)

for i in tqdm(range(1, img_list_len),
desc="Grouping",
file=sys.stdout):
f1encs = img_list[i][1]

# Check if current image is a face, if not then
# add it immediately to the non-face list.
if f1encs is None or len(f1encs) <= 0:
bins[0].append(img_list[i][0])

else:
current_best = [-1, float("inf")]

for key, references in reference_groups.items():
# Non-faces are not added to reference_groups dict, thus
# removing the need to check that f2encs is a face.
# The try-catch block is to handle the first face that gets
# processed, as the first value is None.
try:
score = self.get_avg_score_faces(f1encs, references)
except (TypeError, ValueError, ZeroDivisionError):
score = float("inf")
if score < current_best[1]:
current_best[0], current_best[1] = key, score

if current_best[1] < min_threshold:
reference_groups[current_best[0]].append(f1encs)
bins[current_best[0]].append(img_list[i][0])
else:
reference_groups[len(reference_groups)] = [img_list[i][1]]
bins.append([img_list[i][0]])

return bins

def group_face_cnn(self, img_list):
""" Group into bins by CNN face similarity """
logger.info("Grouping by face-cnn similarity...")
Expand Down Expand Up @@ -638,12 +576,6 @@ def reload_images(self, group_method, img_list):
tqdm(self.find_images(input_dir),
desc="Reloading",
file=sys.stdout)]
elif group_method == 'group_face':
temp_list = [
[img, self.vgg_face.predict(cv2.imread(img))]
for img in tqdm(self.find_images(input_dir),
desc="Reloading",
file=sys.stdout)]
elif group_method == 'group_face_cnn':
self.launch_aligner()
temp_list = []
Expand Down Expand Up @@ -826,15 +758,6 @@ def get_avg_score_hist(img1, references):
scores.append(score)
return sum(scores) / len(scores)

def get_avg_score_faces(self, f1encs, references):
""" Return the average similarity score between a face and
reference image """
scores = []
for f2encs in references:
score = self.vgg_face.find_cosine_similiarity(f1encs, f2encs)
scores.append(score)
return sum(scores) / len(scores)

@staticmethod
def get_avg_score_faces_cnn(fl1, references):
""" Return the average CNN similarity score
Expand Down

0 comments on commit f078071

Please sign in to comment.