Skip to content

Commit

Permalink
resize functionality moved to represent module
Browse files Browse the repository at this point in the history
we were handling resizing in extract faces. with this commit
we moved it to representation module to provide seperation
of concern.
  • Loading branch information
serengil committed Apr 7, 2024
1 parent 42ee298 commit 1078be9
Show file tree
Hide file tree
Showing 9 changed files with 152 additions and 171 deletions.
8 changes: 1 addition & 7 deletions deepface/DeepFace.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import warnings
import logging
from typing import Any, Dict, List, Tuple, Union, Optional
from typing import Any, Dict, List, Union, Optional

# this has to be set before importing tensorflow
os.environ["TF_USE_LEGACY_KERAS"] = "1"
Expand Down Expand Up @@ -439,7 +439,6 @@ def stream(

def extract_faces(
img_path: Union[str, np.ndarray],
target_size: Optional[Tuple[int, int]] = (224, 224),
detector_backend: str = "opencv",
enforce_detection: bool = True,
align: bool = True,
Expand All @@ -453,9 +452,6 @@ def extract_faces(
img_path (str or np.ndarray): Path to the first image. Accepts exact image path
as a string, numpy array (BGR), or base64 encoded images.
target_size (tuple): final shape of facial image. black pixels will be
added to resize the image (default is (224, 224)).
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv).
Expand Down Expand Up @@ -485,13 +481,11 @@ def extract_faces(

return detection.extract_faces(
img_path=img_path,
target_size=target_size,
detector_backend=detector_backend,
enforce_detection=enforce_detection,
align=align,
expand_percentage=expand_percentage,
grayscale=grayscale,
human_readable=True,
)


Expand Down
121 changes: 64 additions & 57 deletions deepface/modules/demography.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from tqdm import tqdm

# project dependencies
from deepface.modules import modeling, detection
from deepface.modules import modeling, detection, preprocessing
from deepface.extendedmodels import Gender, Race, Emotion


Expand Down Expand Up @@ -118,7 +118,6 @@ def analyze(

img_objs = detection.extract_faces(
img_path=img_path,
target_size=(224, 224),
detector_backend=detector_backend,
grayscale=False,
enforce_detection=enforce_detection,
Expand All @@ -130,60 +129,68 @@ def analyze(
img_content = img_obj["face"]
img_region = img_obj["facial_area"]
img_confidence = img_obj["confidence"]
if img_content.shape[0] > 0 and img_content.shape[1] > 0:
obj = {}
# facial attribute analysis
pbar = tqdm(
range(0, len(actions)),
desc="Finding actions",
disable=silent if len(actions) > 1 else True,
)
for index in pbar:
action = actions[index]
pbar.set_description(f"Action: {action}")

if action == "emotion":
emotion_predictions = modeling.build_model("Emotion").predict(img_content)
sum_of_predictions = emotion_predictions.sum()

obj["emotion"] = {}
for i, emotion_label in enumerate(Emotion.labels):
emotion_prediction = 100 * emotion_predictions[i] / sum_of_predictions
obj["emotion"][emotion_label] = emotion_prediction

obj["dominant_emotion"] = Emotion.labels[np.argmax(emotion_predictions)]

elif action == "age":
apparent_age = modeling.build_model("Age").predict(img_content)
# int cast is for exception - object of type 'float32' is not JSON serializable
obj["age"] = int(apparent_age)

elif action == "gender":
gender_predictions = modeling.build_model("Gender").predict(img_content)
obj["gender"] = {}
for i, gender_label in enumerate(Gender.labels):
gender_prediction = 100 * gender_predictions[i]
obj["gender"][gender_label] = gender_prediction

obj["dominant_gender"] = Gender.labels[np.argmax(gender_predictions)]

elif action == "race":
race_predictions = modeling.build_model("Race").predict(img_content)
sum_of_predictions = race_predictions.sum()

obj["race"] = {}
for i, race_label in enumerate(Race.labels):
race_prediction = 100 * race_predictions[i] / sum_of_predictions
obj["race"][race_label] = race_prediction

obj["dominant_race"] = Race.labels[np.argmax(race_predictions)]

# -----------------------------
# mention facial areas
obj["region"] = img_region
# include image confidence
obj["face_confidence"] = img_confidence

resp_objects.append(obj)
if img_content.shape[0] == 0 or img_content.shape[1] == 0:
continue

# rgb to bgr
img_content = img_content[:, :, ::-1]

# resize input image
img_content = preprocessing.resize_image(img=img_content, target_size=(224, 224))

obj = {}
# facial attribute analysis
pbar = tqdm(
range(0, len(actions)),
desc="Finding actions",
disable=silent if len(actions) > 1 else True,
)
for index in pbar:
action = actions[index]
pbar.set_description(f"Action: {action}")

if action == "emotion":
emotion_predictions = modeling.build_model("Emotion").predict(img_content)
sum_of_predictions = emotion_predictions.sum()

obj["emotion"] = {}
for i, emotion_label in enumerate(Emotion.labels):
emotion_prediction = 100 * emotion_predictions[i] / sum_of_predictions
obj["emotion"][emotion_label] = emotion_prediction

obj["dominant_emotion"] = Emotion.labels[np.argmax(emotion_predictions)]

elif action == "age":
apparent_age = modeling.build_model("Age").predict(img_content)
# int cast is for exception - object of type 'float32' is not JSON serializable
obj["age"] = int(apparent_age)

elif action == "gender":
gender_predictions = modeling.build_model("Gender").predict(img_content)
obj["gender"] = {}
for i, gender_label in enumerate(Gender.labels):
gender_prediction = 100 * gender_predictions[i]
obj["gender"][gender_label] = gender_prediction

obj["dominant_gender"] = Gender.labels[np.argmax(gender_predictions)]

elif action == "race":
race_predictions = modeling.build_model("Race").predict(img_content)
sum_of_predictions = race_predictions.sum()

obj["race"] = {}
for i, race_label in enumerate(Race.labels):
race_prediction = 100 * race_predictions[i] / sum_of_predictions
obj["race"][race_label] = race_prediction

obj["dominant_race"] = Race.labels[np.argmax(race_predictions)]

# -----------------------------
# mention facial areas
obj["region"] = img_region
# include image confidence
obj["face_confidence"] = img_confidence

resp_objects.append(obj)

return resp_objects
70 changes: 4 additions & 66 deletions deepface/modules/detection.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# built-in dependencies
from typing import Any, Dict, List, Tuple, Union, Optional
from typing import Any, Dict, List, Tuple, Union

# 3rd part dependencies
import numpy as np
Expand All @@ -10,30 +10,20 @@
from deepface.modules import preprocessing
from deepface.models.Detector import DetectedFace, FacialAreaRegion
from deepface.detectors import DetectorWrapper
from deepface.commons import package_utils
from deepface.commons.logger import Logger

logger = Logger(module="deepface/modules/detection.py")

# pylint: disable=no-else-raise


tf_major_version = package_utils.get_tf_major_version()
if tf_major_version == 1:
from keras.preprocessing import image
elif tf_major_version == 2:
from tensorflow.keras.preprocessing import image


def extract_faces(
img_path: Union[str, np.ndarray],
target_size: Optional[Tuple[int, int]] = (224, 224),
detector_backend: str = "opencv",
enforce_detection: bool = True,
align: bool = True,
expand_percentage: int = 0,
grayscale: bool = False,
human_readable=False,
) -> List[Dict[str, Any]]:
"""
Extract faces from a given image
Expand All @@ -42,9 +32,6 @@ def extract_faces(
img_path (str or np.ndarray): Path to the first image. Accepts exact image path
as a string, numpy array (BGR), or base64 encoded images.
target_size (tuple): final shape of facial image. black pixels will be
added to resize the image.
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv)
Expand All @@ -58,13 +45,10 @@ def extract_faces(
grayscale (boolean): Flag to convert the image to grayscale before
processing (default is False).
human_readable (bool): Flag to make the image human readable. 3D RGB for human readable
or 4D BGR for ML models (default is False).
Returns:
results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary contains:
- "face" (np.ndarray): The detected face as a NumPy array.
- "face" (np.ndarray): The detected face as a NumPy array in RGB format.
- "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing:
- keys 'x', 'y', 'w', 'h' with int values
Expand Down Expand Up @@ -122,57 +106,11 @@ def extract_faces(
if grayscale is True:
current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)

# resize and padding
if target_size is not None:
factor_0 = target_size[0] / current_img.shape[0]
factor_1 = target_size[1] / current_img.shape[1]
factor = min(factor_0, factor_1)

dsize = (
int(current_img.shape[1] * factor),
int(current_img.shape[0] * factor),
)
current_img = cv2.resize(current_img, dsize)

diff_0 = target_size[0] - current_img.shape[0]
diff_1 = target_size[1] - current_img.shape[1]
if grayscale is False:
# Put the base image in the middle of the padded image
current_img = np.pad(
current_img,
(
(diff_0 // 2, diff_0 - diff_0 // 2),
(diff_1 // 2, diff_1 - diff_1 // 2),
(0, 0),
),
"constant",
)
else:
current_img = np.pad(
current_img,
(
(diff_0 // 2, diff_0 - diff_0 // 2),
(diff_1 // 2, diff_1 - diff_1 // 2),
),
"constant",
)

# double check: if target image is not still the same size with target.
if current_img.shape[0:2] != target_size:
current_img = cv2.resize(current_img, target_size)

# normalizing the image pixels
# what this line doing? must?
img_pixels = image.img_to_array(current_img)
img_pixels = np.expand_dims(img_pixels, axis=0)
img_pixels /= 255 # normalize input in [0, 1]
# discard expanded dimension
if human_readable is True and len(img_pixels.shape) == 4:
img_pixels = img_pixels[0]
current_img = current_img / 255 # normalize input in [0, 1]

resp_objs.append(
{
"face": img_pixels[:, :, ::-1] if human_readable is True else img_pixels,
"face": current_img[:, :, ::-1],
"facial_area": {
"x": int(current_region.x),
"y": int(current_region.y),
Expand Down
61 changes: 59 additions & 2 deletions deepface/modules/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@
import requests
from PIL import Image

# project dependencies
from deepface.commons import package_utils


tf_major_version = package_utils.get_tf_major_version()
if tf_major_version == 1:
from keras.preprocessing import image
elif tf_major_version == 2:
from tensorflow.keras.preprocessing import image


def load_image(img: Union[str, np.ndarray]) -> Tuple[np.ndarray, str]:
"""
Expand Down Expand Up @@ -66,8 +76,8 @@ def load_image_from_web(url: str) -> np.ndarray:
response = requests.get(url, stream=True, timeout=60)
response.raise_for_status()
image_array = np.asarray(bytearray(response.raw.read()), dtype=np.uint8)
image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
return image
img = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
return img


def load_base64(uri: str) -> np.ndarray:
Expand Down Expand Up @@ -157,3 +167,50 @@ def normalize_input(img: np.ndarray, normalization: str = "base") -> np.ndarray:
raise ValueError(f"unimplemented normalization type - {normalization}")

return img


def resize_image(img: np.ndarray, target_size: Tuple[int, int]) -> np.ndarray:
"""
Resize an image to expected size of a ml model with adding black pixels.
Args:
img (np.ndarray): pre-loaded image as numpy array
target_size (tuple): input shape of ml model
Returns:
img (np.ndarray): resized input image
"""
factor_0 = target_size[0] / img.shape[0]
factor_1 = target_size[1] / img.shape[1]
factor = min(factor_0, factor_1)

dsize = (
int(img.shape[1] * factor),
int(img.shape[0] * factor),
)
img = cv2.resize(img, dsize)

diff_0 = target_size[0] - img.shape[0]
diff_1 = target_size[1] - img.shape[1]

# Put the base image in the middle of the padded image
img = np.pad(
img,
(
(diff_0 // 2, diff_0 - diff_0 // 2),
(diff_1 // 2, diff_1 - diff_1 // 2),
(0, 0),
),
"constant",
)

# double check: if target image is not still the same size with target.
if img.shape[0:2] != target_size:
img = cv2.resize(img, target_size)

# make it 4-dimensional how ML models expect
img = image.img_to_array(img)
img = np.expand_dims(img, axis=0)

if img.max() > 1:
img = (img.astype(np.float32) / 255.0).astype(np.float32)

return img
Loading

0 comments on commit 1078be9

Please sign in to comment.