resize functionality moved to represent module

we were handling resizing in extract faces. with this commit we moved it to representation module to provide seperation of concern.
HughLio · Apr 7, 2024 · 1078be9 · 1078be9
1 parent 42ee298
commit 1078be9
Show file tree

Hide file tree

Showing 9 changed files with 152 additions and 171 deletions.
diff --git a/deepface/DeepFace.py b/deepface/DeepFace.py
@@ -2,7 +2,7 @@
 import os
 import warnings
 import logging
-from typing import Any, Dict, List, Tuple, Union, Optional
+from typing import Any, Dict, List, Union, Optional
 
 # this has to be set before importing tensorflow
 os.environ["TF_USE_LEGACY_KERAS"] = "1"
@@ -439,7 +439,6 @@ def stream(
 
 def extract_faces(
     img_path: Union[str, np.ndarray],
-    target_size: Optional[Tuple[int, int]] = (224, 224),
     detector_backend: str = "opencv",
     enforce_detection: bool = True,
     align: bool = True,
@@ -453,9 +452,6 @@ def extract_faces(
         img_path (str or np.ndarray): Path to the first image. Accepts exact image path
             as a string, numpy array (BGR), or base64 encoded images.
 
-        target_size (tuple): final shape of facial image. black pixels will be
-            added to resize the image (default is (224, 224)).
-
         detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
             'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv).
 
@@ -485,13 +481,11 @@ def extract_faces(
 
     return detection.extract_faces(
         img_path=img_path,
-        target_size=target_size,
         detector_backend=detector_backend,
         enforce_detection=enforce_detection,
         align=align,
         expand_percentage=expand_percentage,
         grayscale=grayscale,
-        human_readable=True,
     )
 
 

diff --git a/deepface/modules/demography.py b/deepface/modules/demography.py
@@ -6,7 +6,7 @@
 from tqdm import tqdm
 
 # project dependencies
-from deepface.modules import modeling, detection
+from deepface.modules import modeling, detection, preprocessing
 from deepface.extendedmodels import Gender, Race, Emotion
 
 
@@ -118,7 +118,6 @@ def analyze(
 
     img_objs = detection.extract_faces(
         img_path=img_path,
-        target_size=(224, 224),
         detector_backend=detector_backend,
         grayscale=False,
         enforce_detection=enforce_detection,
@@ -130,60 +129,68 @@ def analyze(
         img_content = img_obj["face"]
         img_region = img_obj["facial_area"]
         img_confidence = img_obj["confidence"]
-        if img_content.shape[0] > 0 and img_content.shape[1] > 0:
-            obj = {}
-            # facial attribute analysis
-            pbar = tqdm(
-                range(0, len(actions)),
-                desc="Finding actions",
-                disable=silent if len(actions) > 1 else True,
-            )
-            for index in pbar:
-                action = actions[index]
-                pbar.set_description(f"Action: {action}")
-
-                if action == "emotion":
-                    emotion_predictions = modeling.build_model("Emotion").predict(img_content)
-                    sum_of_predictions = emotion_predictions.sum()
-
-                    obj["emotion"] = {}
-                    for i, emotion_label in enumerate(Emotion.labels):
-                        emotion_prediction = 100 * emotion_predictions[i] / sum_of_predictions
-                        obj["emotion"][emotion_label] = emotion_prediction
-
-                    obj["dominant_emotion"] = Emotion.labels[np.argmax(emotion_predictions)]
-
-                elif action == "age":
-                    apparent_age = modeling.build_model("Age").predict(img_content)
-                    # int cast is for exception - object of type 'float32' is not JSON serializable
-                    obj["age"] = int(apparent_age)
-
-                elif action == "gender":
-                    gender_predictions = modeling.build_model("Gender").predict(img_content)
-                    obj["gender"] = {}
-                    for i, gender_label in enumerate(Gender.labels):
-                        gender_prediction = 100 * gender_predictions[i]
-                        obj["gender"][gender_label] = gender_prediction
-
-                    obj["dominant_gender"] = Gender.labels[np.argmax(gender_predictions)]
-
-                elif action == "race":
-                    race_predictions = modeling.build_model("Race").predict(img_content)
-                    sum_of_predictions = race_predictions.sum()
-
-                    obj["race"] = {}
-                    for i, race_label in enumerate(Race.labels):
-                        race_prediction = 100 * race_predictions[i] / sum_of_predictions
-                        obj["race"][race_label] = race_prediction
-
-                    obj["dominant_race"] = Race.labels[np.argmax(race_predictions)]
-
-                # -----------------------------
-                # mention facial areas
-                obj["region"] = img_region
-                # include image confidence
-                obj["face_confidence"] = img_confidence
-
-            resp_objects.append(obj)
+        if img_content.shape[0] == 0 or img_content.shape[1] == 0:
+            continue
+
+        # rgb to bgr
+        img_content = img_content[:, :, ::-1]
+
+        # resize input image
+        img_content = preprocessing.resize_image(img=img_content, target_size=(224, 224))
+
+        obj = {}
+        # facial attribute analysis
+        pbar = tqdm(
+            range(0, len(actions)),
+            desc="Finding actions",
+            disable=silent if len(actions) > 1 else True,
+        )
+        for index in pbar:
+            action = actions[index]
+            pbar.set_description(f"Action: {action}")
+
+            if action == "emotion":
+                emotion_predictions = modeling.build_model("Emotion").predict(img_content)
+                sum_of_predictions = emotion_predictions.sum()
+
+                obj["emotion"] = {}
+                for i, emotion_label in enumerate(Emotion.labels):
+                    emotion_prediction = 100 * emotion_predictions[i] / sum_of_predictions
+                    obj["emotion"][emotion_label] = emotion_prediction
+
+                obj["dominant_emotion"] = Emotion.labels[np.argmax(emotion_predictions)]
+
+            elif action == "age":
+                apparent_age = modeling.build_model("Age").predict(img_content)
+                # int cast is for exception - object of type 'float32' is not JSON serializable
+                obj["age"] = int(apparent_age)
+
+            elif action == "gender":
+                gender_predictions = modeling.build_model("Gender").predict(img_content)
+                obj["gender"] = {}
+                for i, gender_label in enumerate(Gender.labels):
+                    gender_prediction = 100 * gender_predictions[i]
+                    obj["gender"][gender_label] = gender_prediction
+
+                obj["dominant_gender"] = Gender.labels[np.argmax(gender_predictions)]
+
+            elif action == "race":
+                race_predictions = modeling.build_model("Race").predict(img_content)
+                sum_of_predictions = race_predictions.sum()
+
+                obj["race"] = {}
+                for i, race_label in enumerate(Race.labels):
+                    race_prediction = 100 * race_predictions[i] / sum_of_predictions
+                    obj["race"][race_label] = race_prediction
+
+                obj["dominant_race"] = Race.labels[np.argmax(race_predictions)]
+
+            # -----------------------------
+            # mention facial areas
+            obj["region"] = img_region
+            # include image confidence
+            obj["face_confidence"] = img_confidence
+
+        resp_objects.append(obj)
 
     return resp_objects
diff --git a/deepface/modules/detection.py b/deepface/modules/detection.py
@@ -1,5 +1,5 @@
 # built-in dependencies
-from typing import Any, Dict, List, Tuple, Union, Optional
+from typing import Any, Dict, List, Tuple, Union
 
 # 3rd part dependencies
 import numpy as np
@@ -10,30 +10,20 @@
 from deepface.modules import preprocessing
 from deepface.models.Detector import DetectedFace, FacialAreaRegion
 from deepface.detectors import DetectorWrapper
-from deepface.commons import package_utils
 from deepface.commons.logger import Logger
 
 logger = Logger(module="deepface/modules/detection.py")
 
 # pylint: disable=no-else-raise
 
 
-tf_major_version = package_utils.get_tf_major_version()
-if tf_major_version == 1:
-    from keras.preprocessing import image
-elif tf_major_version == 2:
-    from tensorflow.keras.preprocessing import image
-
-
 def extract_faces(
     img_path: Union[str, np.ndarray],
-    target_size: Optional[Tuple[int, int]] = (224, 224),
     detector_backend: str = "opencv",
     enforce_detection: bool = True,
     align: bool = True,
     expand_percentage: int = 0,
     grayscale: bool = False,
-    human_readable=False,
 ) -> List[Dict[str, Any]]:
     """
     Extract faces from a given image
@@ -42,9 +32,6 @@ def extract_faces(
         img_path (str or np.ndarray): Path to the first image. Accepts exact image path
             as a string, numpy array (BGR), or base64 encoded images.
 
-        target_size (tuple): final shape of facial image. black pixels will be
-            added to resize the image.
-
         detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
             'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv)
 
@@ -58,13 +45,10 @@ def extract_faces(
         grayscale (boolean): Flag to convert the image to grayscale before
             processing (default is False).
 
-        human_readable (bool): Flag to make the image human readable. 3D RGB for human readable
-            or 4D BGR for ML models (default is False).
-
     Returns:
         results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary contains:
 
-        - "face" (np.ndarray): The detected face as a NumPy array.
+        - "face" (np.ndarray): The detected face as a NumPy array in RGB format.
 
         - "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing:
             - keys 'x', 'y', 'w', 'h' with int values
@@ -122,57 +106,11 @@ def extract_faces(
         if grayscale is True:
             current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)
 
-        # resize and padding
-        if target_size is not None:
-            factor_0 = target_size[0] / current_img.shape[0]
-            factor_1 = target_size[1] / current_img.shape[1]
-            factor = min(factor_0, factor_1)
-
-            dsize = (
-                int(current_img.shape[1] * factor),
-                int(current_img.shape[0] * factor),
-            )
-            current_img = cv2.resize(current_img, dsize)
-
-            diff_0 = target_size[0] - current_img.shape[0]
-            diff_1 = target_size[1] - current_img.shape[1]
-            if grayscale is False:
-                # Put the base image in the middle of the padded image
-                current_img = np.pad(
-                    current_img,
-                    (
-                        (diff_0 // 2, diff_0 - diff_0 // 2),
-                        (diff_1 // 2, diff_1 - diff_1 // 2),
-                        (0, 0),
-                    ),
-                    "constant",
-                )
-            else:
-                current_img = np.pad(
-                    current_img,
-                    (
-                        (diff_0 // 2, diff_0 - diff_0 // 2),
-                        (diff_1 // 2, diff_1 - diff_1 // 2),
-                    ),
-                    "constant",
-                )
-
-            # double check: if target image is not still the same size with target.
-            if current_img.shape[0:2] != target_size:
-                current_img = cv2.resize(current_img, target_size)
-
-        # normalizing the image pixels
-        # what this line doing? must?
-        img_pixels = image.img_to_array(current_img)
-        img_pixels = np.expand_dims(img_pixels, axis=0)
-        img_pixels /= 255  # normalize input in [0, 1]
-        # discard expanded dimension
-        if human_readable is True and len(img_pixels.shape) == 4:
-            img_pixels = img_pixels[0]
+        current_img = current_img / 255  # normalize input in [0, 1]
 
         resp_objs.append(
             {
-                "face": img_pixels[:, :, ::-1] if human_readable is True else img_pixels,
+                "face": current_img[:, :, ::-1],
                 "facial_area": {
                     "x": int(current_region.x),
                     "y": int(current_region.y),

diff --git a/deepface/modules/preprocessing.py b/deepface/modules/preprocessing.py
@@ -11,6 +11,16 @@
 import requests
 from PIL import Image
 
+# project dependencies
+from deepface.commons import package_utils
+
+
+tf_major_version = package_utils.get_tf_major_version()
+if tf_major_version == 1:
+    from keras.preprocessing import image
+elif tf_major_version == 2:
+    from tensorflow.keras.preprocessing import image
+
 
 def load_image(img: Union[str, np.ndarray]) -> Tuple[np.ndarray, str]:
     """
@@ -66,8 +76,8 @@ def load_image_from_web(url: str) -> np.ndarray:
     response = requests.get(url, stream=True, timeout=60)
     response.raise_for_status()
     image_array = np.asarray(bytearray(response.raw.read()), dtype=np.uint8)
-    image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
-    return image
+    img = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
+    return img
 
 
 def load_base64(uri: str) -> np.ndarray:
@@ -157,3 +167,50 @@ def normalize_input(img: np.ndarray, normalization: str = "base") -> np.ndarray:
         raise ValueError(f"unimplemented normalization type - {normalization}")
 
     return img
+
+
+def resize_image(img: np.ndarray, target_size: Tuple[int, int]) -> np.ndarray:
+    """
+    Resize an image to expected size of a ml model with adding black pixels.
+    Args:
+        img (np.ndarray): pre-loaded image as numpy array
+        target_size (tuple): input shape of ml model
+    Returns:
+        img (np.ndarray): resized input image
+    """
+    factor_0 = target_size[0] / img.shape[0]
+    factor_1 = target_size[1] / img.shape[1]
+    factor = min(factor_0, factor_1)
+
+    dsize = (
+        int(img.shape[1] * factor),
+        int(img.shape[0] * factor),
+    )
+    img = cv2.resize(img, dsize)
+
+    diff_0 = target_size[0] - img.shape[0]
+    diff_1 = target_size[1] - img.shape[1]
+
+    # Put the base image in the middle of the padded image
+    img = np.pad(
+        img,
+        (
+            (diff_0 // 2, diff_0 - diff_0 // 2),
+            (diff_1 // 2, diff_1 - diff_1 // 2),
+            (0, 0),
+        ),
+        "constant",
+    )
+
+    # double check: if target image is not still the same size with target.
+    if img.shape[0:2] != target_size:
+        img = cv2.resize(img, target_size)
+
+    # make it 4-dimensional how ML models expect
+    img = image.img_to_array(img)
+    img = np.expand_dims(img, axis=0)
+
+    if img.max() > 1:
+        img = (img.astype(np.float32) / 255.0).astype(np.float32)
+
+    return img