update face and head detection

mayujie · mayujie · commit eeb5b8459edc · 2020-08-08T13:53:24.000+02:00
diff --git a/src/face_detection.py b/src/face_detection.py
@@ -2,45 +2,149 @@
 This is a sample class for a model. You may choose to use it as-is or make any changes to it.
 This has been provided just to give you an idea of how to structure your model class.
 '''
+import cv2
+import numpy as np
+from openvino.inference_engine import IECore
 
-class Model_X:
+class FaceDetectionModel:
     '''
     Class for the Face Detection Model.
     '''
     def __init__(self, model_name, device='CPU', extensions=None):
         '''
         TODO: Use this to set your instance variables.
         '''
-        raise NotImplementedError
+        self.model_name = model_name
+        self.device = device
+        self.extensions = extensions
+        self.model_structure = self.model_name # model xml file
+        self.model_weights = self.model_name.split('.')[0]+'.bin' # get model binary file path just use model xml file
+        self.plugin = None
+        self.network = None
+        self.exec_net = None
+        self.input_name = None
+        self.input_shape = None
+        self.output_names = None
+        self.output_shape = None
 
     def load_model(self):
         '''
         TODO: You will need to complete this method.
         This method is for loading the model to the device specified by the user.
         If your model requires any Plugins, this is where you can load them.
         '''
-        raise NotImplementedError
+        # load the IE Engine API plugin (Inference Engine entity)
+        self.plugin = IECore()
+        # Reads a network from the IR files and creates an IENetwork, load IR files into their related class, architecture with XML and weights with binary file
+        self.network = self.plugin.read_network(model=self.model_structure, weights=self.model_weights)
+        # Queries the plugin with specified device name what network layers are supported in the current configuration.
+        # get the supported layers of the network
+        supported_layers = self.plugin.query_network(network=self.network, device_name=self.device)
+        # check unsupported layer
+        unsupported_layers = [ul for ul in self.network.layers.keys() if ul not in supported_layers]
+
+        # condition of found unsupported layer and device is CPU
+        if len(unsupported_layers)!=0 and self.device=='CPU':
+            print('unsupported layers found:{}'.format(unsupported_layers))
+            # extension is not None
+            if not self.extensions==None:
+                print("Adding cpu_extension")
+                # Loads extension library to the plugin with a specified device name.
+                self.plugin.add_extension(self.extensions, self.device)
+                # update the support and unsupported layers
+                supported_layers = self.plugin.query_network(network=self.network, device_name=self.device)
+                unsupported_layers = [ul for ul in self.network.layers.keys() if ul not in supported_layers]
+                # if still no unsupported layer exit
+                if len(unsupported_layers)!=0:
+                    print("After adding the extensions still unsupported layers found")
+                    exit(1)
+                print("After adding the extension the issue is resolved")
+            # extensions is None exit    
+            else:
+                print("Give the path of cpu extension")
+                exit(1)
+        # Loads a network that was read from the Intermediate Representation (IR) to the plugin with specified device
+        # load the network into the inference engine
+        self.exec_net = self.plugin.load_network(network=self.network, device_name=self.device, num_requests=1)
+        
+        # Get the input layer, iterate through the inputs here
+        self.input_name = next(iter(self.network.inputs))
+        # Return the shape of the input layer
+        self.input_shape self.network.inputs[self.input_name].shape
+        # Get the output layer
+        self.output_names = netx(iter(self.network.outputs))
+        # Return the shape of the output layer
+        self.output_shape = self.network.outputs[self.output_names].shape
+
 
     def predict(self, image):
         '''
         TODO: You will need to complete this method.
         This method is meant for running predictions on the input image.
         '''
-        raise NotImplementedError
+        # 1.process the image
+        img_processed = self.preprocess_input(image.copy())
+        # 2.Starts synchronous inference for the first infer request of the executable network and returns output data.
+        # A dictionary that maps output layer names
+        outputs = self.exec_net.infer({self.input_name:img_processed})
+        print(outputs)
+        # 3. process the outputs
+        coords = self.preprocess_output(outputs, prob_threshold)
+        # if coords empty, return 0,0
+        if (len(coords)==0):
+            return 0, 0
+         # get the first detected face
+        coords = coords[0]
+        h=image.shape[0]
+        w=image.shape[1]
+        print(coords)
+
+        coords = coords* np.array([w, h, w, h])
+        # Copy of the array, cast to a specified type. int32
+        coords = coords.astype(np.int32)
+        print(coords)
+
+        cropped_face = image[coords[1]:coords[3], coords[0]:coords[2]]
+        print(cropped_face)
+
+        return cropped_face, coords
+
 
     def check_model(self):
-        raise NotImplementedError
+        # raise NotImplementedError
+        pass
 
     def preprocess_input(self, image):
     '''
     Before feeding the data into the model for inference,
     you might have to preprocess it. This function is where you can do that.
+    Given an input image, height and width:
     '''
-        raise NotImplementedError
+        # - Resize to height and width, (H, W), but resize use W, H which is opposite order
+        # print(self.input_shape)
+        image_resized = cv2.resize(image, (self.input_shape[3], self.input.shape[2]))
+        # print(image_resized)
+        # - Transpose the final "channel" dimension to be first to BGR
+        # - Reshape the image to add a "batch" of 1 at the start
+        img_processed = np.transpose(np.expand_dims(image_resized, axis=0), (0,3,1,2))
+        # print(img_processed) # BxCxHxW
+
+        return img_processed
+
 
     def preprocess_output(self, outputs):
     '''
     Before feeding the output of this model to the next model,
     you might have to preprocess the output. This function is where you can do that.
     '''
-        raise NotImplementedError
+        coords = []
+        outs = outputs[self.output_names][0][0] # output 
+        for out in outs:
+            conf = out[2]
+            if conf > prob_threshold:
+                x_min=out[3]
+                y_min=out[4]
+                x_max=out[5]
+                y_max=out[6]
+                coords.append([x_min, y_min, x_max, y_max])
+        return coords
diff --git a/src/head_pose_estimation.py b/src/head_pose_estimation.py
@@ -2,45 +2,111 @@
 This is a sample class for a model. You may choose to use it as-is or make any changes to it.
 This has been provided just to give you an idea of how to structure your model class.
 '''
+import cv2
+import numpy as np
+from openvino.inference_engine import IECore
 
-class Model_X:
+class HeadPoseEstimationModel:
     '''
     Class for the Face Detection Model.
     '''
     def __init__(self, model_name, device='CPU', extensions=None):
         '''
         TODO: Use this to set your instance variables.
         '''
-        raise NotImplementedError
+        self.model_name = model_name
+        self.device = device
+        self.extensions = extensions
+        self.model_structure = self.model_name
+        self.model_weights = self.model_name.split(".")[0] + '.bin'
+        self.plugin = None
+        self.network = None
+        self.exec_net = None
+        self.input_name = None
+        self.input_shape = None
+        self.output_names = None
+
 
     def load_model(self):
         '''
         TODO: You will need to complete this method.
         This method is for loading the model to the device specified by the user.
         If your model requires any Plugins, this is where you can load them.
         '''
-        raise NotImplementedError
+        self.plugin = IECore()
+        self.network = self.plugin.read_network(model=self.model_structure, weights=self.model_weights)
+        supported_layers = self.plugin.query_network(network=self.network, device_name=self.device)
+        unsupported_layers = [ul for ul in self.network.layers.keys() if ul not in supported_layers]
+
+
+        if len(unsupported_layers)!=0 and self.device=='CPU':
+            print("unsupported layers found{}".format(unsupported_layers))
+            if not self.extensions==None:
+                print("Adding cpu_extension")
+                self.plugin.add_extension(self.extensions, self.device)
+                supported_layers = self.plugin.query_network(network=self.network, device_name=self.device)
+                unsupported_layers = [ul for ul in self.network.layers.keys() if ul not in supported_layers]
+                if len(unsupported_layers)!=0:
+                    print("After adding the extensions still unsupported layers found")
+                    exit(1)
+                print("After adding the extension still unsupported layers found")
+            else:
+                print("Give the path of cpu extension")
+                exit(1)
+
+        self.exec_net = self.plugin.load_network(network.self.network, device_name=self.device, num_requests=1)
+        
+        self.input_name = next(iter(self.network.inputs))
+        self.input_shape = self.network.inputs[self.input_name].shape
+        self.output_names = [i for i in self.network.outputs.keys()]
+
 
     def predict(self, image):
         '''
         TODO: You will need to complete this method.
         This method is meant for running predictions on the input image.
         '''
-        raise NotImplementedError
+        img_processed = self.preprocess_input(image.copy())
+        outputs = self.exec_net({self.input_name:img_processed})
+        lastOutput = self.preprocess_output(outputs)
+
+        return lastOutput
 
     def check_model(self):
-        raise NotImplementedError
+        pass
 
     def preprocess_input(self, image):
     '''
     Before feeding the data into the model for inference,
     you might have to preprocess it. This function is where you can do that.
     '''
-        raise NotImplementedError
+        # we wanna opposite order from H, W 
+        image_resized = cv2.resize(image, (self.input_shape[3], self,input_shape[2]))
+        # (optional)
+        # img_processed = np.transpose(np.expand_dims(image_resized, axis=0), (0, 3, 1, 2))
+        
+        # transpose so that order has channels 1st, cuz our image after resizing still have channels last
+        # 1st put the 3rd channel which is our image channels for BGR. 
+        # and next is 0 and 1 which were originally our heihgt and width of the image
+        image = image_resized.transpose((2,0,1))
+        # add 1 dim at very start, then channels then H, W
+        img_processed = image.reshape(1, 3, self.input_shape[2], self.input_shape[3])
+
+        return img_processed
 
     def preprocess_output(self, outputs):
     '''
     Before feeding the output of this model to the next model,
     you might have to preprocess the output. This function is where you can do that.
+    https://docs.openvinotoolkit.org/latest/omz_models_intel_head_pose_estimation_adas_0001_description_head_pose_estimation_adas_0001.html
+    Output layer names in Inference Engine format:
+
+    name: "angle_y_fc", shape: [1, 1] - Estimated yaw (in degrees).
+    name: "angle_p_fc", shape: [1, 1] - Estimated pitch (in degrees).
+    name: "angle_r_fc", shape: [1, 1] - Estimated roll (in degrees).    
     '''
-        raise NotImplementedError
+        outs = []
+        outs.append(outputs['angle_y_fc'].tolist()[0][0])
+        outs.append(outputs['angle_p_fc'].tolist()[0][0])
+        outs.append(outputs['angle_r_fc'].tolist()[0][0])
+        return outs