Merge remote-tracking branch 'origin/main' into metaphys

# Conflicts: # dataset/dataset_loader.py # models.py
chenyouxin113 · Aug 9, 2021 · 8ae3e39 · 8ae3e39
2 parents da5c373 + d5ee802
commit 8ae3e39
Show file tree

Hide file tree

Showing 10 changed files with 113 additions and 42 deletions.
diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@ On-Device Contactless Vitals Measurement](https://papers.nips.cc/paper/2020/file
   + need to verification
 - [ ] DeepPhys + LSTM
 - [x] [3D physNet :  Remote Photoplethysmograph Signal Measurement from Facial Videos Using Spatio-Temporal Networks](https://arxiv.org/abs/1905.02419)
-
+- [x] [2D phsyNet + LSTM](https://arxiv.org/abs/1905.02419)
 
 ## file list
 
@@ -71,5 +71,5 @@ wagon0004@tvstorm.com &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;wlst
 ## Funding
  This work was supported by the ICT R&D program of MSIP/IITP. [2021(2021-0-00900), Adaptive Federated Learning in Dynamic Heterogeneous Environment]
 
-##reference
+## reference
 1. [ZitongYu/PhysNet](https://github.com/ZitongYu/PhysNet)
diff --git a/dataset/dataset_loader.py b/dataset/dataset_loader.py
@@ -1,6 +1,3 @@
-import glob
-import os
-
 import h5py
 import numpy as np
 
@@ -41,7 +38,7 @@ def dataset_loader(save_root_path: str = "/media/hdd1/dy_dataset/",
         dataset = DeepPhysDataset(appearance_data=np.asarray(appearance_data),
                                   motion_data=np.asarray(motion_data),
                                   target=np.asarray(target_data))
-    elif model_name == "PhysNet":
+    elif model_name == "PhysNet" or model_name == "PhysNet_LSTM":
         hpy_file = h5py.File(save_root_path + model_name + "_" + dataset_name + "_" + option + ".hdf5", "r")
         video_data = []
         label_data = []

diff --git a/main.py b/main.py
@@ -179,7 +179,7 @@
                            + params["dataset_name"] + "_" + str(epoch) + "_"
                            + str(min_val_loss) + '.pth')
 
-    if epoch + 1 == hyper_params["epochs"] or epoch % 10 == 0:
+    if epoch + 1 == hyper_params["epochs"] or epoch % 3 == 0:
         if __TIME__ and epoch == 0:
             start_time = time.time()
         with tqdm(test_loader, desc="test ", total=len(test_loader)) as tepoch:
@@ -193,7 +193,7 @@
                     loss = criterion(outputs, target)
                     running_loss += loss.item()
                     tepoch.set_postfix(loss=running_loss / (params["train_batch_size"] / params["test_batch_size"]))
-                    if model_params["name"] == "PhysNet":
+                    if model_params["name"] == "PhysNet" or model_params["name"] == "PhysNet_LSTM":
                         inference_array.extend(normalize(outputs.cpu().numpy()[0]))
                         target_array.extend(normalize(target.cpu().numpy()[0]))
                     else:

diff --git a/models.py b/models.py
@@ -1,10 +1,13 @@
+import torchinfo
 import torchsummary
 
 from log import log_warning, log_info
 from nets.models.DeepPhys import DeepPhys
 from nets.models.DeepPhys_DA import DeepPhys_DA
 from nets.models.PhysNet import PhysNet
 from nets.models.MetaPhys import TSCAN
+from nets.models.PhysNet import PhysNet_2DCNN_LSTM
+
 
 def get_model(model_name: str = "DeepPhys"):
     """
@@ -19,6 +22,8 @@ def get_model(model_name: str = "DeepPhys"):
         return PhysNet()
     elif model_name == "MetaPhys":
         return TSCAN()
+    elif model_name == "PhysNet_LSTM":
+        return PhysNet_2DCNN_LSTM()
     else:
         log_warning("use implemented model")
         raise NotImplementedError("implement a custom model(%s) in /nets/models/" % model_name)
@@ -46,8 +51,9 @@ def summary(model, model_name):
     log_info("=========================================")
     if model_name == "DeepPhys" or model_name == DeepPhys_DA:
         torchsummary.summary(model, (2, 3, 36, 36))
-    elif model_name == "PhysNet":
-        torchsummary.summary(model, (3, 32, 128, 128))
+    elif model_name == "PhysNet" or model_name == "PhysNet_LSTM":
+        # torchsummary.summary(model, (3, 32, 128, 128))
+        torchinfo.summary(model,(1, 3, 32, 128, 128))
     elif model_name == "MetaPhys":
         print('rrrr')
     else:

diff --git a/nets/blocks/blocks.py b/nets/blocks/blocks.py
@@ -1,5 +1,17 @@
 import torch
 
+class ConvBlock2D(torch.nn.Module):
+    def __init__(self, in_channel, out_channel, kernel_size, stride, padding):
+        super(ConvBlock2D, self).__init__()
+        self.conv_block_2d = torch.nn.Sequential(
+            torch.nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding),
+            torch.nn.BatchNorm2d(out_channel),
+            torch.nn.ReLU(inplace=True)
+        )
+
+    def forward(self, x):
+        return self.conv_block_2d(x)
+
 
 class DeConvBlock3D(torch.nn.Module):
     def __init__(self, in_channel, out_channel, kernel_size, stride, padding):

diff --git a/nets/blocks/cnn_blocks.py b/nets/blocks/cnn_blocks.py
@@ -0,0 +1,31 @@
+import torch.nn
+
+from nets.blocks.blocks import ConvBlock2D
+
+
+class cnn_blocks(torch.nn.Module):
+    def __init__(self):
+        super(cnn_blocks, self).__init__()
+        self.cnn_blocks = torch.nn.Sequential(
+            ConvBlock2D(3, 16, [5, 5], [1, 1], [2, 2]),
+            torch.nn.MaxPool2d((2, 2), stride=(2, 2)),
+            ConvBlock2D(16, 32, [3, 3], [1, 1], [1, 1]),
+            ConvBlock2D(32, 64, [3, 3], [1, 1], [1, 1]),
+            torch.nn.MaxPool2d((2, 2), stride=(2, 2)),
+            ConvBlock2D(64, 64, [3, 3], [1, 1], [1, 1]),
+            ConvBlock2D(64, 64, [3, 3], [1, 1], [1, 1]),
+            torch.nn.MaxPool2d((2, 2), stride=(2, 2)),
+            ConvBlock2D(64, 64, [3, 3], [1, 1], [1, 1]),
+            ConvBlock2D(64, 64, [3, 3], [1, 1], [1, 1]),
+            torch.nn.MaxPool2d((2, 2), stride=(2, 2)),
+            ConvBlock2D(64, 64, [3, 3], [1, 1], [1, 1]),
+            ConvBlock2D(64, 64, [3, 3], [1, 1], [1, 1]),
+            torch.nn.AdaptiveMaxPool2d(1)
+        )
+
+    def forward(self, x):
+        [batch, channel, length, width, height] = x.shape
+        x = x.reshape(batch * length, channel, width, height)
+        x = self.cnn_blocks(x)
+        return x.reshape(batch, length, -1)
+
diff --git a/nets/models/PhysNet.py b/nets/models/PhysNet.py
@@ -2,6 +2,7 @@
 
 from nets.blocks.decoder_blocks import decoder_block
 from nets.blocks.encoder_blocks import encoder_block
+from nets.blocks.cnn_blocks import cnn_blocks
 
 
 class PhysNet(torch.nn.Module):
@@ -17,3 +18,20 @@ def __init__(self, frames=32):
     def forward(self, x):
         [batch, channel, length, width, height] = x.shape
         return self.physnet(x).view(-1, length)
+
+class PhysNet_2DCNN_LSTM(torch.nn.Module):
+    def __init__(self, frame=32):
+        super(PhysNet_2DCNN_LSTM, self).__init__()
+        self.physnet_lstm = torch.nn.ModuleDict({
+            'cnn_blocks' : cnn_blocks(),
+            'lstm' : torch.nn.LSTM(input_size=64, hidden_size=64, num_layers=2, batch_first=True),
+            'cnn_flatten' : torch.nn.Conv1d(64, 1, 1, stride=1, padding=0)
+        })
+
+    def forward(self, x):
+        [batch, channel, length, width, height] = x.shape
+        x = self.physnet_lstm['cnn_blocks'](x)
+        x,(_,_) = self.physnet_lstm['lstm'](x)
+        x = x.reshape(batch, -1, length)
+        x = self.physnet_lstm['cnn_flatten'](x)
+        return x.reshape(-1, length)
diff --git a/params.json b/params.json
@@ -1,29 +1,33 @@
 {
     "__TIME__" : 1,
     "__PREPROCESSING__" : 0,
-    "__MODEL_SUMMARY__" : 0,
+    "__MODEL_SUMMARY__" : 1,
     "options":{
         "parallel_criterion" : 1,
         "parallel_criterion_comment" : "TODO need to verification"
     },
     "params":
     {
-        "save_root_path": "/media/hdd1/dy/dataset/",
+        "save_root_path": "/media/hdd1/js_dataset/",
         "data_root_path": "/media/hdd1/",
         "dataset_name": "UBFC",
-        "checkpoint_path" : "/media/hdd1/dy/checkpoint/",
-        "train_ratio": 0.8,
+        "checkpoint_path" : "/home/js/Desktop/tmp_local/checkpoint/",
+        "train_ratio": 0.7,
         "train_ratio_comment" : "generate train dataset using train_ratio",
         "validation_ratio": 0.9,
         "validation_ratio_comment" : "split train dataset using validation_ratio",
-        "train_batch_size" : 32,
+        "train_batch_size" : 8,
+        "train_batch_size_comment" :
+                [
+                    "PhysNet_LSTM : 8"
+                ],
         "train_shuffle" : 0,
-        "test_batch_size" : 32,
+        "test_batch_size" : 8,
         "test_shuffle" : 0
     },
     "hyper_params":
     {
-        "loss_fn": "mse",
+        "loss_fn": "neg_pearson",
         "loss_fn_comment":
                [
                    "mse","L1","neg_pearson","multi_margin","bce","huber","cosine_embedding",
@@ -32,26 +36,27 @@
                    "nll","nll2d","pairwise","poisson_nll","smooth_l1","soft_margin",
                    "triplet_margin","triplet_margin_distance"
                ],
-        "optimizer": "ada_delta",
+        "optimizer": "adam",
         "optimizer_comment":
                 [
                     "adam","sgd","rms_prop","ada_delta","ada_grad","ada_max",
                     "ada_mw","a_sgd","lbfgs","n_adam","r_adam","rprop","sparse_adam"
                 ],
-        "learning_rate": 1,
+        "learning_rate": 0.001,
         "learning_rate_comment": [
             "DeepPhys : lr = 1",
             "PhysNet : lr = 0.001"
         ],
-        "epochs" : 30
+        "epochs" : 20
     },
     "model_params":
     {
-        "name": "DeepPhys",
+        "name": "PhysNet_LSTM",
         "name_comment":
                 [
                     "DeepPhys",
-                    "PhysNet"
+                    "PhysNet",
+                    "PhysNet_LSTM"
                 ]
     }
 }
diff --git a/utils/dataset_preprocess.py b/utils/dataset_preprocess.py
@@ -120,7 +120,7 @@ def preprocess_Dataset(path, flag, model_name, return_dict):
     """
     if model_name == "DeepPhys":
         rst, preprocessed_video = Deepphys_preprocess_Video(path + "/vid.avi", flag)
-    elif model_name == "PhysNet":
+    elif model_name == "PhysNet" or model_name == "PhysNet_LSTM":
         rst, preprocessed_video = PhysNet_preprocess_Video(path + "/vid.avi", flag)
     elif model_name == "MetaPhys":
         rst, preprocessed_video = Deepphys_preprocess_Video(path + "/vid.avi", flag)
@@ -129,7 +129,7 @@ def preprocess_Dataset(path, flag, model_name, return_dict):
 
     if model_name == "DeepPhys":
         preprocessed_label = Deepphys_preprocess_Label(path + "/ground_truth.txt")
-    elif model_name == "PhysNet":
+    elif model_name == "PhysNet" or model_name == "PhysNet_LSTM":
         preprocessed_label = PhysNet_preprocess_Label(path + "/ground_truth.txt")
     elif model_name == "MetaPhys":
         preprocessed_label = Deepphys_preprocess_Label(path + "/ground_truth.txt")

diff --git a/utils/image_preprocess.py b/utils/image_preprocess.py
@@ -1,5 +1,6 @@
 import cv2
 import numpy as np
+from tqdm import tqdm
 from face_recognition import face_locations
 from skimage.util import img_as_float
 
@@ -55,25 +56,26 @@ def PhysNet_preprocess_Video(path, flag):
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     raw_video = np.empty((frame_total, 128, 128, 3))
-    prev_frame = None
     j = 0
-    while cap.isOpened():
-        ret, frame = cap.read()
-        if frame is None:
-            break
-        if flag:
-            rst, crop_frame = faceDetection(frame)
-            if not rst:  # can't detect face
-                return False, None
-        else:
-            crop_frame = frame[:, int(width / 2) - int(height / 2 + 1):int(height / 2) + int(width / 2), :]
-
-        crop_frame = cv2.resize(crop_frame, dsize=(128, 128), interpolation=cv2.INTER_AREA)
-        crop_frame = generate_Floatimage(crop_frame)
-
-        raw_video[j] = crop_frame
-        j += 1
-    cap.release()
+    with tqdm(total=frame_total, position=0, leave=True, desc=path) as pbar:
+        while cap.isOpened():
+            ret, frame = cap.read()
+            if frame is None:
+                break
+            if flag:
+                rst, crop_frame = faceDetection(frame)
+                if not rst:  # can't detect face
+                    return False, None
+            else:
+                crop_frame = frame[:, int(width / 2) - int(height / 2 + 1):int(height / 2) + int(width / 2), :]
+
+            crop_frame = cv2.resize(crop_frame, dsize=(128, 128), interpolation=cv2.INTER_AREA)
+            crop_frame = generate_Floatimage(crop_frame)
+
+            raw_video[j] = crop_frame
+            j += 1
+            pbar.update(1)
+        cap.release()
 
     split_raw_video = np.zeros(((frame_total // 32), 32, 128, 128, 3))
     index = 0