update CA Attention, multi GPU, heatmap, lr, count

bubbliiiing · bubbliiiing · commit cf1218b7a87e · 2022-04-13T18:20:55.000+08:00
diff --git a/nets/attention.py b/nets/attention.py
@@ -98,3 +98,28 @@ def eca_block(input_feature, b=1, gamma=2, name=""):
 
 	output = multiply([input_feature,x])
 	return output
+
+def ca_block(input_feature, ratio=16, name=""):
+	channel = K.int_shape(input_feature)[-1]
+	h 		= K.int_shape(input_feature)[1]
+	w		= K.int_shape(input_feature)[2]
+ 
+	x_h = Lambda(lambda x: K.mean(x, axis=2, keepdims=True))(input_feature)
+	x_h = Lambda(lambda x: K.permute_dimensions(x, [0, 2, 1, 3]))(x_h)
+	x_w = Lambda(lambda x: K.max(x, axis=1, keepdims=True))(input_feature)
+	
+	x_cat_conv_relu = Concatenate(axis=2)([x_w, x_h])
+	x_cat_conv_relu = Conv2D(channel // ratio, kernel_size=1, strides=1, use_bias=False, name = "ca_block_conv1_"+str(name))(x_cat_conv_relu)
+	x_cat_conv_relu = Activation('relu')(x_cat_conv_relu)
+ 
+	x_cat_conv_split_h, x_cat_conv_split_w = Lambda(lambda x: tf.split(x, num_or_size_splits=[h, w], axis=2))(x_cat_conv_relu)
+	x_cat_conv_split_h = Lambda(lambda x: K.permute_dimensions(x, [0, 2, 1, 3]))(x_cat_conv_split_h)
+	x_cat_conv_split_h = Conv2D(channel, kernel_size=1, strides=1, use_bias=False, name = "ca_block_conv2_"+str(name))(x_cat_conv_split_h)
+	x_cat_conv_split_h = Activation('sigmoid')(x_cat_conv_split_h)
+ 
+	x_cat_conv_split_w = Conv2D(channel, kernel_size=1, strides=1, use_bias=False, name = "ca_block_conv3_"+str(name))(x_cat_conv_split_w)
+	x_cat_conv_split_w = Activation('sigmoid')(x_cat_conv_split_w)
+ 
+	output = multiply([input_feature, x_cat_conv_split_h])
+	output = multiply([output, x_cat_conv_split_w])
+	return output
diff --git a/nets/yolo.py b/nets/yolo.py
@@ -2,12 +2,12 @@
 from tensorflow.keras.models import Model
 from utils.utils import compose
 
-from nets.attention import cbam_block, eca_block, se_block
+from nets.attention import cbam_block, eca_block, se_block, ca_block
 from nets.CSPdarknet53_tiny import (DarknetConv2D, DarknetConv2D_BN_Leaky,
                                     darknet_body)
 from nets.yolo_training import yolo_loss
 
-attention = [se_block, cbam_block, eca_block]
+attention = [se_block, cbam_block, eca_block, ca_block]
 
 #---------------------------------------------------#
 #   特征层->最后的输出
@@ -20,7 +20,7 @@ def yolo_body(input_shape, anchors_mask, num_classes, phi = 0, weight_decay=5e-4
     #   feat2的shape为13,13,512
     #---------------------------------------------------#
     feat1, feat2 = darknet_body(inputs, weight_decay=weight_decay)
-    if phi >= 1 and phi <= 3:
+    if phi >= 1 and phi <= 4:
         feat1 = attention[phi - 1](feat1, name='feat1')
         feat2 = attention[phi - 1](feat2, name='feat2')
 
@@ -32,7 +32,7 @@ def yolo_body(input_shape, anchors_mask, num_classes, phi = 0, weight_decay=5e-4
     
     # 13,13,256 -> 13,13,128 -> 26,26,128
     P5_upsample = compose(DarknetConv2D_BN_Leaky(128, (1,1), weight_decay=weight_decay), UpSampling2D(2))(P5)
-    if phi >= 1 and phi <= 3:
+    if phi >= 1 and phi <= 4:
         P5_upsample = attention[phi - 1](P5_upsample, name='P5_upsample')
 
     # 26,26,256 + 26,26,128 -> 26,26,384
diff --git a/nets/yolo_training.py b/nets/yolo_training.py
@@ -291,7 +291,7 @@ def loop_body(b, ignore_mask):
             loss = tf.Print(loss, [loss, location_loss, confidence_loss, class_loss, tf.shape(ignore_mask)], summarize=100, message='loss: ')
     return loss
 
-def get_lr_scheduler(lr_decay_type, lr, min_lr, total_iters, warmup_iters_ratio = 0.1, warmup_lr_ratio = 0.1, no_aug_iter_ratio = 0.3, step_num = 10):
+def get_lr_scheduler(lr_decay_type, lr, min_lr, total_iters, warmup_iters_ratio = 0.05, warmup_lr_ratio = 0.1, no_aug_iter_ratio = 0.05, step_num = 10):
     def yolox_warm_cos_lr(lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter, iters):
         if iters <= warmup_total_iters:
             # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start
diff --git a/predict.py b/predict.py
@@ -19,41 +19,55 @@
     yolo = YOLO()
     #----------------------------------------------------------------------------------------------------------#
     #   mode用于指定测试的模式：
-    #   'predict'表示单张图片预测，如果想对预测过程进行修改，如保存图片，截取对象等，可以先看下方详细的注释
-    #   'video'表示视频检测，可调用摄像头或者视频进行检测，详情查看下方注释。
-    #   'fps'表示测试fps，使用的图片是img里面的street.jpg，详情查看下方注释。
-    #   'dir_predict'表示遍历文件夹进行检测并保存。默认遍历img文件夹，保存img_out文件夹，详情查看下方注释。
+    #   'predict'           表示单张图片预测，如果想对预测过程进行修改，如保存图片，截取对象等，可以先看下方详细的注释
+    #   'video'             表示视频检测，可调用摄像头或者视频进行检测，详情查看下方注释。
+    #   'fps'               表示测试fps，使用的图片是img里面的street.jpg，详情查看下方注释。
+    #   'dir_predict'       表示遍历文件夹进行检测并保存。默认遍历img文件夹，保存img_out文件夹，详情查看下方注释。
+    #   'heatmap'           表示进行预测结果的热力图可视化，详情查看下方注释。
     #----------------------------------------------------------------------------------------------------------#
     mode = "predict"
     #-------------------------------------------------------------------------#
-    #   crop指定了是否在单张图片预测后对目标进行截取
-    #   crop仅在mode='predict'时有效
+    #   crop                指定了是否在单张图片预测后对目标进行截取
+    #   count               指定了是否进行目标的计数
+    #   crop、count仅在mode='predict'时有效
     #-------------------------------------------------------------------------#
     crop            = False
+    count           = False
     #----------------------------------------------------------------------------------------------------------#
-    #   video_path用于指定视频的路径，当video_path=0时表示检测摄像头
-    #   想要检测视频，则设置如video_path = "xxx.mp4"即可，代表读取出根目录下的xxx.mp4文件。
-    #   video_save_path表示视频保存的路径，当video_save_path=""时表示不保存
-    #   想要保存视频，则设置如video_save_path = "yyy.mp4"即可，代表保存为根目录下的yyy.mp4文件。
-    #   video_fps用于保存的视频的fps
+    #   video_path          用于指定视频的路径，当video_path=0时表示检测摄像头
+    #                       想要检测视频，则设置如video_path = "xxx.mp4"即可，代表读取出根目录下的xxx.mp4文件。
+    #   video_save_path     表示视频保存的路径，当video_save_path=""时表示不保存
+    #                       想要保存视频，则设置如video_save_path = "yyy.mp4"即可，代表保存为根目录下的yyy.mp4文件。
+    #   video_fps           用于保存的视频的fps
+    #
     #   video_path、video_save_path和video_fps仅在mode='video'时有效
     #   保存视频时需要ctrl+c退出或者运行到最后一帧才会完成完整的保存步骤。
     #----------------------------------------------------------------------------------------------------------#
     video_path      = 0
     video_save_path = ""
     video_fps       = 25.0
-    #-------------------------------------------------------------------------#
-    #   test_interval用于指定测量fps的时候，图片检测的次数
-    #   理论上test_interval越大，fps越准确。
-    #-------------------------------------------------------------------------#
+    #----------------------------------------------------------------------------------------------------------#
+    #   test_interval       用于指定测量fps的时候，图片检测的次数。理论上test_interval越大，fps越准确。
+    #   fps_image_path      用于指定测试的fps图片
+    #   
+    #   test_interval和fps_image_path仅在mode='fps'有效
+    #----------------------------------------------------------------------------------------------------------#
     test_interval   = 100
+    fps_image_path  = "img/street.jpg"
     #-------------------------------------------------------------------------#
-    #   dir_origin_path指定了用于检测的图片的文件夹路径
-    #   dir_save_path指定了检测完图片的保存路径
+    #   dir_origin_path     指定了用于检测的图片的文件夹路径
+    #   dir_save_path       指定了检测完图片的保存路径
+    #   
     #   dir_origin_path和dir_save_path仅在mode='dir_predict'时有效
     #-------------------------------------------------------------------------#
     dir_origin_path = "img/"
     dir_save_path   = "img_out/"
+    #-------------------------------------------------------------------------#
+    #   heatmap_save_path   热力图的保存路径，默认保存在model_data下
+    #   
+    #   heatmap_save_path仅在mode='heatmap'有效
+    #-------------------------------------------------------------------------#
+    heatmap_save_path = "model_data/heatmap_vision.png"
 
     if mode == "predict":
         '''
@@ -72,7 +86,7 @@
                 print('Open Error! Try again!')
                 continue
             else:
-                r_image = yolo.detect_image(image, crop = crop)
+                r_image = yolo.detect_image(image, crop = crop, count = count)
                 r_image.show()
 
     elif mode == "video":
@@ -121,16 +135,17 @@
             print("Save processed video to the path :" + video_save_path)
             out.release()
         cv2.destroyAllWindows()
-
+        
     elif mode == "fps":
-        img = Image.open('img/street.jpg')
+        img = Image.open(fps_image_path)
         tact_time = yolo.get_FPS(img, test_interval)
         print(str(tact_time) + ' seconds, ' + str(1/tact_time) + 'FPS, @batch_size 1')
 
     elif mode == "dir_predict":
         import os
+
         from tqdm import tqdm
-        
+
         img_names = os.listdir(dir_origin_path)
         for img_name in tqdm(img_names):
             if img_name.lower().endswith(('.bmp', '.dib', '.png', '.jpg', '.jpeg', '.pbm', '.pgm', '.ppm', '.tif', '.tiff')):
@@ -141,5 +156,16 @@
                     os.makedirs(dir_save_path)
                 r_image.save(os.path.join(dir_save_path, img_name.replace(".jpg", ".png")), quality=95, subsampling=0)
 
+    elif mode == "heatmap":
+        while True:
+            img = input('Input image filename:')
+            try:
+                image = Image.open(img)
+            except:
+                print('Open Error! Try again!')
+                continue
+            else:
+                yolo.detect_heatmap(image, heatmap_save_path)
+        
     else:
         raise AssertionError("Please specify the correct mode: 'predict', 'video', 'fps' or 'dir_predict'.")
diff --git a/train.py b/train.py
@@ -46,6 +46,12 @@
     #----------------------------------------------------#
     eager           = False
     #---------------------------------------------------------------------#
+    #   train_gpu   训练用到的GPU
+    #               默认为第一张卡、双卡为[0, 1]、三卡为[0, 1, 2]
+    #               在使用多GPU时，每个卡上的batch为总batch除以卡的数量。
+    #---------------------------------------------------------------------#
+    train_gpu       = [0,]
+    #---------------------------------------------------------------------#
     #   classes_path    指向model_data下的txt，与自己训练的数据集相关 
     #                   训练前一定要修改classes_path，使其对应自己的数据集
     #---------------------------------------------------------------------#
@@ -87,6 +93,7 @@
     #   phi = 1为SE
     #   phi = 2为CBAM
     #   phi = 3为ECA
+    #   phi = 4为CA
     #-------------------------------#
     phi             = 0
     #------------------------------------------------------------------#
@@ -213,26 +220,53 @@
     train_annotation_path   = '2007_train.txt'
     val_annotation_path     = '2007_val.txt'
 
+    #------------------------------------------------------#
+    #   设置用到的显卡
+    #------------------------------------------------------#
+    os.environ["CUDA_VISIBLE_DEVICES"]  = ','.join(str(x) for x in train_gpu)
+    ngpus_per_node                      = len(train_gpu)
+    
+    gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
+    for gpu in gpus:
+        tf.config.experimental.set_memory_growth(gpu, True)
+        
+    strategy = tf.distribute.MirroredStrategy()
+    print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
+
     #----------------------------------------------------#
     #   获取classes和anchor
     #----------------------------------------------------#
     class_names, num_classes = get_classes(classes_path)
     anchors, num_anchors     = get_anchors(anchors_path)
 
-    #------------------------------------------------------#
-    #   创建yolo模型
-    #------------------------------------------------------#
-    model_body  = yolo_body((None, None, 3), anchors_mask, num_classes, phi = phi, weight_decay = weight_decay)
-    if model_path != '':
+    if ngpus_per_node > 1:
+        with strategy.scope():
+            #------------------------------------------------------#
+            #   创建yolo模型
+            #------------------------------------------------------#
+            model_body  = yolo_body((input_shape[0], input_shape[1], 3), anchors_mask, num_classes, phi = phi, weight_decay = weight_decay)
+            if model_path != '':
+                #------------------------------------------------------#
+                #   载入预训练权重
+                #------------------------------------------------------#
+                print('Load weights {}.'.format(model_path))
+                model_body.load_weights(model_path, by_name=True, skip_mismatch=True)
+            if not eager:
+                model = get_train_model(model_body, input_shape, num_classes, anchors, anchors_mask, label_smoothing)
+    else:
         #------------------------------------------------------#
-        #   载入预训练权重
+        #   创建yolo模型
         #------------------------------------------------------#
-        print('Load weights {}.'.format(model_path))
-        model_body.load_weights(model_path, by_name=True, skip_mismatch=True)
-
-    if not eager:
-        model = get_train_model(model_body, input_shape, num_classes, anchors, anchors_mask, label_smoothing)
-
+        model_body  = yolo_body((input_shape[0], input_shape[1], 3), anchors_mask, num_classes, phi = phi, weight_decay = weight_decay)
+        if model_path != '':
+            #------------------------------------------------------#
+            #   载入预训练权重
+            #------------------------------------------------------#
+            print('Load weights {}.'.format(model_path))
+            model_body.load_weights(model_path, by_name=True, skip_mismatch=True)
+        if not eager:
+            model = get_train_model(model_body, input_shape, num_classes, anchors, anchors_mask, label_smoothing)
+        
     #---------------------------#
     #   读取数据集对应的txt
     #---------------------------#
@@ -360,7 +394,11 @@
             start_epoch = Init_Epoch
             end_epoch   = Freeze_Epoch if Freeze_Train else UnFreeze_Epoch
 
-            model.compile(optimizer = optimizer, loss={'yolo_loss': lambda y_true, y_pred: y_pred})
+            if ngpus_per_node > 1:
+                with strategy.scope():
+                    model.compile(optimizer = optimizer, loss={'yolo_loss': lambda y_true, y_pred: y_pred})
+            else:
+                model.compile(optimizer = optimizer, loss={'yolo_loss': lambda y_true, y_pred: y_pred})
             #-------------------------------------------------------------------------------#
             #   训练参数的设置
             #   logging         用于设置tensorboard的保存地址
@@ -417,7 +455,11 @@
 
                 for i in range(len(model_body.layers)): 
                     model_body.layers[i].trainable = True
-                model.compile(optimizer = optimizer, loss={'yolo_loss': lambda y_true, y_pred: y_pred})
+                if ngpus_per_node > 1:
+                    with strategy.scope():
+                        model.compile(optimizer = optimizer, loss={'yolo_loss': lambda y_true, y_pred: y_pred})
+                else:
+                    model.compile(optimizer = optimizer, loss={'yolo_loss': lambda y_true, y_pred: y_pred})
 
                 epoch_step      = num_train // batch_size
                 epoch_step_val  = num_val // batch_size
diff --git a/yolo.py b/yolo.py