add libra rcnn (PaddlePaddle#198)

add libra rcnn, including * libra loss * libra fpn(bfp) * libra sampling
wanghuancoder · Feb 4, 2020 · 2cda4b2 · 2cda4b2
1 parent 4b05371
commit 2cda4b2
Show file tree

Hide file tree

Showing 12 changed files with 1,114 additions and 124 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,6 +4,7 @@
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
+.ipynb_checkpoints/
 *.py[cod]
 
 # C extensions

diff --git a/configs/libra_rcnn/README.md b/configs/libra_rcnn/README.md
@@ -0,0 +1,23 @@
+# Libra R-CNN: Towards Balanced Learning for Object Detection
+
+## Introduction
+
+- Libra R-CNN: Towards Balanced Learning for Object Detection
+: [https://arxiv.org/abs/1904.02701](https://arxiv.org/abs/1904.02701)
+
+```
+@inproceedings{pang2019libra,
+  title={Libra R-CNN: Towards Balanced Learning for Object Detection},
+  author={Pang, Jiangmiao and Chen, Kai and Shi, Jianping and Feng, Huajun and Ouyang, Wanli and Dahua Lin},
+  booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
+  year={2019}
+}
+```
+
+
+## Model Zoo
+
+| Backbone                | Type     | Image/gpu | Lr schd | Inf time (fps) | Box AP | Mask AP |                           Download                           |
+| :---------------------- | :-------------:  | :-------: | :-----: | :------------: | :----: | :-----: | :----------------------------------------------------------: |
+| ResNet50-vd-BFP         | Faster     |     2     |   1x    |     18.247     |  40.5  |    -    | [model](https://paddlemodels.bj.bcebos.com/object_detection/libra_rcnn_r50_vd_fpn_1x.tar) |
+| ResNet101-vd-BFP         | Faster     |     2     |   1x    |     14.865     |  42.5  |    -    | [model](https://paddlemodels.bj.bcebos.com/object_detection/libra_rcnn_r101_vd_fpn_1x.tar) |
diff --git a/configs/libra_rcnn/libra_rcnn_r101_vd_fpn_1x.yml b/configs/libra_rcnn/libra_rcnn_r101_vd_fpn_1x.yml
@@ -0,0 +1,117 @@
+architecture: FasterRCNN
+max_iters: 90000
+snapshot_iter: 10000
+use_gpu: true
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar
+weights: output/libra_rcnn_r101_vd_fpn_1x/model_final
+metric: COCO
+num_classes: 81
+
+FasterRCNN:
+  backbone: ResNet
+  fpn: BFP
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: LibraBBoxAssigner
+
+ResNet:
+  depth: 101
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  norm_type: bn
+  variant: d
+
+BFP:
+  base_neck:
+    max_level: 6
+    min_level: 2
+    num_chan: 256
+    spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+  refine_level: 2
+  refine_type: nonlocal
+  nonlocal_reduction: 1.0
+
+FPNRPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 2000
+    pre_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 1000
+    pre_nms_top_n: 1000
+
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  max_level: 5
+  min_level: 2
+  box_resolution: 7
+  sampling_ratio: 2
+
+LibraBBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+
+BBoxHead:
+  head: TwoFCHead
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  bbox_loss: BalancedL1Loss
+
+BalancedL1Loss:
+  alpha: 0.5
+  gamma: 1.5
+  beta: 1.0
+  loss_weight: 1.0
+
+TwoFCHead:
+  mlp_dim: 1024
+
+LearningRate:
+  base_lr: 0.02
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [60000, 80000]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+
+_READER_: '../faster_fpn_reader.yml'
+TrainReader:
+  batch_size: 2
diff --git a/configs/libra_rcnn/libra_rcnn_r50_vd_fpn_1x.yml b/configs/libra_rcnn/libra_rcnn_r50_vd_fpn_1x.yml
@@ -0,0 +1,117 @@
+architecture: FasterRCNN
+max_iters: 90000
+snapshot_iter: 10000
+use_gpu: true
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar
+weights: output/libra_rcnn_r50_vd_fpn_1x/model_final
+metric: COCO
+num_classes: 81
+
+FasterRCNN:
+  backbone: ResNet
+  fpn: BFP
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: LibraBBoxAssigner
+
+ResNet:
+  depth: 50
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  norm_type: bn
+  variant: d
+
+BFP:
+  base_neck:
+    max_level: 6
+    min_level: 2
+    num_chan: 256
+    spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+  refine_level: 2
+  refine_type: nonlocal
+  nonlocal_reduction: 1.0
+
+FPNRPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 2000
+    pre_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 1000
+    pre_nms_top_n: 1000
+
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  max_level: 5
+  min_level: 2
+  box_resolution: 7
+  sampling_ratio: 2
+
+LibraBBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+
+BBoxHead:
+  head: TwoFCHead
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  bbox_loss: BalancedL1Loss
+
+BalancedL1Loss:
+  alpha: 0.5
+  gamma: 1.5
+  beta: 1.0
+  loss_weight: 1.0
+
+TwoFCHead:
+  mlp_dim: 1024
+
+LearningRate:
+  base_lr: 0.02
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [60000, 80000]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+
+_READER_: '../faster_fpn_reader.yml'
+TrainReader:
+  batch_size: 2
diff --git a/ppdet/modeling/backbones/__init__.py b/ppdet/modeling/backbones/__init__.py
@@ -27,6 +27,7 @@
 from . import res2net
 from . import hrnet
 from . import hrfpn
+from . import bfp
 
 from .resnet import *
 from .resnext import *
@@ -40,4 +41,5 @@
 from .cb_resnet import *
 from .res2net import *
 from .hrnet import *
-from .hrfpn import *
+from .hrfpn import *
+from .bfp import *