Skip to content

Commit

Permalink
[Feature] Support HorNet Backbone. (open-mmlab#1013)
Browse files Browse the repository at this point in the history
* add hornet

* add hornet

* add hornet

* add hornet

* add hornet

* add hornet

* add hornet

* fix test for torch before 1.7.0

* del timm

* fix readme

* fix readme

* Update mmcls/models/backbones/hornet.py

Co-authored-by: Ezra-Yu <18586273+Ezra-Yu@users.noreply.github.com>

* fix docs

* fix docs

* s -> scale

* fix dims and dpr impl

* fix layer scale

* refactor gnconv

* add dw_cfg

* add convert tools

* update code

* update docs

* update readme

* update URLs

Co-authored-by: Ezra-Yu <18586273+Ezra-Yu@users.noreply.github.com>
  • Loading branch information
okotaku and Ezra-Yu authored Sep 27, 2022
1 parent 56589ee commit 1047daa
Show file tree
Hide file tree
Showing 30 changed files with 1,240 additions and 77 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ Results and models are available in the [model zoo](https://mmclassification.rea
- [x] [PoolFormer](https://github.com/open-mmlab/mmclassification/tree/master/configs/poolformer)
- [x] [MViT](https://github.com/open-mmlab/mmclassification/tree/master/configs/mvit)
- [x] [EfficientFormer](https://github.com/open-mmlab/mmclassification/tree/master/configs/efficientformer)
- [x] [HorNet](https://github.com/open-mmlab/mmclassification/tree/master/configs/hornet)

</details>

Expand Down
2 changes: 2 additions & 0 deletions README_zh-CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ pip3 install -e .
- [x] [CSPNet](https://github.com/open-mmlab/mmclassification/tree/master/configs/cspnet)
- [x] [PoolFormer](https://github.com/open-mmlab/mmclassification/tree/master/configs/poolformer)
- [x] [MViT](https://github.com/open-mmlab/mmclassification/tree/master/configs/mvit)
- [x] [EfficientFormer](https://github.com/open-mmlab/mmclassification/tree/master/configs/efficientformer)
- [x] [HorNet](https://github.com/open-mmlab/mmclassification/tree/master/configs/hornet)

</details>

Expand Down
21 changes: 21 additions & 0 deletions configs/_base_/models/hornet/hornet-base-gf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='HorNet', arch='base-gf', drop_path_rate=0.5),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1024,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.),
dict(type='Constant', layer=['LayerScale'], val=1e-6)
],
train_cfg=dict(augments=[
dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5),
dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5)
]))
21 changes: 21 additions & 0 deletions configs/_base_/models/hornet/hornet-base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='HorNet', arch='base', drop_path_rate=0.5),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1024,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.),
dict(type='Constant', layer=['LayerScale'], val=1e-6)
],
train_cfg=dict(augments=[
dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5),
dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5)
]))
21 changes: 21 additions & 0 deletions configs/_base_/models/hornet/hornet-large-gf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='HorNet', arch='large-gf', drop_path_rate=0.2),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1536,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.),
dict(type='Constant', layer=['LayerScale'], val=1e-6)
],
train_cfg=dict(augments=[
dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5),
dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5)
]))
17 changes: 17 additions & 0 deletions configs/_base_/models/hornet/hornet-large-gf384.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='HorNet', arch='large-gf384', drop_path_rate=0.4),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1536,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.),
dict(type='Constant', layer=['LayerScale'], val=1e-6)
])
21 changes: 21 additions & 0 deletions configs/_base_/models/hornet/hornet-large.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='HorNet', arch='large', drop_path_rate=0.2),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1536,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.),
dict(type='Constant', layer=['LayerScale'], val=1e-6)
],
train_cfg=dict(augments=[
dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5),
dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5)
]))
21 changes: 21 additions & 0 deletions configs/_base_/models/hornet/hornet-small-gf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='HorNet', arch='small-gf', drop_path_rate=0.4),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=768,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.),
dict(type='Constant', layer=['LayerScale'], val=1e-6)
],
train_cfg=dict(augments=[
dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5),
dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5)
]))
21 changes: 21 additions & 0 deletions configs/_base_/models/hornet/hornet-small.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='HorNet', arch='small', drop_path_rate=0.4),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=768,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.),
dict(type='Constant', layer=['LayerScale'], val=1e-6)
],
train_cfg=dict(augments=[
dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5),
dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5)
]))
21 changes: 21 additions & 0 deletions configs/_base_/models/hornet/hornet-tiny-gf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='HorNet', arch='tiny-gf', drop_path_rate=0.2),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=512,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.),
dict(type='Constant', layer=['LayerScale'], val=1e-6)
],
train_cfg=dict(augments=[
dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5),
dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5)
]))
21 changes: 21 additions & 0 deletions configs/_base_/models/hornet/hornet-tiny.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='HorNet', arch='tiny', drop_path_rate=0.2),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=512,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.),
dict(type='Constant', layer=['LayerScale'], val=1e-6)
],
train_cfg=dict(augments=[
dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5),
dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5)
]))
51 changes: 51 additions & 0 deletions configs/hornet/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# HorNet

> [HorNet: Efficient High-Order Spatial Interactions with Recursive Gated Convolutions](https://arxiv.org/pdf/2207.14284v2.pdf)
<!-- [ALGORITHM] -->

## Abstract

Recent progress in vision Transformers exhibits great success in various tasks driven by the new spatial modeling mechanism based on dot-product self-attention. In this paper, we show that the key ingredients behind the vision Transformers, namely input-adaptive, long-range and high-order spatial interactions, can also be efficiently implemented with a convolution-based framework. We present the Recursive Gated Convolution (g nConv) that performs high-order spatial interactions with gated convolutions and recursive designs. The new operation is highly flexible and customizable, which is compatible with various variants of convolution and extends the two-order interactions in self-attention to arbitrary orders without introducing significant extra computation. g nConv can serve as a plug-and-play module to improve various vision Transformers and convolution-based models. Based on the operation, we construct a new family of generic vision backbones named HorNet. Extensive experiments on ImageNet classification, COCO object detection and ADE20K semantic segmentation show HorNet outperform Swin Transformers and ConvNeXt by a significant margin with similar overall architecture and training configurations. HorNet also shows favorable scalability to more training data and a larger model size. Apart from the effectiveness in visual encoders, we also show g nConv can be applied to task-specific decoders and consistently improve dense prediction performance with less computation. Our results demonstrate that g nConv can be a new basic module for visual modeling that effectively combines the merits of both vision Transformers and CNNs. Code is available at https://github.com/raoyongming/HorNet.

<div align=center>
<img src="https://user-images.githubusercontent.com/24734142/188356236-b8e3db94-eaa6-48e9-b323-15e5ba7f2991.png" width="80%"/>
</div>

## Results and models

### ImageNet-1k

| Model | Pretrain | resolution | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-----------: | :----------: | :--------: | :-------: | :------: | :-------: | :-------: | :--------------------------------------------------------------: | :----------------------------------------------------------------: |
| HorNet-T\* | From scratch | 224x224 | 22.41 | 3.98 | 82.84 | 96.24 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/hornet/hornet-tiny_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hornet/hornet-tiny_3rdparty_in1k_20220915-0e8eedff.pth) |
| HorNet-T-GF\* | From scratch | 224x224 | 22.99 | 3.9 | 82.98 | 96.38 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/hornet/hornet-tiny-gf_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hornet/hornet-tiny-gf_3rdparty_in1k_20220915-4c35a66b.pth) |
| HorNet-S\* | From scratch | 224x224 | 49.53 | 8.83 | 83.79 | 96.75 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/hornet/hornet-small_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hornet/hornet-small_3rdparty_in1k_20220915-5935f60f.pth) |
| HorNet-S-GF\* | From scratch | 224x224 | 50.4 | 8.71 | 83.98 | 96.77 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/hornet/hornet-small-gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hornet/hornet-small-gf_3rdparty_in1k_20220915-649ca492.pth) |
| HorNet-B\* | From scratch | 224x224 | 87.26 | 15.59 | 84.24 | 96.94 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/hornet/hornet-base_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hornet/hornet-base_3rdparty_in1k_20220915-a06176bb.pth) |
| HorNet-B-GF\* | From scratch | 224x224 | 88.42 | 15.42 | 84.32 | 96.95 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/hornet/hornet-base-gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hornet/hornet-base-gf_3rdparty_in1k_20220915-82c06fa7.pth) |

\*Models with * are converted from [the official repo](https://github.com/raoyongming/HorNet). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.

### Pre-trained Models

The pre-trained models on ImageNet-21k are used to fine-tune on the downstream tasks.

| Model | Pretrain | resolution | Params(M) | Flops(G) | Download |
| :--------------: | :----------: | :--------: | :-------: | :------: | :------------------------------------------------------------------------------------------------------------------------: |
| HorNet-L\* | ImageNet-21k | 224x224 | 194.54 | 34.83 | [model](https://download.openmmlab.com/mmclassification/v0/hornet/hornet-large_3rdparty_in21k_20220909-9ccef421.pth) |
| HorNet-L-GF\* | ImageNet-21k | 224x224 | 196.29 | 34.58 | [model](https://download.openmmlab.com/mmclassification/v0/hornet/hornet-large-gf_3rdparty_in21k_20220909-3aea3b61.pth) |
| HorNet-L-GF384\* | ImageNet-21k | 384x384 | 201.23 | 101.63 | [model](https://download.openmmlab.com/mmclassification/v0/hornet/hornet-large-gf384_3rdparty_in21k_20220909-80894290.pth) |

\*Models with * are converted from [the official repo](https://github.com/raoyongming/HorNet).

## Citation

```
@article{rao2022hornet,
title={HorNet: Efficient High-Order Spatial Interactions with Recursive Gated Convolutions},
author={Rao, Yongming and Zhao, Wenliang and Tang, Yansong and Zhou, Jie and Lim, Ser-Lam and Lu, Jiwen},
journal={arXiv preprint arXiv:2207.14284},
year={2022}
}
```
13 changes: 13 additions & 0 deletions configs/hornet/hornet-base-gf_8xb64_in1k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
_base_ = [
'../_base_/models/hornet/hornet-base-gf.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]

data = dict(samples_per_gpu=64)

optimizer = dict(lr=4e-3)
optimizer_config = dict(grad_clip=dict(max_norm=1.0), _delete_=True)

custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
13 changes: 13 additions & 0 deletions configs/hornet/hornet-base_8xb64_in1k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
_base_ = [
'../_base_/models/hornet/hornet-base.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]

data = dict(samples_per_gpu=64)

optimizer = dict(lr=4e-3)
optimizer_config = dict(grad_clip=dict(max_norm=5.0), _delete_=True)

custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
13 changes: 13 additions & 0 deletions configs/hornet/hornet-small-gf_8xb64_in1k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
_base_ = [
'../_base_/models/hornet/hornet-small-gf.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]

data = dict(samples_per_gpu=64)

optimizer = dict(lr=4e-3)
optimizer_config = dict(grad_clip=dict(max_norm=1.0), _delete_=True)

custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
13 changes: 13 additions & 0 deletions configs/hornet/hornet-small_8xb64_in1k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
_base_ = [
'../_base_/models/hornet/hornet-small.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]

data = dict(samples_per_gpu=64)

optimizer = dict(lr=4e-3)
optimizer_config = dict(grad_clip=dict(max_norm=5.0), _delete_=True)

custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
13 changes: 13 additions & 0 deletions configs/hornet/hornet-tiny-gf_8xb128_in1k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
_base_ = [
'../_base_/models/hornet/hornet-tiny-gf.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]

data = dict(samples_per_gpu=128)

optimizer = dict(lr=4e-3)
optimizer_config = dict(grad_clip=dict(max_norm=1.0), _delete_=True)

custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
13 changes: 13 additions & 0 deletions configs/hornet/hornet-tiny_8xb128_in1k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
_base_ = [
'../_base_/models/hornet/hornet-tiny.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]

data = dict(samples_per_gpu=128)

optimizer = dict(lr=4e-3)
optimizer_config = dict(grad_clip=dict(max_norm=100.0), _delete_=True)

custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
Loading

0 comments on commit 1047daa

Please sign in to comment.