forked from open-mmlab/mmpretrain
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Feature] Support HorNet Backbone. (open-mmlab#1013)
* add hornet * add hornet * add hornet * add hornet * add hornet * add hornet * add hornet * fix test for torch before 1.7.0 * del timm * fix readme * fix readme * Update mmcls/models/backbones/hornet.py Co-authored-by: Ezra-Yu <18586273+Ezra-Yu@users.noreply.github.com> * fix docs * fix docs * s -> scale * fix dims and dpr impl * fix layer scale * refactor gnconv * add dw_cfg * add convert tools * update code * update docs * update readme * update URLs Co-authored-by: Ezra-Yu <18586273+Ezra-Yu@users.noreply.github.com>
- Loading branch information
Showing
30 changed files
with
1,240 additions
and
77 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# model settings | ||
model = dict( | ||
type='ImageClassifier', | ||
backbone=dict(type='HorNet', arch='base-gf', drop_path_rate=0.5), | ||
head=dict( | ||
type='LinearClsHead', | ||
num_classes=1000, | ||
in_channels=1024, | ||
init_cfg=None, # suppress the default init_cfg of LinearClsHead. | ||
loss=dict( | ||
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), | ||
cal_acc=False), | ||
init_cfg=[ | ||
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.), | ||
dict(type='Constant', layer='LayerNorm', val=1., bias=0.), | ||
dict(type='Constant', layer=['LayerScale'], val=1e-6) | ||
], | ||
train_cfg=dict(augments=[ | ||
dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5), | ||
dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5) | ||
])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# model settings | ||
model = dict( | ||
type='ImageClassifier', | ||
backbone=dict(type='HorNet', arch='base', drop_path_rate=0.5), | ||
head=dict( | ||
type='LinearClsHead', | ||
num_classes=1000, | ||
in_channels=1024, | ||
init_cfg=None, # suppress the default init_cfg of LinearClsHead. | ||
loss=dict( | ||
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), | ||
cal_acc=False), | ||
init_cfg=[ | ||
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.), | ||
dict(type='Constant', layer='LayerNorm', val=1., bias=0.), | ||
dict(type='Constant', layer=['LayerScale'], val=1e-6) | ||
], | ||
train_cfg=dict(augments=[ | ||
dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5), | ||
dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5) | ||
])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# model settings | ||
model = dict( | ||
type='ImageClassifier', | ||
backbone=dict(type='HorNet', arch='large-gf', drop_path_rate=0.2), | ||
head=dict( | ||
type='LinearClsHead', | ||
num_classes=1000, | ||
in_channels=1536, | ||
init_cfg=None, # suppress the default init_cfg of LinearClsHead. | ||
loss=dict( | ||
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), | ||
cal_acc=False), | ||
init_cfg=[ | ||
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.), | ||
dict(type='Constant', layer='LayerNorm', val=1., bias=0.), | ||
dict(type='Constant', layer=['LayerScale'], val=1e-6) | ||
], | ||
train_cfg=dict(augments=[ | ||
dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5), | ||
dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5) | ||
])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# model settings | ||
model = dict( | ||
type='ImageClassifier', | ||
backbone=dict(type='HorNet', arch='large-gf384', drop_path_rate=0.4), | ||
head=dict( | ||
type='LinearClsHead', | ||
num_classes=1000, | ||
in_channels=1536, | ||
init_cfg=None, # suppress the default init_cfg of LinearClsHead. | ||
loss=dict( | ||
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), | ||
cal_acc=False), | ||
init_cfg=[ | ||
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.), | ||
dict(type='Constant', layer='LayerNorm', val=1., bias=0.), | ||
dict(type='Constant', layer=['LayerScale'], val=1e-6) | ||
]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# model settings | ||
model = dict( | ||
type='ImageClassifier', | ||
backbone=dict(type='HorNet', arch='large', drop_path_rate=0.2), | ||
head=dict( | ||
type='LinearClsHead', | ||
num_classes=1000, | ||
in_channels=1536, | ||
init_cfg=None, # suppress the default init_cfg of LinearClsHead. | ||
loss=dict( | ||
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), | ||
cal_acc=False), | ||
init_cfg=[ | ||
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.), | ||
dict(type='Constant', layer='LayerNorm', val=1., bias=0.), | ||
dict(type='Constant', layer=['LayerScale'], val=1e-6) | ||
], | ||
train_cfg=dict(augments=[ | ||
dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5), | ||
dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5) | ||
])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# model settings | ||
model = dict( | ||
type='ImageClassifier', | ||
backbone=dict(type='HorNet', arch='small-gf', drop_path_rate=0.4), | ||
head=dict( | ||
type='LinearClsHead', | ||
num_classes=1000, | ||
in_channels=768, | ||
init_cfg=None, # suppress the default init_cfg of LinearClsHead. | ||
loss=dict( | ||
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), | ||
cal_acc=False), | ||
init_cfg=[ | ||
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.), | ||
dict(type='Constant', layer='LayerNorm', val=1., bias=0.), | ||
dict(type='Constant', layer=['LayerScale'], val=1e-6) | ||
], | ||
train_cfg=dict(augments=[ | ||
dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5), | ||
dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5) | ||
])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# model settings | ||
model = dict( | ||
type='ImageClassifier', | ||
backbone=dict(type='HorNet', arch='small', drop_path_rate=0.4), | ||
head=dict( | ||
type='LinearClsHead', | ||
num_classes=1000, | ||
in_channels=768, | ||
init_cfg=None, # suppress the default init_cfg of LinearClsHead. | ||
loss=dict( | ||
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), | ||
cal_acc=False), | ||
init_cfg=[ | ||
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.), | ||
dict(type='Constant', layer='LayerNorm', val=1., bias=0.), | ||
dict(type='Constant', layer=['LayerScale'], val=1e-6) | ||
], | ||
train_cfg=dict(augments=[ | ||
dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5), | ||
dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5) | ||
])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# model settings | ||
model = dict( | ||
type='ImageClassifier', | ||
backbone=dict(type='HorNet', arch='tiny-gf', drop_path_rate=0.2), | ||
head=dict( | ||
type='LinearClsHead', | ||
num_classes=1000, | ||
in_channels=512, | ||
init_cfg=None, # suppress the default init_cfg of LinearClsHead. | ||
loss=dict( | ||
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), | ||
cal_acc=False), | ||
init_cfg=[ | ||
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.), | ||
dict(type='Constant', layer='LayerNorm', val=1., bias=0.), | ||
dict(type='Constant', layer=['LayerScale'], val=1e-6) | ||
], | ||
train_cfg=dict(augments=[ | ||
dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5), | ||
dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5) | ||
])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# model settings | ||
model = dict( | ||
type='ImageClassifier', | ||
backbone=dict(type='HorNet', arch='tiny', drop_path_rate=0.2), | ||
head=dict( | ||
type='LinearClsHead', | ||
num_classes=1000, | ||
in_channels=512, | ||
init_cfg=None, # suppress the default init_cfg of LinearClsHead. | ||
loss=dict( | ||
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), | ||
cal_acc=False), | ||
init_cfg=[ | ||
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.), | ||
dict(type='Constant', layer='LayerNorm', val=1., bias=0.), | ||
dict(type='Constant', layer=['LayerScale'], val=1e-6) | ||
], | ||
train_cfg=dict(augments=[ | ||
dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5), | ||
dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5) | ||
])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# HorNet | ||
|
||
> [HorNet: Efficient High-Order Spatial Interactions with Recursive Gated Convolutions](https://arxiv.org/pdf/2207.14284v2.pdf) | ||
<!-- [ALGORITHM] --> | ||
|
||
## Abstract | ||
|
||
Recent progress in vision Transformers exhibits great success in various tasks driven by the new spatial modeling mechanism based on dot-product self-attention. In this paper, we show that the key ingredients behind the vision Transformers, namely input-adaptive, long-range and high-order spatial interactions, can also be efficiently implemented with a convolution-based framework. We present the Recursive Gated Convolution (g nConv) that performs high-order spatial interactions with gated convolutions and recursive designs. The new operation is highly flexible and customizable, which is compatible with various variants of convolution and extends the two-order interactions in self-attention to arbitrary orders without introducing significant extra computation. g nConv can serve as a plug-and-play module to improve various vision Transformers and convolution-based models. Based on the operation, we construct a new family of generic vision backbones named HorNet. Extensive experiments on ImageNet classification, COCO object detection and ADE20K semantic segmentation show HorNet outperform Swin Transformers and ConvNeXt by a significant margin with similar overall architecture and training configurations. HorNet also shows favorable scalability to more training data and a larger model size. Apart from the effectiveness in visual encoders, we also show g nConv can be applied to task-specific decoders and consistently improve dense prediction performance with less computation. Our results demonstrate that g nConv can be a new basic module for visual modeling that effectively combines the merits of both vision Transformers and CNNs. Code is available at https://github.com/raoyongming/HorNet. | ||
|
||
<div align=center> | ||
<img src="https://user-images.githubusercontent.com/24734142/188356236-b8e3db94-eaa6-48e9-b323-15e5ba7f2991.png" width="80%"/> | ||
</div> | ||
|
||
## Results and models | ||
|
||
### ImageNet-1k | ||
|
||
| Model | Pretrain | resolution | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download | | ||
| :-----------: | :----------: | :--------: | :-------: | :------: | :-------: | :-------: | :--------------------------------------------------------------: | :----------------------------------------------------------------: | | ||
| HorNet-T\* | From scratch | 224x224 | 22.41 | 3.98 | 82.84 | 96.24 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/hornet/hornet-tiny_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hornet/hornet-tiny_3rdparty_in1k_20220915-0e8eedff.pth) | | ||
| HorNet-T-GF\* | From scratch | 224x224 | 22.99 | 3.9 | 82.98 | 96.38 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/hornet/hornet-tiny-gf_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hornet/hornet-tiny-gf_3rdparty_in1k_20220915-4c35a66b.pth) | | ||
| HorNet-S\* | From scratch | 224x224 | 49.53 | 8.83 | 83.79 | 96.75 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/hornet/hornet-small_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hornet/hornet-small_3rdparty_in1k_20220915-5935f60f.pth) | | ||
| HorNet-S-GF\* | From scratch | 224x224 | 50.4 | 8.71 | 83.98 | 96.77 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/hornet/hornet-small-gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hornet/hornet-small-gf_3rdparty_in1k_20220915-649ca492.pth) | | ||
| HorNet-B\* | From scratch | 224x224 | 87.26 | 15.59 | 84.24 | 96.94 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/hornet/hornet-base_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hornet/hornet-base_3rdparty_in1k_20220915-a06176bb.pth) | | ||
| HorNet-B-GF\* | From scratch | 224x224 | 88.42 | 15.42 | 84.32 | 96.95 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/hornet/hornet-base-gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hornet/hornet-base-gf_3rdparty_in1k_20220915-82c06fa7.pth) | | ||
|
||
\*Models with * are converted from [the official repo](https://github.com/raoyongming/HorNet). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results. | ||
|
||
### Pre-trained Models | ||
|
||
The pre-trained models on ImageNet-21k are used to fine-tune on the downstream tasks. | ||
|
||
| Model | Pretrain | resolution | Params(M) | Flops(G) | Download | | ||
| :--------------: | :----------: | :--------: | :-------: | :------: | :------------------------------------------------------------------------------------------------------------------------: | | ||
| HorNet-L\* | ImageNet-21k | 224x224 | 194.54 | 34.83 | [model](https://download.openmmlab.com/mmclassification/v0/hornet/hornet-large_3rdparty_in21k_20220909-9ccef421.pth) | | ||
| HorNet-L-GF\* | ImageNet-21k | 224x224 | 196.29 | 34.58 | [model](https://download.openmmlab.com/mmclassification/v0/hornet/hornet-large-gf_3rdparty_in21k_20220909-3aea3b61.pth) | | ||
| HorNet-L-GF384\* | ImageNet-21k | 384x384 | 201.23 | 101.63 | [model](https://download.openmmlab.com/mmclassification/v0/hornet/hornet-large-gf384_3rdparty_in21k_20220909-80894290.pth) | | ||
|
||
\*Models with * are converted from [the official repo](https://github.com/raoyongming/HorNet). | ||
|
||
## Citation | ||
|
||
``` | ||
@article{rao2022hornet, | ||
title={HorNet: Efficient High-Order Spatial Interactions with Recursive Gated Convolutions}, | ||
author={Rao, Yongming and Zhao, Wenliang and Tang, Yansong and Zhou, Jie and Lim, Ser-Lam and Lu, Jiwen}, | ||
journal={arXiv preprint arXiv:2207.14284}, | ||
year={2022} | ||
} | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
_base_ = [ | ||
'../_base_/models/hornet/hornet-base-gf.py', | ||
'../_base_/datasets/imagenet_bs64_swin_224.py', | ||
'../_base_/schedules/imagenet_bs1024_adamw_swin.py', | ||
'../_base_/default_runtime.py', | ||
] | ||
|
||
data = dict(samples_per_gpu=64) | ||
|
||
optimizer = dict(lr=4e-3) | ||
optimizer_config = dict(grad_clip=dict(max_norm=1.0), _delete_=True) | ||
|
||
custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
_base_ = [ | ||
'../_base_/models/hornet/hornet-base.py', | ||
'../_base_/datasets/imagenet_bs64_swin_224.py', | ||
'../_base_/schedules/imagenet_bs1024_adamw_swin.py', | ||
'../_base_/default_runtime.py', | ||
] | ||
|
||
data = dict(samples_per_gpu=64) | ||
|
||
optimizer = dict(lr=4e-3) | ||
optimizer_config = dict(grad_clip=dict(max_norm=5.0), _delete_=True) | ||
|
||
custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
_base_ = [ | ||
'../_base_/models/hornet/hornet-small-gf.py', | ||
'../_base_/datasets/imagenet_bs64_swin_224.py', | ||
'../_base_/schedules/imagenet_bs1024_adamw_swin.py', | ||
'../_base_/default_runtime.py', | ||
] | ||
|
||
data = dict(samples_per_gpu=64) | ||
|
||
optimizer = dict(lr=4e-3) | ||
optimizer_config = dict(grad_clip=dict(max_norm=1.0), _delete_=True) | ||
|
||
custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
_base_ = [ | ||
'../_base_/models/hornet/hornet-small.py', | ||
'../_base_/datasets/imagenet_bs64_swin_224.py', | ||
'../_base_/schedules/imagenet_bs1024_adamw_swin.py', | ||
'../_base_/default_runtime.py', | ||
] | ||
|
||
data = dict(samples_per_gpu=64) | ||
|
||
optimizer = dict(lr=4e-3) | ||
optimizer_config = dict(grad_clip=dict(max_norm=5.0), _delete_=True) | ||
|
||
custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
_base_ = [ | ||
'../_base_/models/hornet/hornet-tiny-gf.py', | ||
'../_base_/datasets/imagenet_bs64_swin_224.py', | ||
'../_base_/schedules/imagenet_bs1024_adamw_swin.py', | ||
'../_base_/default_runtime.py', | ||
] | ||
|
||
data = dict(samples_per_gpu=128) | ||
|
||
optimizer = dict(lr=4e-3) | ||
optimizer_config = dict(grad_clip=dict(max_norm=1.0), _delete_=True) | ||
|
||
custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
_base_ = [ | ||
'../_base_/models/hornet/hornet-tiny.py', | ||
'../_base_/datasets/imagenet_bs64_swin_224.py', | ||
'../_base_/schedules/imagenet_bs1024_adamw_swin.py', | ||
'../_base_/default_runtime.py', | ||
] | ||
|
||
data = dict(samples_per_gpu=128) | ||
|
||
optimizer = dict(lr=4e-3) | ||
optimizer_config = dict(grad_clip=dict(max_norm=100.0), _delete_=True) | ||
|
||
custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')] |
Oops, something went wrong.