differentiate multi-scale and single-scale configs for segmentation

datumbox · Jan 11, 2022 · dcb9287 · dcb9287
1 parent 64aba99
commit dcb9287
Show file tree

Hide file tree

Showing 13 changed files with 384 additions and 2 deletions.
diff --git a/semantic_segmentation/README.md b/semantic_segmentation/README.md
@@ -36,17 +36,28 @@ More config files can be found at [`configs/convnext`](configs/convnext).
 
 ## Evaluation
 
-Command format:
+Command format for multi-scale testing:
 ```
 tools/dist_test.sh <CONFIG_PATH> <CHECKPOINT_PATH> <NUM_GPUS> --eval mIoU --aug-test
 ```
 
 For example, evaluate a `ConvNeXt-T` backbone with UperNet:
 ```bash
-bash tools/dist_test.sh configs/convnext/upernet_convnext_tiny_512_160k_ade20k.py \ 
+bash tools/dist_test.sh configs/convnext/upernet_convnext_tiny_512_160k_ade20k_ms.py \ 
     https://dl.fbaipublicfiles.com/convnext/ade20k/upernet_convnext_tiny_1k_512x512.pth 4 --eval mIoU --aug-test
 ```
 
+Command format for single-scale testing:
+```
+tools/dist_test.sh <CONFIG_PATH> <CHECKPOINT_PATH> <NUM_GPUS> --eval mIoU
+```
+
+For example, evaluate a `ConvNeXt-T` backbone with UperNet:
+```bash
+bash tools/dist_test.sh configs/convnext/upernet_convnext_tiny_512_160k_ade20k_ss.py \ 
+    https://dl.fbaipublicfiles.com/convnext/ade20k/upernet_convnext_tiny_1k_512x512.pth 4 --eval mIoU
+```
+
 ## Acknowledgment 
 
 This code is built using the [mmsegmentation](https://github.com/open-mmlab/mmsegmentation) library, [Timm](https://github.com/rwightman/pytorch-image-models) library, the [BeiT](https://github.com/microsoft/unilm/tree/f8f3df80c65eb5e5fc6d6d3c9bd3137621795d1e/beit) repository, the [Swin](https://github.com/microsoft/Swin-Transformer) repository, [XCiT](https://github.com/facebookresearch/xcit) and the [SETR](https://github.com/fudan-zvg/SETR) repository.
diff --git a/.../upernet_convnext_base_512_160k_ade20k.py → ...ernet_convnext_base_512_160k_ade20k_ms.py b/.../upernet_convnext_base_512_160k_ade20k.py → ...ernet_convnext_base_512_160k_ade20k_ms.py
diff --git a/semantic_segmentation/configs/convnext/upernet_convnext_base_512_160k_ade20k_ss.py b/semantic_segmentation/configs/convnext/upernet_convnext_base_512_160k_ade20k_ss.py
@@ -0,0 +1,62 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+_base_ = [
+    '../_base_/models/upernet_convnext.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+crop_size = (512, 512)
+
+model = dict(
+    backbone=dict(
+        type='ConvNeXt',
+        in_chans=3,
+        depths=[3, 3, 27, 3], 
+        dims=[128, 256, 512, 1024], 
+        drop_path_rate=0.4,
+        layer_scale_init_value=1.0,
+        out_indices=[0, 1, 2, 3],
+    ),
+    decode_head=dict(
+        in_channels=[128, 256, 512, 1024],
+        num_classes=150,
+    ),
+    auxiliary_head=dict(
+        in_channels=512,
+        num_classes=150
+    ), 
+    test_cfg = dict(mode='slide', crop_size=crop_size, stride=(341, 341)),
+)
+
+optimizer = dict(constructor='LearningRateDecayOptimizerConstructor', _delete_=True, type='AdamW', 
+                 lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
+                 paramwise_cfg={'decay_rate': 0.9,
+                                'decay_type': 'stage_wise',
+                                'num_layers': 12})
+
+lr_config = dict(_delete_=True, policy='poly',
+                 warmup='linear',
+                 warmup_iters=1500,
+                 warmup_ratio=1e-6,
+                 power=1.0, min_lr=0.0, by_epoch=False)
+
+# By default, models are trained on 8 GPUs with 2 images per GPU
+data=dict(samples_per_gpu=2)
+
+runner = dict(type='IterBasedRunnerAmp')
+
+# do not use mmdet version fp16
+fp16 = None
+optimizer_config = dict(
+    type="DistOptimizerHook",
+    update_interval=1,
+    grad_clip=None,
+    coalesce=True,
+    bucket_size_mb=-1,
+    use_fp16=True,
+)
diff --git a/.../upernet_convnext_base_640_160k_ade20k.py → ...ernet_convnext_base_640_160k_ade20k_ms.py b/.../upernet_convnext_base_640_160k_ade20k.py → ...ernet_convnext_base_640_160k_ade20k_ms.py
diff --git a/semantic_segmentation/configs/convnext/upernet_convnext_base_640_160k_ade20k_ss.py b/semantic_segmentation/configs/convnext/upernet_convnext_base_640_160k_ade20k_ss.py
@@ -0,0 +1,62 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+_base_ = [
+    '../_base_/models/upernet_convnext.py', '../_base_/datasets/ade20k_640x640.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+crop_size = (640, 640)
+
+model = dict(
+    backbone=dict(
+        type='ConvNeXt',
+        in_chans=3,
+        depths=[3, 3, 27, 3], 
+        dims=[128, 256, 512, 1024], 
+        drop_path_rate=0.4,
+        layer_scale_init_value=1.0,
+        out_indices=[0, 1, 2, 3],
+    ),
+    decode_head=dict(
+        in_channels=[128, 256, 512, 1024],
+        num_classes=150,
+    ),
+    auxiliary_head=dict(
+        in_channels=512,
+        num_classes=150
+    ), 
+    test_cfg = dict(mode='slide', crop_size=crop_size, stride=(426, 426)),
+)
+
+optimizer = dict(constructor='LearningRateDecayOptimizerConstructor', _delete_=True, type='AdamW', 
+                 lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
+                 paramwise_cfg={'decay_rate': 0.9,
+                                'decay_type': 'stage_wise',
+                                'num_layers': 12})
+
+lr_config = dict(_delete_=True, policy='poly',
+                 warmup='linear',
+                 warmup_iters=1500,
+                 warmup_ratio=1e-6,
+                 power=1.0, min_lr=0.0, by_epoch=False)
+
+# By default, models are trained on 8 GPUs with 2 images per GPU
+data=dict(samples_per_gpu=2)
+
+runner = dict(type='IterBasedRunnerAmp')
+
+# do not use mmdet version fp16
+fp16 = None
+optimizer_config = dict(
+    type="DistOptimizerHook",
+    update_interval=1,
+    grad_clip=None,
+    coalesce=True,
+    bucket_size_mb=-1,
+    use_fp16=True,
+)
diff --git a/...upernet_convnext_large_640_160k_ade20k.py → ...rnet_convnext_large_640_160k_ade20k_ms.py b/...upernet_convnext_large_640_160k_ade20k.py → ...rnet_convnext_large_640_160k_ade20k_ms.py
diff --git a/semantic_segmentation/configs/convnext/upernet_convnext_large_640_160k_ade20k_ss.py b/semantic_segmentation/configs/convnext/upernet_convnext_large_640_160k_ade20k_ss.py
@@ -0,0 +1,62 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+_base_ = [
+    '../_base_/models/upernet_convnext.py', '../_base_/datasets/ade20k_640x640.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+crop_size = (640, 640)
+
+model = dict(
+    backbone=dict(
+        type='ConvNeXt',
+        in_chans=3,
+        depths=[3, 3, 27, 3], 
+        dims=[192, 384, 768, 1536], 
+        drop_path_rate=0.4,
+        layer_scale_init_value=1.0,
+        out_indices=[0, 1, 2, 3],
+    ),
+    decode_head=dict(
+        in_channels=[192, 384, 768, 1536],
+        num_classes=150,
+    ),
+    auxiliary_head=dict(
+        in_channels=768,
+        num_classes=150
+    ), 
+    test_cfg = dict(mode='slide', crop_size=crop_size, stride=(426, 426)),
+)
+
+optimizer = dict(constructor='LearningRateDecayOptimizerConstructor', _delete_=True, type='AdamW', 
+                 lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
+                 paramwise_cfg={'decay_rate': 0.9,
+                                'decay_type': 'stage_wise',
+                                'num_layers': 12})
+
+lr_config = dict(_delete_=True, policy='poly',
+                 warmup='linear',
+                 warmup_iters=1500,
+                 warmup_ratio=1e-6,
+                 power=1.0, min_lr=0.0, by_epoch=False)
+
+# By default, models are trained on 8 GPUs with 2 images per GPU
+data=dict(samples_per_gpu=2)
+
+runner = dict(type='IterBasedRunnerAmp')
+
+# do not use mmdet version fp16
+fp16 = None
+optimizer_config = dict(
+    type="DistOptimizerHook",
+    update_interval=1,
+    grad_clip=None,
+    coalesce=True,
+    bucket_size_mb=-1,
+    use_fp16=False,
+)
diff --git a/...upernet_convnext_small_512_160k_ade20k.py → ...rnet_convnext_small_512_160k_ade20k_ms.py b/...upernet_convnext_small_512_160k_ade20k.py → ...rnet_convnext_small_512_160k_ade20k_ms.py
diff --git a/semantic_segmentation/configs/convnext/upernet_convnext_small_512_160k_ade20k_ss.py b/semantic_segmentation/configs/convnext/upernet_convnext_small_512_160k_ade20k_ss.py
@@ -0,0 +1,61 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+_base_ = [
+    '../_base_/models/upernet_convnext.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+crop_size = (512, 512)
+
+model = dict(
+    backbone=dict(
+        in_chans=3,
+        depths=[3, 3, 27, 3], 
+        dims=[96, 192, 384, 768], 
+        drop_path_rate=0.3,
+        layer_scale_init_value=1.0,
+        out_indices=[0, 1, 2, 3],
+    ),
+    decode_head=dict(
+        in_channels=[96, 192, 384, 768],
+        num_classes=150,
+    ),
+    auxiliary_head=dict(
+        in_channels=384,
+        num_classes=150
+    ), 
+    test_cfg = dict(mode='slide', crop_size=crop_size, stride=(341, 341)),
+)
+
+optimizer = dict(constructor='LearningRateDecayOptimizerConstructor', _delete_=True, type='AdamW', 
+                 lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
+                 paramwise_cfg={'decay_rate': 0.9,
+                                'decay_type': 'stage_wise',
+                                'num_layers': 12})
+
+lr_config = dict(_delete_=True, policy='poly',
+                 warmup='linear',
+                 warmup_iters=1500,
+                 warmup_ratio=1e-6,
+                 power=1.0, min_lr=0.0, by_epoch=False)
+
+# By default, models are trained on 8 GPUs with 2 images per GPU
+data=dict(samples_per_gpu=2)
+
+runner = dict(type='IterBasedRunnerAmp')
+
+# do not use mmdet version fp16
+fp16 = None
+optimizer_config = dict(
+    type="DistOptimizerHook",
+    update_interval=1,
+    grad_clip=None,
+    coalesce=True,
+    bucket_size_mb=-1,
+    use_fp16=True,
+)
diff --git a/.../upernet_convnext_tiny_512_160k_ade20k.py → ...ernet_convnext_tiny_512_160k_ade20k_ms.py b/.../upernet_convnext_tiny_512_160k_ade20k.py → ...ernet_convnext_tiny_512_160k_ade20k_ms.py
diff --git a/semantic_segmentation/configs/convnext/upernet_convnext_tiny_512_160k_ade20k_ss.py b/semantic_segmentation/configs/convnext/upernet_convnext_tiny_512_160k_ade20k_ss.py
@@ -0,0 +1,62 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+_base_ = [
+    '../_base_/models/upernet_convnext.py', '../_base_/datasets/ade20k.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+crop_size = (512, 512)
+
+model = dict(
+    backbone=dict(
+        type='ConvNeXt',
+        in_chans=3,
+        depths=[3, 3, 9, 3], 
+        dims=[96, 192, 384, 768], 
+        drop_path_rate=0.4,
+        layer_scale_init_value=1.0,
+        out_indices=[0, 1, 2, 3],
+    ),
+    decode_head=dict(
+        in_channels=[96, 192, 384, 768],
+        num_classes=150,
+    ),
+    auxiliary_head=dict(
+        in_channels=384,
+        num_classes=150
+    ), 
+    test_cfg = dict(mode='slide', crop_size=crop_size, stride=(341, 341)),
+)
+
+optimizer = dict(constructor='LearningRateDecayOptimizerConstructor', _delete_=True, type='AdamW', 
+                 lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
+                 paramwise_cfg={'decay_rate': 0.9,
+                                'decay_type': 'stage_wise',
+                                'num_layers': 6})
+
+lr_config = dict(_delete_=True, policy='poly',
+                 warmup='linear',
+                 warmup_iters=1500,
+                 warmup_ratio=1e-6,
+                 power=1.0, min_lr=0.0, by_epoch=False)
+
+# By default, models are trained on 8 GPUs with 2 images per GPU
+data=dict(samples_per_gpu=2)
+
+runner = dict(type='IterBasedRunnerAmp')
+
+# do not use mmdet version fp16
+fp16 = None
+optimizer_config = dict(
+    type="DistOptimizerHook",
+    update_interval=1,
+    grad_clip=None,
+    coalesce=True,
+    bucket_size_mb=-1,
+    use_fp16=True,
+)
diff --git a/...pernet_convnext_xlarge_640_160k_ade20k.py → ...net_convnext_xlarge_640_160k_ade20k_ms.py b/...pernet_convnext_xlarge_640_160k_ade20k.py → ...net_convnext_xlarge_640_160k_ade20k_ms.py