From 06d40a1c513302e38f15e8f88e10d56e297ed21e Mon Sep 17 00:00:00 2001 From: MengzhangLI Date: Sat, 25 Jun 2022 16:01:50 +0800 Subject: [PATCH] [Feature] Update New SegFormer models (#1705) --- configs/segformer/README.md | 71 ++++++++++++------- configs/segformer/segformer.yml | 48 ++++++------- .../segformer_mit-b0_512x512_160k_ade20k.py | 5 +- ...er_mit-b0_8x1_1024x1024_160k_cityscapes.py | 5 +- .../segformer_mit-b1_512x512_160k_ade20k.py | 4 +- ...er_mit-b1_8x1_1024x1024_160k_cityscapes.py | 3 +- .../segformer_mit-b2_512x512_160k_ade20k.py | 4 +- ...er_mit-b2_8x1_1024x1024_160k_cityscapes.py | 3 +- .../segformer_mit-b3_512x512_160k_ade20k.py | 4 +- ...er_mit-b3_8x1_1024x1024_160k_cityscapes.py | 3 +- .../segformer_mit-b4_512x512_160k_ade20k.py | 4 +- ...er_mit-b4_8x1_1024x1024_160k_cityscapes.py | 3 +- .../segformer_mit-b5_512x512_160k_ade20k.py | 4 +- .../segformer_mit-b5_640x640_160k_ade20k.py | 3 +- ...er_mit-b5_8x1_1024x1024_160k_cityscapes.py | 3 +- 15 files changed, 101 insertions(+), 66 deletions(-) diff --git a/configs/segformer/README.md b/configs/segformer/README.md index 5ac6f36968..dae8ecbc55 100644 --- a/configs/segformer/README.md +++ b/configs/segformer/README.md @@ -35,9 +35,9 @@ We present SegFormer, a simple, efficient yet powerful semantic segmentation fra ## Usage -To use other repositories' pre-trained models, it is necessary to convert keys. +We have provided pretrained models converted from [SegFormer](https://github.com/NVlabs/SegFormer). -We provide a script [`mit2mmseg.py`](../../tools/model_converters/mit2mmseg.py) in the tools directory to convert the key of models from [the official repo](https://github.com/NVlabs/SegFormer) to MMSegmentation style. +If you want to convert keys on your own, we also provide a script [`mit2mmseg.py`](../../tools/model_converters/mit2mmseg.py) in the tools directory to convert the key of models from [the official repo](https://github.com/NVlabs/SegFormer) to MMSegmentation style. ```shell python tools/model_converters/mit2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} @@ -51,28 +51,45 @@ This script convert model from `PRETRAIN_PATH` and store the converted model in | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | --------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| Segformer | MIT-B0 | 512x512 | 160000 | 2.1 | 51.32 | 37.41 | 38.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_512x512_160k_ade20k/segformer_mit-b0_512x512_160k_ade20k_20210726_101530-8ffa8fda.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_512x512_160k_ade20k/segformer_mit-b0_512x512_160k_ade20k_20210726_101530.log.json) | -| Segformer | MIT-B1 | 512x512 | 160000 | 2.6 | 47.66 | 40.97 | 42.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b1_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_512x512_160k_ade20k/segformer_mit-b1_512x512_160k_ade20k_20210726_112106-d70e859d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_512x512_160k_ade20k/segformer_mit-b1_512x512_160k_ade20k_20210726_112106.log.json) | -| Segformer | MIT-B2 | 512x512 | 160000 | 3.6 | 30.88 | 45.58 | 47.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b2_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_512x512_160k_ade20k/segformer_mit-b2_512x512_160k_ade20k_20210726_112103-cbd414ac.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_512x512_160k_ade20k/segformer_mit-b2_512x512_160k_ade20k_20210726_112103.log.json) | -| Segformer | MIT-B3 | 512x512 | 160000 | 4.8 | 22.11 | 47.82 | 48.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b3_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_512x512_160k_ade20k/segformer_mit-b3_512x512_160k_ade20k_20210726_081410-962b98d2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_512x512_160k_ade20k/segformer_mit-b3_512x512_160k_ade20k_20210726_081410.log.json) | -| Segformer | MIT-B4 | 512x512 | 160000 | 6.1 | 15.45 | 48.46 | 49.76 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b4_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_512x512_160k_ade20k/segformer_mit-b4_512x512_160k_ade20k_20210728_183055-7f509d7d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_512x512_160k_ade20k/segformer_mit-b4_512x512_160k_ade20k_20210728_183055.log.json) | +| Segformer | MIT-B0 | 512x512 | 160000 | 2.1 | 38.17 | 37.85 | 38.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_512x512_160k_ade20k/segformer_mit-b0_512x512_160k_ade20k_20220617_162207-c00b9603.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_512x512_160k_ade20k/segformer_mit-b0_512x512_160k_ade20k_20220617_162207.log.json) | +| Segformer | MIT-B1 | 512x512 | 160000 | 2.6 | 37.80 | 42.13 | 43.74 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b1_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_512x512_160k_ade20k/segformer_mit-b1_512x512_160k_ade20k_20220620_112037-c3f39e00.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_512x512_160k_ade20k/segformer_mit-b1_512x512_160k_ade20k_20220620_112037.log.json) | +| Segformer | MIT-B2 | 512x512 | 160000 | 3.6 | 26.80 | 46.80 | 48.12 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b2_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_512x512_160k_ade20k/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_512x512_160k_ade20k/segformer_mit-b2_512x512_160k_ade20k_20220620_114047.log.json) | +| Segformer | MIT-B3 | 512x512 | 160000 | 4.8 | 19.19 | 48.25 | 49.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b3_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_512x512_160k_ade20k/segformer_mit-b3_512x512_160k_ade20k_20220617_162254-3a4b7363.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_512x512_160k_ade20k/segformer_mit-b3_512x512_160k_ade20k_20220617_162254.log.json) | +| Segformer | MIT-B4 | 512x512 | 160000 | 6.1 | 14.54 | 49.09 | 50.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b4_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_512x512_160k_ade20k/segformer_mit-b4_512x512_160k_ade20k_20220620_112216-4fa4f58f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_512x512_160k_ade20k/segformer_mit-b4_512x512_160k_ade20k_20220620_112216.log.json) | | Segformer | MIT-B5 | 512x512 | 160000 | 7.2 | 11.89 | 49.13 | 50.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b5_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_512x512_160k_ade20k/segformer_mit-b5_512x512_160k_ade20k_20210726_145235-94cedf59.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_512x512_160k_ade20k/segformer_mit-b5_512x512_160k_ade20k_20210726_145235.log.json) | -| Segformer | MIT-B5 | 640x640 | 160000 | 11.5 | 11.30 | 49.62 | 50.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20210801_121243-41d2845b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20210801_121243.log.json) | +| Segformer | MIT-B5 | 640x640 | 160000 | 11.5 | 10.60 | 50.19 | 51.41 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20220617_203542-940a6bd8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20220617_203542.log.json) | -Evaluation with AlignedResize: +Evaluation with `AlignedResize`: | Method | Backbone | Crop Size | Lr schd | mIoU | mIoU(ms+flip) | | --------- | -------- | --------- | ------: | ----: | ------------- | -| Segformer | MIT-B0 | 512x512 | 160000 | 38.1 | 38.57 | -| Segformer | MIT-B1 | 512x512 | 160000 | 41.64 | 42.76 | -| Segformer | MIT-B2 | 512x512 | 160000 | 46.53 | 47.49 | -| Segformer | MIT-B3 | 512x512 | 160000 | 48.46 | 49.14 | -| Segformer | MIT-B4 | 512x512 | 160000 | 49.34 | 50.29 | +| Segformer | MIT-B0 | 512x512 | 160000 | 38.55 | 39.03 | +| Segformer | MIT-B1 | 512x512 | 160000 | 43.26 | 44.11 | +| Segformer | MIT-B2 | 512x512 | 160000 | 47.46 | 48.16 | +| Segformer | MIT-B3 | 512x512 | 160000 | 49.27 | 49.94 | +| Segformer | MIT-B4 | 512x512 | 160000 | 50.23 | 51.10 | | Segformer | MIT-B5 | 512x512 | 160000 | 50.08 | 50.72 | -| Segformer | MIT-B5 | 640x640 | 160000 | 50.58 | 50.8 | +| Segformer | MIT-B5 | 640x640 | 160000 | 51.13 | 51.66 | -We replace `AlignedResize` in original implementatiuon to `Resize + ResizeToMultiple`. If you want to test by -using `AlignedResize`, you can change the dataset pipeline like this: +### Cityscapes + +The lower fps result is caused by the sliding window inference scheme (window size:1024x1024). + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | -------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Segformer | MIT-B0 | 1024x1024 | 160000 | 3.64 | 4.74 | 76.54 | 78.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes/segformer_mit-b0_8x1_1024x1024_160k_cityscapes_20211208_101857-e7f88502.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes/segformer_mit-b0_8x1_1024x1024_160k_cityscapes_20211208_101857.log.json) | +| Segformer | MIT-B1 | 1024x1024 | 160000 | 4.49 | 4.3 | 78.56 | 79.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes/segformer_mit-b1_8x1_1024x1024_160k_cityscapes_20211208_064213-655c7b3f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes/segformer_mit-b1_8x1_1024x1024_160k_cityscapes_20211208_064213.log.json) | +| Segformer | MIT-B2 | 1024x1024 | 160000 | 7.42 | 3.36 | 81.08 | 82.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes/segformer_mit-b2_8x1_1024x1024_160k_cityscapes_20211207_134205-6096669a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes/segformer_mit-b2_8x1_1024x1024_160k_cityscapes_20211207_134205.log.json) | +| Segformer | MIT-B3 | 1024x1024 | 160000 | 10.86 | 2.53 | 81.94 | 83.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes/segformer_mit-b3_8x1_1024x1024_160k_cityscapes_20211206_224823-a8f8a177.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes/segformer_mit-b3_8x1_1024x1024_160k_cityscapes_20211206_224823.log.json) | +| Segformer | MIT-B4 | 1024x1024 | 160000 | 15.07 | 1.88 | 81.89 | 83.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes/segformer_mit-b4_8x1_1024x1024_160k_cityscapes_20211207_080709-07f6c333.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes/segformer_mit-b4_8x1_1024x1024_160k_cityscapes_20211207_080709.log.json) | +| Segformer | MIT-B5 | 1024x1024 | 160000 | 18.00 | 1.39 | 82.25 | 83.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes/segformer_mit-b5_8x1_1024x1024_160k_cityscapes_20211206_072934-87a052ec.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes/segformer_mit-b5_8x1_1024x1024_160k_cityscapes_20211206_072934.log.json) | + +Note: + +Original SegFormer paper uses different `test_pipeline` and image ratios in `ms+flip`. If you want to cite SegFormer original results as benchmark you may modify settings as below: + +- We replace `AlignedResize` in original implementatiuon to `Resize + ResizeToMultiple`. If you want to test by + using `AlignedResize`, you can change the dataset pipeline like this: ```python test_pipeline = [ @@ -94,15 +111,15 @@ test_pipeline = [ ] ``` -### Cityscapes +- Different from default setting of `ms+flip`, SegFormer original repo adopts [different image ratios](https://github.com/NVlabs/SegFormer/blob/master/tools/test.py#L97-L101) for ADE20K dataset. To re-implement numerical results of `ms+flip`, you can change image ratios in `tools/test.py` like this: -The lower fps result is caused by the sliding window inference scheme (window size:1024x1024). +```python +if args.aug_test: + if cfg.data.test.type == 'ADE20KDataset': + # hard code index + cfg.data.test.pipeline[1].img_ratios = [ + 0.75, 0.875, 1.0, 1.125, 1.25 + ] +``` -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| --------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | -------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Segformer | MIT-B0 | 1024x1024 | 160000 | 3.64 | 4.74 | 76.54 | 78.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes/segformer_mit-b0_8x1_1024x1024_160k_cityscapes_20211208_101857-e7f88502.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes/segformer_mit-b0_8x1_1024x1024_160k_cityscapes_20211208_101857.log.json) | -| Segformer | MIT-B1 | 1024x1024 | 160000 | 4.49 | 4.3 | 78.56 | 79.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes/segformer_mit-b1_8x1_1024x1024_160k_cityscapes_20211208_064213-655c7b3f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes/segformer_mit-b1_8x1_1024x1024_160k_cityscapes_20211208_064213.log.json) | -| Segformer | MIT-B2 | 1024x1024 | 160000 | 7.42 | 3.36 | 81.08 | 82.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes/segformer_mit-b2_8x1_1024x1024_160k_cityscapes_20211207_134205-6096669a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes/segformer_mit-b2_8x1_1024x1024_160k_cityscapes_20211207_134205.log.json) | -| Segformer | MIT-B3 | 1024x1024 | 160000 | 10.86 | 2.53 | 81.94 | 83.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes/segformer_mit-b3_8x1_1024x1024_160k_cityscapes_20211206_224823-a8f8a177.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes/segformer_mit-b3_8x1_1024x1024_160k_cityscapes_20211206_224823.log.json) | -| Segformer | MIT-B4 | 1024x1024 | 160000 | 15.07 | 1.88 | 81.89 | 83.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes/segformer_mit-b4_8x1_1024x1024_160k_cityscapes_20211207_080709-07f6c333.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes/segformer_mit-b4_8x1_1024x1024_160k_cityscapes_20211207_080709.log.json) | -| Segformer | MIT-B5 | 1024x1024 | 160000 | 18.00 | 1.39 | 82.25 | 83.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes/segformer_mit-b5_8x1_1024x1024_160k_cityscapes_20211206_072934-87a052ec.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes/segformer_mit-b5_8x1_1024x1024_160k_cityscapes_20211206_072934.log.json) | +- Training of SegFormer is not very stable, which is sensitive to random seeds. diff --git a/configs/segformer/segformer.yml b/configs/segformer/segformer.yml index d28cb16265..7d5d244f8b 100644 --- a/configs/segformer/segformer.yml +++ b/configs/segformer/segformer.yml @@ -22,7 +22,7 @@ Models: crop size: (512,512) lr schd: 160000 inference time (ms/im): - - value: 19.49 + - value: 26.2 hardware: V100 backend: PyTorch batch size: 1 @@ -33,10 +33,10 @@ Models: - Task: Semantic Segmentation Dataset: ADE20K Metrics: - mIoU: 37.41 - mIoU(ms+flip): 38.34 + mIoU: 37.85 + mIoU(ms+flip): 38.97 Config: configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_512x512_160k_ade20k/segformer_mit-b0_512x512_160k_ade20k_20210726_101530-8ffa8fda.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_512x512_160k_ade20k/segformer_mit-b0_512x512_160k_ade20k_20220617_162207-c00b9603.pth - Name: segformer_mit-b1_512x512_160k_ade20k In Collection: Segformer Metadata: @@ -44,7 +44,7 @@ Models: crop size: (512,512) lr schd: 160000 inference time (ms/im): - - value: 20.98 + - value: 26.46 hardware: V100 backend: PyTorch batch size: 1 @@ -55,10 +55,10 @@ Models: - Task: Semantic Segmentation Dataset: ADE20K Metrics: - mIoU: 40.97 - mIoU(ms+flip): 42.54 + mIoU: 42.13 + mIoU(ms+flip): 43.74 Config: configs/segformer/segformer_mit-b1_512x512_160k_ade20k.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_512x512_160k_ade20k/segformer_mit-b1_512x512_160k_ade20k_20210726_112106-d70e859d.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_512x512_160k_ade20k/segformer_mit-b1_512x512_160k_ade20k_20220620_112037-c3f39e00.pth - Name: segformer_mit-b2_512x512_160k_ade20k In Collection: Segformer Metadata: @@ -66,7 +66,7 @@ Models: crop size: (512,512) lr schd: 160000 inference time (ms/im): - - value: 32.38 + - value: 37.31 hardware: V100 backend: PyTorch batch size: 1 @@ -77,10 +77,10 @@ Models: - Task: Semantic Segmentation Dataset: ADE20K Metrics: - mIoU: 45.58 - mIoU(ms+flip): 47.03 + mIoU: 46.8 + mIoU(ms+flip): 48.12 Config: configs/segformer/segformer_mit-b2_512x512_160k_ade20k.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_512x512_160k_ade20k/segformer_mit-b2_512x512_160k_ade20k_20210726_112103-cbd414ac.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_512x512_160k_ade20k/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth - Name: segformer_mit-b3_512x512_160k_ade20k In Collection: Segformer Metadata: @@ -88,7 +88,7 @@ Models: crop size: (512,512) lr schd: 160000 inference time (ms/im): - - value: 45.23 + - value: 52.11 hardware: V100 backend: PyTorch batch size: 1 @@ -99,10 +99,10 @@ Models: - Task: Semantic Segmentation Dataset: ADE20K Metrics: - mIoU: 47.82 - mIoU(ms+flip): 48.81 + mIoU: 48.25 + mIoU(ms+flip): 49.58 Config: configs/segformer/segformer_mit-b3_512x512_160k_ade20k.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_512x512_160k_ade20k/segformer_mit-b3_512x512_160k_ade20k_20210726_081410-962b98d2.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_512x512_160k_ade20k/segformer_mit-b3_512x512_160k_ade20k_20220617_162254-3a4b7363.pth - Name: segformer_mit-b4_512x512_160k_ade20k In Collection: Segformer Metadata: @@ -110,7 +110,7 @@ Models: crop size: (512,512) lr schd: 160000 inference time (ms/im): - - value: 64.72 + - value: 68.78 hardware: V100 backend: PyTorch batch size: 1 @@ -121,10 +121,10 @@ Models: - Task: Semantic Segmentation Dataset: ADE20K Metrics: - mIoU: 48.46 - mIoU(ms+flip): 49.76 + mIoU: 49.09 + mIoU(ms+flip): 50.72 Config: configs/segformer/segformer_mit-b4_512x512_160k_ade20k.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_512x512_160k_ade20k/segformer_mit-b4_512x512_160k_ade20k_20210728_183055-7f509d7d.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_512x512_160k_ade20k/segformer_mit-b4_512x512_160k_ade20k_20220620_112216-4fa4f58f.pth - Name: segformer_mit-b5_512x512_160k_ade20k In Collection: Segformer Metadata: @@ -154,7 +154,7 @@ Models: crop size: (640,640) lr schd: 160000 inference time (ms/im): - - value: 88.5 + - value: 94.34 hardware: V100 backend: PyTorch batch size: 1 @@ -165,10 +165,10 @@ Models: - Task: Semantic Segmentation Dataset: ADE20K Metrics: - mIoU: 49.62 - mIoU(ms+flip): 50.36 + mIoU: 50.19 + mIoU(ms+flip): 51.41 Config: configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20210801_121243-41d2845b.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20220617_203542-940a6bd8.pth - Name: segformer_mit-b0_8x1_1024x1024_160k_cityscapes In Collection: Segformer Metadata: diff --git a/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py b/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py index 03065a7940..b0af248d93 100644 --- a/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py +++ b/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py @@ -3,8 +3,9 @@ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] -model = dict( - pretrained='pretrain/mit_b0.pth', decode_head=dict(num_classes=150)) +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b0_20220624-7e0fe6dd.pth' # noqa + +model = dict(pretrained=checkpoint, decode_head=dict(num_classes=150)) # optimizer optimizer = dict( diff --git a/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py b/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py index 6444500537..7e2cb2d558 100644 --- a/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py +++ b/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py @@ -4,9 +4,10 @@ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b0_20220624-7e0fe6dd.pth' # noqa + model = dict( - backbone=dict( - init_cfg=dict(type='Pretrained', checkpoint='pretrain/mit_b0.pth')), + backbone=dict(init_cfg=dict(type='Pretrained', checkpoint=checkpoint)), test_cfg=dict(mode='slide', crop_size=(1024, 1024), stride=(768, 768))) # optimizer diff --git a/configs/segformer/segformer_mit-b1_512x512_160k_ade20k.py b/configs/segformer/segformer_mit-b1_512x512_160k_ade20k.py index 5fce602144..6dee6ddc81 100644 --- a/configs/segformer/segformer_mit-b1_512x512_160k_ade20k.py +++ b/configs/segformer/segformer_mit-b1_512x512_160k_ade20k.py @@ -1,8 +1,10 @@ _base_ = ['./segformer_mit-b0_512x512_160k_ade20k.py'] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b1_20220624-02e5a6a1.pth' # noqa + # model settings model = dict( - pretrained='pretrain/mit_b1.pth', + pretrained=checkpoint, backbone=dict( embed_dims=64, num_heads=[1, 2, 5, 8], num_layers=[2, 2, 2, 2]), decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes.py b/configs/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes.py index a93e33bd88..2a3263ff0d 100644 --- a/configs/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes.py +++ b/configs/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes.py @@ -1,7 +1,8 @@ _base_ = ['./segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py'] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b1_20220624-02e5a6a1.pth' # noqa model = dict( backbone=dict( - init_cfg=dict(type='Pretrained', checkpoint='pretrain/mit_b1.pth'), + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), embed_dims=64), decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b2_512x512_160k_ade20k.py b/configs/segformer/segformer_mit-b2_512x512_160k_ade20k.py index afb24b0170..3c63163b62 100644 --- a/configs/segformer/segformer_mit-b2_512x512_160k_ade20k.py +++ b/configs/segformer/segformer_mit-b2_512x512_160k_ade20k.py @@ -1,8 +1,10 @@ _base_ = ['./segformer_mit-b0_512x512_160k_ade20k.py'] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b2_20220624-66e8bf70.pth' # noqa + # model settings model = dict( - pretrained='pretrain/mit_b2.pth', + pretrained=checkpoint, backbone=dict( embed_dims=64, num_heads=[1, 2, 5, 8], num_layers=[3, 4, 6, 3]), decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes.py b/configs/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes.py index fab6be2945..282cc24342 100644 --- a/configs/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes.py +++ b/configs/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes.py @@ -1,8 +1,9 @@ _base_ = ['./segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py'] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b2_20220624-66e8bf70.pth' # noqa model = dict( backbone=dict( - init_cfg=dict(type='Pretrained', checkpoint='pretrain/mit_b2.pth'), + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), embed_dims=64, num_layers=[3, 4, 6, 3]), decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b3_512x512_160k_ade20k.py b/configs/segformer/segformer_mit-b3_512x512_160k_ade20k.py index 52348f6fcc..aa4dc4c271 100644 --- a/configs/segformer/segformer_mit-b3_512x512_160k_ade20k.py +++ b/configs/segformer/segformer_mit-b3_512x512_160k_ade20k.py @@ -1,8 +1,10 @@ _base_ = ['./segformer_mit-b0_512x512_160k_ade20k.py'] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b3_20220624-13b1141c.pth' # noqa + # model settings model = dict( - pretrained='pretrain/mit_b3.pth', + pretrained=checkpoint, backbone=dict( embed_dims=64, num_heads=[1, 2, 5, 8], num_layers=[3, 4, 18, 3]), decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes.py b/configs/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes.py index 479ce04ea1..67d70c1541 100644 --- a/configs/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes.py +++ b/configs/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes.py @@ -1,8 +1,9 @@ _base_ = ['./segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py'] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b3_20220624-13b1141c.pth' # noqa model = dict( backbone=dict( - init_cfg=dict(type='Pretrained', checkpoint='pretrain/mit_b3.pth'), + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), embed_dims=64, num_layers=[3, 4, 18, 3]), decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b4_512x512_160k_ade20k.py b/configs/segformer/segformer_mit-b4_512x512_160k_ade20k.py index 7b50b75608..f9a026e09c 100644 --- a/configs/segformer/segformer_mit-b4_512x512_160k_ade20k.py +++ b/configs/segformer/segformer_mit-b4_512x512_160k_ade20k.py @@ -1,8 +1,10 @@ _base_ = ['./segformer_mit-b0_512x512_160k_ade20k.py'] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b4_20220624-d588d980.pth' # noqa + # model settings model = dict( - pretrained='pretrain/mit_b4.pth', + pretrained=checkpoint, backbone=dict( embed_dims=64, num_heads=[1, 2, 5, 8], num_layers=[3, 8, 27, 3]), decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes.py b/configs/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes.py index 808a1eb41b..332d840c8f 100644 --- a/configs/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes.py +++ b/configs/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes.py @@ -1,8 +1,9 @@ _base_ = ['./segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py'] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b4_20220624-d588d980.pth' # noqa model = dict( backbone=dict( - init_cfg=dict(type='Pretrained', checkpoint='pretrain/mit_b4.pth'), + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), embed_dims=64, num_layers=[3, 8, 27, 3]), decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b5_512x512_160k_ade20k.py b/configs/segformer/segformer_mit-b5_512x512_160k_ade20k.py index 5212fb1f6a..3175ba50ed 100644 --- a/configs/segformer/segformer_mit-b5_512x512_160k_ade20k.py +++ b/configs/segformer/segformer_mit-b5_512x512_160k_ade20k.py @@ -1,8 +1,10 @@ _base_ = ['./segformer_mit-b0_512x512_160k_ade20k.py'] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b5_20220624-658746d9.pthh' # noqa + # model settings model = dict( - pretrained='pretrain/mit_b5.pth', + pretrained=checkpoint, backbone=dict( embed_dims=64, num_heads=[1, 2, 5, 8], num_layers=[3, 6, 40, 3]), decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py b/configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py index d21774c4d6..ca3f683ab3 100644 --- a/configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py +++ b/configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py @@ -37,8 +37,9 @@ test=dict(pipeline=test_pipeline)) # model settings +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b5_20220624-658746d9.pthh' # noqa model = dict( - pretrained='pretrain/mit_b5.pth', + pretrained=checkpoint, backbone=dict( embed_dims=64, num_heads=[1, 2, 5, 8], num_layers=[3, 6, 40, 3]), decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes.py b/configs/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes.py index 1c9422d37c..3015aee088 100644 --- a/configs/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes.py +++ b/configs/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes.py @@ -1,8 +1,9 @@ _base_ = ['./segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py'] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b5_20220624-658746d9.pthh' # noqa model = dict( backbone=dict( - init_cfg=dict(type='Pretrained', checkpoint='pretrain/mit_b5.pth'), + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), embed_dims=64, num_layers=[3, 6, 40, 3]), decode_head=dict(in_channels=[64, 128, 320, 512]))