Add matting model zoo documentation

9vivian88 · Jun 18, 2020 · 16511dc · 16511dc
1 parent eab6257
commit 16511dc
Show file tree

Hide file tree

Showing 5 changed files with 62 additions and 20 deletions.
diff --git a/configs/mattors/dim/README.md b/configs/mattors/dim/README.md
@@ -16,6 +16,12 @@
 
 |   Method   |  SAD |  MSE  | GRAD | CONN | Download |
 |:----------:|:----:|:-----:|:----:|:----:|:--------:|
-| DIM stage1 | 53.8 | 0.017 | 32.7 | 54.5 | [model](TODO) \| [log](TODO) |
-| DIM stage2 | 52.3 | 0.016 | 29.4 | 52.4 | [model](TODO) \| [log](TODO) |
-| DIM stage3 | 50.6 | 0.015 | 29.0 | 50.7 | [model](TODO) \| [log](TODO) |
+| stage1 (paper) | 54.6  | 0.017  | 36.7  | 55.3 | - |
+| stage3 (paper) | **50.4**  | **0.014**  | 31.0  | 50.8 | - |
+| stage1 (our) | 53.8 | 0.017 | 32.7 | 54.5 | [model](TODO) \| [log](TODO) |
+| stage2 (our) | 52.3 | 0.016 | 29.4 | 52.4 | [model](TODO) \| [log](TODO) |
+| stage3 (our) | 50.6 | 0.015 | **29.0** | **50.7** | [model](TODO) \| [log](TODO) |
+
+> stage1: train the encoder-decoder part without the refinement part. \
+> stage2: fix the encoder-decoder part and train the refinement part. \
+> stage3: fine-tune the whole network.
diff --git a/configs/mattors/gca/README.md b/configs/mattors/gca/README.md
@@ -15,5 +15,7 @@
 
 |   Method   |  SAD  |  MSE   | GRAD  | CONN  | Download |
 |:----------:|:-----:|:------:|:-----:|:-----:|:--------:|
-|  baseline  | 36.50 | 0.0090 | 17.40 | 34.33 | [model](TODO) \| [log](TODO) |
-|    GCA     | 34.77 | 0.0080 | 16.33 | 32.20 | [model](TODO) \| [log](TODO) |
+| baseline (paper) | 40.62 | 0.0106 | 21.53 | 38.43 | -  |
+|   GCA (paper)    | 35.28 | 0.0091 | 16.92 | 32.53 | -  |
+|  baseline (our)  | 36.50 | 0.0090 | 17.40 | 34.33 | [model](TODO) \| [log](TODO) |
+|    GCA (our)     | **34.77** | **0.0080** | **16.33** | **32.20** | [model](TODO) \| [log](TODO) |
diff --git a/configs/mattors/indexnet/README.md b/configs/mattors/indexnet/README.md
@@ -15,4 +15,7 @@
 
 |   Method   |  SAD  |  MSE   | GRAD  | CONN  | Download |
 |:----------:|:-----:|:------:|:-----:|:-----:|:--------:|
-|  IndexNet  | xxxxx | xxxxxx | xxxxx | xxxxx | [model](TODO) \| [log](TODO) |
+|  M2O DINs (paper) | **45.8**  | **0.013**  | 25.9  | **43.7**  | - |
+|  M2O DINs (our)   | 46.8  | 0.016  | **24.6**  | 44.6  | [model](TODO) \| [log](TODO) |
+
+> It should be noted that the best result we get from the original [IndexNet repo](https://github.com/poppinace/indexnet_matting) is `SAD: 46.96, MSE: 0.0143, Grad: 29.57, Conn: 46.39`
diff --git a/docs/config_matting.md b/docs/config_matting.md
@@ -28,7 +28,7 @@ train_cfg = dict(  # Config of training DIM model.
     train_refiner=False)  # In DIM stage1, refiner is not trained.
 test_cfg = dict(  # Config of testing DIM model.
     refine=False,  # Whether use refiner output as output, in stage1, we don't use it.
-    metrics=['SAD', 'MSE'])  # The metrics used when testing.
+    metrics=['SAD', 'MSE', 'GRAD', 'CONN'])  # The metrics used when testing.
 
 # data settings
 dataset_type = 'AdobeComp1kDataset'  # Dataset type, this will be used to define the dataset.
@@ -39,7 +39,7 @@ img_norm_cfg = dict(  # Image normalization config to normalize the input images
     to_rgb=True)  # The channel orders of image used to pre-training the pre-trained backbone models.
 train_pipeline = [  # Training data processing pipeline.
     dict(
-        type='LoadAlpha',  # Load alpha matte.
+        type='LoadImageFromFile',  # Load alpha matte from file.
         key='alpha',  # Key of alpha matte in annotation file. The pipeline will read alpha matte from path `alpha_path`.
         flag='grayscale'),  # Load as grayscale image which has shape (height, width).
     dict(
@@ -52,7 +52,8 @@ train_pipeline = [  # Training data processing pipeline.
         type='LoadImageFromFile',  # Load image from file.
         key='merged'),  # Key of image to load. The pipeline will read merged from path `merged_path`.
     dict(
-        type='CropAroundSemiTransparent',  # Crop images around unknown area (semi-transparent area).
+        type='CropAroundUnknown',  # Crop images around unknown area (semi-transparent area).
+        keys=['alpha', 'merged', 'ori_merged', 'fg', 'bg'],  # Images to crop.
         crop_sizes=[320, 480, 640]),  # Candidate crop size.
     dict(
         type='Flip',  # Augmentation pipeline that flips the images.
@@ -82,40 +83,39 @@ train_pipeline = [  # Training data processing pipeline.
 ]
 test_pipeline = [
     dict(
-        type='LoadAlpha',  # Load alpha matte.
+        type='LoadImageFromFile',  # Load alpha matte.
         key='alpha',  # Key of alpha matte in annotation file. The pipeline will read alpha matte from path `alpha_path`.
         flag='grayscale',
         save_original_img=True),
     dict(
         type='LoadImageFromFile',  # Load image from file
         key='trimap',  # Key of image to load. The pipeline will read trimap from path `trimap_path`.
         flag='grayscale',  # Load as grayscale image which has shape (height, width).
-        save_original_img=True),  # Save a copy of alpha matte for calculating metrics. It will be saved with key `ori_alpha`
+        save_original_img=True),  # Save a copy of trimap for calculating metrics. It will be saved with key `ori_trimap`
     dict(
         type='LoadImageFromFile',  # Load image from file
         key='merged'),  # Key of image to load. The pipeline will read merged from path `merged_path`.
     dict(
-        type='Resize',  # Pipeline to resize images to align with the downsample factor of the model.
-        keys=['alpha', 'trimap', 'merged'],  # Images to be resized.
-        size_factor=32,  # Downsample factor of the model. Size of images should be divisible by it.
-        max_size=1600),  # Maximum allowed longest size in case of OOM.
+        type='Pad',  # Pipeline to pad images to align with the downsample factor of the model.
+        keys=['trimap', 'merged'],  # Images to be padded.
+        mode='reflect'),  # Mode of the padding.
     dict(
         type='RescaleToZeroOne',  # Same as it in train_pipeline.
-        keys=['merged', 'alpha', 'ori_alpha']),  # Images to be rescaled.
+        keys=['merged', 'ori_alpha']),  # Images to be rescaled.
     dict(
         type='Normalize',  # Same as it in train_pipeline.
         keys=['merged'],
         **img_norm_cfg),
     dict(
         type='Collect',  # Same as it in train_pipeline.
-        keys=['merged', 'alpha', 'trimap'],
+        keys=['merged', 'trimap'],
         meta_keys=[
-            'merged_path', 'interpolation', 'ori_shape', 'ori_alpha',
+            'merged_path', 'pad', 'merged_ori_shape', 'ori_alpha',
             'ori_trimap'
         ]),
     dict(
         type='ImageToTensor',  # Same as it in train_pipeline.
-        keys=['merged', 'alpha', 'trimap']),
+        keys=['merged', 'trimap']),
 ]
 data = dict(
     samples_per_gpu=1,  # Batch size of a single GPU.

diff --git a/docs/model_zoo_matting.md b/docs/model_zoo_matting.md
@@ -1,4 +1,35 @@
 # Benchmark and Model Zoo for Matting
 
 ## Benchmark
-[TODO]
+
+### Deep Image Matting (DIM)
+
+Please refer to [DIM](https://github.com/open-mmlab/mmediting/blob/master/configs/mattors/dim) for details.
+
+### GCA Matting
+
+Please refer to [GCA](https://github.com/open-mmlab/mmediting/blob/master/configs/mattors/gca) for details.
+
+### IndexNet Matting
+
+Please refer to [IndexNet](https://github.com/open-mmlab/mmediting/blob/master/configs/mattors/indexnet) for details.
+
+
+## Overview
+
+|        Method       |  SAD  |   MSE  |  GRAD |  CONN |
+|:-------------------:|:-----:|:------:|:-----:|:-----:|
+|        DIM          | 50.6  | 0.015  | 29.0  | 50.7  |
+|        GCA          | 34.77 | 0.0080 | 16.33 | 32.20 |
+|      IndexNet       | 46.8  | 0.016  | 24.6  | 44.6  |
+
+
+## Evaluation Details
+
+### Data
+
+We provide a python script [preprocess_comp1k_dataset.py](https://github.com/open-mmlab/mmediting/blob/master/tools/preprocess_comp1k_dataset.py) for compositing Adobe Composition-1k (comp1k) dataset foreground images with MS COCO dataset background images. The result merged images are the same as the merged images produced by the official composite script by Adobe.
+
+### Evaluation Implementation Details
+
+We provide a python script [evaluate_comp1k.py](https://github.com/open-mmlab/mmediting/blob/master/tools/evaluate_comp1k.py) for evaluating test results of matting models. The four evaluation metrics (`SAD`, `MSE`, `GRAD` and `CONN`) are calculated in the same way as the official evaluation script by Adobe. We observe only minor difference between the evaluation results of our python script and the official, which has no effect on the reported performance.