[Api] vis hook and data flow api (#1185)

* vis hook and data flow api * fix comment * add TODO for merging and rewriting after MultiDatasetWrapper
open-mmlab · Aug 8, 2022 · ef68320 · ef68320
1 parent 6759bd4
commit ef68320
Show file tree

Hide file tree

Showing 36 changed files with 549 additions and 351 deletions.
diff --git a/configs/_base_/default_runtime.py b/configs/_base_/default_runtime.py
@@ -6,7 +6,14 @@
     param_scheduler=dict(type='ParamSchedulerHook'),
     checkpoint=dict(type='CheckpointHook', interval=1),
     sampler_seed=dict(type='DistSamplerSeedHook'),
-    sync_buffer=dict(type='SyncBuffersHook'))
+    visualization=dict(
+        type='VisualizationHook',
+        interval=1,
+        enable=False,
+        show=False,
+        draw_gt=False,
+        draw_pred=False),
+)
 
 env_cfg = dict(
     cudnn_benchmark=True,

diff --git a/configs/textdet/drrg/drrg_r50_fpn_unet_1200e_ctw1500.py b/configs/textdet/drrg/drrg_r50_fpn_unet_1200e_ctw1500.py
@@ -69,10 +69,16 @@
         file_client_args=file_client_args,
         color_type='color_ignore_orientation'),
     dict(type='Resize', scale=(1024, 640), keep_ratio=True),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(
+        type='LoadOCRAnnotations',
+        with_polygon=True,
+        with_bbox=True,
+        with_label=True),
     dict(
         type='PackTextDetInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
-                   'instances'))
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
 ]
 
 train_dataloader = dict(

diff --git a/configs/textdet/fcenet/fcenet_r50_fpn_1500e_icdar2015.py b/configs/textdet/fcenet/fcenet_r50_fpn_1500e_icdar2015.py
@@ -68,10 +68,16 @@
         file_client_args=file_client_args,
         color_type='color_ignore_orientation'),
     dict(type='Resize', scale=(2260, 2260), keep_ratio=True),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(
+        type='LoadOCRAnnotations',
+        with_polygon=True,
+        with_bbox=True,
+        with_label=True),
     dict(
         type='PackTextDetInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
-                   'instances'))
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
 ]
 
 train_dataloader = dict(

diff --git a/configs/textdet/fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500.py b/configs/textdet/fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500.py
@@ -73,10 +73,16 @@
         file_client_args=file_client_args,
         color_type='color_ignore_orientation'),
     dict(type='Resize', scale=(1080, 736), keep_ratio=True),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(
+        type='LoadOCRAnnotations',
+        with_polygon=True,
+        with_bbox=True,
+        with_label=True),
     dict(
         type='PackTextDetInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
-                   'instances'))
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
 ]
 
 train_dataloader = dict(

diff --git a/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500.py b/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500.py
@@ -50,10 +50,16 @@
         file_client_args=file_client_args,
         color_type='color_ignore_orientation'),
     dict(type='Resize', scale=(1600, 1600), keep_ratio=True),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(
+        type='LoadOCRAnnotations',
+        with_polygon=True,
+        with_bbox=True,
+        with_label=True),
     dict(
         type='PackTextDetInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
-                   'instances'))
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
 ]
 
 train_dataloader = dict(

diff --git a/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015.py b/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015.py
@@ -50,10 +50,16 @@
         file_client_args=file_client_args,
         color_type='color_ignore_orientation'),
     dict(type='Resize', scale=(1920, 1920), keep_ratio=True),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(
+        type='LoadOCRAnnotations',
+        with_polygon=True,
+        with_bbox=True,
+        with_label=True),
     dict(
         type='PackTextDetInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
-                   'instances'))
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
 ]
 
 train_dataloader = dict(

diff --git a/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017.py b/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017.py
@@ -49,10 +49,16 @@
         file_client_args=file_client_args,
         color_type='color_ignore_orientation'),
     dict(type='Resize', scale=(1920, 1920), keep_ratio=True),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(
+        type='LoadOCRAnnotations',
+        with_polygon=True,
+        with_bbox=True,
+        with_label=True),
     dict(
         type='PackTextDetInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
-                   'instances'))
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
 ]
 
 train_dataloader = dict(

diff --git a/configs/textdet/panet/panet_r18_fpem_ffm_600e_icdar2015.py b/configs/textdet/panet/panet_r18_fpem_ffm_600e_icdar2015.py
@@ -49,10 +49,16 @@
         scale_divisor=1,
         ratio_range=(1.0, 1.0),
         aspect_ratio_range=(1.0, 1.0)),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(
+        type='LoadOCRAnnotations',
+        with_polygon=True,
+        with_bbox=True,
+        with_label=True),
     dict(
         type='PackTextDetInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
-                   'instances'))
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
 ]
 
 train_dataloader = dict(

diff --git a/configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2015.py b/configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2015.py
@@ -46,10 +46,16 @@
         file_client_args=file_client_args,
         color_type='color_ignore_orientation'),
     dict(type='Resize', scale=(2240, 2240), keep_ratio=True),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(
+        type='LoadOCRAnnotations',
+        with_polygon=True,
+        with_bbox=True,
+        with_label=True),
     dict(
         type='PackTextDetInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
-                   'instances'))
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
 ]
 
 train_dataloader = dict(

diff --git a/configs/textdet/textsnake/textsnake_r50_fpn_unet_1200e_ctw1500.py b/configs/textdet/textsnake/textsnake_r50_fpn_unet_1200e_ctw1500.py
@@ -63,10 +63,16 @@
         file_client_args=file_client_args,
         color_type='color_ignore_orientation'),
     dict(type='Resize', scale=(1333, 736), keep_ratio=True),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(
+        type='LoadOCRAnnotations',
+        with_polygon=True,
+        with_bbox=True,
+        with_label=True),
     dict(
         type='PackTextDetInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
-                   'instances'))
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
 ]
 
 train_dataloader = dict(

diff --git a/configs/textrecog/abinet/base.py b/configs/textrecog/abinet/base.py
@@ -76,12 +76,13 @@
 ]
 test_pipeline = [
     dict(type='LoadImageFromFile', file_client_args=file_client_args),
-    dict(type='LoadOCRAnnotations', with_text=True),
     dict(type='Resize', scale=(128, 32)),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(type='LoadOCRAnnotations', with_text=True),
     dict(
         type='PackTextRecogInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio',
-                   'instances'))
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
 ]
 
 train_dataloader = dict(

diff --git a/configs/textrecog/crnn/crnn_academic_dataset.py b/configs/textrecog/crnn/crnn_academic_dataset.py
@@ -38,10 +38,12 @@
         min_width=32,
         max_width=None,
         width_divisor=16),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(type='LoadOCRAnnotations', with_text=True),
     dict(
         type='PackTextRecogInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio',
-                   'instances'))
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
 ]
 
 train_dataloader = dict(

diff --git a/configs/textrecog/master/master_r31_12e_ST_MJ_SA.py b/configs/textrecog/master/master_r31_12e_ST_MJ_SA.py
@@ -39,10 +39,12 @@
         max_width=160,
         width_divisor=16),
     dict(type='PadToWidth', width=160),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(type='LoadOCRAnnotations', with_text=True),
     dict(
         type='PackTextRecogInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio',
-                   'instances'))
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
 ]
 
 train_dataloader = dict(

diff --git a/configs/textrecog/master/master_toy_dataset.py b/configs/textrecog/master/master_toy_dataset.py
@@ -35,10 +35,12 @@
         max_width=160,
         width_divisor=16),
     dict(type='PadToWidth', width=160),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(type='LoadOCRAnnotations', with_text=True),
     dict(
         type='PackTextRecogInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio',
-                   'instances'))
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
 ]
 
 train_dataloader = dict(

diff --git a/configs/textrecog/nrtr/nrtr_modality_transform_academic.py b/configs/textrecog/nrtr/nrtr_modality_transform_academic.py
@@ -42,10 +42,12 @@
         max_width=160,
         width_divisor=16),
     dict(type='PadToWidth', width=160),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(type='LoadOCRAnnotations', with_text=True),
     dict(
         type='PackTextRecogInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio',
-                   'instances'))
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
 ]
 
 train_dataloader = dict(

diff --git a/configs/textrecog/nrtr/nrtr_modality_transform_toy_dataset.py b/configs/textrecog/nrtr/nrtr_modality_transform_toy_dataset.py
@@ -34,10 +34,12 @@
         max_width=160,
         width_divisor=16),
     dict(type='PadToWidth', width=160),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(type='LoadOCRAnnotations', with_text=True),
     dict(
         type='PackTextRecogInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio',
-                   'instances'))
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
 ]
 
 train_dataloader = dict(

diff --git a/configs/textrecog/nrtr/nrtr_r31_1by16_1by8_academic.py b/configs/textrecog/nrtr/nrtr_r31_1by16_1by8_academic.py
@@ -42,10 +42,12 @@
         max_width=160,
         width_divisor=16),
     dict(type='PadToWidth', width=160),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(type='LoadOCRAnnotations', with_text=True),
     dict(
         type='PackTextRecogInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio',
-                   'instances'))
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
 ]
 
 train_dataloader = dict(

diff --git a/configs/textrecog/nrtr/nrtr_r31_1by8_1by4_academic.py b/configs/textrecog/nrtr/nrtr_r31_1by8_1by4_academic.py
@@ -44,10 +44,12 @@
         max_width=160,
         width_divisor=16),
     dict(type='PadToWidth', width=160),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(type='LoadOCRAnnotations', with_text=True),
     dict(
         type='PackTextRecogInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio',
-                   'instances'))
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
 ]
 
 train_dataloader = dict(

diff --git a/configs/textrecog/robust_scanner/robustscanner_r31_academic.py b/configs/textrecog/robust_scanner/robustscanner_r31_academic.py
@@ -36,10 +36,12 @@
         max_width=160,
         width_divisor=4),
     dict(type='PadToWidth', width=160),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(type='LoadOCRAnnotations', with_text=True),
     dict(
         type='PackTextRecogInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio',
-                   'instances'))
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
 ]
 
 # dataset settings

diff --git a/configs/textrecog/sar/sar_r31_parallel_decoder_academic.py b/configs/textrecog/sar/sar_r31_parallel_decoder_academic.py
@@ -38,10 +38,12 @@
         max_width=160,
         width_divisor=4),
     dict(type='PadToWidth', width=160),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(type='LoadOCRAnnotations', with_text=True),
     dict(
         type='PackTextRecogInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio',
-                   'instances'))
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
 ]
 
 # dataset settings

diff --git a/configs/textrecog/sar/sar_r31_sequential_decoder_academic.py b/configs/textrecog/sar/sar_r31_sequential_decoder_academic.py
@@ -38,10 +38,12 @@
         max_width=160,
         width_divisor=4),
     dict(type='PadToWidth', width=160),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(type='LoadOCRAnnotations', with_text=True),
     dict(
         type='PackTextRecogInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio',
-                   'instances'))
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
 ]
 
 # dataset settings

diff --git a/configs/textrecog/satrn/satrn_academic.py b/configs/textrecog/satrn/satrn_academic.py
@@ -58,10 +58,12 @@
 test_pipeline = [
     dict(type='LoadImageFromFile', file_client_args=file_client_args),
     dict(type='Resize', scale=(100, 32), keep_ratio=False),
+    # add loading annotation after ``Resize`` because ground truth
+    # does not need to do resize data transform
+    dict(type='LoadOCRAnnotations', with_text=True),
     dict(
         type='PackTextRecogInputs',
-        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio',
-                   'instances'))
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
 ]
 
 train_dataloader = dict(

diff --git a/mmocr/engine/__init__.py b/mmocr/engine/__init__.py
@@ -1,2 +1,3 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+from .hooks import *  # NOQA
 from .runner import *  # NOQA
diff --git a/mmocr/engine/hooks/__init__.py b/mmocr/engine/hooks/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .visualization_hook import VisualizationHook
+
+__all__ = ['VisualizationHook']