mindspore-lab · tonytonglt · Jan 29, 2024 · Jan 29, 2024
diff --git a/configs/rec/svtr/README_CN.md b/configs/rec/svtr/README_CN.md
@@ -36,18 +36,22 @@ Table Format:
 
 <div align="center">
 
-| **模型** | **环境配置** | **平均准确率** | **训练时间** | **FPS** | **配置文件** | **模型权重下载** |
-| :-----: | :-----:  | :-----: | :-----: | :-----: |:--------: | :-----: |
-| SVTR-Tiny      | D910x4-MS1.10-G | 90.23%    | 3638 s/epoch        | 4560 | [yaml](https://github.com/mindspore-lab/mindocr/blob/main/configs/rec/svtr/svtr_tiny.yaml) | [ckpt](https://download.mindspore.cn/toolkits/mindocr/svtr/svtr_tiny-950be1c3.ckpt) \| [mindir](https://download.mindspore.cn/toolkits/mindocr/svtr/svtr_tiny-950be1c3-86ece8c8.mindir) |
+|    **模型**    |    **环境配置**     | **平均准确率** |   **训练时间**   | **FPS** |                                           **配置文件**                                            |                                                                                       **模型权重下载**                                                                                        |
+|:------------:|:---------------:|:---------:|:------------:|:-------:|:---------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
+|  SVTR-Tiny   | D910x4-MS1.10-G |  90.23%   | 3638 s/epoch |  4560   |  [yaml](https://github.com/mindspore-lab/mindocr/blob/main/configs/rec/svtr/svtr_tiny.yaml)   | [ckpt](https://download.mindspore.cn/toolkits/mindocr/svtr/svtr_tiny-950be1c3.ckpt) \| [mindir](https://download.mindspore.cn/toolkits/mindocr/svtr/svtr_tiny-950be1c3-86ece8c8.mindir) |
+| SVTR-Tiny-8P | D910x8-MS2.2-G  |  90.32%   | 1646 s/epoch |  9840   | [yaml](https://github.com/mindspore-lab/mindocr/blob/main/configs/rec/svtr/svtr_tiny_8p.yaml) |                                                                                       Coming soon                                                                                       |
+
 </div>
 
 <details open markdown>
   <div align="center">
   <summary>在各个基准数据集上的准确率</summary>
 
-  | **模型** | **IC03_860** | **IC03_867** | **IC13_857** | **IC13_1015** | **IC15_1811** | **IC15_2077** | **IIIT5k_3000** | **SVT** | **SVTP** | **CUTE80** | **平均准确率** |
-  | :------: | :------: | :------: | :------: | :------: | :------: | :------: | :------: | :------: | :------: | :------: | :------: |
-  | SVTR-Tiny  | 95.70% | 95.50% | 95.33% | 93.99% | 83.60% | 79.83% | 94.70% | 91.96% | 85.58% | 86.11% | 90.23% |
+  |    **模型**    | **IC03_860** | **IC03_867** | **IC13_857** | **IC13_1015** | **IC15_1811** | **IC15_2077** | **IIIT5k_3000** | **SVT** | **SVTP** | **CUTE80** | **平均准确率** |
+  |:------------:|:------------:|:------------:|:------------:|:-------------:|:-------------:|:-------------:|:---------------:|:-------:|:--------:|:----------:|:---------:|
+  |  SVTR-Tiny   |    95.70%    |    95.50%    |    95.33%    |    93.99%     |    83.60%     |    79.83%     |     94.70%      | 91.96%  |  85.58%  |   86.11%   |  90.23%   |
+  | SVTR-Tiny-8P |    95.93%    |    95.62%    |    95.33%    |    93.89%     |    84.32%     |    80.55%     |     94.33%      | 90.57%  |  86.20%  |   86.46%   |   90.32%   |
+
   </div>
 </details>
 

diff --git a/configs/rec/svtr/svtr_tiny_8p.yaml b/configs/rec/svtr/svtr_tiny_8p.yaml
@@ -0,0 +1,187 @@
+system:
+  mode: 0 # 0 for graph mode, 1 for pynative mode in MindSpore
+  distribute: True
+  amp_level: O2
+  amp_level_infer: O2 # running inference in O2 mode
+  seed: 42
+  log_interval: 100
+  val_while_train: True
+  drop_overflow_update: False
+  ckpt_save_policy: latest_k
+  ckpt_max_keep: 5
+
+common:
+  character_dict_path: &character_dict_path
+  num_classes: &num_classes 37 # num_chars_in_dict + 1
+  max_text_len: &max_text_len 24
+  use_space_char: &use_space_char False
+  batch_size: &batch_size 512
+
+model:
+  type: rec
+  transform:
+    name: STN_ON
+    in_channels: 3
+    tps_inputsize: [32, 64]
+    tps_outputsize: [32, 100]
+    num_control_points: 20
+    tps_margins: [0.05, 0.05]
+    stn_activation: none
+  backbone:
+    name: SVTRNet
+    pretrained: False
+    img_size: [32, 100]
+    out_channels: 192
+    patch_merging: Conv
+    embed_dim: [64, 128, 256]
+    depth: [3, 6, 3]
+    num_heads: [2, 4, 8]
+    mixer:
+      [
+        "Local",
+        "Local",
+        "Local",
+        "Local",
+        "Local",
+        "Local",
+        "Global",
+        "Global",
+        "Global",
+        "Global",
+        "Global",
+        "Global",
+      ]
+    local_mixer: [[7, 11], [7, 11], [7, 11]]
+    last_stage: True
+    prenorm: False
+  neck:
+    name: Img2Seq
+  head:
+    name: CTCHead
+    out_channels: *num_classes
+
+postprocess:
+  name: RecCTCLabelDecode
+  character_dict_path: *character_dict_path
+  use_space_char: *use_space_char
+
+metric:
+  name: RecMetric
+  main_indicator: acc
+  character_dict_path: *character_dict_path
+  ignore_space: True
+  print_flag: False
+
+loss:
+  name: CTCLoss
+  pred_seq_len: 25 # 100 / 4
+  max_label_len: *max_text_len # this value should be smaller than pre_seq_len
+  batch_size: *batch_size
+
+scheduler:
+  scheduler: warmup_cosine_decay
+  min_lr: 0.00003
+  lr: 0.003
+  num_epochs: 30
+  warmup_epochs: 3
+  decay_epochs: 27
+
+optimizer:
+  opt: adamw
+  grouping_strategy: svtr
+  filter_bias_and_bn: False
+  weight_decay: 0.05
+
+loss_scaler:
+  type: dynamic
+  loss_scale: 512
+  scale_factor: 2.0
+  scale_window: 1000
+
+train:
+  ckpt_save_dir: ./tmp_rec
+  dataset_sink_mode: False
+  ema: True
+  ema_decay: 0.9999
+  dataset:
+    type: LMDBDataset
+    dataset_root: path/to/data_lmdb_release/
+    data_dir: training/
+    label_file: null
+    sample_ratio: 1.0
+    shuffle: True
+    filter_max_len: True
+    filter_zero_text_image: True
+    extra_count_if_repeat: True
+    max_text_len: *max_text_len
+    character_dict_path: *character_dict_path
+    label_standandize: True
+    transform_pipeline:
+      - DecodeImage:
+          img_mode: BGR
+          to_float32: False
+      - SVTRRecAug:
+          aug_type: 0
+      - RecCTCLabelEncode:
+          max_text_len: *max_text_len
+          character_dict_path: *character_dict_path
+          use_space_char: *use_space_char
+          lower: True
+      - SVTRRecResizeImg:
+          image_shape: [64, 256]
+          padding: False
+      - NormalizeImage:
+          bgr_to_rgb: True
+          is_hwc: True
+          mean: [127.0, 127.0, 127.0]
+          std: [127.0, 127.0, 127.0]
+      - ToCHWImage:
+    output_columns: ["image", "text_seq"]
+    net_input_column_index: [0]
+    label_column_index: [1]
+
+  loader:
+    shuffle: True
+    batch_size: *batch_size
+    drop_remainder: True
+    max_rowsize: 12
+    num_workers: 4
+
+eval:
+  ckpt_load_path: ./tmp_rec/best.ckpt
+  dataset_sink_mode: False
+  dataset:
+    type: LMDBDataset
+    dataset_root: path/to/data_lmdb_release/
+    data_dir: validation/
+    label_file: null
+    sample_ratio: 1.0
+    shuffle: False
+    transform_pipeline:
+      - DecodeImage:
+          img_mode: BGR
+          to_float32: False
+      - RecCTCLabelEncode:
+          max_text_len: *max_text_len
+          character_dict_path: *character_dict_path
+          use_space_char: *use_space_char
+          lower: True
+      - SVTRRecResizeImg:
+          image_shape: [64, 256]
+          padding: False
+      - NormalizeImage:
+          bgr_to_rgb: True
+          is_hwc: True
+          mean: [127.0, 127.0, 127.0]
+          std: [127.0, 127.0, 127.0]
+      - ToCHWImage:
+    output_columns: ["image", "text_padded", "text_length"]
+    net_input_column_index: [0]
+    label_column_index: [1, 2]
+
+  loader:
+    shuffle: False
+    batch_size: 512
+    drop_remainder: False
+    max_rowsize: 12
+    num_workers: 1