mindspore-lab · SamitHuang · May 3, 2023 · Apr 28, 2023
diff --git a/README.md b/README.md
@@ -149,6 +149,8 @@ We give instructions on how to download the following datasets.
 
 - [x] MLT2017 [paper](https://ieeexplore.ieee.org/abstract/document/8270168) [homepage](https://rrc.cvc.uab.es/?ch=8&com=introduction) [download instruction](docs/en/datasets/mlt2017.md)
 
+- [x] MSRA-TD500 [paper](https://ieeexplore.ieee.org/abstract/document/6247787) [homepage](http://www.iapr-tc11.org/mediawiki/index.php/MSRA_Text_Detection_500_Database_(MSRA-TD500)) [download instruction](docs/en/datasets/td500.md)
+
 </details>
 
 ### Conversion

diff --git a/README_CN.md b/README_CN.md
@@ -139,6 +139,8 @@ MindOCR集成了MX推理引擎，支持文本检测识别任务，请参考[mx_i
 
 - [x] MLT2017 [论文](https://ieeexplore.ieee.org/abstract/document/8270168) [主页](https://rrc.cvc.uab.es/?ch=8&com=introduction) [下载说明](docs/cn/datasets/mlt2017_CN.md)
 
+- [x] MSRA-TD500 [论文](https://ieeexplore.ieee.org/abstract/document/6247787) [主页](http://www.iapr-tc11.org/mediawiki/index.php/MSRA_Text_Detection_500_Database_(MSRA-TD500)) [下载说明](docs/cn/datasets/td500_CN.md)
+
 </details>
 
 ### 转换

diff --git a/docs/cn/datasets/td500_CN.md b/docs/cn/datasets/td500_CN.md
@@ -0,0 +1,49 @@
+[English](../../en/datasets/td500.md) | 中文
+
+# MSRA Text Detection 500 Database (MSRA-TD500)
+
+## 数据下载
+文本检测数据集（MSRA-TD500）[官网](http://www.iapr-tc11.org/mediawiki/index.php/MSRA_Text_Detection_500_Database_(MSRA-TD500))
+
+[下载数据集](http://www.iapr-tc11.org/mediawiki/index.php/MSRA_Text_Detection_500_Database_(MSRA-TD500))
+
+请从上述网站下载数据并解压缩文件。解压文件后，数据结构应该是这样的：
+
+```txt
+MSRA-TD500
+ ├── test
+ │   ├── IMG_0059.gt 
+ │   ├── IMG_0059.JPG
+ │   ├── IMG_0080.gt
+ │   ├── IMG_0080.JPG
+ │   ├── ...
+ ├── train
+ │   ├── IMG_0030.gt 
+ │   ├── IMG_0030.JPG
+ │   ├── IMG_0063.gt
+ │   ├── IMG_0063.JPG
+ │   ├── ...
+```
+
+## 数据准备
+
+### 检测任务
+
+要准备用于文本检测的数据，您可以运行以下命令：
+
+```bash
+python tools/dataset_converters/convert.py \
+    --dataset_name td500 --task det \
+    --image_dir path/to/MSRA-TD500/train/ \
+    --label_dir path/to/MSRA-TD500/train \
+    --output_path path/to/MSRA-TD500/train_det_gt.txt 
+```
+```bash
+python tools/dataset_converters/convert.py \
+    --dataset_name td500 --task det \
+    --image_dir path/to/MSRA-TD500/test/ \
+    --label_dir path/to/MSRA-TD500/test \
+    --output_path path/to/MSRA-TD500/test_det_gt.txt 
+```
+
+运行后，在文件夹 `MSRA-TD500/` 下有两个注释文件 `train_det_gt.txt` 和 `test_det_gt.txt`。
diff --git a/docs/en/datasets/td500.md b/docs/en/datasets/td500.md
@@ -0,0 +1,50 @@
+English | [中文](../../cn/datasets/td500_CN.md)
+
+# MSRA Text Detection 500 Database (MSRA-TD500)
+
+## Data Downloading
+MSRA Text Detection 500 Database（MSRA-TD500）[official website](http://www.iapr-tc11.org/mediawiki/index.php/MSRA_Text_Detection_500_Database_(MSRA-TD500))
+
+[download dataset](http://www.iapr-tc11.org/mediawiki/index.php/MSRA_Text_Detection_500_Database_(MSRA-TD500))
+
+Please download the data from the website above and unzip the file.
+After unzipping the file, the data structure should be like:
+
+```txt
+MSRA-TD500
+ ├── test
+ │   ├── IMG_0059.gt 
+ │   ├── IMG_0059.JPG
+ │   ├── IMG_0080.gt
+ │   ├── IMG_0080.JPG
+ │   ├── ...
+ ├── train
+ │   ├── IMG_0030.gt 
+ │   ├── IMG_0030.JPG
+ │   ├── IMG_0063.gt
+ │   ├── IMG_0063.JPG
+ │   ├── ...
+```
+
+## Data Preparation
+
+### For Detection task
+
+To prepare the data for text detection, you can run the following commands:
+
+```bash
+python tools/dataset_converters/convert.py \
+    --dataset_name td500 --task det \
+    --image_dir path/to/MSRA-TD500/train/ \
+    --label_dir path/to/MSRA-TD500/train \
+    --output_path path/to/MSRA-TD500/train_det_gt.txt 
+```
+```bash
+python tools/dataset_converters/convert.py \
+    --dataset_name td500 --task det \
+    --image_dir path/to/MSRA-TD500/test/ \
+    --label_dir path/to/MSRA-TD500/test \
+    --output_path path/to/MSRA-TD500/test_det_gt.txt 
+```
+
+Then you can have two annotation files `train_det_gt.txt` and `test_det_gt.txt` under the folder `MSRA-TD500/`.
diff --git a/tools/convert_datasets.sh b/tools/convert_datasets.sh
@@ -271,3 +271,33 @@ else
           --output_path $DIR/test_det_gt.txt
   fi
 fi
+##########################td500#########################
+DIR="$DATASETS_DIR/td500"
+if  [ ! -d $DIR ] || [  ! "$(ls -A $DIR)"  ]; then
+  echo "td500 is Empty! Skipped."
+else
+  unzip $DIR/MSRA-TD500.zip -d  $DIR/
+  rm $DIR/MSRA-TD500.zip
+
+
+  if test -f "$DIR/MSRA-TD500/train_det_gt.txt"; then
+     echo "$DIR/MSRA-TD500/train_det_gt.txt exists."
+  else
+     python tools/dataset_converters/convert.py \
+          --dataset_name  td500 \
+          --task det \
+          --image_dir $DIR/MSRA-TD500/train/ \
+          --label_dir $DIR/MSRA-TD500/train/ \
+          --output_path $DIR/MSRA-TD500/train_det_gt.txt
+  fi
+  if test -f "$DIR/MSRA-TD500/test_det_gt.txt"; then
+     echo "$DIR/MSRA-TD500/test_det_gt.txt exists."
+  else
+     python tools/dataset_converters/convert.py \
+          --dataset_name  td500 \
+          --task det \
+          --image_dir $DIR/MSRA-TD500/test/ \
+          --label_dir $DIR/MSRA-TD500/test/ \
+          --output_path $DIR/MSRA-TD500/test_det_gt.txt
+  fi
+fi
diff --git a/tools/dataset_converters/convert.py b/tools/dataset_converters/convert.py
@@ -22,8 +22,9 @@
 from mlt2017 import MLT2017_Converter
 from syntext150k import SYNTEXT150K_Converter
 from svt import SVT_Converter
+from td500 import TD500_Converter
 
-supported_datasets = ['ic15', 'totaltext', 'mlt2017', 'syntext150k', 'svt']
+supported_datasets = ['ic15', 'totaltext', 'mlt2017', 'syntext150k', 'svt', 'td500']
 
 
 def convert(dataset_name, task, image_dir, label_path, output_path=None, path_mode='relative'):

diff --git a/tools/dataset_converters/td500.py b/tools/dataset_converters/td500.py
@@ -0,0 +1,62 @@
+import os
+import json
+import glob
+import math
+
+
+def rotate_xy(x, y, center_x, center_y, theta):
+    rotate_x = math.cos(theta) * (x - center_x) - math.sin(theta) * (y - center_y)
+    rotate_y = math.cos(theta) * (y - center_y) + math.sin(theta) * (x - center_x)
+    return center_x + rotate_x, center_y + rotate_y
+
+def det_rotate(x, y, width, height, theta):
+    center_x = x + width / 2
+    center_y = y + height / 2
+
+    x1, y1 = rotate_xy(x, y, center_x, center_y, theta)
+    x2, y2 = rotate_xy(x + width, y, center_x, center_y, theta)
+    x3, y3 = rotate_xy(x + width, y + height, center_x, center_y, theta)
+    x4, y4 = rotate_xy(x, y + height, center_x, center_y, theta)
+    return x1, y1, x2, y2, x3, y3, x4, y4
+
+
+class TD500_Converter(object):
+    '''
+    Format annotation to standard form for MSRA-TD500 dataset.
+    '''
+    def __init__(self, path_mode='relative'):
+        self.path_mode = path_mode
+
+    def convert(self, task='det', image_dir=None, label_path=None, output_path=None):
+        self.label_path = label_path
+        assert os.path.exists(label_path), f'{label_path} no exist!'
+
+        if task == 'det':
+            self._format_det_label(image_dir, self.label_path, output_path)
+        if task == 'rec':
+            raise ValueError("SynText dataset has no cropped word images and recognition labels.")
+
+    def _format_det_label(self, image_dir, label_dir, output_path):
+        label_paths = sorted(glob.glob(os.path.join(label_dir, '*.gt')))
+        with open(output_path, 'w') as out_file:
+            for label_fp in label_paths:
+                label_file_name = os.path.basename(label_fp)
+                img_path = os.path.join(image_dir, label_file_name[:-3] + ".JPG")
+                assert os.path.exists(img_path), f'{img_path} not exist! Please check the input image_dir {image_dir} and names in {label_fp}'
+                label = []
+                if self.path_mode == 'relative':
+                    img_path = os.path.basename(img_path)
+                with open(label_fp, 'r', encoding='utf-8-sig') as f:
+                    for line in f.readlines():
+                        tmp = line.strip("\n").replace("\xef\xbb\xbf", "").split(' ')
+                        x1, y1, x2, y2, x3, y3, x4, y4 = det_rotate(int(tmp[2]), int(tmp[3]), int(tmp[4]),
+                                                                    int(tmp[5]), float(tmp[6]))
+                        s = [[int(x1), int(y1)], [int(x2), int(y2)], [int(x3), int(y3)], [int(x4), int(y4)]]
+                        if tmp[1] == "1":
+                            result = {"transcription": "###", "points": s}
+                        else:
+                            result = {"transcription": tmp[1], "points": s}
+                        label.append(result)
+
+                out_file.write(img_path + '\t' + json.dumps(
+                    label, ensure_ascii=False) + '\n')