merge paddleocr

Liyulingyue · Feb 3, 2021 · 6443988 · 6443988
2 parents 69c85d9 + e4db8a5
commit 6443988
Show file tree

Hide file tree

Showing 57 changed files with 2,311 additions and 209 deletions.
diff --git a/PPOCRLabel/PPOCRLabel.py b/PPOCRLabel/PPOCRLabel.py
@@ -1031,7 +1031,7 @@ def format_shape(s):
 
         for box in self.result_dic:
             trans_dic = {"label": box[1][0], "points": box[0], 'difficult': False}
-            if trans_dic["label"] is "" and mode == 'Auto':
+            if trans_dic["label"] == "" and mode == 'Auto':
                 continue
             shapes.append(trans_dic)
 
@@ -1450,7 +1450,7 @@ def importDirImages(self, dirpath, isDelete = False):
                 item = QListWidgetItem(closeicon, filename)
             self.fileListWidget.addItem(item)
 
-        print('dirPath in importDirImages is', dirpath)
+        print('DirPath in importDirImages is', dirpath)
         self.iconlist.clear()
         self.additems5(dirpath)
         self.changeFileFolder = True
@@ -1459,7 +1459,6 @@ def importDirImages(self, dirpath, isDelete = False):
         self.reRecogButton.setEnabled(True)
         self.actions.AutoRec.setEnabled(True)
         self.actions.reRec.setEnabled(True)
-        self.actions.saveLabel.setEnabled(True)
 
 
     def openPrevImg(self, _value=False):
@@ -1764,7 +1763,7 @@ def reRecognition(self):
                     QMessageBox.information(self, "Information", msg)
                     return
                 result = self.ocr.ocr(img_crop, cls=True, det=False)
-                if result[0][0] is not '':
+                if result[0][0] != '':
                     result.insert(0, box)
                     print('result in reRec is ', result)
                     self.result_dic.append(result)
@@ -1795,7 +1794,7 @@ def singleRerecognition(self):
             QMessageBox.information(self, "Information", msg)
             return
         result = self.ocr.ocr(img_crop, cls=True, det=False)
-        if result[0][0] is not '':
+        if result[0][0] != '':
             result.insert(0, box)
             print('result in reRec is ', result)
             if result[1][0] == shape.label:
@@ -1862,6 +1861,8 @@ def loadFilestate(self, saveDir):
                 for each in states:
                     file, state = each.split('\t')
                     self.fileStatedict[file] = 1
+                self.actions.saveLabel.setEnabled(True)
+                self.actions.saveRec.setEnabled(True)
 
 
     def saveFilestate(self):
@@ -1919,22 +1920,29 @@ def saveRecResult(self):
 
         rec_gt_dir = os.path.dirname(self.PPlabelpath) + '/rec_gt.txt'
         crop_img_dir = os.path.dirname(self.PPlabelpath) + '/crop_img/'
+        ques_img = []
         if not os.path.exists(crop_img_dir):
             os.mkdir(crop_img_dir)
 
         with open(rec_gt_dir, 'w', encoding='utf-8') as f:
             for key in self.fileStatedict:
                 idx = self.getImglabelidx(key)
-                for i, label in enumerate(self.PPlabel[idx]):
-                    if label['difficult']: continue
+                try:
                     img = cv2.imread(key)
-                    img_crop = get_rotate_crop_image(img, np.array(label['points'], np.float32))
-                    img_name = os.path.splitext(os.path.basename(idx))[0] + '_crop_'+str(i)+'.jpg'
-                    cv2.imwrite(crop_img_dir+img_name, img_crop)
-                    f.write('crop_img/'+ img_name + '\t')
-                    f.write(label['transcription'] + '\n')
-
-        QMessageBox.information(self, "Information", "Cropped images has been saved in "+str(crop_img_dir))
+                    for i, label in enumerate(self.PPlabel[idx]):
+                        if label['difficult']: continue
+                        img_crop = get_rotate_crop_image(img, np.array(label['points'], np.float32))
+                        img_name = os.path.splitext(os.path.basename(idx))[0] + '_crop_'+str(i)+'.jpg'
+                        cv2.imwrite(crop_img_dir+img_name, img_crop)
+                        f.write('crop_img/'+ img_name + '\t')
+                        f.write(label['transcription'] + '\n')
+                except Exception as e:
+                    ques_img.append(key)
+                    print("Can not read image ",e)
+        if ques_img:
+            QMessageBox.information(self, "Information", "The following images can not be saved, "
+                                                         "please check the image path and labels.\n" + "".join(str(i)+'\n' for i in ques_img))
+        QMessageBox.information(self, "Information", "Cropped images have been saved in "+str(crop_img_dir))
 
     def speedChoose(self):
         if self.labelDialogOption.isChecked():
@@ -1991,7 +1999,7 @@ def main():
     resource_file = './libs/resources.py'
     if not os.path.exists(resource_file):
         output = os.system('pyrcc5 -o libs/resources.py resources.qrc')
-        assert output is 0, "operate the cmd have some problems ,please check  whether there is a in the lib " \
+        assert output == 0, "operate the cmd have some problems ,please check  whether there is a in the lib " \
                             "directory resources.py "
     import libs.resources
     sys.exit(main())
diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools
 
 ## Notice
 PaddleOCR supports both dynamic graph and static graph programming paradigm
-- Dynamic graph: dygraph branch (default), **supported by paddle 2.0rc1+ ([installation](./doc/doc_en/installation_en.md))**
+- Dynamic graph: dygraph branch (default), **supported by paddle 2.0.0 ([installation](./doc/doc_en/installation_en.md))**
 - Static graph: develop branch
 
 **Recent updates**

diff --git a/README_ch.md b/README_ch.md
@@ -4,12 +4,12 @@
 PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力使用者训练出更好的模型，并应用落地。
 ## 注意
 PaddleOCR同时支持动态图与静态图两种编程范式
-- 动态图版本：dygraph分支（默认），需将paddle版本升级至2.0rc1+（[快速安装](./doc/doc_ch/installation.md)）
+- 动态图版本：dygraph分支（默认），需将paddle版本升级至2.0.0（[快速安装](./doc/doc_ch/installation.md)）
 - 静态图版本：develop分支
 
 **近期更新**
+- 2021.2.1 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题，总数162个，每周一都会更新，欢迎大家持续关注。
 - 2021.1.26,28,29 PaddleOCR官方研发团队带来技术深入解读三日直播课，1月26日、28日、29日晚上19:30，[直播地址](https://live.bilibili.com/21689802)
-- 2021.1.25 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题，总数157个，每周一都会更新，欢迎大家持续关注。
 - 2021.1.21 更新多语言识别模型，目前支持语种超过27种，[多语言模型下载](./doc/doc_ch/models_list.md)，包括中文简体、中文繁体、英文、法文、德文、韩文、日文、意大利文、西班牙文、葡萄牙文、俄罗斯文、阿拉伯文等，后续计划可以参考[多语言研发计划](https://github.com/PaddlePaddle/PaddleOCR/issues/1048)
 - 2020.12.15 更新数据合成工具[Style-Text](./StyleText/README_ch.md)，可以批量合成大量与目标场景类似的图像，在多个场景验证，效果明显提升。
 - 2020.11.25 更新半自动标注工具[PPOCRLabel](./PPOCRLabel/README_ch.md)，辅助开发者高效完成标注任务，输出格式与PP-OCR训练任务完美衔接。

diff --git a/configs/rec/rec_mv3_none_bilstm_ctc.yml b/configs/rec/rec_mv3_none_bilstm_ctc.yml
@@ -1,5 +1,5 @@
 Global:
-  use_gpu: true
+  use_gpu: True
   epoch_num: 72
   log_smooth_window: 20
   print_batch_step: 10
@@ -59,7 +59,7 @@ Metric:
 
 Train:
   dataset:
-    name: LMDBDateSet
+    name: LMDBDataSet
     data_dir: ./train_data/data_lmdb_release/training/
     transforms:
       - DecodeImage: # load image
@@ -78,7 +78,7 @@ Train:
 
 Eval:
   dataset:
-    name: LMDBDateSet
+    name: LMDBDataSet
     data_dir: ./train_data/data_lmdb_release/validation/
     transforms:
       - DecodeImage: # load image

diff --git a/configs/rec/rec_mv3_none_none_ctc.yml b/configs/rec/rec_mv3_none_none_ctc.yml
@@ -58,7 +58,7 @@ Metric:
 
 Train:
   dataset:
-    name: LMDBDateSet
+    name: LMDBDataSet
     data_dir: ./train_data/data_lmdb_release/training/
     transforms:
       - DecodeImage: # load image
@@ -77,7 +77,7 @@ Train:
 
 Eval:
   dataset:
-    name: LMDBDateSet
+    name: LMDBDataSet
     data_dir: ./train_data/data_lmdb_release/validation/
     transforms:
       - DecodeImage: # load image

diff --git a/configs/rec/rec_mv3_tps_bilstm_att.yml b/configs/rec/rec_mv3_tps_bilstm_att.yml
@@ -0,0 +1,102 @@
+Global:
+  use_gpu: True
+  epoch_num: 72
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec/rec_mv3_tps_bilstm_att/
+  save_epoch_step: 3
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [0, 2000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_words/ch/word_1.jpg
+  # for data or label process
+  character_dict_path: 
+  character_type: en
+  max_text_length: 25
+  infer_mode: False
+  use_space_char: False
+
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    learning_rate: 0.0005
+  regularizer:
+    name: 'L2'
+    factor: 0.00001
+
+Architecture:
+  model_type: rec
+  algorithm: RARE
+  Transform:
+    name: TPS
+    num_fiducial: 20
+    loc_lr: 0.1
+    model_name: small
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: large
+  Neck:
+    name: SequenceEncoder
+    encoder_type: rnn 
+    hidden_size: 96
+  Head:
+    name: AttentionHead  
+    hidden_size: 96
+
+
+Loss:
+  name: AttentionLoss
+
+PostProcess:
+  name: AttnLabelDecode
+
+Metric:
+  name: RecMetric
+  main_indicator: acc
+
+Train:
+  dataset:
+    name: LMDBDataSet
+    data_dir: ../training/
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - AttnLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 100]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    batch_size_per_card: 256
+    drop_last: True
+    num_workers: 8
+
+Eval:
+  dataset:
+    name: LMDBDataSet
+    data_dir: ../validation/
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - AttnLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 100]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 256
+    num_workers: 1
diff --git a/configs/rec/rec_mv3_tps_bilstm_ctc.yml b/configs/rec/rec_mv3_tps_bilstm_ctc.yml
@@ -1,5 +1,5 @@
 Global:
-  use_gpu: true
+  use_gpu: True
   epoch_num: 72
   log_smooth_window: 20
   print_batch_step: 10
@@ -63,7 +63,7 @@ Metric:
 
 Train:
   dataset:
-    name: LMDBDateSet
+    name: LMDBDataSet
     data_dir: ./train_data/data_lmdb_release/training/
     transforms:
       - DecodeImage: # load image
@@ -82,7 +82,7 @@ Train:
 
 Eval:
   dataset:
-    name: LMDBDateSet
+    name: LMDBDataSet
     data_dir: ./train_data/data_lmdb_release/validation/
     transforms:
       - DecodeImage: # load image

diff --git a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
@@ -58,7 +58,7 @@ Metric:
 
 Train:
   dataset:
-    name: LMDBDateSet
+    name: LMDBDataSet
     data_dir: ./train_data/data_lmdb_release/training/
     transforms:
       - DecodeImage: # load image
@@ -77,7 +77,7 @@ Train:
 
 Eval:
   dataset:
-    name: LMDBDateSet
+    name: LMDBDataSet
     data_dir: ./train_data/data_lmdb_release/validation/
     transforms:
       - DecodeImage: # load image

diff --git a/configs/rec/rec_r34_vd_none_none_ctc.yml b/configs/rec/rec_r34_vd_none_none_ctc.yml
@@ -56,7 +56,7 @@ Metric:
 
 Train:
   dataset:
-    name: LMDBDateSet
+    name: LMDBDataSet
     data_dir: ./train_data/data_lmdb_release/training/
     transforms:
       - DecodeImage: # load image
@@ -75,7 +75,7 @@ Train:
 
 Eval:
   dataset:
-    name: LMDBDateSet
+    name: LMDBDataSet
     data_dir: ./train_data/data_lmdb_release/validation/
     transforms:
       - DecodeImage: # load image