opencv · zihaomu · May 8, 2023 · May 8, 2023 · fengyuentau · May 5, 2023
diff --git a/models/text_detection_db/LICENSE b/models/text_detection_db/LICENSE
@@ -1,3 +1,4 @@
+Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 
                                  Apache License
                            Version 2.0, January 2004
@@ -187,7 +188,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright [yyyy] [name of copyright owner]
+   Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.

diff --git a/models/text_detection_db/README.md b/models/text_detection_db/README.md
@@ -1,10 +1,12 @@
-# DB
+# PP-OCRv3 Text Detection
 
-Real-time Scene Text Detection with Differentiable Binarization
+### NOTE: the PP-OCRv3 Text Detection can be supported `opencv >= 4.8.0`.
+PP-OCRv3: More Attempts for the Improvement of Ultra Lightweight OCR System
 
 Note:
 
-- Models source: [here](https://drive.google.com/drive/folders/1qzNCHfUJOS0NEUOIKn69eCtxdlNPpWbq).
+- Original Paddle Models source of English: [here](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar).
+- Original Paddle Models source of Chinese: [here](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar).
 - `IC15` in the filename means the model is trained on [IC15 dataset](https://rrc.cvc.uab.es/?ch=4&com=introduction), which can detect English text instances only.
 - `TD500` in the filename means the model is trained on [TD500 dataset](http://www.iapr-tc11.org/mediawiki/index.php/MSRA_Text_Detection_500_Database_(MSRA-TD500)), which can detect both English & Chinese instances.
 - Visit https://docs.opencv.org/master/d4/d43/tutorial_dnn_text_spotting.html for more information.
@@ -35,6 +37,6 @@ All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
 
 ## Reference
 
-- https://arxiv.org/abs/1911.08947
-- https://github.com/MhLiao/DB
+- https://arxiv.org/abs/2206.03001
+- https://github.com/PaddlePaddle/PaddleOCR
 - https://docs.opencv.org/master/d4/d43/tutorial_dnn_text_spotting.html
diff --git a/models/text_detection_db/db.py b/models/text_detection_db/db.py
@@ -7,7 +7,7 @@
 import numpy as np
 import cv2 as cv
 
-class DB:
+class PPOCRv3DB:
     def __init__(self, modelPath, inputSize=[736, 736], binaryThreshold=0.3, polygonThreshold=0.5, maxCandidates=200, unclipRatio=2.0, backendId=0, targetId=0):
         self._modelPath = modelPath
         self._model = cv.dnn_TextDetectionModel_DB(
@@ -32,7 +32,10 @@ def __init__(self, modelPath, inputSize=[736, 736], binaryThreshold=0.3, polygon
         self._model.setUnclipRatio(self._unclipRatio)
         self._model.setMaxCandidates(self._maxCandidates)
 
-        self._model.setInputParams(1.0/255.0, self._inputSize, (122.67891434, 116.66876762, 104.00698793))
+        self._model.setInputSize(self._inputSize)
+        self._model.setInputMean((123.675, 116.28, 103.53))
+        self._model.setInputScale(1.0/255.0/np.array([0.229, 0.224, 0.225]))
+        self._model.setInputSwapRB(True)
 
     @property
     def name(self):
@@ -46,7 +49,10 @@ def setBackendAndTarget(self, backendId, targetId):
 
     def setInputSize(self, input_size):
         self._inputSize = tuple(input_size)
-        self._model.setInputParams(1.0/255.0, self._inputSize, (122.67891434, 116.66876762, 104.00698793))
+        self._model.setInputSize(self._inputSize)
+        self._model.setInputMean((123.675, 116.28, 103.53))
+        self._model.setInputScale(1.0/255.0/np.array([0.229, 0.224, 0.225]))
+        self._model.setInputSwapRB(True)
 
     def infer(self, image):
         assert image.shape[0] == self._inputSize[1], '{} (height of input image) != {} (preset height)'.format(image.shape[0], self._inputSize[1])

diff --git a/models/text_detection_db/demo.py b/models/text_detection_db/demo.py
@@ -9,7 +9,7 @@
 import numpy as np
 import cv2 as cv
 
-from db import DB
+from db import PPOCRv3DB
 
 # Check OpenCV version
 assert cv.__version__ >= "4.7.0", \
@@ -24,11 +24,11 @@
     [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
 ]
 
-parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).')
+parser = argparse.ArgumentParser(description='PP-OCRv3 Text Detection (https://arxiv.org/abs/2206.03001).')
 parser.add_argument('--input', '-i', type=str,
                     help='Usage: Set path to the input image. Omit for using default camera.')
-parser.add_argument('--model', '-m', type=str, default='text_detection_DB_TD500_resnet18_2021sep.onnx',
-                    help='Usage: Set model path, defaults to text_detection_DB_TD500_resnet18_2021sep.onnx.')
+parser.add_argument('--model', '-m', type=str, default='./text_detection_en_ppocrv3_2023may.onnx',
+                    help='Usage: Set model path, defaults to text_detection_en_ppocrv3_2023may.onnx.')
 parser.add_argument('--backend_target', '-bt', type=int, default=0,
                     help='''Choose one of the backend-target pair to run this demo:
                         {:d}: (default) OpenCV implementation + CPU,
@@ -71,7 +71,7 @@ def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), isC
     target_id = backend_target_pairs[args.backend_target][1]
 
     # Instantiate DB
-    model = DB(modelPath=args.model,
+    model = PPOCRv3DB(modelPath=args.model,
                inputSize=[args.width, args.height],
                binaryThreshold=args.binary_threshold,
                polygonThreshold=args.polygon_threshold,

diff --git a/models/text_detection_db/text_detection_DB_IC15_resnet18_2021sep.onnx b/models/text_detection_db/text_detection_DB_IC15_resnet18_2021sep.onnx
diff --git a/models/text_detection_db/text_detection_DB_TD500_resnet18_2021sep.onnx b/models/text_detection_db/text_detection_DB_TD500_resnet18_2021sep.onnx
diff --git a/models/text_detection_db/text_detection_ch_ppocrv3_2023may.onnx b/models/text_detection_db/text_detection_ch_ppocrv3_2023may.onnx
diff --git a/models/text_detection_db/text_detection_en_ppocrv3_2023may.onnx b/models/text_detection_db/text_detection_en_ppocrv3_2023may.onnx