Skip to content

Commit 334886b

Browse files
committed
remove old DB, and add PPOCRv3DB text detection model.
1 parent d9968b7 commit 334886b

8 files changed

+23
-20
lines changed

models/text_detection_db/LICENSE

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
12

23
Apache License
34
Version 2.0, January 2004
@@ -187,7 +188,7 @@
187188
same "printed page" as the copyright notice for easier
188189
identification within third-party archives.
189190

190-
Copyright [yyyy] [name of copyright owner]
191+
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
191192

192193
Licensed under the Apache License, Version 2.0 (the "License");
193194
you may not use this file except in compliance with the License.

models/text_detection_db/README.md

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1-
# DB
1+
# PP-OCRv3 Text Detection
22

3-
Real-time Scene Text Detection with Differentiable Binarization
3+
### NOTE: the PP-OCRv3 Text Detection can be supported `opencv >= 4.8.0`.
4+
PP-OCRv3: More Attempts for the Improvement of Ultra Lightweight OCR System
45

56
Note:
67

7-
- Models source: [here](https://drive.google.com/drive/folders/1qzNCHfUJOS0NEUOIKn69eCtxdlNPpWbq).
8+
- Original Paddle Models source of English: [here](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar).
9+
- Original Paddle Models source of Chinese: [here](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar).
810
- `IC15` in the filename means the model is trained on [IC15 dataset](https://rrc.cvc.uab.es/?ch=4&com=introduction), which can detect English text instances only.
911
- `TD500` in the filename means the model is trained on [TD500 dataset](http://www.iapr-tc11.org/mediawiki/index.php/MSRA_Text_Detection_500_Database_(MSRA-TD500)), which can detect both English & Chinese instances.
1012
- Visit https://docs.opencv.org/master/d4/d43/tutorial_dnn_text_spotting.html for more information.
@@ -35,6 +37,6 @@ All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
3537

3638
## Reference
3739

38-
- https://arxiv.org/abs/1911.08947
39-
- https://github.com/MhLiao/DB
40+
- https://arxiv.org/abs/2206.03001
41+
- https://github.com/PaddlePaddle/PaddleOCR
4042
- https://docs.opencv.org/master/d4/d43/tutorial_dnn_text_spotting.html

models/text_detection_db/db.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import numpy as np
88
import cv2 as cv
99

10-
class DB:
10+
class PPOCRv3DB:
1111
def __init__(self, modelPath, inputSize=[736, 736], binaryThreshold=0.3, polygonThreshold=0.5, maxCandidates=200, unclipRatio=2.0, backendId=0, targetId=0):
1212
self._modelPath = modelPath
1313
self._model = cv.dnn_TextDetectionModel_DB(
@@ -32,7 +32,10 @@ def __init__(self, modelPath, inputSize=[736, 736], binaryThreshold=0.3, polygon
3232
self._model.setUnclipRatio(self._unclipRatio)
3333
self._model.setMaxCandidates(self._maxCandidates)
3434

35-
self._model.setInputParams(1.0/255.0, self._inputSize, (122.67891434, 116.66876762, 104.00698793))
35+
self._model.setInputSize(self._inputSize)
36+
self._model.setInputMean((123.675, 116.28, 103.53))
37+
self._model.setInputScale(1.0/255.0/(0.229, 0.224, 0.225))
38+
self._model.setInputSwapRB(True)
3639

3740
@property
3841
def name(self):
@@ -46,7 +49,10 @@ def setBackendAndTarget(self, backendId, targetId):
4649

4750
def setInputSize(self, input_size):
4851
self._inputSize = tuple(input_size)
49-
self._model.setInputParams(1.0/255.0, self._inputSize, (122.67891434, 116.66876762, 104.00698793))
52+
self._model.setInputSize(self._inputSize)
53+
self._model.setInputMean((123.675, 116.28, 103.53))
54+
self._model.setInputScale(1.0/255.0/(0.229, 0.224, 0.225))
55+
self._model.setInputSwapRB(True)
5056

5157
def infer(self, image):
5258
assert image.shape[0] == self._inputSize[1], '{} (height of input image) != {} (preset height)'.format(image.shape[0], self._inputSize[1])

models/text_detection_db/demo.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import numpy as np
1010
import cv2 as cv
1111

12-
from db import DB
12+
from db import PPOCRv3DB
1313

1414
# Check OpenCV version
1515
assert cv.__version__ >= "4.7.0", \
@@ -24,11 +24,11 @@
2424
[cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
2525
]
2626

27-
parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).')
27+
parser = argparse.ArgumentParser(description='PP-OCRv3 Text Detection (https://arxiv.org/abs/2206.03001).')
2828
parser.add_argument('--input', '-i', type=str,
2929
help='Usage: Set path to the input image. Omit for using default camera.')
30-
parser.add_argument('--model', '-m', type=str, default='text_detection_DB_TD500_resnet18_2021sep.onnx',
31-
help='Usage: Set model path, defaults to text_detection_DB_TD500_resnet18_2021sep.onnx.')
30+
parser.add_argument('--model', '-m', type=str, default='text_detection_en_ppocrv3_2023may.onnx',
31+
help='Usage: Set model path, defaults to text_detection_en_ppocrv3_2023may.onnx.')
3232
parser.add_argument('--backend_target', '-bt', type=int, default=0,
3333
help='''Choose one of the backend-target pair to run this demo:
3434
{:d}: (default) OpenCV implementation + CPU,
@@ -71,7 +71,7 @@ def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), isC
7171
target_id = backend_target_pairs[args.backend_target][1]
7272

7373
# Instantiate DB
74-
model = DB(modelPath=args.model,
74+
model = PPOCRv3DB(modelPath=args.model,
7575
inputSize=[args.width, args.height],
7676
binaryThreshold=args.binary_threshold,
7777
polygonThreshold=args.polygon_threshold,

models/text_detection_db/text_detection_DB_IC15_resnet18_2021sep.onnx

Lines changed: 0 additions & 3 deletions
This file was deleted.

models/text_detection_db/text_detection_DB_TD500_resnet18_2021sep.onnx

Lines changed: 0 additions & 3 deletions
This file was deleted.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)