Skip to content

Commit

Permalink
Update ocr_paddle.py
Browse files Browse the repository at this point in the history
fix text capitalization
  • Loading branch information
bropines authored Oct 29, 2024
1 parent 41e3ec5 commit b679cf0
Showing 1 changed file with 25 additions and 12 deletions.
37 changes: 25 additions & 12 deletions modules/ocr/ocr_paddle.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,29 @@ def _ocr_blk_list(self, img: np.ndarray, blk_list: List[TextBlock], *args, **kwa
self.logger.warning('Invalid text block coordinates for target image')
blk.text = ''

def _apply_text_case(self, text: str) -> str:
if self.text_case == 'Uppercase':
return text.upper()
elif self.text_case == 'Capitalize Sentences':
return self._capitalize_sentences(text)
elif self.text_case == 'Lowercase':
return text.lower()
else:
return text # Без изменений, если режим не распознан

def _capitalize_sentences(self, text: str) -> str:
def process_sentence(sentence):
words = sentence.split()
if not words:
return ''
if len(words) == 1:
return words[0].capitalize()
else:
return ' '.join([words[0].capitalize()] + [word.lower() for word in words[1:]])

sentences = re.split(r'(?<=[.!?…])\s+', text)
return ' '.join(process_sentence(sentence) for sentence in sentences)

def _process_result(self, result):
try:
if not result or result[0] is None:
Expand All @@ -247,30 +270,20 @@ def _process_result(self, result):
text = line[1][0]
text = re.sub(r'-(?!\w)', '', text)
text = re.sub(r'\s+', ' ', text)
text = self._apply_text_case(text) # Применяем выбранный регистр
text = self._apply_punctuation_and_spacing(text)
texts.append(text.strip())

if not texts:
return ''

text = ' '.join(texts)
text = self._apply_no_uppercase(text)
text = self._apply_punctuation_and_spacing(text)

return text
except Exception as e:
if self.debug_mode:
self.logger.error(f"Error processing OCR result: {str(e)}")
return ''

def _apply_no_uppercase(self, text: str) -> str:
def process_sentence(sentence):
words = sentence.split()
if not words:
return ''
return ' '.join([words[0].capitalize()] + [word.lower() for word in words[1:]])

sentences = re.split(r'(?<=[.!?…])\s+', text)
return ' '.join(process_sentence(sentence) for sentence in sentences)

def _apply_punctuation_and_spacing(self, text: str) -> str:
text = re.sub(r'\s+([,.!?…])', r'\1', text)
Expand Down

0 comments on commit b679cf0

Please sign in to comment.