From 2a0fc23fdc2a01256c3f216e0f52581e0ff0a68a Mon Sep 17 00:00:00 2001 From: rkcosmos Date: Sun, 23 Aug 2020 11:40:26 +0700 Subject: [PATCH] v1.1.8 --- README.md | 33 ++++--------- easyocr/__init__.py | 2 +- easyocr/character/abq_char.txt | 67 +++++++++++++++++++++++++++ easyocr/character/ady_char.txt | 67 +++++++++++++++++++++++++++ easyocr/character/ang_char.txt | 84 ++++++++++++++++++++++++++++++++++ easyocr/character/as_char.txt | 74 ++++++++++++++++++++++++++++++ easyocr/character/ava_char.txt | 67 +++++++++++++++++++++++++++ easyocr/character/bh_char.txt | 84 ++++++++++++++++++++++++++++++++++ easyocr/character/bho_char.txt | 84 ++++++++++++++++++++++++++++++++++ easyocr/character/che_char.txt | 67 +++++++++++++++++++++++++++ easyocr/character/dar_char.txt | 67 +++++++++++++++++++++++++++ easyocr/character/gom_char.txt | 84 ++++++++++++++++++++++++++++++++++ easyocr/character/inh_char.txt | 67 +++++++++++++++++++++++++++ easyocr/character/kbd_char.txt | 67 +++++++++++++++++++++++++++ easyocr/character/lbe_char.txt | 67 +++++++++++++++++++++++++++ easyocr/character/lez_char.txt | 67 +++++++++++++++++++++++++++ easyocr/character/mah_char.txt | 84 ++++++++++++++++++++++++++++++++++ easyocr/character/mai_char.txt | 84 ++++++++++++++++++++++++++++++++++ easyocr/character/new_char.txt | 84 ++++++++++++++++++++++++++++++++++ easyocr/character/sck_char.txt | 84 ++++++++++++++++++++++++++++++++++ easyocr/character/tab_char.txt | 67 +++++++++++++++++++++++++++ easyocr/easyocr.py | 33 ++++++++++--- easyocr/utils.py | 7 ++- releasenotes.md | 6 +++ setup.py | 2 +- 25 files changed, 1464 insertions(+), 35 deletions(-) create mode 100644 easyocr/character/abq_char.txt create mode 100644 easyocr/character/ady_char.txt create mode 100644 easyocr/character/ang_char.txt create mode 100644 easyocr/character/as_char.txt create mode 100644 easyocr/character/ava_char.txt create mode 100644 easyocr/character/bh_char.txt create mode 100644 easyocr/character/bho_char.txt create mode 100644 easyocr/character/che_char.txt create mode 100644 easyocr/character/dar_char.txt create mode 100644 easyocr/character/gom_char.txt create mode 100644 easyocr/character/inh_char.txt create mode 100644 easyocr/character/kbd_char.txt create mode 100644 easyocr/character/lbe_char.txt create mode 100644 easyocr/character/lez_char.txt create mode 100644 easyocr/character/mah_char.txt create mode 100644 easyocr/character/mai_char.txt create mode 100644 easyocr/character/new_char.txt create mode 100644 easyocr/character/sck_char.txt create mode 100644 easyocr/character/tab_char.txt diff --git a/README.md b/README.md index 88eb776da6..fa2d53a73c 100644 --- a/README.md +++ b/README.md @@ -6,16 +6,18 @@ [![Tweet](https://img.shields.io/twitter/url/https/github.com/JaidedAI/EasyOCR.svg?style=social)](https://twitter.com/intent/tweet?text=Check%20out%20this%20awesome%20library:%20EasyOCR%20https://github.com/JaidedAI/EasyOCR) [![Twitter](https://img.shields.io/badge/twitter-@JaidedAI-blue.svg?style=flat)](https://twitter.com/JaidedAI) -Ready-to-use OCR with 40+ languages supported including Chinese, Japanese, Korean and Thai. +Ready-to-use OCR with 70+ languages supported including Chinese, Japanese, Korean and Thai. ## What's new? +- 23 August 2020 - Version 1.1.8 + - 20 new language supports for Bengali, Assamese, Abaza, Adyghe, Kabardian, Avar, + Dargwa, Ingush, Chechen, Lak, Lezgian, Tabassaran, Bihari, Maithili, Angika, + Bhojpuri, Magahi, Nagpuri, Newari, Goan Konkani + - Support RGBA input format + - Add `min_size` argument for `readtext`: for filtering out small text box - 10 August 2020 - Version 1.1.7 - New language support for Tamil - Temporary fix for memory leakage on CPU mode -- 4 August 2020 - Version 1.1.6 - - New language support for Russian, Serbian, Belarusian, Bulgarian, Mongolian, Ukranian (Cyrillic Script) and Arabic, Persian(Farsi), Urdu, Uyghur (Arabic Script) - - Docker file and Ainize demo (thanks @ghandic and @Wook-2) - - Better production friendly with Logger and custom model folder location (By setting ` model_storage_directory` when create `Reader` instance) (thanks @jpotter) - [Read all released notes](https://github.com/JaidedAI/EasyOCR/blob/master/releasenotes.md) ## What's coming next? @@ -31,24 +33,7 @@ Ready-to-use OCR with 40+ languages supported including Chinese, Japanese, Korea ## Supported Languages -We are currently supporting the following 59 languages. - -Afrikaans (af), Arabic (ar), Azerbaijani (az), Belarusian (be), Bulgarian (bg), Bosnian (bs), -Simplified Chinese (ch_sim), Traditional Chinese (ch_tra), Czech (cs), Welsh (cy), -Danish (da), German (de), English (en), Spanish (es), Estonian (et), Persian (Farsi) (fa) -French (fr), Irish (ga), Hindi(hi), Croatian (hr), Hungarian (hu), -Indonesian (id), Icelandic (is), Italian (it), Japanese (ja), Korean (ko), -Kurdish (ku), Latin (la), Lithuanian (lt), Latvian (lv), Maori (mi), Mongolian (mn), -Marathi (mr), Malay (ms), Maltese (mt), Nepali (ne), Dutch (nl), Norwegian (no), -Occitan (oc), Polish (pl), Portuguese (pt), Romanian (ro), Russian (ru), -Serbian (cyrillic)(rs_cyrillic), Serbian (latin)(rs_latin), -Slovak (sk) (need revisit), Slovenian (sl), Albanian (sq), Swedish (sv), -Swahili (sw), Tamil (ta), Thai (th), Tagalog (tl), Turkish (tr), Uyghur (ug), Ukranian(uk), Urdu (ur), -Uzbek (uz), Vietnamese (vi) (need revisit) - -List of characters is in folder [easyocr/character](https://github.com/JaidedAI/EasyOCR/tree/master/easyocr/character). -If you are native speaker of any language and think we should add or remove any character, -please create an issue and/or pull request (like [this one](https://github.com/JaidedAI/EasyOCR/pull/15)). +We are currently supporting 70+ languages. See [list of supported languages](https://www.jaided.ai/easyocr). ## Installation @@ -213,7 +198,7 @@ See [List of languages in development](https://github.com/JaidedAI/EasyOCR/issue > Base class for EasyOCR > > **Parameters** -> * **lang_list** (list) - list of language code you want to recognize, for example ['ch_sim','en']. List of supported language code is [here](#Supported-Languages). +> * **lang_list** (list) - list of language code you want to recognize, for example ['ch_sim','en']. List of supported language code is [here](https://www.jaided.ai/easyocr). > * **gpu** (bool, string, default = True) > * **model_storage_directory** (string, default = None) > * **download_enabled** (bool, default = True) diff --git a/easyocr/__init__.py b/easyocr/__init__.py index 5a0df77fa6..b45f80e603 100644 --- a/easyocr/__init__.py +++ b/easyocr/__init__.py @@ -1,3 +1,3 @@ from .easyocr import Reader -__version__ = '1.1.7' +__version__ = '1.1.8' diff --git a/easyocr/character/abq_char.txt b/easyocr/character/abq_char.txt new file mode 100644 index 0000000000..b10df0cb3b --- /dev/null +++ b/easyocr/character/abq_char.txt @@ -0,0 +1,67 @@ +А +Б +В +Г +Д +Е +Ё +Ж +З +И +Й +К +Л +М +Н +О +П +Р +С +Т +У +Ф +Х +Ц +Ч +Ш +Щ +Ъ +Ы +Ь +Э +Ю +Я +а +б +в +г +д +е +ё +ж +з +и +й +к +л +м +н +о +п +р +с +т +у +ф +х +ц +ч +ш +щ +ъ +ы +ь +э +ю +я +I diff --git a/easyocr/character/ady_char.txt b/easyocr/character/ady_char.txt new file mode 100644 index 0000000000..b10df0cb3b --- /dev/null +++ b/easyocr/character/ady_char.txt @@ -0,0 +1,67 @@ +А +Б +В +Г +Д +Е +Ё +Ж +З +И +Й +К +Л +М +Н +О +П +Р +С +Т +У +Ф +Х +Ц +Ч +Ш +Щ +Ъ +Ы +Ь +Э +Ю +Я +а +б +в +г +д +е +ё +ж +з +и +й +к +л +м +н +о +п +р +с +т +у +ф +х +ц +ч +ш +щ +ъ +ы +ь +э +ю +я +I diff --git a/easyocr/character/ang_char.txt b/easyocr/character/ang_char.txt new file mode 100644 index 0000000000..2808c378ce --- /dev/null +++ b/easyocr/character/ang_char.txt @@ -0,0 +1,84 @@ +अ +आ +इ +ई +उ +ऊ +ऋ +ए +ऐ +ऑ +ओ +औ +अं +अः +क +ख +ग +घ +ङ +च +छ +ज +झ +ञ +ट +ठ +ड +ढ +ण +त +थ +द +ध +न +प +फ +ब +भ +म +य +र +ल +व +श +ष +स +ह +ळ +१ +२ +३ +४ +५ +६ +७ +८ +९ +० +ै +ा +ं +े +ि +ो +् +ु +ी +़ +ू +ँ +ृ +ौ +ॉ +ज़ +ड़ +क़ +ढ़ +फ़ +ग़ +ः +ख़ +. +॰ +ॅ diff --git a/easyocr/character/as_char.txt b/easyocr/character/as_char.txt new file mode 100644 index 0000000000..89ffadfa5f --- /dev/null +++ b/easyocr/character/as_char.txt @@ -0,0 +1,74 @@ +হ +থ +শ +৫ +ক +ও +য +০ +গ +দ +ড় +খ +য় +ঋ +ন +অ +৪ +এ +ব +ঠ +ঢ +৭ +৯ +ধ +ঙ +ট +ঝ +ৎ +ণ +ত +র +২ +চ +ঌ +ড +৬ +ঔ +প +ভ +ম +ঢ় +ঈ +৮ +ঘ +১ +ষ +৩ +ফ +ছ +ল +জ +আ +। +ঊ +ই +স +ঐ +উ +ঞ +া +্ +ু +ী +ে +ং +ি +় +ঁ +ৃ +ো +ূ +ৈ +ৌ +ঃ diff --git a/easyocr/character/ava_char.txt b/easyocr/character/ava_char.txt new file mode 100644 index 0000000000..b10df0cb3b --- /dev/null +++ b/easyocr/character/ava_char.txt @@ -0,0 +1,67 @@ +А +Б +В +Г +Д +Е +Ё +Ж +З +И +Й +К +Л +М +Н +О +П +Р +С +Т +У +Ф +Х +Ц +Ч +Ш +Щ +Ъ +Ы +Ь +Э +Ю +Я +а +б +в +г +д +е +ё +ж +з +и +й +к +л +м +н +о +п +р +с +т +у +ф +х +ц +ч +ш +щ +ъ +ы +ь +э +ю +я +I diff --git a/easyocr/character/bh_char.txt b/easyocr/character/bh_char.txt new file mode 100644 index 0000000000..2808c378ce --- /dev/null +++ b/easyocr/character/bh_char.txt @@ -0,0 +1,84 @@ +अ +आ +इ +ई +उ +ऊ +ऋ +ए +ऐ +ऑ +ओ +औ +अं +अः +क +ख +ग +घ +ङ +च +छ +ज +झ +ञ +ट +ठ +ड +ढ +ण +त +थ +द +ध +न +प +फ +ब +भ +म +य +र +ल +व +श +ष +स +ह +ळ +१ +२ +३ +४ +५ +६ +७ +८ +९ +० +ै +ा +ं +े +ि +ो +् +ु +ी +़ +ू +ँ +ृ +ौ +ॉ +ज़ +ड़ +क़ +ढ़ +फ़ +ग़ +ः +ख़ +. +॰ +ॅ diff --git a/easyocr/character/bho_char.txt b/easyocr/character/bho_char.txt new file mode 100644 index 0000000000..2808c378ce --- /dev/null +++ b/easyocr/character/bho_char.txt @@ -0,0 +1,84 @@ +अ +आ +इ +ई +उ +ऊ +ऋ +ए +ऐ +ऑ +ओ +औ +अं +अः +क +ख +ग +घ +ङ +च +छ +ज +झ +ञ +ट +ठ +ड +ढ +ण +त +थ +द +ध +न +प +फ +ब +भ +म +य +र +ल +व +श +ष +स +ह +ळ +१ +२ +३ +४ +५ +६ +७ +८ +९ +० +ै +ा +ं +े +ि +ो +् +ु +ी +़ +ू +ँ +ृ +ौ +ॉ +ज़ +ड़ +क़ +ढ़ +फ़ +ग़ +ः +ख़ +. +॰ +ॅ diff --git a/easyocr/character/che_char.txt b/easyocr/character/che_char.txt new file mode 100644 index 0000000000..b10df0cb3b --- /dev/null +++ b/easyocr/character/che_char.txt @@ -0,0 +1,67 @@ +А +Б +В +Г +Д +Е +Ё +Ж +З +И +Й +К +Л +М +Н +О +П +Р +С +Т +У +Ф +Х +Ц +Ч +Ш +Щ +Ъ +Ы +Ь +Э +Ю +Я +а +б +в +г +д +е +ё +ж +з +и +й +к +л +м +н +о +п +р +с +т +у +ф +х +ц +ч +ш +щ +ъ +ы +ь +э +ю +я +I diff --git a/easyocr/character/dar_char.txt b/easyocr/character/dar_char.txt new file mode 100644 index 0000000000..b10df0cb3b --- /dev/null +++ b/easyocr/character/dar_char.txt @@ -0,0 +1,67 @@ +А +Б +В +Г +Д +Е +Ё +Ж +З +И +Й +К +Л +М +Н +О +П +Р +С +Т +У +Ф +Х +Ц +Ч +Ш +Щ +Ъ +Ы +Ь +Э +Ю +Я +а +б +в +г +д +е +ё +ж +з +и +й +к +л +м +н +о +п +р +с +т +у +ф +х +ц +ч +ш +щ +ъ +ы +ь +э +ю +я +I diff --git a/easyocr/character/gom_char.txt b/easyocr/character/gom_char.txt new file mode 100644 index 0000000000..2808c378ce --- /dev/null +++ b/easyocr/character/gom_char.txt @@ -0,0 +1,84 @@ +अ +आ +इ +ई +उ +ऊ +ऋ +ए +ऐ +ऑ +ओ +औ +अं +अः +क +ख +ग +घ +ङ +च +छ +ज +झ +ञ +ट +ठ +ड +ढ +ण +त +थ +द +ध +न +प +फ +ब +भ +म +य +र +ल +व +श +ष +स +ह +ळ +१ +२ +३ +४ +५ +६ +७ +८ +९ +० +ै +ा +ं +े +ि +ो +् +ु +ी +़ +ू +ँ +ृ +ौ +ॉ +ज़ +ड़ +क़ +ढ़ +फ़ +ग़ +ः +ख़ +. +॰ +ॅ diff --git a/easyocr/character/inh_char.txt b/easyocr/character/inh_char.txt new file mode 100644 index 0000000000..b10df0cb3b --- /dev/null +++ b/easyocr/character/inh_char.txt @@ -0,0 +1,67 @@ +А +Б +В +Г +Д +Е +Ё +Ж +З +И +Й +К +Л +М +Н +О +П +Р +С +Т +У +Ф +Х +Ц +Ч +Ш +Щ +Ъ +Ы +Ь +Э +Ю +Я +а +б +в +г +д +е +ё +ж +з +и +й +к +л +м +н +о +п +р +с +т +у +ф +х +ц +ч +ш +щ +ъ +ы +ь +э +ю +я +I diff --git a/easyocr/character/kbd_char.txt b/easyocr/character/kbd_char.txt new file mode 100644 index 0000000000..b10df0cb3b --- /dev/null +++ b/easyocr/character/kbd_char.txt @@ -0,0 +1,67 @@ +А +Б +В +Г +Д +Е +Ё +Ж +З +И +Й +К +Л +М +Н +О +П +Р +С +Т +У +Ф +Х +Ц +Ч +Ш +Щ +Ъ +Ы +Ь +Э +Ю +Я +а +б +в +г +д +е +ё +ж +з +и +й +к +л +м +н +о +п +р +с +т +у +ф +х +ц +ч +ш +щ +ъ +ы +ь +э +ю +я +I diff --git a/easyocr/character/lbe_char.txt b/easyocr/character/lbe_char.txt new file mode 100644 index 0000000000..b10df0cb3b --- /dev/null +++ b/easyocr/character/lbe_char.txt @@ -0,0 +1,67 @@ +А +Б +В +Г +Д +Е +Ё +Ж +З +И +Й +К +Л +М +Н +О +П +Р +С +Т +У +Ф +Х +Ц +Ч +Ш +Щ +Ъ +Ы +Ь +Э +Ю +Я +а +б +в +г +д +е +ё +ж +з +и +й +к +л +м +н +о +п +р +с +т +у +ф +х +ц +ч +ш +щ +ъ +ы +ь +э +ю +я +I diff --git a/easyocr/character/lez_char.txt b/easyocr/character/lez_char.txt new file mode 100644 index 0000000000..b10df0cb3b --- /dev/null +++ b/easyocr/character/lez_char.txt @@ -0,0 +1,67 @@ +А +Б +В +Г +Д +Е +Ё +Ж +З +И +Й +К +Л +М +Н +О +П +Р +С +Т +У +Ф +Х +Ц +Ч +Ш +Щ +Ъ +Ы +Ь +Э +Ю +Я +а +б +в +г +д +е +ё +ж +з +и +й +к +л +м +н +о +п +р +с +т +у +ф +х +ц +ч +ш +щ +ъ +ы +ь +э +ю +я +I diff --git a/easyocr/character/mah_char.txt b/easyocr/character/mah_char.txt new file mode 100644 index 0000000000..2808c378ce --- /dev/null +++ b/easyocr/character/mah_char.txt @@ -0,0 +1,84 @@ +अ +आ +इ +ई +उ +ऊ +ऋ +ए +ऐ +ऑ +ओ +औ +अं +अः +क +ख +ग +घ +ङ +च +छ +ज +झ +ञ +ट +ठ +ड +ढ +ण +त +थ +द +ध +न +प +फ +ब +भ +म +य +र +ल +व +श +ष +स +ह +ळ +१ +२ +३ +४ +५ +६ +७ +८ +९ +० +ै +ा +ं +े +ि +ो +् +ु +ी +़ +ू +ँ +ृ +ौ +ॉ +ज़ +ड़ +क़ +ढ़ +फ़ +ग़ +ः +ख़ +. +॰ +ॅ diff --git a/easyocr/character/mai_char.txt b/easyocr/character/mai_char.txt new file mode 100644 index 0000000000..2808c378ce --- /dev/null +++ b/easyocr/character/mai_char.txt @@ -0,0 +1,84 @@ +अ +आ +इ +ई +उ +ऊ +ऋ +ए +ऐ +ऑ +ओ +औ +अं +अः +क +ख +ग +घ +ङ +च +छ +ज +झ +ञ +ट +ठ +ड +ढ +ण +त +थ +द +ध +न +प +फ +ब +भ +म +य +र +ल +व +श +ष +स +ह +ळ +१ +२ +३ +४ +५ +६ +७ +८ +९ +० +ै +ा +ं +े +ि +ो +् +ु +ी +़ +ू +ँ +ृ +ौ +ॉ +ज़ +ड़ +क़ +ढ़ +फ़ +ग़ +ः +ख़ +. +॰ +ॅ diff --git a/easyocr/character/new_char.txt b/easyocr/character/new_char.txt new file mode 100644 index 0000000000..2808c378ce --- /dev/null +++ b/easyocr/character/new_char.txt @@ -0,0 +1,84 @@ +अ +आ +इ +ई +उ +ऊ +ऋ +ए +ऐ +ऑ +ओ +औ +अं +अः +क +ख +ग +घ +ङ +च +छ +ज +झ +ञ +ट +ठ +ड +ढ +ण +त +थ +द +ध +न +प +फ +ब +भ +म +य +र +ल +व +श +ष +स +ह +ळ +१ +२ +३ +४ +५ +६ +७ +८ +९ +० +ै +ा +ं +े +ि +ो +् +ु +ी +़ +ू +ँ +ृ +ौ +ॉ +ज़ +ड़ +क़ +ढ़ +फ़ +ग़ +ः +ख़ +. +॰ +ॅ diff --git a/easyocr/character/sck_char.txt b/easyocr/character/sck_char.txt new file mode 100644 index 0000000000..2808c378ce --- /dev/null +++ b/easyocr/character/sck_char.txt @@ -0,0 +1,84 @@ +अ +आ +इ +ई +उ +ऊ +ऋ +ए +ऐ +ऑ +ओ +औ +अं +अः +क +ख +ग +घ +ङ +च +छ +ज +झ +ञ +ट +ठ +ड +ढ +ण +त +थ +द +ध +न +प +फ +ब +भ +म +य +र +ल +व +श +ष +स +ह +ळ +१ +२ +३ +४ +५ +६ +७ +८ +९ +० +ै +ा +ं +े +ि +ो +् +ु +ी +़ +ू +ँ +ृ +ौ +ॉ +ज़ +ड़ +क़ +ढ़ +फ़ +ग़ +ः +ख़ +. +॰ +ॅ diff --git a/easyocr/character/tab_char.txt b/easyocr/character/tab_char.txt new file mode 100644 index 0000000000..b10df0cb3b --- /dev/null +++ b/easyocr/character/tab_char.txt @@ -0,0 +1,67 @@ +А +Б +В +Г +Д +Е +Ё +Ж +З +И +Й +К +Л +М +Н +О +П +Р +С +Т +У +Ф +Х +Ц +Ч +Ш +Щ +Ъ +Ы +Ь +Э +Ю +Я +а +б +в +г +д +е +ё +ж +з +и +й +к +л +м +н +о +п +р +с +т +у +ф +х +ц +ч +ш +щ +ъ +ы +ь +э +ю +я +I diff --git a/easyocr/easyocr.py b/easyocr/easyocr.py index 3de850b1a0..66494c7ed0 100644 --- a/easyocr/easyocr.py +++ b/easyocr/easyocr.py @@ -3,7 +3,7 @@ from .detection import get_detector, get_textbox from .imgproc import loadImage from .recognition import get_recognizer, get_text -from .utils import group_text_box, get_image_list, calculate_md5, get_paragraph, download_and_unzip, printProgressBar +from .utils import group_text_box, get_image_list, calculate_md5, get_paragraph, download_and_unzip, printProgressBar, diff from bidi.algorithm import get_display import numpy as np import cv2 @@ -37,10 +37,12 @@ 'nl','no','oc','pl','pt','ro','rs_latin','sk','sl','sq',\ 'sv','sw','tl','tr','uz','vi'] arabic_lang_list = ['ar','fa','ug','ur'] -cyrillic_lang_list = ['ru','rs_cyrillic','be','bg','uk','mn'] -devanagari_lang_list = ['hi','mr','ne'] +bengali_lang_list = ['bn','as'] +cyrillic_lang_list = ['ru','rs_cyrillic','be','bg','uk','mn','abq','ady','kbd',\ + 'ava','dar','inh','che','lbe','lez','tab'] +devanagari_lang_list = ['hi','mr','ne','bh','mai','ang','bho','mah','sck','new','gom'] -all_lang_list = latin_lang_list + arabic_lang_list+ cyrillic_lang_list + devanagari_lang_list + ['th','ch_sim','ch_tra','ja','ko','ta'] +all_lang_list = latin_lang_list + arabic_lang_list+ cyrillic_lang_list + devanagari_lang_list + bengali_lang_list + ['th','ch_sim','ch_tra','ja','ko','ta'] imgH = 64 input_channel = 1 output_channel = 512 @@ -63,6 +65,7 @@ 'cyrillic.pth': ('https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/cyrillic.zip', '5a046f7be2a4f7da6ed50740f487efa8'), 'arabic.pth': ('https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/arabic.zip', '993074555550e4e06a6077d55ff0449a'), 'tamil.pth': ('https://github.com/JaidedAI/EasyOCR/releases/download/v1.1.7/tamil.zip', '4b93972fdacdcdabe6d57097025d4dc2'), + 'bengali.pth': ('https://github.com/JaidedAI/EasyOCR/releases/download/v1.1.8/bengali.zip', 'cea9e897e2c0576b62cbb1554997ce1c'), } class Reader(object): @@ -129,6 +132,10 @@ def __init__(self, lang_list, gpu=True, model_storage_directory=None, download_e self.model_lang = 'tamil' if set(lang_list) - set(['ta','en']) != set(): raise ValueError('Tamil is only compatible with English, try lang_list=["ta","en"]') + elif set(lang_list) & set(bengali_lang_list): + self.model_lang = 'bengali' + if set(lang_list) - set(bengali_lang_list+['en']) != set(): + raise ValueError('Bengali is only compatible with English, try lang_list=["bn","as","en"]') elif set(lang_list) & set(arabic_lang_list): self.model_lang = 'arabic' if set(lang_list) - set(arabic_lang_list+['en']) != set(): @@ -163,6 +170,10 @@ def __init__(self, lang_list, gpu=True, model_storage_directory=None, download_e devanagari_char = '.ँंःअअंअःआइईउऊऋएऐऑओऔकखगघङचछजझञटठडढणतथदधनऩपफबभमयरऱलळवशषसह़ािीुूृॅेैॉोौ्ॐ॒क़ख़ग़ज़ड़ढ़फ़ॠ।०१२३४५६७८९॰' self.character = number+ symbol + en_char + devanagari_char model_file = 'devanagari.pth' + elif self.model_lang == 'bengali': + bn_char = '।ঁংঃঅআইঈউঊঋঌএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ািীুূৃেৈোৌ্ৎড়ঢ়য়০১২৩৪৫৬৭৮৯' + self.character = number+ symbol + en_char + bn_char + model_file = 'bengali.pth' elif self.model_lang == 'chinese_tra': char_file = os.path.join(BASE_PATH, 'character', "ch_tra_char.txt") with open(char_file, "r", encoding = "utf-8-sig") as input_file: @@ -279,7 +290,7 @@ def __init__(self, lang_list, gpu=True, model_storage_directory=None, download_e def readtext(self, image, decoder = 'greedy', beamWidth= 5, batch_size = 1,\ workers = 0, allowlist = None, blocklist = None, detail = 1,\ - paragraph = False,\ + paragraph = False, min_size = 20,\ contrast_ths = 0.1,adjust_contrast = 0.5, filter_ths = 0.003,\ text_threshold = 0.7, low_text = 0.4, link_threshold = 0.4,\ canvas_size = 2560, mag_ratio = 1.,\ @@ -309,15 +320,23 @@ def readtext(self, image, decoder = 'greedy', beamWidth= 5, batch_size = 1,\ if len(image.shape) == 2: # grayscale img_cv_grey = image img = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) - elif len(image.shape) == 3: # BGRscale + elif len(image.shape) == 3 and image.shape[2] == 3: # BGRscale img = image img_cv_grey = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + elif len(image.shape) == 3 and image.shape[2] == 4: # RGBAscale + img = image[:,:,:3] + img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) + img_cv_grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + else: + LOGGER.warning('Invalid input type. Suppoting format = string(file path or url), bytes, numpy array') text_box = get_textbox(self.detector, img, canvas_size, mag_ratio, text_threshold,\ link_threshold, low_text, False, self.device) horizontal_list, free_list = group_text_box(text_box, slope_ths, ycenter_ths, height_ths, width_ths, add_margin) - # should add filter to screen small box out + if min_size: + horizontal_list = [i for i in horizontal_list if max(i[1]-i[0],i[3]-i[2]) > min_size] + free_list = [i for i in free_list if max(diff([c[0] for c in i]), diff([c[1] for c in i]))>min_size] image_list, max_width = get_image_list(horizontal_list, free_list, img_cv_grey, model_height = imgH) diff --git a/easyocr/utils.py b/easyocr/utils.py index b1eab581ea..5e9a4ab77e 100644 --- a/easyocr/utils.py +++ b/easyocr/utils.py @@ -503,6 +503,9 @@ def calculate_md5(fname): hash_md5.update(chunk) return hash_md5.hexdigest() +def diff(input_list): + return max(input_list)-min(input_list) + def get_paragraph(raw_result, x_ths=1, y_ths=0.5, mode = 'ltr'): # create basic attributes box_group = [] @@ -589,5 +592,5 @@ def progress_hook(count, blockSize, totalSize): filledLength = int(length * progress) bar = fill * filledLength + '-' * (length - filledLength) print(f'\r{prefix} |{bar}| {percent}% {suffix}', end = printEnd) - - return progress_hook \ No newline at end of file + + return progress_hook diff --git a/releasenotes.md b/releasenotes.md index 2204cc6ef4..d4a3794869 100644 --- a/releasenotes.md +++ b/releasenotes.md @@ -1,4 +1,10 @@ +- 23 August 2020 - Version 1.1.8 + - 20 new language supports for Bengali, Assamese, Abaza, Adyghe, Kabardian, Avar, + Dargwa, Ingush, Chechen, Lak, Lezgian, Tabassaran, Bihari, Maithili, Angika, + Bhojpuri, Magahi, Nagpuri, Newari, Goan Konkani + - Support RGBA input format + - Add `min_size` argument for `readtext`: for filtering out small text box - 10 August 2020 - Version 1.1.7 - New language support for Tamil - Temporary fix for memory leakage on CPU mode diff --git a/setup.py b/setup.py index 8061fa6639..39a6d21890 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ def readme(): name='easyocr', packages=['easyocr'], include_package_data=True, - version='1.1.7', + version='1.1.8', install_requires=requirements, entry_points={"console_scripts": ["easyocr= easyocr.cli:main"]}, license='Apache License 2.0',