1- from piper .base .executors import FastAPIExecutor , FastAPITesseractExecutor , VirtualEnvExecutor
2- from fastapi .responses import JSONResponse
3-
4- from pydantic import BaseModel
5- from loguru import logger
61import json
7- import spacy
82import sys
3+
4+ import spacy
5+ from fastapi .responses import JSONResponse
6+ from loguru import logger
7+ from pydantic import BaseModel
8+
9+ from piper .base .executors import FastAPIExecutor , FastAPITesseractExecutor
910from piper .configurations import get_configuration
1011from piper .utils import tesrct_utils as tu
1112
12-
1313logger .add ("file.log" , level = "INFO" , backtrace = True , diagnose = True , rotation = '5 MB' )
1414
1515
1616class StringValue (BaseModel ):
1717 value : str
1818
19+
1920class BytesObject (BaseModel ):
2021 value : bytes
2122
23+
2224class ListOfStringsObject (BaseModel ):
2325 value : list
2426
27+
2528class TestMessageAdder (FastAPIExecutor ):
2629
2730 def __init__ (self , appender = "TEST" , ** kwargs ):
@@ -30,18 +33,19 @@ def __init__(self, appender="TEST", **kwargs):
3033
3134 async def run (self , message : StringValue ) -> StringValue :
3235 return StringValue (value = (message .value + self .appender ))
33-
36+
3437
3538class TesseractRecognizer (FastAPITesseractExecutor ):
3639 '''
3740 Tesseract OCR implementation service
3841 '''
42+
3943 def __init__ (self , ** kwargs ):
4044 super ().__init__ (** kwargs )
4145 cfg = get_configuration ()
4246 self .ts_config = cfg .ts_config
4347
44- def set_config_ (self , config_ ):
48+ def set_config_ (self , config_ ):
4549 if 'ts_lang' not in config_ .keys ():
4650 logger .error (f'tesseract config keys must contains ts_lang, keys { config_ .keys ()} ' )
4751 logger .error (f'tesseract config did not set' )
@@ -59,23 +63,24 @@ async def sconfig(self, conf) -> ListOfStringsObject:
5963 # conf = '12'
6064 logger .info (f'request to set config to { conf } ' )
6165 self .set_config_ (conf )
62- return JSONResponse (content = {'text' :'OK' })
63-
64- async def recognize (self , file_content : BytesObject , suf : str ) -> ListOfStringsObject :
66+ return JSONResponse (content = {'text' : 'OK' })
67+
68+ async def recognize (self , file_content : BytesObject , suf : str ) -> ListOfStringsObject :
6569 logger .info (f'file_content { type (file_content )} , file suffix is { suf } ' )
6670
6771 logger .info (f'current tesseract config is { self .ts_config } ' )
6872 text_dict = tu .bytes_handler (file_content , suf , self .ts_config )
6973 logger .info (f'img_bytes_handler return { type (text_dict )} object' )
7074 return JSONResponse (content = text_dict )
7175
72- async def ner (self , txt : str ):
76+ async def ner (self , txt : str ):
7377 sn = SpacyNER ()
7478 if sn .available_models and len (sn .available_models ) > 0 :
7579 dummy_model = sn .available_models [0 ]
7680 sn .set_model (dummy_model )
7781 return JSONResponse (content = sn .extract_named_ents (txt ))
7882
83+
7984# class ModelNameNotInList(BaseException):
8085# def __init__(self, msg):
8186# # pass
@@ -86,6 +91,7 @@ class SpacyNER():
8691 '''
8792 Spacy NER service
8893 '''
94+
8995 def __init__ (self ):
9096 cfg = get_configuration ()
9197 self .available_models = set ()
@@ -102,34 +108,32 @@ def __init__(self):
102108 logger .error (f'catch exception { e } ' )
103109 sys .exit ()
104110
105-
106111 def set_model (self , cur_model ):
107112 if cur_model not in self .available_models :
108113 logger .error (f'there is not { cur_model } in available_models set: { self .available_models } ' )
109114 self .nlp = None
110115 raise ValueError (f'there is not { cur_model } in available_models set: { self .available_models } ' )
111116
112- try :
117+ try :
113118 nlp = spacy .load (cur_model )
114119 # nlp = spacy.load('en_default')
115120 logger .info ('spacy nlp object created with model {cur_model}' )
116121 except Exception as e :
117122 logger .error (f'catch exception { e } ' )
118- if isinstance (e , OSError ):
123+ if isinstance (e , OSError ):
119124 logger .error (f'you must download spacy model { cur_model } ' )
120125 nlp = None
121126 logger .info ('spacy nlp object DID NOT create' )
122-
123- self .nlp = nlp
124127
128+ self .nlp = nlp
125129
126130 def extract_named_ents (self , txt : str ):
127131 logger .debug (f'got data type { type (txt )} and data <<{ txt } >> for NER' )
128132 if self .nlp :
129133 res = []
130134 doc = self .nlp (txt )
131135 for ent in doc .ents :
132- res .append ((ent .text , ent .label_ ))
136+ res .append ((ent .text , ent .label_ ))
133137 return JSONResponse (content = res )
134138 else :
135139 logger .error (f'nlp object didn`t create. you should use set_model(model_name)' )
0 commit comments