Skip to content

Commit 0a1e28f

Browse files
committed
add PiperOperator
1 parent 1db596f commit 0a1e28f

File tree

1 file changed

+176
-0
lines changed

1 file changed

+176
-0
lines changed

tests/PiperOperator.py

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
import requests
2+
import json
3+
import os
4+
import sys
5+
from pprint import pprint
6+
from loguru import logger
7+
8+
# root_dir = os.path.realpath(os.path.pardir)
9+
# logger.info(f'root dir is {root_dir}')
10+
# sys.path.insert(1, root_dir)
11+
# from piper.utils import tesrct_utils as tu
12+
13+
14+
HEADERS = {"Content-Type": "application/json"}
15+
NER_RESPONSE_KEY = 'body'
16+
17+
class PiperOperatorException(BaseException):
18+
def __init__(self, msg):
19+
pass
20+
# logger.exception(msg)
21+
22+
23+
24+
class FileLoadException(PiperOperatorException):
25+
def __init__(self, fn):
26+
self.fn = fn
27+
super().__init__(f'file {fn} can`t be loaded')
28+
29+
30+
class JSONGetKeyException(PiperOperatorException):
31+
def __init__(self, key):
32+
self.key = key
33+
super().__init__(f'can`t get JSON key {key}')
34+
35+
36+
class NoAvailableModelsException(PiperOperatorException):
37+
def __init__(self):
38+
super().__init__(f'there are no spacy models')
39+
40+
41+
def get_data_by_key_from_response(cur_response, k):
42+
j = cur_response.json()
43+
if not j and k not in j.keys():
44+
raise JSONGetKeyException(k)
45+
v = j.get(k)
46+
return v
47+
48+
def get_data_by_key_from_url(url, key, post=True, data=None, file_name=""):
49+
try:
50+
if post:
51+
if file_name:
52+
logger.info(f'filename is {file_name}')
53+
multipart_form_data = {
54+
'file': open(file_name, 'rb')
55+
}
56+
cur_response = requests.post(url, files=multipart_form_data, verify=False)
57+
else:
58+
cur_response = requests.post(url, headers=HEADERS, data=data)
59+
60+
logger.debug(f'url is {url}, response is {cur_response}, content is {cur_response.content}')
61+
cur_response.raise_for_status()
62+
if key:
63+
logger.debug(f'try to get value for key {key}')
64+
# pprint(cur_response.json())
65+
val = get_data_by_key_from_response(cur_response, key)
66+
logger.debug(f'value for key is {val}')
67+
return val
68+
else:
69+
return cur_response
70+
71+
else:
72+
cur_response = requests.get(url, headers=HEADERS, data=data)
73+
cur_response.raise_for_status()
74+
# logger.debug(f'response is {cur_response.text}')
75+
val = get_data_by_key_from_response(cur_response, key)
76+
return val
77+
78+
except requests.exceptions.ConnectionError as ce:
79+
logger.exception(f'can`t connect to url: {ce}')
80+
81+
except JSONGetKeyException as cjke:
82+
logger.exception(f'can`t get key from response: {cjke}')
83+
84+
except Exception as e:
85+
logger.exception(f'error while processing url {url}: {e}')
86+
87+
88+
class PiperNLPWorker():
89+
'''
90+
simple class shows how to use piper NLPProcessor
91+
'''
92+
93+
def __init__(self, base_url):
94+
self.base_url = base_url
95+
96+
### RECOGNIZE
97+
self.url_tsrct_cfg = f'{self.base_url}/set_config'
98+
self.url_rcg = f'{self.base_url}/recognize'
99+
100+
### NER
101+
# get all available SPACY models url
102+
self.url_spacy_all_models = f'{self.base_url}/get_ner_models'
103+
# set current SPACY model url
104+
self.url_spacy_set_model = f'{self.base_url}/set_ner_model'
105+
# get named entitys from text url
106+
self.url_spacy_get_NE = f'{self.base_url}/extract_named_ents'
107+
108+
109+
def get_available_ner_models(self):
110+
return get_data_by_key_from_url(self.url_spacy_all_models, 'available_models', post=False)
111+
112+
def set_current_spacy_model(self, model):
113+
return get_data_by_key_from_url(self.url_spacy_set_model, '', post=True, data=json.dumps({'model_name':model}))
114+
115+
def get_named_ent_from_text(self, txt):
116+
resp = get_data_by_key_from_url(self.url_spacy_get_NE, 'result', post=False, data=json.dumps({'txt':txt}))
117+
logger.debug(f'url is {resp}, response is {resp}')
118+
if NER_RESPONSE_KEY in resp.keys():
119+
named_ents = resp.get(NER_RESPONSE_KEY)
120+
if named_ents:
121+
return json.loads(named_ents)
122+
else:
123+
logger.info(f'NER result is empty: {named_ents}')
124+
return []
125+
else:
126+
raise JSONGetKeyException(NER_RESPONSE_KEY)
127+
128+
def get_text_from_file(self, fn):
129+
try:
130+
txt = get_data_by_key_from_url(self.url_rcg, 'text', post=True, file_name=fn)
131+
return txt
132+
133+
except Exception as e:
134+
logger.error(f'error while extract text from file {fn}')
135+
logger.exception(e)
136+
137+
def set_tesseract_config(self, conf):
138+
return get_data_by_key_from_url(self.url_tsrct_cfg, '', post=True, data=json.dumps(conf))
139+
140+
if __name__ == '__main__':
141+
piper_worker = PiperNLPWorker('http://localhost:8788')
142+
143+
144+
amodels = piper_worker.get_available_ner_models()
145+
print('all models', amodels)
146+
147+
# model = amodels[0]
148+
model = 'en_core_web_sm'
149+
ok = piper_worker.set_current_spacy_model(model)
150+
# print(ok, ok.text)
151+
if ok:
152+
print('model set!')
153+
else:
154+
print('model does not set')
155+
sys.exit()
156+
157+
txt = 'The Alraigo Incident occurred on 6th June 1983, when a lost British Royal Navy Sea Harrier fighter aircraft landed on the deck of a Spanish container ship.[1][2] Its pilot, Sub-lieutenant Ian Watson, was a junior Royal Navy Pilot undertaking his first NATO exercise from HMS Illustrious, which was operating off the coast of Portugal. Watson was launched in a pair of aircraft tasked with locating a French aircraft carrier under combat conditions including radio-silence and radar switched off.'
158+
try:
159+
resp = piper_worker.get_named_ent_from_text(txt)
160+
except JSONGetKeyException as e:
161+
logger.exception(e)
162+
# pprint(resp)
163+
164+
165+
txt = piper_worker.get_text_from_file('/home/pavel/repo/piper_new/piper/tests/ocr_data.pdf')
166+
logger.info(f'txt {txt}')
167+
168+
169+
ts_conf = dict()
170+
ts_conf['ts_lang'] = 'eng'
171+
ts_conf['ts_config_row'] = rf'--oem 1 --psm 6'
172+
173+
resp = piper_worker.set_tesseract_config(ts_conf)
174+
logger.info(resp)
175+
176+

0 commit comments

Comments
 (0)