forked from elebumm/RedditVideoMakerBot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
posttextparser.py
35 lines (27 loc) · 844 Bytes
/
posttextparser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import os
import re
import time
from typing import List
import spacy
from utils.console import print_step
from utils.voice import sanitize_text
# working good
def posttextparser(obj, *, tried: bool = False) -> List[str]:
text: str = re.sub("\n", " ", obj)
try:
nlp = spacy.load("en_core_web_sm")
except OSError as e:
if not tried:
os.system("python -m spacy download en_core_web_sm")
time.sleep(5)
return posttextparser(obj, tried=True)
print_step(
"The spacy model can't load. You need to install it with the command \npython -m spacy download en_core_web_sm "
)
raise e
doc = nlp(text)
newtext: list = []
for line in doc.sents:
if sanitize_text(line.text):
newtext.append(line.text)
return newtext