File tree Expand file tree Collapse file tree 2 files changed +8
-3
lines changed
Expand file tree Collapse file tree 2 files changed +8
-3
lines changed Original file line number Diff line number Diff line change @@ -8,6 +8,8 @@ RUN apt-get update -y \
88 && apt-get clean \
99 && rm -rf /var/lib/apt/lists/*
1010
11+ RUN pip install torch --index-url https://download.pytorch.org/whl/cpu
12+
1113RUN pip install -U --no-cache-dir \
1214 setuptools \
1315 wheel \
@@ -22,6 +24,8 @@ RUN pip install -U --no-cache-dir \
2224 bottle \
2325#spacy
2426 spacy \
27+ #stanza integration for spacy
28+ spacy-stanza \
2529#chinese reading
2630 pinyin \
2731#subtitle file parser
@@ -33,7 +37,6 @@ RUN python3 -m spacy download de_core_news_sm \
3337 && python3 -m spacy download nb_core_news_sm \
3438 && python3 -m spacy download es_core_news_sm \
3539 && python3 -m spacy download nl_core_news_sm \
36- && python3 -m spacy download fi_core_news_sm \
3740 && python3 -m spacy download fr_core_news_sm \
3841 && python3 -m spacy download it_core_news_sm \
3942 && python3 -m spacy download sv_core_news_sm \
@@ -48,5 +51,6 @@ RUN python3 -m spacy download de_core_news_sm \
4851 && python3 -m spacy download pt_core_news_sm \
4952 && python3 -m spacy download ro_core_news_sm \
5053 && python3 -m spacy download sl_core_news_sm \
51- && python3 -m spacy download xx_ent_wiki_sm
54+ && python3 -m spacy download xx_ent_wiki_sm \
55+ && python3 -c 'import stanza; stanza.download("fi")'
5256
Original file line number Diff line number Diff line change 2222import shutil
2323import subprocess
2424from newspaper import Article
25+ import spacy_stanza
2526
2627# create emtpy sapce models
2728multi_nlp = None
@@ -122,7 +123,7 @@ def getTokenizerDoc(language, words):
122123 if language == 'finnish' :
123124 global finnish_nlp
124125 if finnish_nlp == None :
125- finnish_nlp = spacy . load ( "fi_core_news_sm " , disable = [ 'ner' , 'parser' ] )
126+ finnish_nlp = spacy_stanza . load_pipeline ( "fi " , processors = "tokenize,lemma" )
126127 finnish_nlp .add_pipe ("custom_sentence_splitter" , first = True )
127128 doc = finnish_nlp (words )
128129
You can’t perform that action at this time.
0 commit comments