@@ -47,41 +47,6 @@ def generate_words_sorted(loader_obj, text):
47
47
os .system (wordcommand )
48
48
49
49
50
- def old_spacy_tagger (loader_obj , text ):
51
- """Tag words with Spacy"""
52
- with open (text ["raw" ] + ".tmp" , "w" , encoding = "utf8" ) as tmp_file :
53
- with open (text ["raw" ], encoding = "utf8" ) as fh :
54
- sentence = []
55
- current_sent_id = None
56
- for line in fh :
57
- philo_type , word , philo_id , attrib = line .split ("\t " )
58
- if philo_type in ("word" , "sent" , "punct" ):
59
- sent_id = " " .join (philo_id .split ()[:6 ])
60
- record = Record (philo_type , word , philo_id .split ())
61
- record .attrib = loads (attrib )
62
- if current_sent_id is not None and sent_id != current_sent_id :
63
- spacy_sentence = SpacyDoc (loader_obj .nlp .vocab , [r .name for r in sentence ])
64
- parsed_sentence = loader_obj .nlp (spacy_sentence )
65
- for saved_record , parsed_word in zip (sentence , parsed_sentence ):
66
- saved_record .attrib ["pos" ] = parsed_word .pos_
67
- saved_record .attrib ["tag" ] = parsed_word .tag_
68
- saved_record .attrib ["ent_type" ] = parsed_word .ent_type_
69
- saved_record .attrib ["lemma" ] = parsed_word .lemma_
70
- print (saved_record , file = tmp_file )
71
- sentence = []
72
- sentence .append (record )
73
- current_sent_id = sent_id
74
- if sentence :
75
- spacy_sentence = SpacyDoc (loader_obj .nlp .vocab , [r .name for r in sentence ])
76
- parsed_sentence = loader_obj .nlp (spacy_sentence )
77
- for saved_record , parsed_word in zip (sentence , parsed_sentence ):
78
- saved_record .attrib ["pos" ] = parsed_word .pos_
79
- saved_record .attrib ["tag" ] = parsed_word .tag_
80
- saved_record .attrib ["ent_type" ] = parsed_word .ent_type_
81
- saved_record .attrib ["lemma" ] = parsed_word .lemma_
82
- print (saved_record , file = tmp_file )
83
-
84
-
85
50
def spacy_tagger (loader_obj , text ):
86
51
"""Tag words with Spacy"""
87
52
0 commit comments