File tree 2 files changed +11
-4
lines changed
2 files changed +11
-4
lines changed Original file line number Diff line number Diff line change @@ -146,6 +146,8 @@ def convert_response_to_doc(doc, semgrex_response):
146
146
mwt_tokens = []
147
147
for word_start_idx , word in enumerate (tokens ):
148
148
if not word ["is_first_mwt" ]:
149
+ if word ["is_mwt" ]:
150
+ word [MISC ] = None
149
151
mwt_tokens .append (word )
150
152
continue
151
153
word_end_idx = word_start_idx + 1
@@ -159,6 +161,7 @@ def convert_response_to_doc(doc, semgrex_response):
159
161
# use the SpaceAfter=No (or not) from the last word in the token
160
162
MISC : tokens [word_end_idx - 1 ][MISC ],
161
163
}
164
+ word [MISC ] = None
162
165
mwt_tokens .append (mwt_token_entry )
163
166
mwt_tokens .append (word )
164
167
@@ -167,7 +170,11 @@ def convert_response_to_doc(doc, semgrex_response):
167
170
168
171
# TODO: look at word.parent to see if it is part of an MWT
169
172
# once that's done, the beginning words of an MWT do not need SpaceAfter=No any more (it is implied)
170
- word_text = [word .text if (word_idx == len (sentence .words ) - 1 or (word .misc and "SpaceAfter=No" in word .misc )) else word .text + " "
173
+ word_text = [word .text if (word_idx == len (sentence .words ) - 1 or
174
+ (word .misc and "SpaceAfter=No" in word .misc ) or
175
+ word .id != word .parent .id [- 1 ] or
176
+ (word .parent .misc and "SpaceAfter=No" in word .parent .misc ))
177
+ else word .text + " "
171
178
for word_idx , word in enumerate (sentence .words )]
172
179
sentence_text = "" .join (word_text )
173
180
Original file line number Diff line number Diff line change @@ -106,7 +106,7 @@ def test_ssurgeon_different_length():
106
106
# text = It's not yours!
107
107
# comment = negation
108
108
1-2 It's _ _ _ _ _ _ _ _
109
- 1 It it PRON PRP Number=Sing|Person=2|PronType=Prs 4 nsubj _ SpaceAfter=No
109
+ 1 It it PRON PRP Number=Sing|Person=2|PronType=Prs 4 nsubj _ _
110
110
2 's be AUX VBZ Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin 4 cop _ _
111
111
3 not not PART RB Polarity=Neg 4 advmod _ _
112
112
4 yours yours PRON PRP Gender=Neut|Number=Sing|Person=2|Poss=Yes|PronType=Prs 0 root _ SpaceAfter=No
@@ -150,8 +150,8 @@ def test_ssurgeon_become_mwt():
150
150
2 of of ADP IN _ 4 case _ _
151
151
3 “ " PUNCT `` _ 4 punct _ SpaceAfter=No
152
152
4-5 NCRC4ME’s _ _ _ _ _ _ _ SpaceAfter=No
153
- 4 NCRC4ME NCRC4ME PROPN NNP Number=Sing 1 compound _ SpaceAfter=No
154
- 5 ’s 's PART POS _ 4 case _ SpaceAfter=No
153
+ 4 NCRC4ME NCRC4ME PROPN NNP Number=Sing 1 compound _ _
154
+ 5 ’s 's PART POS _ 4 case _ _
155
155
6 ” " PUNCT '' _ 4 punct _ _
156
156
"""
157
157
You can’t perform that action at this time.
0 commit comments