@@ -153,7 +153,7 @@ def wrap_up_doc():
153
153
# multi-token expressions
154
154
if '-' in str_token_id :
155
155
# this will be in the range token, not the word itself
156
- if token .get ('misc' , defaultdict ()).get ('NewPar' ) == 'Yes' :
156
+ if token .get ('misc' , defaultdict ()).get ('NewPar' ) == True :
157
157
new_paragraph_mid_sentence ()
158
158
# ignore ranges otherwise during token parsing
159
159
continue
@@ -166,7 +166,7 @@ def wrap_up_doc():
166
166
'upos' : token ['upostag' ], # universal pos
167
167
'xpos' : token ['xpostag' ], # language-specific pos
168
168
'features' : OrderedDict ({
169
- 'Overt' : 'Yes'
169
+ 'Overt' : True
170
170
})
171
171
}
172
172
if token .get ('feats' ):
@@ -177,12 +177,12 @@ def wrap_up_doc():
177
177
if 'Mseg' in t ['misc' ]:
178
178
t ['morphemes' ] = t ['misc' ]['Mseg' ].split ('-' )
179
179
# new paragraph in the middle of a sentence
180
- if t ['misc' ].get ('NewPar' ) == 'Yes' :
180
+ if t ['misc' ].get ('NewPar' ) == True :
181
181
new_paragraph_mid_sentence ()
182
182
183
183
# non-overt tokens are represented as decimal ids in conll
184
184
if '.' in str_token_id :
185
- t ['features' ]['Overt' ] = 'No'
185
+ t ['features' ]['Overt' ] = False
186
186
187
187
# bookkeeping
188
188
token_lookup [(sent_num , str_token_id )] = token_id
0 commit comments