@@ -131,7 +131,6 @@ def cluster_id(self):
131
131
132
132
@property
133
133
def mentions (self ):
134
- #TODO return sorted(self._mentions, key=lambda x:...
135
134
return self ._mentions
136
135
137
136
def create_mention (self , head = None , mention_words = None , mention_span = None ):
@@ -167,6 +166,7 @@ def create_mention(self, head=None, mention_words=None, mention_span=None):
167
166
mention .words = mention_words
168
167
if mention_span :
169
168
mention .span = mention_span
169
+ self ._mentions .sort ()
170
170
return mention
171
171
172
172
# TODO or should we create a BridgingLinks instance with a fake src_mention?
@@ -330,6 +330,17 @@ def store_coref_to_misc(doc):
330
330
for key in list (node .misc ):
331
331
if any (re .match (attr + r'(\[\d+\])?$' , key ) for attr in attrs ):
332
332
del node .misc [key ]
333
+ # doc._coref_clusters is a dict, which is insertion ordered in Python 3.7+.
334
+ # The insertion order is sorted according to CorefCluster.__lt__ (see few lines above).
335
+ # However, new clusters could be added meanwhile or some clusters edited,
336
+ # so we need to sort the clusters again before storing to MISC.
337
+ # We also need to mare sure cluster.mentions are sorted in each cluster
338
+ # because the ordering of clusters is defined by the first mention in each cluster.
339
+ # Ordering of mentions within a cluster can be changed when e.g. changing the span
340
+ # of a given mention or reordering words within a sentence and in such events
341
+ # Udapi currently does not automatically update the ordering of clusters.
342
+ for cluster in doc ._coref_clusters .values ():
343
+ cluster ._mentions .sort ()
333
344
for cluster in sorted (doc ._coref_clusters .values ()):
334
345
for mention in cluster .mentions :
335
346
head = mention .head
0 commit comments