Skip to content

Commit f0af0dd

Browse files
committed
cluster.mentions should be sorted
at least after loading and before storing to CoNLL-U because guaranteeing always-sorted mentions would be too difficult.
1 parent a559ea2 commit f0af0dd

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

udapi/core/coref.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,6 @@ def cluster_id(self):
131131

132132
@property
133133
def mentions(self):
134-
#TODO return sorted(self._mentions, key=lambda x:...
135134
return self._mentions
136135

137136
def create_mention(self, head=None, mention_words=None, mention_span=None):
@@ -167,6 +166,7 @@ def create_mention(self, head=None, mention_words=None, mention_span=None):
167166
mention.words = mention_words
168167
if mention_span:
169168
mention.span = mention_span
169+
self._mentions.sort()
170170
return mention
171171

172172
# TODO or should we create a BridgingLinks instance with a fake src_mention?
@@ -330,6 +330,17 @@ def store_coref_to_misc(doc):
330330
for key in list(node.misc):
331331
if any(re.match(attr + r'(\[\d+\])?$', key) for attr in attrs):
332332
del node.misc[key]
333+
# doc._coref_clusters is a dict, which is insertion ordered in Python 3.7+.
334+
# The insertion order is sorted according to CorefCluster.__lt__ (see few lines above).
335+
# However, new clusters could be added meanwhile or some clusters edited,
336+
# so we need to sort the clusters again before storing to MISC.
337+
# We also need to mare sure cluster.mentions are sorted in each cluster
338+
# because the ordering of clusters is defined by the first mention in each cluster.
339+
# Ordering of mentions within a cluster can be changed when e.g. changing the span
340+
# of a given mention or reordering words within a sentence and in such events
341+
# Udapi currently does not automatically update the ordering of clusters.
342+
for cluster in doc._coref_clusters.values():
343+
cluster._mentions.sort()
333344
for cluster in sorted(doc._coref_clusters.values()):
334345
for mention in cluster.mentions:
335346
head = mention.head

0 commit comments

Comments
 (0)