Janky patch to avoid errors from predicting states which are effetively OOV - EMPTY in particular is treated by the CompositeVocab as 'leave this blank'. The better fix would be to remove those states from the output layer entirely

AngledLuffa · AngledLuffa · commit cc7f10de4485 · 2023-10-02T20:56:26.000-07:00
diff --git a/stanza/models/common/vocab.py b/stanza/models/common/vocab.py
@@ -13,6 +13,7 @@
 ROOT = '<ROOT>'
 ROOT_ID = 3
 VOCAB_PREFIX = [PAD, UNK, EMPTY, ROOT]
+VOCAB_PREFIX_SIZE = len(VOCAB_PREFIX)
 
 class BaseVocab:
     """ A base class for common vocabulary operations. Each subclass should at least 
diff --git a/stanza/models/ner/trainer.py b/stanza/models/ner/trainer.py
@@ -9,7 +9,7 @@
 
 from stanza.models.common.foundation_cache import NoTransformerFoundationCache
 from stanza.models.common.trainer import Trainer as BaseTrainer
-from stanza.models.common.vocab import VOCAB_PREFIX
+from stanza.models.common.vocab import VOCAB_PREFIX, VOCAB_PREFIX_SIZE
 from stanza.models.common import utils, loss
 from stanza.models.ner.model import NERTagger
 from stanza.models.ner.vocab import MultiVocab
@@ -129,6 +129,8 @@ def predict(self, batch, unsort=True):
         for i in range(batch_size):
             # for each tag column in the output, decode the tag assignments
             tags = [viterbi_decode(x[i, :sentlens[i]], y)[0] for x, y in zip(logits, trans)]
+            # TODO: this is to patch that the model can sometimes predict < "O"
+            tags = [[x if x >= VOCAB_PREFIX_SIZE else VOCAB_PREFIX_SIZE for x in y] for y in tags]
             # that gives us N lists of |sent| tags, whereas we want |sent| lists of N tags
             tags = list(zip(*tags))
             # now unmap that to the tags in the vocab