Skip to content

Commit

Permalink
fix: mismatch between building vocabs
Browse files Browse the repository at this point in the history
  • Loading branch information
vejvarm committed Nov 30, 2023
1 parent 0e6f387 commit 4d4b171
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def _build_vocabs_streaming(self):
specials=[O, PAD_TOKEN],
vocab_cache=self.vocab_cache.joinpath("ner_vocab.pkl"))
vocabs[COREF] = self._counter_to_vocab(self.counters[COREF],
specials=['0', PAD_TOKEN],
specials=[NA_TOKEN, PAD_TOKEN],
vocab_cache=self.vocab_cache.joinpath("coref_vocab.pkl"))
vocabs[PREDICATE_POINTER] = self._counter_to_vocab(self.counters[PREDICATE_POINTER],
specials=[NA_TOKEN, PAD_TOKEN],
Expand All @@ -229,7 +229,7 @@ def _build_vocabs_streaming(self):
specials=[NA_TOKEN, PAD_TOKEN],
vocab_cache=self.vocab_cache.joinpath("type_vocab.pkl"))
vocabs[ENTITY] = self._counter_to_vocab(self.counters[ENTITY],
specials=[PAD_TOKEN, NA_TOKEN],
specials=[NA_TOKEN, PAD_TOKEN],
vocab_cache=self.vocab_cache.joinpath("ent_vocab.pkl"))

return vocabs
Expand Down

0 comments on commit 4d4b171

Please sign in to comment.