save/load_word2vec_format
fails for FastText
models #1743
Closed
Description
Description
Saving and loading using save_word2vec_format
and load_word2vec_format
fails for both native FastText models and models loaded using the wrapper.
Steps/Code/Corpus to Reproduce
Example:
from gensim.models import fasttext as ft
from gensim.models.wrappers import fasttext as ft_wrapper
from gensim.models.word2vec import Text8Corpus
corpus = Text8Corpus('gensim/test/test_data/lee_background.cor')
native_model = ft.FastText()
native_model.build_vocab(corpus)
print(native_model.wv.most_similar('wars'))
>>> # prints results
print(native_model.wv['wars'])
>>> # prints results
native_model.wv.save_word2vec_format('test.wv')
wv = ft_wrapper.FastTextKeyedVectors.load_word2vec_format('test.wv')
print(wv.most_similar('wars'))
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-5-43e12f136081> in <module>()
----> 1 print(wv.most_similar('wars'))
~/Projects/gensim/gensim/gensim/models/keyedvectors.py in most_similar(self, positive, negative, topn, restrict_vocab, indexer)
318 negative = []
319
--> 320 self.init_sims()
321
322 if isinstance(positive, string_types) and not negative:
~/Projects/gensim/gensim/gensim/models/wrappers/fasttext.py in init_sims(self, replace)
125 else:
126 self.syn0_ngrams_norm = \
--> 127 (self.syn0_ngrams / sqrt((self.syn0_ngrams ** 2).sum(-1))[..., newaxis]).astype(REAL)
128
129 def __contains__(self, word):
TypeError: unsupported operand type(s) for ** or pow(): 'NoneType' and 'int'
print(wv['wars'])
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-6-ce05f767b013> in <module>()
----> 1 print(wv['wars'])
~/Projects/gensim/gensim/gensim/models/keyedvectors.py in __getitem__(self, words)
601 if isinstance(words, string_types):
602 # allow calls like trained_model['office'], as a shorthand for trained_model[['office']]
--> 603 return self.word_vec(words)
604
605 return vstack([self.word_vec(word) for word in words])
~/Projects/gensim/gensim/gensim/models/wrappers/fasttext.py in word_vec(self, word, use_norm)
91 return super(FastTextKeyedVectors, self).word_vec(word, use_norm)
92 else:
---> 93 word_vec = np.zeros(self.syn0_ngrams.shape[1], dtype=np.float32)
94 ngrams = compute_ngrams(word, self.min_n, self.max_n)
95 ngrams = [ng for ng in ngrams if ng in self.ngrams]
AttributeError: 'NoneType' object has no attribute 'shape'
From a quick glance, it looks like this resulted from the changes made to FastTextKeyedVectors
during the native implementation of FastText
where two different matrices - syn0_vocab
and syn0_ngrams
were created.
Although, I'm not sure save_word2vec_format
is even suitable for FastText
seeing as how the ngram vectors aren't stored to disk.