Removed old most_similar method, renamed new method

piskvorky · jimgoo · Oct 14, 2015 · Oct 27, 2015 · Dec 2, 2015 · Jan 21, 2016
commit 51d0bc2e0e5eb90a6132d3817cda546c04ebfa08
diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py
@@ -1130,68 +1130,6 @@ def most_similar(self, positive=[], negative=[], topn=10, restrict_vocab=None):
 
         If topn is False, most_similar returns the vector of similarity scores.
 
-        `restrict_vocab` is an optional integer which limits the range of vectors which
-        are searched for most-similar values. For example, restrict_vocab=10000 would
-        only check the first 10000 word vectors in the vocabulary order. (This may be
-        meaningful if you've sorted the vocabulary by descending frequency.)
-
-        Example::
-
-          >>> trained_model.most_similar(positive=['woman', 'king'], negative=['man'])
-          [('queen', 0.50882536), ...]
-
-        """
-        self.init_sims()
-
-        if isinstance(positive, string_types) and not negative:
-            # allow calls like most_similar('dog'), as a shorthand for most_similar(['dog'])
-            positive = [positive]
-
-        # add weights for each word, if not already present; default to 1.0 for positive and -1.0 for negative words
-        positive = [
-            (word, 1.0) if isinstance(word, string_types + (ndarray,)) else word
-            for word in positive
-        ]
-        negative = [
-            (word, -1.0) if isinstance(word, string_types + (ndarray,)) else word
-            for word in negative
-        ]
-
-        # compute the weighted average of all words
-        all_words, mean = set(), []
-        for word, weight in positive + negative:
-            if isinstance(word, ndarray):
-                mean.append(weight * word)
-            elif word in self.vocab:
-                mean.append(weight * self.syn0norm[self.vocab[word].index])
-                all_words.add(self.vocab[word].index)
-            else:
-                raise KeyError("word '%s' not in vocabulary" % word)
-        if not mean:
-            raise ValueError("cannot compute similarity with no input")
-        mean = matutils.unitvec(array(mean).mean(axis=0)).astype(REAL)
-
-        limited = self.syn0norm if restrict_vocab is None else self.syn0norm[:restrict_vocab]
-        dists = dot(limited, mean)
-        if not topn:
-            return dists
-        best = matutils.argsort(dists, topn=topn + len(all_words), reverse=True)
-        # ignore (don't return) words from the input
-        result = [(self.index2word[sim], float(dists[sim])) for sim in best if sim not in all_words]
-        return result[:topn]
-
-    def most_similar_in_list(self, positive=[], negative=[], topn=10, restrict_vocab=None):
-        """
-        Find the top-N most similar words. Positive words contribute positively towards the
-        similarity, negative words negatively.
-
-        This method computes cosine similarity between a simple mean of the projection
-        weight vectors of the given words and the vectors for each word in the model.
-        The method corresponds to the `word-analogy` and `distance` scripts in the original
-        word2vec implementation.
-
-        If topn is False, most_similar returns the vector of similarity scores.
-
         `restrict_vocab` is optional. An integer values limits the range of vectors which
         are searched for most-similar values. For example, restrict_vocab=10000 would
         only check the first 10000 word vectors in the vocabulary order. (This may be