Line lengths pep8 fixed

piskvorky · Apr 11, 2017 · 59feafd · 59feafd
1 parent 2b69e3b
commit 59feafd
Show file tree

Hide file tree

Showing 2 changed files with 61 additions and 40 deletions.
diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py
@@ -345,56 +345,66 @@ def most_similar(self, positive=[], negative=[], topn=10, restrict_vocab=None, i
         result = [(self.index2word[sim], float(dists[sim])) for sim in best if sim not in all_words]
         return result[:topn]
 
-    def most_similar_among(self, positive=[], negative=[], topn=10, words_list=None, indexer=None,
+    def most_similar_among(self, positive=[], negative=[],
+                            topn=10, words_list=None, indexer=None,
                             suppress_warnings=False):
         """
-        Find the top-N most similar words among words_list to given words. Positive words
-        contribute positively towards the similarity, negative words negatively.
+        Find the top-N most similar words among words_list to given words.
+
+        Positive words contribute positively towards the similarity,
+        negative words negatively.
 
         Please refer to docs of most_similar function.
 
-        If topn is False, most_similar returns the vector of similarity scores for all words
-        in vocabulary of model, restriced by the supplied words_list.
+        If topn is False, most_similar returns the vector of similarity scores
+        for all words in vocabulary of model, restriced by the supplied words_list.
 
-        'words_list' should be a list/set of words. The returned word similarities will only
-        contain similarity scores for those words that are in words_list (and in trained vocabulary).
+        'words_list' should be a list/set of words. The returned word similarities
+        will only contain similarity scores for those words that are in words_list
+        (and in trained vocabulary).
 
-        If some words in words_list are not in vocabulary then a warning is issued to the user.
+        If some words in words_list are not in vocabulary then a warning is
+        issued to the user.
 
         Warnings can be supressed by setting the suppress_warnings flag.
 
         Example::
 
-          >>> trained_model.most_similar_among(positive=['man'], topn=1, words_list=['woman','random_word'])
+          >>> trained_model.most_similar_among(positive=['man'], topn=1,
+                                                words_list=['woman','random_word'])
           [('woman', 0.75882536)]
 
         """
 
         if isinstance(words_list, int):
-            raise ValueError("words_list must be a set/list of words. Maybe you wanted the \
-                                most_similar function.")
+            raise ValueError("words_list must be a set/list of words. \
+                                Maybe you wanted the most_similar function.")
         elif isinstance(words_list, list) or isinstance(words_list, set):
             pass
         else:  # This is triggered for empty words_list parameter
-            raise ValueError("words_list must be set/list of words. Maybe you wanted the \
-                                most_similar function. Please read doc string")
+            raise ValueError("words_list must be set/list of words. \
+                                Maybe you wanted the most_similar function. \
+                                Please read doc string")
 
         if type(topn) is not int:
             if topn is False:
                 pass
             else:
                 if suppress_warnings is False:
                     logger.warning("topn needs to either be a number or False. \
-                                    Please read docstring. Displaying all similarities!")
+                                    Please read docstring. \
+                                    Displaying all similarities!")
             topn = len(self.index2word)
 
         self.init_sims()
 
         if isinstance(positive, string_types) and negative is False:
-            # allow calls like most_similar('dog'), as a shorthand for most_similar(['dog'])
+            # allow calls like most_similar('dog'),
+            # as a shorthand for most_similar(['dog'])
             positive = [positive]
 
-        # add weights for each word, if not already present; default to 1.0 for positive and -1.0 for negative words
+        # add weights for each word, if not already present;
+        # default to 1.0 for positive and -1.0 for negative words
         positive = [
             (word, 1.0) if isinstance(word, string_types + (ndarray,)) else word
             for word in positive
@@ -426,20 +436,24 @@ def most_similar_among(self, positive=[], negative=[], topn=10, words_list=None,
         words_to_use = vocabulary_words.intersection(words_list)
 
         if not words_to_use:
-            raise ValueError("None of the words in words_list exist in current vocabulary")
+            raise ValueError("None of the words in words_list \
+                                exist in current vocabulary")
 
         if suppress_warnings is False:
             missing_words = words_list.difference(vocabulary_words)
             if not missing_words:  # missing_words is empty
                 pass
             else:
-                logger.warning("The following words are not in trained vocabulary : %s", str(missing_words))
-                logger.info("This warning is expensive to calculate, especially for largs words_list. \
-                                If you would rather not remove the missing_words from words_list \
-                                please set the suppress_warnings flag.")
+                logger.warning("The following words are not in \
+                                trained vocabulary : %s", str(missing_words))
+                logger.info("This warning is expensive to calculate, \
+                                especially for largs words_list. \
+                                If you would rather not remove the missing_words \
+                                from words_list please set the \
+                                suppress_warnings flag.")
 
         words_list_indices = [self.vocab[word].index for word in words_to_use]
-        # limited = self.syn0norm[words_list_indices] #syn0norm is an ndarray so this indexing works
+        # limited = self.syn0norm[words_list_indices]
         # Storing 'limited' might add a huge memory overhead so we avoid doing that
 
         dists = dot(self.syn0norm[words_list_indices], mean)

diff --git a/gensim/test/test_word2vec.py b/gensim/test/test_word2vec.py
@@ -16,7 +16,6 @@
 import itertools
 import bz2
 import sys
-import warnings
 
 import numpy as np
 
@@ -470,49 +469,57 @@ def test_most_similar_among(self, l):
         CBOW model is used here.
         """
 
-        model = word2vec.Word2Vec(sentences, size=2, sg=0, min_count=1, hs=1, negative=0)
+        model = word2vec.Word2Vec(sentences, size=2, sg=0, min_count=1,
+                                    hs=1, negative=0)
 
         # Testing Error in case of absent words_list
-        self.assertRaises(ValueError, model.wv.most_similar_among, positive=['graph'])
+        self.assertRaises(ValueError, model.wv.most_similar_among,
+                            positive=['graph'])
 
         words_in_voc = model.wv.index2word[:5]
 
         # Testing logs for warnings
-        model.wv.most_similar_among('graph', \
-                                        words_list=words_in_voc+['random_word'], \
-                                        topn="some_gibberish_not_number_or_False")
+        model.wv.most_similar_among('graph',
+                                    words_list=words_in_voc+['random_word'],
+                                    topn="some_gibberish_not_number_or_False")
+
         self.assertIn("topn needs to either be a number or False", str(l))
         self.assertIn("The following words are not in trained vocabulary", str(l))
         self.assertIn("This warning is expensive to calculate", str(l))
 
         l.clear()
 
         # Check if warnings are suppressed upon setting suppress_warnings flag
-        model.wv.most_similar_among('graph', \
-                                        words_list=words_in_voc+['random_word'], \
-                                        topn="some_gibberish_not_number_or_False", \
-                                        suppress_warnings=True)
+        model.wv.most_similar_among('graph',
+                                    words_list=words_in_voc+['random_word'],
+                                    topn="some_gibberish_not_number_or_False",
+                                    suppress_warnings=True)
         self.assertIn("No logging captured", str(l))
 
         # Check functionality
         sims = model.wv.most_similar_among('graph', words_list=words_in_voc)
-        sims2 = model.wv.most_similar_among('graph', words_list=words_in_voc+['random_word'], \
-                                                suppress_warnings=True)
+        sims2 = model.wv.most_similar_among('graph',
+                                            words_list=words_in_voc+['random_word'],
+                                            suppress_warnings=True)
         self.assertEqual(sims, sims2)
 
         # Results by vector
         graph_vector = model.wv.syn0norm[model.wv.vocab['graph'].index]
-        sims3 = model.wv.most_similar_among(positive = [graph_vector], words_list=words_in_voc)
+        sims3 = model.wv.most_similar_among(positive = [graph_vector],
+                                            words_list=words_in_voc)
         sims3 = [(w, sim) for w, sim in sims3 if w != 'graph']  # ignore 'graph' itself
         self.assertEqual(sims, sims3)
 
-        sims4 = model.wv.most_similar_among('graph', words_list=model.wv.index2word, \
-                                                topn=False)  # Returns all possible similarities
-        sims5 = model.wv.most_similar_among('graph', words_list=model.wv.index2word, \
-                                                topn=len(model.wv.vocab))
+        sims4 = model.wv.most_similar_among('graph',
+                                            words_list=model.wv.index2word,
+                                            topn=False)  # Returns all possible similarities
+        sims5 = model.wv.most_similar_among('graph',
+                                            words_list=model.wv.index2word,
+                                            topn=len(model.wv.vocab))
         self.assertEqual(sims4, sims5)
         self.assertEqual(len(sims4), len(model.wv.vocab)-1)
-        # Subtracting one as the word itself is not returned in most_similar calculation
+        # Subtracting one as the word itself is not returned
+        # in most_similar calculation
 
     def test_cosmul(self):
         model = word2vec.Word2Vec(sentences, size=2, min_count=1, hs=1, negative=0)