From ff0c66e9012295fac785a2289ab318e057e72542 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20=C5=98eh=C5=AF=C5=99ek?= <radimrehurek@seznam.cz>
Date: Sun, 18 May 2014 22:06:22 +0200
Subject: [PATCH 1/8] add missing cython files to MANIFEST.in

---
 MANIFEST.in | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MANIFEST.in b/MANIFEST.in
index d514fa4993..5f682d46b4 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -7,4 +7,6 @@ include CHANGELOG.txt
 include COPYING
 include COPYING.LESSER
 include ez_setup.py
+include gensim/models/voidptr.h
 include gensim/models/word2vec_inner.pyx
+include gensim_addons/models/word2vec_inner.pyx

From 1f297e20214f08ff651863110874e776356da880 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20=C5=98eh=C5=AF=C5=99ek?= <radimrehurek@seznam.cz>
Date: Sun, 18 May 2014 22:10:20 +0200
Subject: [PATCH 2/8] re #197: fix typo

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 36acee42d8..7945b6038d 100644
--- a/setup.py
+++ b/setup.py
@@ -27,7 +27,7 @@
 pkg_desc = 'Python framework for fast Vector Space Modelling'
 
 # there is a bug in python2.5, preventing distutils from using any non-ascii characters :( http://bugs.python.org/issue2562
-pkg_author = 'Radim Rehurek', # u'Radim Řehůřek', # <- should really be this...
+pkg_author = 'Radim Rehurek' # u'Radim Řehůřek', # <- should really be this...
 pkg_author_email = 'radimrehurek@seznam.cz'
 pkg_url = 'http://radimrehurek.com/gensim'
 pkg_download_url = 'http://pypi.python.org/pypi/gensim'

From be148f298fd43d973bb620ec65d90fcdb5680f0f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20=C5=98eh=C5=AF=C5=99ek?= <radimrehurek@seznam.cz>
Date: Wed, 21 May 2014 10:26:35 +0200
Subject: [PATCH 3/8] use miniconda in travis CI

---
 .travis.yml | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index f70a39267e..6c205e8e74 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,10 +5,15 @@ python:
   - "3.3"
   - "3.4"
 before_install:
-    - sudo apt-get update -qq
-    - sudo apt-get install -qq libatlas-dev liblapack-dev gfortran
-    - travis_wait pip install --quiet numpy
-    - travis_wait pip install --quiet scipy
+  - wget 'http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh' -O miniconda.sh
+  - chmod +x miniconda.sh
+  - ./miniconda.sh -b
+  - export PATH=/home/travis/miniconda/bin:$PATH
+  - conda update --yes conda
+  # The next couple lines fix a crash with multiprocessing on Travis and are not specific to using Miniconda
+  - sudo rm -rf /dev/shm
+  - sudo ln -s /run/shm /dev/shm
 install:
-    - python setup.py install
+  - conda install --yes python=$TRAVIS_PYTHON_VERSION pip atlas numpy scipy
+  - python setup.py install
 script: python setup.py test

From 1d2d19b0bea40d5b68506c40520307014c99f152 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20=C5=98eh=C5=AF=C5=99ek?= <radimrehurek@seznam.cz>
Date: Wed, 21 May 2014 10:47:45 +0200
Subject: [PATCH 4/8] create conda env in travis CI

---
 .travis.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 6c205e8e74..4490d19536 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -14,6 +14,7 @@ before_install:
   - sudo rm -rf /dev/shm
   - sudo ln -s /run/shm /dev/shm
 install:
-  - conda install --yes python=$TRAVIS_PYTHON_VERSION pip atlas numpy scipy
+  - conda create --yes -n gensim-test python=$TRAVIS_PYTHON_VERSION pip atlas numpy scipy
+  - source activate gensim-test
   - python setup.py install
 script: python setup.py test

From 04365588788d29518928df4568e75642086ccad3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20=C5=98eh=C5=AF=C5=99ek?= <radimrehurek@seznam.cz>
Date: Mon, 26 May 2014 16:56:56 +0200
Subject: [PATCH 5/8] improve matutils docs

---
 gensim/matutils.py | 48 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 39 insertions(+), 9 deletions(-)

diff --git a/gensim/matutils.py b/gensim/matutils.py
index 584f59adc9..961c9b9269 100644
--- a/gensim/matutils.py
+++ b/gensim/matutils.py
@@ -70,11 +70,16 @@ def argsort(x, topn=None):
 
 def corpus2csc(corpus, num_terms=None, dtype=numpy.float64, num_docs=None, num_nnz=None, printprogress=0):
     """
-    Convert corpus into a sparse matrix, in scipy.sparse.csc_matrix format,
+    Convert a streamed corpus into a sparse matrix, in scipy.sparse.csc_matrix format,
     with documents as columns.
 
     If the number of terms, documents and non-zero elements is known, you can pass
     them here as parameters and a more memory efficient code path will be taken.
+
+    The input corpus may be a non-repeatable stream (generator).
+
+    This is the mirror function to `Sparse2Corpus`.
+
     """
     try:
         # if the input corpus has the `num_nnz`, `num_docs` and `num_terms` attributes
@@ -151,7 +156,7 @@ def ismatrix(m):
 
 
 def any2sparse(vec, eps=1e-9):
-    """Convert a numpy/scipy vector into gensim format (list of 2-tuples)."""
+    """Convert a numpy/scipy vector into gensim document format (=list of 2-tuples)."""
     if isinstance(vec, numpy.ndarray):
         return dense2vec(vec, eps)
     if scipy.sparse.issparse(vec):
@@ -160,15 +165,25 @@ def any2sparse(vec, eps=1e-9):
 
 
 def scipy2sparse(vec, eps=1e-9):
-    """Convert a scipy.sparse vector to gensim format (list of 2-tuples)."""
+    """Convert a scipy.sparse vector into gensim document format (=list of 2-tuples)."""
     vec = vec.tocsr()
     assert vec.shape[0] == 1
     return [(int(pos), float(val)) for pos, val in zip(vec.indices, vec.data) if numpy.abs(val) > eps]
 
 
 class Scipy2Corpus(object):
+    """
+    Convert a sequence of dense/sparse vectors into a streamed gensim corpus object.
+
+    This is the mirror function to `corpus2csc`.
+
+    """
     def __init__(self, vecs):
-        """Convert a sequence of dense/sparse vector to a gensim corpus object."""
+        """
+        `vecs` is a sequence of dense and/or sparse vectors, such as a 2d numpy array,
+        or a scipy.sparse.csc_matrix, or any sequence containing a mix of 1d numpy/scipy vectors.
+
+        """
         self.vecs = vecs
 
     def __iter__(self):
@@ -184,8 +199,11 @@ def __len__(self):
 
 def sparse2full(doc, length):
     """
-    Convert a document in sparse corpus format (sequence of 2-tuples) into a dense
+    Convert a document in sparse document format (=sequence of 2-tuples) into a dense
     numpy array (of size `length`).
+
+    This is the mirror function to `full2sparse`.
+
     """
     result = numpy.zeros(length, dtype=numpy.float32) # fill with zeroes (default value)
     doc = dict(doc)
@@ -196,9 +214,12 @@ def sparse2full(doc, length):
 
 def full2sparse(vec, eps=1e-9):
     """
-    Convert a dense numpy array into the sparse corpus format (sequence of 2-tuples).
+    Convert a dense numpy array into the sparse document format (sequence of 2-tuples).
 
     Values of magnitude < `eps` are treated as zero (ignored).
+
+    This is the mirror function to `sparse2full`.
+
     """
     vec = numpy.asarray(vec, dtype=float)
     nnz = numpy.nonzero(abs(vec) > eps)[0]
@@ -209,7 +230,8 @@ def full2sparse(vec, eps=1e-9):
 
 def full2sparse_clipped(vec, topn, eps=1e-9):
     """
-    Like `full2sparse`, but only return the `topn` greatest elements (not all).
+    Like `full2sparse`, but only return the `topn` elements of the greatest magnitude (abs).
+
     """
     # use numpy.argsort and only form tuples that are actually returned.
     # this is about 40x faster than explicitly forming all 2-tuples to run sort() or heapq.nlargest() on.
@@ -225,10 +247,12 @@ def corpus2dense(corpus, num_terms, num_docs=None, dtype=numpy.float32):
     """
     Convert corpus into a dense numpy array (documents will be columns). You
     must supply the number of features `num_terms`, because dimensionality
-    cannot be deduced from sparse vectors alone.
+    cannot be deduced from the sparse vectors alone.
 
     You can optionally supply `num_docs` (=the corpus length) as well, so that
-    a more memory efficient code path is taken.
+    a more memory-efficient code path is taken.
+
+    This is the mirror function to `Dense2Corpus`.
 
     """
     if num_docs is not None:
@@ -249,6 +273,9 @@ class Dense2Corpus(object):
 
     No data copy is made (changes to the underlying matrix imply changes in the
     corpus).
+
+    This is the mirror function to `corpus2dense`.
+
     """
     def __init__(self, dense, documents_columns=True):
         if documents_columns:
@@ -268,6 +295,9 @@ def __len__(self):
 class Sparse2Corpus(object):
     """
     Convert a matrix in scipy.sparse format into a streaming gensim corpus.
+
+    This is the mirror function to `corpus2csc`.
+
     """
     def __init__(self, sparse, documents_columns=True):
         if documents_columns:

From 6c8f478811cad9ea4138de3594bf355b610eb5f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20=C5=98eh=C5=AF=C5=99ek?= <radimrehurek@seznam.cz>
Date: Fri, 30 May 2014 19:00:21 +0200
Subject: [PATCH 6/8] increase max word2vec sentence length to 10k * was: max
 1k tokens per sentence (rest is ignored)

---
 gensim/models/word2vec_inner.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gensim/models/word2vec_inner.pyx b/gensim/models/word2vec_inner.pyx
index b0f8c86e23..a7266a577e 100755
--- a/gensim/models/word2vec_inner.pyx
+++ b/gensim/models/word2vec_inner.pyx
@@ -22,7 +22,7 @@ from scipy.linalg.blas import fblas
 REAL = np.float32
 ctypedef np.float32_t REAL_t
 
-DEF MAX_SENTENCE_LEN = 1000
+DEF MAX_SENTENCE_LEN = 10000
 
 ctypedef void (*scopy_ptr) (const int *N, const float *X, const int *incX, float *Y, const int *incY) nogil
 ctypedef void (*saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil

From 668d701e465f7a1f617172be6cea9a16d760a715 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20=C5=98eh=C5=AF=C5=99ek?= <radimrehurek@seznam.cz>
Date: Fri, 30 May 2014 19:01:16 +0200
Subject: [PATCH 7/8] improve docs

---
 docs/src/gensim_theme/layout.html |  2 +-
 gensim/models/ldamodel.py         |  6 +++---
 gensim/models/word2vec.py         | 14 ++++++++------
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/docs/src/gensim_theme/layout.html b/docs/src/gensim_theme/layout.html
index cedf9d1905..ac9900ad21 100644
--- a/docs/src/gensim_theme/layout.html
+++ b/docs/src/gensim_theme/layout.html
@@ -80,7 +80,7 @@ <h1 class="h1gensim">
             <div class="consulting-banner">
               <h3><a href="http://radimrehurek.com/">Get Expert Help</a></h3>
               <p>• machine learning, NLP, data mining</p>
-              <p>• custom system design, development, optimizations</p>
+              <p>• custom SW design, development, optimizations</p>
               <p>• tech trainings &amp; IT consulting</p>
             </div>
           </div>
diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py
index f8f9fe662c..04c3ac3adc 100644
--- a/gensim/models/ldamodel.py
+++ b/gensim/models/ldamodel.py
@@ -391,8 +391,8 @@ def inference(self, chunk, collect_sstats=False):
                 sstats[:, ids] += numpy.outer(expElogthetad.T, cts / phinorm)
 
         if len(chunk) > 1:
-            logger.info("%i/%i documents converged within %i iterations" %
-                         (converged, len(chunk), self.iterations))
+            logger.debug("%i/%i documents converged within %i iterations" %
+                (converged, len(chunk), self.iterations))
 
         if collect_sstats:
             # This step finishes computing the sufficient statistics for the
@@ -518,7 +518,7 @@ def update(self, corpus, chunksize=None, decay=None, passes=None, update_every=N
         logger.info("running %s LDA training, %s topics, %i passes over "
                     "the supplied corpus of %i documents, updating model once "
                     "every %i documents, evaluating perplexity every %i documents, "
-                    "iterating %i with a convergence threshold of %i" %
+                    "iterating %ix with a convergence threshold of %f" %
                     (updatetype, self.num_topics, passes, lencorpus,
                         updateafter, evalafter, iterations,
                         gamma_threshold))
diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py
index 35b1270799..f35c7957be 100755
--- a/gensim/models/word2vec.py
+++ b/gensim/models/word2vec.py
@@ -805,18 +805,20 @@ def __iter__(self):
 
 
 class LineSentence(object):
+    """Simple format: one sentence = one line; words already preprocessed and separated by whitespace."""
     def __init__(self, source):
-        """Simple format: one sentence = one line; words already preprocessed and separated by whitespace.
-
-        source can be either a string or a file object
+        """
+        `source` can be either a string or a file object.
 
-        Thus, one can use this for just plain files:
+        Example::
 
             sentences = LineSentence('myfile.txt')
 
-        Or for compressed files:
+        Or for compressed files::
+
+            sentences = LineSentence('compressed_text.txt.bz2')
+            sentences = LineSentence('compressed_text.txt.gz')
 
-            sentences = LineSentence(bz2.BZ2File('compressed_text.bz2'))
         """
         self.source = source
 

From 9b67a4a0117fd3220f32be116646c1160a02790e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20=C5=98eh=C5=AF=C5=99ek?= <radimrehurek@seznam.cz>
Date: Wed, 4 Jun 2014 15:25:16 +0200
Subject: [PATCH 8/8] bump version: 0.10.0

---
 docs/src/conf.py | 4 ++--
 setup.py         | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/src/conf.py b/docs/src/conf.py
index 87207929b9..3d5400367e 100644
--- a/docs/src/conf.py
+++ b/docs/src/conf.py
@@ -52,9 +52,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '0.10.0rc1'
+version = '0.10.0'
 # The full version, including alpha/beta/rc tags.
-release = '0.10.0rc1'
+release = '0.10.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/setup.py b/setup.py
index 7945b6038d..3d5ced683b 100644
--- a/setup.py
+++ b/setup.py
@@ -23,7 +23,7 @@
 
 # Commonly used information
 pkg_name = 'gensim'
-pkg_ver = '0.10.0rc1'
+pkg_ver = '0.10.0'
 pkg_desc = 'Python framework for fast Vector Space Modelling'
 
 # there is a bug in python2.5, preventing distutils from using any non-ascii characters :( http://bugs.python.org/issue2562