Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added random_seed parameter to make LsiModel reproducible #3194

Merged
merged 6 commits into from
Oct 24, 2021
Prev Previous commit
Next Next commit
fixed code style. Hanging indents in functions and methods along with…
… trailing commas
  • Loading branch information
parashardhapola committed Jul 19, 2021
commit 06ab30bcb7236b30f12da1e9f3b59651fe15c3cf
21 changes: 13 additions & 8 deletions gensim/models/lsimodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
* distributed computing for very large corpora, making use of a cluster of
machines

Wall-clock `performance on the English Wikipedia <http://radimrehurek.com/gensim/wiki.html>`_
Wall-clock `performance on the English Wikipedia <https://radimrehurek.com/gensim/wiki.html>`_
(2G corpus positions, 3.2M documents, 100K features, 0.5G non-zero entries in the final TF-IDF matrix),
requesting the top 400 LSI factors:

Expand Down Expand Up @@ -162,8 +162,11 @@ class Projection(utils.SaveLoad):
via :meth:`~gensim.models.lsimodel.Projection.merge`. This is how incremental training actually happens.

"""
def __init__(self, m, k, docs=None, use_svdlibc=False, power_iters=P2_EXTRA_ITERS,
extra_dims=P2_EXTRA_DIMS, dtype=np.float64, random_seed=None):

def __init__(
self, m, k, docs=None, use_svdlibc=False, power_iters=P2_EXTRA_ITERS,
extra_dims=P2_EXTRA_DIMS, dtype=np.float64, random_seed=None,
):
"""Construct the (U, S) projection from a corpus.

Parameters
Expand Down Expand Up @@ -359,9 +362,9 @@ class LsiModel(interfaces.TransformationABC, basemodel.BaseTopicModel):

def __init__(
self, corpus=None, num_topics=200, id2word=None, chunksize=20000,
decay=1.0, distributed=False, onepass=True,
power_iters=P2_EXTRA_ITERS, extra_samples=P2_EXTRA_DIMS, dtype=np.float64, random_seed=None,
):
decay=1.0, distributed=False, onepass=True, power_iters=P2_EXTRA_ITERS,
extra_samples=P2_EXTRA_DIMS, dtype=np.float64, random_seed=None,
):
"""Build an LSI model.

Parameters
Expand Down Expand Up @@ -876,8 +879,10 @@ def print_debug(id2token, u, s, topics, num_words=10, num_neg=None):
logger.info('topic #%s(%.3f): %s, ..., %s', topic, s[topic], ', '.join(pos), ', '.join(neg))


def stochastic_svd(corpus, rank, num_terms, chunksize=20000, extra_dims=None,
power_iters=0, dtype=np.float64, eps=1e-6, random_seed=None):
def stochastic_svd(
corpus, rank, num_terms, chunksize=20000, extra_dims=None,
power_iters=0, dtype=np.float64, eps=1e-6, random_seed=None,
):
"""Run truncated Singular Value Decomposition (SVD) on a sparse input.

Parameters
Expand Down