From 7f1ea09b59554111c0d3256524c3c79849d325ca Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 10 Feb 2024 19:53:27 +0000 Subject: [PATCH] Deployed to GitHub Pages --- dev/_modules/tlda/third_order_cumulant.html | 49 ++++++++++--------- dev/_modules/tlda/tlda_wrapper.html | 54 ++++++++++----------- dev/modules/generated/tlda.TLDA.html | 4 +- dev/searchindex.js | 2 +- 4 files changed, 55 insertions(+), 54 deletions(-) diff --git a/dev/_modules/tlda/third_order_cumulant.html b/dev/_modules/tlda/third_order_cumulant.html index 4eee064..0a61133 100644 --- a/dev/_modules/tlda/third_order_cumulant.html +++ b/dev/_modules/tlda/third_order_cumulant.html @@ -119,18 +119,18 @@
def loss_rec(factor, theta):
'''Inputs:
- factor: (n_topics x n_topics): whitened factors from the SGD
+ factor: (n_topics x n_topics): whitened factors from the SGD
cumulant: Whitened M3 (n_topics x n_topicsx n_topics)
- theta: othogonalization penalty term (scalar)
- output:
+ theta: othogonalization penalty term (scalar)
+ output:
orthogonality loss:
-
- '''
+
+ '''
rec = tl.cp_to_tensor((None, [factor]*3))
- ortho_loss = (1 + theta)/2*tl.norm(rec, 2)**2
+ ortho_loss = (1 + theta)/2*tl.norm(rec, 2)**2
- return ortho_loss
+ return ortho_loss
@@ -139,7 +139,7 @@ Source code for tlda.third_order_cumulant
"""
Class to compute the third order cumulant
"""
- def __init__(self, n_topic, alpha_0, n_iter_train, n_iter_test, batch_size,
+ def __init__(self, n_topic, alpha_0, n_iter_train, n_iter_test, batch_size,
learning_rate, gamma_shape=1.0,
theta=1, ortho_loss_criterion=1000, seed=None, n_eigenvec=None,
learning_rate_criterion = 1e-5): # we could try to find a more informative name for alpha_0
@@ -148,13 +148,13 @@ Source code for tlda.third_order_cumulant
Parameters
----------
- n_topic :
- alpha_0 :
+ n_topic :
+ alpha_0 :
n_iter_train : int
n_iter_test : int
batch_size : int
learning_rate : float
- cumulant :
+ cumulant :
"""
rng = tl.check_random_state(seed)
@@ -170,23 +170,23 @@ Source code for tlda.third_order_cumulant
n_eigenvec = self.n_topic
self.n_eigenvec = n_eigenvec
self.learning_rate_criterion = learning_rate_criterion
-
+
# initializing the orthogonality loss
ortho_loss = ortho_loss_criterion+1
# Finding optimal starting values based on orthonormal inits:
while ortho_loss >= ortho_loss_criterion:
init_values = tl.tensor(rng.uniform(-1, 1, size=(n_eigenvec, n_topic)))
-
+
# init_values has shape (n_eigenvec, min(n_topic, n_eigenvec)) = (n_eigenvec, n_topic)
init_values, _ = tl.qr(init_values, mode='reduced')
ortho_loss = loss_rec(init_values, self.theta)
self.theta -= 0.01
-
+
self.factors_ = init_values
-
-
+
+
[docs]
def partial_fit(self, X_batch, learning_rate = None):
@@ -218,10 +218,10 @@ Source code for tlda.third_order_cumulant
X : ndarray of shape (number_documents, num_topics) equal to the whitened
word counts in each document in the documents used to update the factors
'''
- tol = self.learning_rate_criterion
+ tol = self.learning_rate_criterion
i = 1
max_diff = tol+1
-
+
while (i <= 10 or max_diff >= tol) and i < self.n_iter_train:
prev_fac = tl.copy(self.factors_)
for j in range(0, len(X), self.batch_size):
@@ -233,9 +233,10 @@ Source code for tlda.third_order_cumulant
i += 1
if verbose and i%5 ==0:
print(str(i)+"'th iteration complete. Maximum change in factors: "+str(max_diff))
-
+
del X
- print("Total iterations: " + str(i))
+ if verbose:
+ print("Total iterations: " + str(i))
@@ -260,9 +261,9 @@ Source code for tlda.third_order_cumulant
n_topics = self.n_topic
n_docs = X_test.shape[0]
- gammad = tl.gamma(self.gamma_shape, scale= 1.0/self.gamma_shape, size = (n_docs,n_topics))
+ gammad = tl.gamma(self.gamma_shape, scale= 1.0/self.gamma_shape, size = (n_docs,n_topics))
exp_elogthetad = tl.exp(dirichlet_expectation(gammad)) #ndocs, n_topic
-
+
epsilon = tl.finfo(gammad.dtype).eps
phinorm = (tl.matmul(exp_elogthetad,adjusted_factors.T) + epsilon) #ndoc X nwords
max_gamma_change = 1.0
@@ -274,7 +275,7 @@ Source code for tlda.third_order_cumulant
x_phi_norm = X_test / phinorm
x_phi_norm_factors = tl.matmul(x_phi_norm, adjusted_factors)
gammad = ((exp_elogthetad * (x_phi_norm_factors)) + weights) # estimate for the variational mixing param
- exp_elogthetad = tl.exp(dirichlet_expectation(gammad))
+ exp_elogthetad = tl.exp(dirichlet_expectation(gammad))
phinorm = (tl.matmul(exp_elogthetad,adjusted_factors.T) + epsilon)
mean_gamma_change_pdoc = tl.sum(tl.abs(gammad - lastgamma),axis=1) / n_topics
@@ -282,7 +283,7 @@ Source code for tlda.third_order_cumulant
i += 1
print("End Document Topic Prediction Iteration " + str(i) +" out of "+str(self.n_iter_test))
print("Current Maximal Change:" + str(max_gamma_change))
-
+
del X_test
return gammad
diff --git a/dev/_modules/tlda/tlda_wrapper.html b/dev/_modules/tlda/tlda_wrapper.html
index d756a29..0ac6bc8 100644
--- a/dev/_modules/tlda/tlda_wrapper.html
+++ b/dev/_modules/tlda/tlda_wrapper.html
@@ -119,8 +119,8 @@ Source code for tlda.tlda_wrapper
"""
Class to learn topic-word distribution from a corpus of documents
"""
- def __init__(self, n_topic, alpha_0, n_iter_train, n_iter_test, learning_rate,
- pca_batch_size=10000, third_order_cumulant_batch=1000 , gamma_shape=1.0, smoothing=1e-6,
+ def __init__(self, n_topic, alpha_0, n_iter_train, n_iter_test, learning_rate,
+ pca_batch_size=10000, third_order_cumulant_batch=1000 , gamma_shape=1.0, smoothing=1e-6,
theta=1, ortho_loss_criterion=1000, n_eigenvec = None, random_seed=None):
"""
Parameters
@@ -141,7 +141,7 @@ Source code for tlda.tlda_wrapper
if n_eigenvec is None:
n_eigenvec = n_topic
self.n_eigenvec = n_eigenvec
-
+
self.weights_ = tl.ones(self.n_topic)
self.vocab = 0
self.n_documents = 0
@@ -156,7 +156,7 @@ Source code for tlda.tlda_wrapper
[docs]
def fit(self, X, order = None):
"""
- Compute the word-topic distribution for the entire dataset at once. Assumes that the whole dataset and
+ Compute the word-topic distribution for the entire dataset at once. Assumes that the whole dataset and
the tensors required to compute its word-topic distribution fit in memory.
Parameters
@@ -167,18 +167,18 @@ Source code for tlda.tlda_wrapper
self.n_documents = X.shape[0]
self.vocab = X.shape[1]
self.mean = tl.mean(X, axis=0)
-
+
if order is None or order == 2:
self.second_order.fit(X - self.mean)
-
+
if order is None or order == 3:
X_whit = self.second_order.transform(X - self.mean)
self.third_order.fit(X_whit,verbose=False)
del X_whit
-
+
del X
-
+
def _partial_fit_first_order(self, X_batch):
if self.mean is None:
self.vocab = X_batch.shape[1]
@@ -192,13 +192,13 @@ Source code for tlda.tlda_wrapper
for j in range(0, len(X_batch), self.second_order.batch_size):
y = X_batch[j:j+self.second_order.batch_size]
self.second_order.partial_fit(y - self.mean)
- del y
+ del y
del X_batch
-
+
def _partial_fit_third_order(self, X_batch):
for j in range(0, len(X_batch), self.third_order_cumulant_batch):
y = X_batch[j:j+self.third_order_cumulant_batch]
- self.third_order.partial_fit(y)
+ self.third_order.partial_fit(y)
del y
del X_batch
@@ -209,7 +209,7 @@ Source code for tlda.tlda_wrapper
Update the word-topic distribution using a batch of documents. For a given batch, the
first and second order cumulants need to be fit once, but the third order cumulant should
be fit many times.
-
+
Parameters
----------
X_batch : tensor of shape (batch_size, self.vocab)
@@ -223,7 +223,7 @@ Source code for tlda.tlda_wrapper
"""
if not hasattr(self, "seen_batches"):
self.seen_batches = dict()
-
+
if batch_index in self.seen_batches:
# We've seen the batch at least once
if self.seen_batches[batch_index] != 0:
@@ -243,7 +243,7 @@ Source code for tlda.tlda_wrapper
pickle.dump(X_batch, open(Path(save_folder).joinpath(save_file).as_posix(), 'wb'))
else:
self.seen_batches[batch_index] = 1
-
+
self._partial_fit_third_order(X_batch)
else:
@@ -260,11 +260,11 @@ Source code for tlda.tlda_wrapper
"""
Update the word-topic distribution using a batch of documents in a fully online version. Meant for very large datasets,
since we only do one gradient update for each batch in the third order cumulant calculation.
-
+
Parameters
----------
X_batch : tensor of shape (batch_size, self.vocab)
- """
+ """
self._partial_fit_first_order(X_batch)
self._partial_fit_second_order(X_batch)
X_whit = self.second_order.transform(X_batch - self.mean)
@@ -275,7 +275,7 @@ Source code for tlda.tlda_wrapper
def _unwhiten_factors(self):
"""Unwhitens self.third_order.factors_, then uncenters and unnormalizes"""
- factors_unwhitened = self.second_order.reverse_transform(self.third_order.factors_.T).T
+ factors_unwhitened = self.second_order.reverse_transform(self.third_order.factors_.T).T
# Un-centers the data
factors_unwhitened += tl.reshape(self.mean,(self.vocab,1))
@@ -283,7 +283,7 @@ Source code for tlda.tlda_wrapper
# Save unwhitened factors before postprocessing
self.unwhitened_factors_raw_ = tl.copy(factors_unwhitened)
-
+
# Smoothing
factors_unwhitened *= (1. - self.smoothing)
factors_unwhitened += (self.smoothing / factors_unwhitened.shape[1])
@@ -291,12 +291,12 @@ Source code for tlda.tlda_wrapper
# Calculate the eigenvalues from the whitened factors
eig_vals = tl.tensor([tl.norm(k)**3 for k in self.third_order.factors_ ])
alpha = eig_vals**(-2)
- # Recover the topic weights
+ # Recover the topic weights
alpha_norm = (alpha / alpha.sum()) * self.alpha_0
self.weights_ = tl.tensor(alpha_norm)
# Normalize the factors
-
+
factors_unwhitened /= factors_unwhitened.sum(axis=0)
return factors_unwhitened
@@ -305,7 +305,7 @@ Source code for tlda.tlda_wrapper
"""Unwhitened learned factors of shape (n_topic, vocabulary_size)
On the first call, this will compute and store the unwhitened factors.
- Subsequent calls will simply return the stored value.
+ Subsequent calls will simply return the stored value.
"""
if self.unwhitened_factors_ is None:
self.unwhitened_factors_ = self._unwhiten_factors()
@@ -313,22 +313,22 @@ Source code for tlda.tlda_wrapper
[docs]
- def transform(self, X=None, predict=True):
+ def transform(self, X=None, predict=False):
"""
Transform the document-word matrix of a set of documents into a word-topic distribution and topic-distribution when predict=True.
Parameters
- ----------
- X : tensor of shape (n_documents , self.vocab)
+ ----------
+ X : tensor of shape (n_documents , self.vocab)
set of documetns to predict topic distribution
- predict : indicate whether to return topic-document distribution and word-topic distribution or just word-topic distribution.
+ predict : indicate whether to return topic-document distribution and word-topic distribution or just word-topic distribution.
"""
- self.third_order.unwhitened_factors_ = self.unwhitened_factors_
+ self.third_order.unwhitened_factors_ = self.unwhitened_factors
if predict:
predicted_topics = self.third_order.predict(X, self.unwhitened_factors_raw_, self.weights_)
return predicted_topics
-
+
return predicted_topics
diff --git a/dev/modules/generated/tlda.TLDA.html b/dev/modules/generated/tlda.TLDA.html
index 55d42e1..eddb14b 100644
--- a/dev/modules/generated/tlda.TLDA.html
+++ b/dev/modules/generated/tlda.TLDA.html
@@ -183,7 +183,7 @@
fit(X, order=None)[source]
-Compute the word-topic distribution for the entire dataset at once. Assumes that the whole dataset and
+
Compute the word-topic distribution for the entire dataset at once. Assumes that the whole dataset and
the tensors required to compute its word-topic distribution fit in memory.
- Parameters:
@@ -240,7 +240,7 @@
-
-transform(X=None, predict=True)[source]
+transform(X=None, predict=False)[source]
Transform the document-word matrix of a set of documents into a word-topic distribution and topic-distribution when predict=True.
- Parameters:
diff --git a/dev/searchindex.js b/dev/searchindex.js
index 19a8306..a2dced5 100644
--- a/dev/searchindex.js
+++ b/dev/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["index", "install", "modules/api", "modules/generated/tlda.SecondOrderCumulant", "modules/generated/tlda.TLDA", "modules/generated/tlda.ThirdOrderCumulant", "user_guide/index", "user_guide/tlda"], "filenames": ["index.rst", "install.rst", "modules/api.rst", "modules/generated/tlda.SecondOrderCumulant.rst", "modules/generated/tlda.TLDA.rst", "modules/generated/tlda.ThirdOrderCumulant.rst", "user_guide/index.rst", "user_guide/tlda.rst"], "titles": ["Tensor LDA in PyTorch", "Installing TLDA", "API reference", "tlda
.SecondOrderCumulant", "tlda
.TLDA", "tlda
.ThirdOrderCumulant", "User guide", "Tensor LDA"], "terms": {"tensor": [0, 2, 3, 4, 5, 6], "lda": [0, 1, 2, 6], "pytorch": [0, 1], "A": 0, "scalabl": 0, "gpu": 0, "acceler": 0, "onlin": [0, 4], "built": 0, "tensorli": [0, 1], "As": 0, "batch": [0, 3, 4, 5], "document": [0, 3, 4, 5], "arriv": 0, "thei": 0, "ar": [0, 1], "first": [0, 4], "pre": 0, "process": 0, "The": [0, 1], "result": [0, 1], "term": 0, "matrix": [0, 4], "i": [0, 1, 4], "center": [0, 3], "whiten": [0, 3, 4, 5], "transform": [0, 3, 4], "updat": [0, 1, 3, 4, 5], "us": [0, 1, 3, 4, 5], "x": [0, 3, 4, 5], "final": 0, "third": [0, 3, 4, 5], "order": [0, 3, 4, 5], "moment": [0, 4], "directli": [0, 5], "factor": [0, 4, 5], "form": 0, "thi": [0, 1, 4], "learn": [0, 1, 4], "can": [0, 1], "unwhiten": [0, 3, 4], "uncent": 0, "recov": 0, "classic": 0, "solut": 0, "topic": [0, 4, 5], "get": [0, 3], "start": [0, 1], "packag": 1, "call": [1, 4], "provid": 1, "all": [1, 3, 4], "tool": 1, "oper": 1, "onc": [1, 4], "you": 1, "import": 1, "need": [1, 4], "have": 1, "python": [1, 2], "3": 1, "well": 1, "numpi": 1, "scipi": 1, "torch": 1, "If": [1, 4], "gener": 1, "want": 1, "pain": 1, "free": 1, "experi": 1, "anaconda": 1, "distribiut": 1, "It": 1, "come": 1, "ship": 1, "readi": 1, "simpli": [1, 4], "your": 1, "termin": 1, "u": 1, "option": [1, 5], "cd": 1, "git": 1, "http": 1, "com": 1, "requir": [1, 4], "easili": 1, "r": 1, "txt": 1, "Then": 1, "here": 1, "edit": 1, "mode": 1, "e": 1, "equival": 1, "uni": 1, "an": 1, "part": 1, "pytest": 1, "depend": 1, "doc": 1, "requirements_doc": 1, "now": 1, "html": 1, "make": [1, 5], "main": 1, "page": 1, "index": [1, 4], "tlda": 2, "class": [3, 4, 5], "n_eigenvec": [3, 4, 5], "alpha_0": [3, 4, 5], "batch_siz": [3, 4, 5], "sourc": [3, 4, 5], "comput": [3, 4, 5], "cumul": [3, 4, 5], "method": [3, 4, 5], "fit": [3, 4, 5], "entir": [3, 4], "data": 3, "project": 3, "weight": [3, 5], "singular": 3, "vector": 3, "scale": 3, "explain": 3, "varianc": 3, "input": 3, "dataset": [3, 4], "paramet": [3, 4, 5], "shape": [3, 4, 5], "n_sampl": 3, "vocabulary_s": [3, 4, 5], "contain": 3, "partial_fit": [3, 4, 5], "x_batch": [3, 4, 5], "accordingli": 3, "some": 3, "pca": 3, "model": 3, "sampl": 3, "return": [3, 4, 5], "whitened_x": 3, "self": [3, 4], "reverse_transform": 3, "unwhitened_x": 3, "n_topic": [4, 5], "n_iter_train": [4, 5], "n_iter_test": [4, 5], "learning_r": [4, 5], "pca_batch_s": 4, "10000": 4, "third_order_cumulant_batch": 4, "1000": [4, 5], "gamma_shap": [4, 5], "1": [4, 5], "0": [4, 5], "smooth": 4, "1e": [4, 5], "06": 4, "theta": [4, 5], "ortho_loss_criterion": [4, 5], "none": [4, 5], "random_se": 4, "word": [4, 5], "distribut": [4, 5], "from": [4, 5], "corpu": 4, "attribut": 4, "unwhitened_factor": 4, "assum": 4, "whole": 4, "its": 4, "memori": 4, "size": 4, "n_document": 4, "vocab": 4, "batch_index": 4, "save_fold": 4, "For": 4, "given": 4, "second": 4, "should": 4, "mani": 4, "time": 4, "int": 4, "current": 4, "know": 4, "whether": 4, "just": 4, "str": 4, "default": 4, "folder": 4, "which": 4, "store": 4, "recomput": 4, "each": [4, 5], "iter": [4, 5], "instead": 4, "being": 4, "catch": 4, "partial_fit_onlin": 4, "fulli": 4, "version": 4, "meant": 4, "veri": 4, "larg": 4, "sinc": 4, "we": 4, "onli": 4, "do": 4, "one": 4, "gradient": [4, 5], "calcul": 4, "properti": 4, "On": 4, "subsequ": 4, "valu": 4, "predict": [4, 5], "true": [4, 5], "set": 4, "when": 4, "documetn": 4, "indic": 4, "seed": 5, "learning_rate_criterion": 5, "05": 5, "stochast": 5, "descent": 5, "ndarrai": 5, "number_docu": 5, "num_top": 5, "equal": 5, "count": 5, "verbos": 5, "bool": 5, "print": 5, "inform": 5, "about": 5, "everi": 5, "200th": 5, "x_test": 5, "adjusted_factor": 5, "infer": 5, "non": 5, "neg": 5, "test": 5, "gammad": 5, "number_top": 5, "normal": 5}, "objects": {"": [[2, 0, 0, "-", "tlda"]], "tlda": [[3, 1, 1, "", "SecondOrderCumulant"], [4, 1, 1, "", "TLDA"], [5, 1, 1, "", "ThirdOrderCumulant"]], "tlda.SecondOrderCumulant": [[3, 2, 1, "", "fit"], [3, 2, 1, "", "partial_fit"], [3, 2, 1, "", "reverse_transform"], [3, 2, 1, "", "transform"]], "tlda.TLDA": [[4, 2, 1, "", "fit"], [4, 2, 1, "", "partial_fit"], [4, 2, 1, "", "partial_fit_online"], [4, 2, 1, "", "transform"], [4, 3, 1, "", "unwhitened_factors"]], "tlda.ThirdOrderCumulant": [[5, 2, 1, "", "fit"], [5, 2, 1, "", "partial_fit"], [5, 2, 1, "", "predict"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:property"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "property", "Python property"]}, "titleterms": {"instal": 1, "tlda": [1, 3, 4, 5], "pre": 1, "requisit": 1, "pip": 1, "recommend": 1, "clone": 1, "github": 1, "repositori": 1, "run": 1, "test": 1, "build": 1, "document": 1, "api": 2, "refer": 2, "secondordercumul": 3, "thirdordercumul": 5, "user": 6, "guid": 6, "tensor": 7, "lda": 7}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 60}, "alltitles": {"Installing TLDA": [[1, "installing-tlda"]], "Pre-requisite": [[1, "pre-requisite"]], "Installing with pip (recommended)": [[1, "installing-with-pip-recommended"]], "Cloning the github repository": [[1, "cloning-the-github-repository"]], "Running the tests": [[1, "running-the-tests"]], "Building the documentation": [[1, "building-the-documentation"]], "API reference": [[2, "api-reference"]], "tlda.SecondOrderCumulant": [[3, "tlda-secondordercumulant"]], "tlda.TLDA": [[4, "tlda-tlda"]], "tlda.ThirdOrderCumulant": [[5, "tlda-thirdordercumulant"]], "User guide": [[6, "user-guide"]], "Tensor LDA": [[7, "tensor-lda"]]}, "indexentries": {"module": [[2, "module-tlda"]], "tlda": [[2, "module-tlda"]], "secondordercumulant (class in tlda)": [[3, "tlda.SecondOrderCumulant"]], "fit() (tlda.secondordercumulant method)": [[3, "tlda.SecondOrderCumulant.fit"]], "partial_fit() (tlda.secondordercumulant method)": [[3, "tlda.SecondOrderCumulant.partial_fit"]], "reverse_transform() (tlda.secondordercumulant method)": [[3, "tlda.SecondOrderCumulant.reverse_transform"]], "transform() (tlda.secondordercumulant method)": [[3, "tlda.SecondOrderCumulant.transform"]], "tlda (class in tlda)": [[4, "tlda.TLDA"]], "fit() (tlda.tlda method)": [[4, "tlda.TLDA.fit"]], "partial_fit() (tlda.tlda method)": [[4, "tlda.TLDA.partial_fit"]], "partial_fit_online() (tlda.tlda method)": [[4, "tlda.TLDA.partial_fit_online"]], "transform() (tlda.tlda method)": [[4, "tlda.TLDA.transform"]], "unwhitened_factors (tlda.tlda property)": [[4, "tlda.TLDA.unwhitened_factors"]], "thirdordercumulant (class in tlda)": [[5, "tlda.ThirdOrderCumulant"]], "fit() (tlda.thirdordercumulant method)": [[5, "tlda.ThirdOrderCumulant.fit"]], "partial_fit() (tlda.thirdordercumulant method)": [[5, "tlda.ThirdOrderCumulant.partial_fit"]], "predict() (tlda.thirdordercumulant method)": [[5, "tlda.ThirdOrderCumulant.predict"]]}})
\ No newline at end of file
+Search.setIndex({"docnames": ["index", "install", "modules/api", "modules/generated/tlda.SecondOrderCumulant", "modules/generated/tlda.TLDA", "modules/generated/tlda.ThirdOrderCumulant", "user_guide/index", "user_guide/tlda"], "filenames": ["index.rst", "install.rst", "modules/api.rst", "modules/generated/tlda.SecondOrderCumulant.rst", "modules/generated/tlda.TLDA.rst", "modules/generated/tlda.ThirdOrderCumulant.rst", "user_guide/index.rst", "user_guide/tlda.rst"], "titles": ["Tensor LDA in PyTorch", "Installing TLDA", "API reference", "tlda
.SecondOrderCumulant", "tlda
.TLDA", "tlda
.ThirdOrderCumulant", "User guide", "Tensor LDA"], "terms": {"tensor": [0, 2, 3, 4, 5, 6], "lda": [0, 1, 2, 6], "pytorch": [0, 1], "A": 0, "scalabl": 0, "gpu": 0, "acceler": 0, "onlin": [0, 4], "built": 0, "tensorli": [0, 1], "As": 0, "batch": [0, 3, 4, 5], "document": [0, 3, 4, 5], "arriv": 0, "thei": 0, "ar": [0, 1], "first": [0, 4], "pre": 0, "process": 0, "The": [0, 1], "result": [0, 1], "term": 0, "matrix": [0, 4], "i": [0, 1, 4], "center": [0, 3], "whiten": [0, 3, 4, 5], "transform": [0, 3, 4], "updat": [0, 1, 3, 4, 5], "us": [0, 1, 3, 4, 5], "x": [0, 3, 4, 5], "final": 0, "third": [0, 3, 4, 5], "order": [0, 3, 4, 5], "moment": [0, 4], "directli": [0, 5], "factor": [0, 4, 5], "form": 0, "thi": [0, 1, 4], "learn": [0, 1, 4], "can": [0, 1], "unwhiten": [0, 3, 4], "uncent": 0, "recov": 0, "classic": 0, "solut": 0, "topic": [0, 4, 5], "get": [0, 3], "start": [0, 1], "packag": 1, "call": [1, 4], "provid": 1, "all": [1, 3, 4], "tool": 1, "oper": 1, "onc": [1, 4], "you": 1, "import": 1, "need": [1, 4], "have": 1, "python": [1, 2], "3": 1, "well": 1, "numpi": 1, "scipi": 1, "torch": 1, "If": [1, 4], "gener": 1, "want": 1, "pain": 1, "free": 1, "experi": 1, "anaconda": 1, "distribiut": 1, "It": 1, "come": 1, "ship": 1, "readi": 1, "simpli": [1, 4], "your": 1, "termin": 1, "u": 1, "option": [1, 5], "cd": 1, "git": 1, "http": 1, "com": 1, "requir": [1, 4], "easili": 1, "r": 1, "txt": 1, "Then": 1, "here": 1, "edit": 1, "mode": 1, "e": 1, "equival": 1, "uni": 1, "an": 1, "part": 1, "pytest": 1, "depend": 1, "doc": 1, "requirements_doc": 1, "now": 1, "html": 1, "make": [1, 5], "main": 1, "page": 1, "index": [1, 4], "tlda": 2, "class": [3, 4, 5], "n_eigenvec": [3, 4, 5], "alpha_0": [3, 4, 5], "batch_siz": [3, 4, 5], "sourc": [3, 4, 5], "comput": [3, 4, 5], "cumul": [3, 4, 5], "method": [3, 4, 5], "fit": [3, 4, 5], "entir": [3, 4], "data": 3, "project": 3, "weight": [3, 5], "singular": 3, "vector": 3, "scale": 3, "explain": 3, "varianc": 3, "input": 3, "dataset": [3, 4], "paramet": [3, 4, 5], "shape": [3, 4, 5], "n_sampl": 3, "vocabulary_s": [3, 4, 5], "contain": 3, "partial_fit": [3, 4, 5], "x_batch": [3, 4, 5], "accordingli": 3, "some": 3, "pca": 3, "model": 3, "sampl": 3, "return": [3, 4, 5], "whitened_x": 3, "self": [3, 4], "reverse_transform": 3, "unwhitened_x": 3, "n_topic": [4, 5], "n_iter_train": [4, 5], "n_iter_test": [4, 5], "learning_r": [4, 5], "pca_batch_s": 4, "10000": 4, "third_order_cumulant_batch": 4, "1000": [4, 5], "gamma_shap": [4, 5], "1": [4, 5], "0": [4, 5], "smooth": 4, "1e": [4, 5], "06": 4, "theta": [4, 5], "ortho_loss_criterion": [4, 5], "none": [4, 5], "random_se": 4, "word": [4, 5], "distribut": [4, 5], "from": [4, 5], "corpu": 4, "attribut": 4, "unwhitened_factor": 4, "assum": 4, "whole": 4, "its": 4, "memori": 4, "size": 4, "n_document": 4, "vocab": 4, "batch_index": 4, "save_fold": 4, "For": 4, "given": 4, "second": 4, "should": 4, "mani": 4, "time": 4, "int": 4, "current": 4, "know": 4, "whether": 4, "just": 4, "str": 4, "default": 4, "folder": 4, "which": 4, "store": 4, "recomput": 4, "each": [4, 5], "iter": [4, 5], "instead": 4, "being": 4, "catch": 4, "partial_fit_onlin": 4, "fulli": 4, "version": 4, "meant": 4, "veri": 4, "larg": 4, "sinc": 4, "we": 4, "onli": 4, "do": 4, "one": 4, "gradient": [4, 5], "calcul": 4, "properti": 4, "On": 4, "subsequ": 4, "valu": 4, "predict": [4, 5], "fals": 4, "set": 4, "when": 4, "true": [4, 5], "documetn": 4, "indic": 4, "seed": 5, "learning_rate_criterion": 5, "05": 5, "stochast": 5, "descent": 5, "ndarrai": 5, "number_docu": 5, "num_top": 5, "equal": 5, "count": 5, "verbos": 5, "bool": 5, "print": 5, "inform": 5, "about": 5, "everi": 5, "200th": 5, "x_test": 5, "adjusted_factor": 5, "infer": 5, "non": 5, "neg": 5, "test": 5, "gammad": 5, "number_top": 5, "normal": 5}, "objects": {"": [[2, 0, 0, "-", "tlda"]], "tlda": [[3, 1, 1, "", "SecondOrderCumulant"], [4, 1, 1, "", "TLDA"], [5, 1, 1, "", "ThirdOrderCumulant"]], "tlda.SecondOrderCumulant": [[3, 2, 1, "", "fit"], [3, 2, 1, "", "partial_fit"], [3, 2, 1, "", "reverse_transform"], [3, 2, 1, "", "transform"]], "tlda.TLDA": [[4, 2, 1, "", "fit"], [4, 2, 1, "", "partial_fit"], [4, 2, 1, "", "partial_fit_online"], [4, 2, 1, "", "transform"], [4, 3, 1, "", "unwhitened_factors"]], "tlda.ThirdOrderCumulant": [[5, 2, 1, "", "fit"], [5, 2, 1, "", "partial_fit"], [5, 2, 1, "", "predict"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:property"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "property", "Python property"]}, "titleterms": {"instal": 1, "tlda": [1, 3, 4, 5], "pre": 1, "requisit": 1, "pip": 1, "recommend": 1, "clone": 1, "github": 1, "repositori": 1, "run": 1, "test": 1, "build": 1, "document": 1, "api": 2, "refer": 2, "secondordercumul": 3, "thirdordercumul": 5, "user": 6, "guid": 6, "tensor": 7, "lda": 7}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 60}, "alltitles": {"Installing TLDA": [[1, "installing-tlda"]], "Pre-requisite": [[1, "pre-requisite"]], "Installing with pip (recommended)": [[1, "installing-with-pip-recommended"]], "Cloning the github repository": [[1, "cloning-the-github-repository"]], "Running the tests": [[1, "running-the-tests"]], "Building the documentation": [[1, "building-the-documentation"]], "API reference": [[2, "api-reference"]], "tlda.SecondOrderCumulant": [[3, "tlda-secondordercumulant"]], "tlda.TLDA": [[4, "tlda-tlda"]], "tlda.ThirdOrderCumulant": [[5, "tlda-thirdordercumulant"]], "User guide": [[6, "user-guide"]], "Tensor LDA": [[7, "tensor-lda"]]}, "indexentries": {"module": [[2, "module-tlda"]], "tlda": [[2, "module-tlda"]], "secondordercumulant (class in tlda)": [[3, "tlda.SecondOrderCumulant"]], "fit() (tlda.secondordercumulant method)": [[3, "tlda.SecondOrderCumulant.fit"]], "partial_fit() (tlda.secondordercumulant method)": [[3, "tlda.SecondOrderCumulant.partial_fit"]], "reverse_transform() (tlda.secondordercumulant method)": [[3, "tlda.SecondOrderCumulant.reverse_transform"]], "transform() (tlda.secondordercumulant method)": [[3, "tlda.SecondOrderCumulant.transform"]], "tlda (class in tlda)": [[4, "tlda.TLDA"]], "fit() (tlda.tlda method)": [[4, "tlda.TLDA.fit"]], "partial_fit() (tlda.tlda method)": [[4, "tlda.TLDA.partial_fit"]], "partial_fit_online() (tlda.tlda method)": [[4, "tlda.TLDA.partial_fit_online"]], "transform() (tlda.tlda method)": [[4, "tlda.TLDA.transform"]], "unwhitened_factors (tlda.tlda property)": [[4, "tlda.TLDA.unwhitened_factors"]], "thirdordercumulant (class in tlda)": [[5, "tlda.ThirdOrderCumulant"]], "fit() (tlda.thirdordercumulant method)": [[5, "tlda.ThirdOrderCumulant.fit"]], "partial_fit() (tlda.thirdordercumulant method)": [[5, "tlda.ThirdOrderCumulant.partial_fit"]], "predict() (tlda.thirdordercumulant method)": [[5, "tlda.ThirdOrderCumulant.predict"]]}})
\ No newline at end of file