diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index 4a41e03..0000000
--- a/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2022 James Foster
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0cc90ca
--- /dev/null
+++ b/README.md
@@ -0,0 +1,71 @@
+# Markov Chain Cubature for Bayesian Inference
+
+This repository contains code for simulating Markov chains as a cloud of particles using cubature formulae.
+
+The `presentation` folder contains slides describing the methodology.
+
+The `scripts` folder contains the code.
+
+# Examples
+
+## 2D Gaussian Mixture
+
+This will run the cubature algorithm on a 2D Gaussian mixture:
+
+ ```shell
+ python gmm_cubature.py
+ ```
+
+Outputting something like
+
+ ```shell
+ Time: 18.5 seconds
+Number of particles: 1024
+
+Cubature mean vector
+[-4.13376 -1.8908577]
+
+Cubature covariance matrix
+[[ 2.31224477 1.89332097]
+ [ 1.89332097 10.73603521]]
+
+Actual mean vector
+[-4.148839 -1.8847313]
+
+Actual variances
+[2.2467885 10.953863]
+
+Difference in means
+0.016276047
+```
+
+An illustration of the resulting particle configurations is given below:
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ## Bayesian Logistic Regression
+
+This runs the algorithm for a Bayesian logistic regression on a binary Covertype dataset (see [SVGD paper](https://proceedings.neurips.cc/paper/2016/file/b3ba8f1bee1238a2f37603d90b58898d-Paper.pdf) for details):
+
+ ```shell
+ python bayesian_logisitc_regression_cubature.py
+ ```
+
+Outputting something like
+ ```shell
+Time: 1137.0 seconds
+Number of particles: 256
+[accuracy, log-likelihood]
+[0.7573040282953107, -0.5628809048706431]
+ ```
+The content of markov_chain_cubature_presentation.pdf is licensed under the Creative Commons Attribution 4.0 International license, and the Python code is licensed under the MIT license.
\ No newline at end of file
diff --git a/data/README.md b/data/README.md
new file mode 100644
index 0000000..12540a8
--- /dev/null
+++ b/data/README.md
@@ -0,0 +1,3 @@
+We didn't contribute the data sets here. Please let us know if any conflicts of interest.
+
+The binary Covertype dataset is from [LIBSVM Data Repository](https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html)
\ No newline at end of file
diff --git a/data/covertype.mat b/data/covertype.mat
new file mode 100644
index 0000000..af9bd30
Binary files /dev/null and b/data/covertype.mat differ
diff --git a/img/0.png b/img/0.png
new file mode 100644
index 0000000..fcf8c26
Binary files /dev/null and b/img/0.png differ
diff --git a/img/100.png b/img/100.png
new file mode 100644
index 0000000..dc3945c
Binary files /dev/null and b/img/100.png differ
diff --git a/img/1000.png b/img/1000.png
new file mode 100644
index 0000000..89c2a5a
Binary files /dev/null and b/img/1000.png differ
diff --git a/img/25.png b/img/25.png
new file mode 100644
index 0000000..5ccfd36
Binary files /dev/null and b/img/25.png differ
diff --git a/img/250.png b/img/250.png
new file mode 100644
index 0000000..8df4467
Binary files /dev/null and b/img/250.png differ
diff --git a/img/500.png b/img/500.png
new file mode 100644
index 0000000..683f099
Binary files /dev/null and b/img/500.png differ
diff --git a/presentation/markov_chain_cubature_presentation.pdf b/presentation/markov_chain_cubature_presentation.pdf
new file mode 100644
index 0000000..a632fd5
Binary files /dev/null and b/presentation/markov_chain_cubature_presentation.pdf differ
diff --git a/scripts/bayesian_logisitc_regression_cubature.py b/scripts/bayesian_logisitc_regression_cubature.py
new file mode 100644
index 0000000..4f477d2
--- /dev/null
+++ b/scripts/bayesian_logisitc_regression_cubature.py
@@ -0,0 +1,165 @@
+# This code is adapted from https://github.com/dilinwang820/Stein-Variational-Gradient-Descent/blob/master/python/bayesian_logistic_regression.py
+
+import numpy as np
+import scipy.io
+from sklearn.model_selection import train_test_split
+import numpy.matlib as nm
+import torch
+from scipy.linalg import hadamard
+from sklearn.neighbors import BallTree
+import time
+
+'''
+ Example of Bayesian Logistic Regression (the same setting as Gershman et al. 2012):
+ The observed data D = {X, y} consist of N binary class labels,
+ y_t \in {-1,+1}, and d covariates for each datapoint, X_t \in R^d.
+ The hidden variables \theta = {w, \alpha} consist of d regression coefficients w_k \in R,
+ and a precision parameter \alpha \in R_+. We assume the following model:
+ p(\alpha) = Gamma(\alpha; a, b)
+ p(w_k | a) = N(w_k; 0, \alpha^-1)
+ p(y_t = 1| x_t, w) = 1 / (1+exp(-w^T x_t))
+'''
+class BayesianLR:
+ def __init__(self, X, Y, batchsize=100, a0=1, b0=0.01):
+ self.X, self.Y = X, Y
+ # TODO. Y in \in{+1, -1}
+ self.batchsize = min(batchsize, X.shape[0])
+ self.a0, self.b0 = a0, b0
+
+ self.N = X.shape[0]
+ self.permutation = np.random.permutation(self.N)
+ self.iter = 0
+
+
+ def dlnprob(self, theta):
+
+ if self.batchsize > 0:
+ batch = [ i % self.N for i in range(self.iter * self.batchsize, (self.iter + 1) * self.batchsize) ]
+ ridx = self.permutation[batch]
+ self.iter += 1
+ else:
+ ridx = np.random.permutation(self.X.shape[0])
+
+ Xs = self.X[ridx, :]
+ Ys = self.Y[ridx]
+
+ w = theta[:, :-1] # logistic weights
+ alpha = np.exp(theta[:, -1]) # the last column is logalpha
+ d = w.shape[1]
+
+ wt = np.multiply((alpha / 2), np.sum(w ** 2, axis=1))
+
+ coff = np.matmul(Xs, w.T)
+ y_hat = 1.0 / (1.0 + np.exp(-1 * coff))
+
+ dw_data = np.matmul(((nm.repmat(np.vstack(Ys), 1, theta.shape[0]) + 1) / 2.0 - y_hat).T, Xs) # Y \in {-1,1}
+ dw_prior = -np.multiply(nm.repmat(np.vstack(alpha), 1, d) , w)
+ dw = dw_data * 1.0 * self.X.shape[0] / Xs.shape[0] + dw_prior # re-scale
+
+ dalpha = d / 2.0 - wt + (self.a0 - 1) - self.b0 * alpha + 1 # the last term is the jacobian term
+
+ return torch.Tensor(np.hstack([dw, np.vstack(dalpha)])) # % first order derivative
+
+ def evaluation(self, theta, X_test, y_test):
+ theta = theta[:, :-1]
+ M, n_test = theta.shape[0], len(y_test)
+
+ prob = np.zeros([n_test, M])
+ for t in range(M):
+ coff = np.multiply(y_test, np.sum(-1 * np.multiply(nm.repmat(theta[t, :], n_test, 1), X_test), axis=1))
+ prob[:, t] = np.divide(np.ones(n_test), (1 + np.exp(coff)))
+
+ prob = np.mean(prob, axis=1)
+ acc = np.mean(prob > 0.5)
+ llh = np.mean(np.log(prob))
+ return [acc, llh]
+
+
+def propagate_measure(measure, gradlnprob, local_cubature, step_size, root_step_size):
+ n = measure.size(0)
+ new_measure = root_step_size*torch.Tensor(local_cubature).repeat(n, 1)
+ gradients = gradlnprob(np.array(measure))
+
+ # THεO POULA adjustment
+ abs_gradient = torch.abs(gradients)
+
+ gradients = torch.div(gradients, 1.0 + root_step_size*abs_gradient)
+ gradients = gradients + torch.div(root_step_size*gradients, 1.0 + abs_gradient)
+
+ next_points = (measure + gradients*step_size).repeat_interleave(len(local_cubature), dim=0)
+ new_measure += next_points
+
+ return new_measure
+
+def produce_local_cubature(dimension):
+ max_power = np.ceil(np.log2(dimension))
+ max_dim = int(2**max_power)
+ hadamard_matrix = hadamard(max_dim)
+ if (dimension == max_dim):
+ return np.sqrt(2.0)*np.vstack((hadamard_matrix, -hadamard_matrix))
+ else:
+ new_matrix = hadamard_matrix[:, :dimension]
+ return np.sqrt(2.0)*np.vstack((new_matrix, -new_matrix))
+
+
+if __name__ == '__main__':
+ data = scipy.io.loadmat('../data/covertype.mat')
+
+ X_input = data['covtype'][:, 1:]
+ y_input = data['covtype'][:, 0]
+ y_input[y_input == 2] = -1
+
+ N = X_input.shape[0]
+ X_input = np.hstack([X_input, np.ones([N, 1])])
+ d = X_input.shape[1]
+ dimension = d + 1
+
+ # split the dataset into training and testing
+ X_train, X_test, y_train, y_test = train_test_split(X_input, y_input, test_size=0.2, random_state=42)
+
+ a0, b0 = 1, 0.01 #hyperparameters
+ model = BayesianLR(X_train, y_train, 100, a0, b0) # batchsize = 100
+
+ # initialization
+ number_of_points = 1024
+ cubature_measure = np.zeros([number_of_points, dimension]);
+ alpha0 = np.random.gamma(a0, b0, number_of_points);
+
+ for i in range(number_of_points):
+ cubature_measure[i, :] = np.hstack([np.random.normal(0, np.sqrt(1 / alpha0[i]), d), np.log(alpha0[i])])
+
+ cubature_measure = torch.Tensor(cubature_measure)
+ local_cubature = produce_local_cubature(dimension)
+
+ points_per_recombine = 128
+ number_of_halving = 4
+ number_of_steps = 1500
+ step_size = 0.01
+ root_step_size = np.sqrt(step_size)
+
+ tic = time.time()
+
+ for i in range(number_of_steps):
+ # Propagate cubature measure
+ big_measure = propagate_measure(cubature_measure, model.dlnprob, local_cubature, step_size, root_step_size)
+
+ # Partition particles into subsets using a ball tree
+ ball_tree = BallTree(big_measure, leaf_size=0.5*points_per_recombine)
+ _, indices, nodes, _ = ball_tree.get_arrays()
+
+ # Resample particles
+ new_indices = [np.random.choice(indices[nd[0]: nd[1]]) for nd in nodes if nd[2]]
+ cubature_measure = torch.Tensor(big_measure[new_indices])
+
+ if (i+1) % 100 == 0:
+ print('iter ' + str(i+1))
+
+ toc = time.time()
+
+ print("Time: %.1f seconds" % (toc - tic))
+
+ print("Number of particles: ", len(cubature_measure))
+
+ print("[accuracy, log-likelihood]")
+ print(model.evaluation(cubature_measure, X_test, y_test))
+
\ No newline at end of file
diff --git a/scripts/bayesian_logistic_regression_svgd.py b/scripts/bayesian_logistic_regression_svgd.py
new file mode 100644
index 0000000..042044f
--- /dev/null
+++ b/scripts/bayesian_logistic_regression_svgd.py
@@ -0,0 +1,109 @@
+# This code is taken from https://github.com/dilinwang820/Stein-Variational-Gradient-Descent/blob/master/python/bayesian_logistic_regression.py
+
+import numpy as np
+import scipy.io
+from sklearn.model_selection import train_test_split
+import numpy.matlib as nm
+from svgd import SVGD
+
+import time
+
+'''
+ Example of Bayesian Logistic Regression (the same setting as Gershman et al. 2012):
+ The observed data D = {X, y} consist of N binary class labels,
+ y_t \in {-1,+1}, and d covariates for each datapoint, X_t \in R^d.
+ The hidden variables \theta = {w, \alpha} consist of d regression coefficients w_k \in R,
+ and a precision parameter \alpha \in R_+. We assume the following model:
+ p(\alpha) = Gamma(\alpha; a, b)
+ p(w_k | a) = N(w_k; 0, \alpha^-1)
+ p(y_t = 1| x_t, w) = 1 / (1+exp(-w^T x_t))
+'''
+class BayesianLR:
+ def __init__(self, X, Y, batchsize=100, a0=1, b0=0.01):
+ self.X, self.Y = X, Y
+ # TODO. Y in \in{+1, -1}
+ self.batchsize = min(batchsize, X.shape[0])
+ self.a0, self.b0 = a0, b0
+
+ self.N = X.shape[0]
+ self.permutation = np.random.permutation(self.N)
+ self.iter = 0
+
+
+ def dlnprob(self, theta):
+
+ if self.batchsize > 0:
+ batch = [ i % self.N for i in range(self.iter * self.batchsize, (self.iter + 1) * self.batchsize) ]
+ ridx = self.permutation[batch]
+ self.iter += 1
+ else:
+ ridx = np.random.permutation(self.X.shape[0])
+
+ Xs = self.X[ridx, :]
+ Ys = self.Y[ridx]
+
+ w = theta[:, :-1] # logistic weights
+ alpha = np.exp(theta[:, -1]) # the last column is logalpha
+ d = w.shape[1]
+
+ wt = np.multiply((alpha / 2), np.sum(w ** 2, axis=1))
+
+ coff = np.matmul(Xs, w.T)
+ y_hat = 1.0 / (1.0 + np.exp(-1 * coff))
+
+ dw_data = np.matmul(((nm.repmat(np.vstack(Ys), 1, theta.shape[0]) + 1) / 2.0 - y_hat).T, Xs) # Y \in {-1,1}
+ dw_prior = -np.multiply(nm.repmat(np.vstack(alpha), 1, d) , w)
+ dw = dw_data * 1.0 * self.X.shape[0] / Xs.shape[0] + dw_prior # re-scale
+
+ dalpha = d / 2.0 - wt + (self.a0 - 1) - self.b0 * alpha + 1 # the last term is the jacobian term
+
+ return np.hstack([dw, np.vstack(dalpha)]) # % first order derivative
+
+ def evaluation(self, theta, X_test, y_test):
+ theta = theta[:, :-1]
+ M, n_test = theta.shape[0], len(y_test)
+
+ prob = np.zeros([n_test, M])
+ for t in range(M):
+ coff = np.multiply(y_test, np.sum(-1 * np.multiply(nm.repmat(theta[t, :], n_test, 1), X_test), axis=1))
+ prob[:, t] = np.divide(np.ones(n_test), (1 + np.exp(coff)))
+
+ prob = np.mean(prob, axis=1)
+ acc = np.mean(prob > 0.5)
+ llh = np.mean(np.log(prob))
+ return [acc, llh]
+
+if __name__ == '__main__':
+ data = scipy.io.loadmat('../data/covertype.mat')
+
+ X_input = data['covtype'][:, 1:]
+ y_input = data['covtype'][:, 0]
+ y_input[y_input == 2] = -1
+
+ N = X_input.shape[0]
+ X_input = np.hstack([X_input, np.ones([N, 1])])
+ d = X_input.shape[1]
+ D = d + 1
+
+ # split the dataset into training and testing
+ X_train, X_test, y_train, y_test = train_test_split(X_input, y_input, test_size=0.2, random_state=42)
+
+ a0, b0 = 1, 0.01 #hyper-parameters
+ model = BayesianLR(X_train, y_train, 100, a0, b0) # batchsize = 100
+
+ # initialization
+ M = 256 # number of particles
+ theta0 = np.zeros([M, D]);
+ alpha0 = np.random.gamma(a0, b0, M);
+ for i in range(M):
+ theta0[i, :] = np.hstack([np.random.normal(0, np.sqrt(1 / alpha0[i]), d), np.log(alpha0[i])])
+
+ tic = time.time()
+ theta = SVGD().update(x0=theta0, lnprob=model.dlnprob, bandwidth=-1, n_iter=6000, stepsize=0.05, alpha=0.9, debug=True)
+
+ toc = time.time()
+
+ print("Time: %.1f seconds" % (toc - tic))
+
+ print('[accuracy, log-likelihood]')
+ print(model.evaluation(theta, X_test, y_test))
diff --git a/scripts/gmm.py b/scripts/gmm.py
new file mode 100644
index 0000000..c409964
--- /dev/null
+++ b/scripts/gmm.py
@@ -0,0 +1,148 @@
+import torch
+from torch import distributions as D
+from torch.nn import functional as F
+
+
+class MixtureSameFamily(D.Distribution):
+ """ Mixture (same-family) distribution.
+
+ The `MixtureSameFamily` distribution implements a (batch of) mixture
+ distribution where all components are from different parameterizations of
+ the same distribution type. It is parameterized by a `Categorical`
+ "selecting distribution" (over `k` components) and a components
+ distribution, i.e., a `Distribution` with a rightmost batch shape
+ (equal to `[k]`) which indexes each (batch of) component.
+ """
+
+ def __init__(self,
+ mixture_distribution,
+ components_distribution,
+ validate_args=None):
+ """ Construct a 'MixtureSameFamily' distribution
+
+ Args::
+ mixture_distribution: `torch.distributions.Categorical`-like
+ instance. Manages the probability of selecting components.
+ The number of categories must match the rightmost batch
+ dimension of the `components_distribution`. Must have either
+ scalar `batch_shape` or `batch_shape` matching
+ `components_distribution.batch_shape[:-1]`
+ components_distribution: `torch.distributions.Distribution`-like
+ instance. Right-most batch dimension indexes components.
+
+ Examples::
+ # Construct Gaussian Mixture Model in 1D consisting of 5 equally
+ # weighted normal distributions
+ >>> mix = D.Categorical(torch.ones(5,))
+ >>> comp = D.Normal(torch.randn(5,), torch.rand(5,))
+ >>> gmm = MixtureSameFamily(mix, comp)
+
+ # Construct Gaussian Mixture Modle in 2D consisting of 5 equally
+ # weighted bivariate normal distributions
+ >>> mix = D.Categorical(torch.ones(5,))
+ >>> comp = D.Independent(D.Normal(
+ torch.randn(5,2), torch.rand(5,2)), 1)
+ >>> gmm = MixtureSameFamily(mix, comp)
+
+ # Construct a batch of 3 Gaussian Mixture Models in 2D each
+ # consisting of 5 random weighted bivariate normal distributions
+ >>> mix = D.Categorical(torch.rand(3,5))
+ >>> comp = D.Independent(D.Normal(
+ torch.randn(3,5,2), torch.rand(3,5,2)), 1)
+ >>> gmm = MixtureSameFamily(mix, comp)
+
+ """
+ self._mixture_distribution = mixture_distribution
+ self._components_distribution = components_distribution
+
+ if not isinstance(self._mixture_distribution, D.Categorical):
+ raise ValueError(" The Mixture distribution needs to be an "
+ " instance of torch.distribtutions.Categorical")
+
+ if not isinstance(self._components_distribution, D.Distribution):
+ raise ValueError("The Component distribution need to be an "
+ "instance of torch.distributions.Distribution")
+
+ # Check that batch size matches
+ mdbs = self._mixture_distribution.batch_shape
+ cdbs = self._components_distribution.batch_shape[:-1]
+ if len(mdbs) != 0 and mdbs != cdbs:
+ raise ValueError("`mixture_distribution.batch_shape` ({0}) is not "
+ "compatible with `components_distribution."
+ "batch_shape`({1})".format(mdbs, cdbs))
+
+ # Check that the number of mixture components matches
+ km = self._mixture_distribution.logits.shape[-1]
+ kc = self._components_distribution.batch_shape[-1]
+ if km is not None and kc is not None and km != kc:
+ raise ValueError("`mixture_distribution components` ({0}) does not"
+ " equal `components_distribution.batch_shape[-1]`"
+ " ({1})".format(km, kc))
+ self._num_components = km
+
+ event_shape = self._components_distribution.event_shape
+ self._event_ndims = len(event_shape)
+ super(MixtureSameFamily, self).__init__(batch_shape=cdbs,
+ event_shape=event_shape,
+ validate_args=validate_args)
+
+ @property
+ def mixture_distribution(self):
+ return self._mixture_distribution
+
+ @property
+ def components_distribution(self):
+ return self._components_distribution
+
+ @property
+ def mean(self):
+ probs = self._pad_mixture_dimensions(self.mixture_distribution.probs)
+ return torch.sum(probs * self.components_distribution.mean,
+ dim=-1-self._event_ndims) # [B, E]
+
+ @property
+ def variance(self):
+ # Law of total variance: Var(Y) = E[Var(Y|X)] + Var(E[Y|X])
+ probs = self._pad_mixture_dimensions(self.mixture_distribution.probs)
+ mean_cond_var = torch.sum(probs*self.components_distribution.variance,
+ dim=-1-self._event_ndims)
+ var_cond_mean = torch.sum(probs * (self.components_distribution.mean -
+ self._pad(self.mean)).pow(2.0),
+ dim=-1-self._event_ndims)
+ return mean_cond_var + var_cond_mean
+
+ def log_prob(self, x):
+ x = self._pad(x)
+ log_prob_x = self.components_distribution.log_prob(x) # [S, B, k]
+ log_mix_prob = torch.log_softmax(self.mixture_distribution.logits,
+ dim=-1) # [B, k]
+ return torch.logsumexp(log_prob_x + log_mix_prob, dim=-1) # [S, B]
+
+ def sample(self, sample_shape=torch.Size()):
+ with torch.no_grad():
+ # [n, B]
+ mix_sample = self.mixture_distribution.sample(sample_shape)
+ # [n, B, k, E]
+ comp_sample = self.components_distribution.sample(sample_shape)
+ # [n, B, k]
+ mask = F.one_hot(mix_sample, self._num_components)
+ # [n, B, k, [1]*E]
+ mask = self._pad_mixture_dimensions(mask)
+ return torch.sum(comp_sample * mask.float(),
+ dim=-1-self._event_ndims)
+
+ def _pad(self, x):
+ d = len(x.shape) - self._event_ndims
+ s = x.shape
+ x = x.reshape(*s[:d], 1, *s[d:])
+ return x
+
+ def _pad_mixture_dimensions(self, x):
+ dist_batch_ndims = self.batch_shape.numel()
+ cat_batch_ndims = self.mixture_distribution.batch_shape.numel()
+ pad_ndims = 0 if cat_batch_ndims == 1 else \
+ dist_batch_ndims - cat_batch_ndims
+ s = x.shape
+ x = torch.reshape(x, shape=(*s[:-1], *(pad_ndims*[1]),
+ *s[-1:], *(self._event_ndims*[1])))
+ return x
\ No newline at end of file
diff --git a/scripts/gmm_cubature.py b/scripts/gmm_cubature.py
new file mode 100644
index 0000000..70271a1
--- /dev/null
+++ b/scripts/gmm_cubature.py
@@ -0,0 +1,160 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+plt.rc('text', usetex=True)
+plt.rc('text.latex', preamble=r'\usepackage{amsmath}')
+from sklearn.neighbors import BallTree
+from gmm import MixtureSameFamily
+import torch
+import torch.autograd as autograd
+from scipy.linalg import hadamard
+import argparse
+import time
+
+def produce_local_cubature(dimension):
+ max_power = np.ceil(np.log2(dimension))
+ max_dim = int(2**max_power)
+ hadamard_matrix = hadamard(max_dim)
+ if (dimension == max_dim):
+ return np.vstack((hadamard_matrix, -hadamard_matrix))
+ else:
+ new_matrix = hadamard_matrix[:, :dimension]
+ return np.vstack((new_matrix, -new_matrix))
+
+def grad_log_prob(P, X):
+ X = X.detach().requires_grad_(True)
+
+ log_prob = P.log_prob(X)
+
+ return autograd.grad(log_prob.sum(), X)[0]
+
+def propagate_measure(measure, target_dist, local_cubature, step_size, root_two_step_size):
+ n = measure.size(0)
+ new_measure = root_two_step_size*torch.Tensor(local_cubature).repeat(n, 1)
+ X = measure.detach().requires_grad_()
+ target_dist.log_prob(X).backward(torch.ones(n))
+ grads = X.grad
+ next_points = (measure + grads*step_size).repeat_interleave(len(local_cubature), dim=0)
+ new_measure += next_points
+ return new_measure
+
+def plot_output(P, X, d=7.0, step=0.1, save=False, i=None):
+ xv, yv = torch.meshgrid([
+ torch.arange(-(d+2.0), (d-2.0), step),
+ torch.arange(-(d+2.0), d, step)
+ ], indexing="ij")
+ pos_xy = torch.cat((xv.unsqueeze(-1), yv.unsqueeze(-1)), dim=-1)
+ p_xy = P.log_prob(pos_xy.to(device)).exp().unsqueeze(-1).cpu()
+
+ df = torch.cat([pos_xy, p_xy], dim=-1).numpy()
+ df = pd.DataFrame({
+ 'x': df[:, :, 0].ravel(),
+ 'y': df[:, :, 1].ravel(),
+ 'p': df[:, :, 2].ravel(),
+ })
+
+ mu_hat = cubature_measure.mean(0)
+ S = np.cov(cubature_measure.T, bias=True)
+ mu = P.mean.numpy()
+ Sigma = P.variance.numpy()
+
+ mnorm = np.linalg.norm(mu_hat - mu)
+ Snorm = np.linalg.norm(np.diag(S) - Sigma)
+
+ fig1, ax2 = plt.subplots(constrained_layout=True)
+ ax2.scatter(X[:, 0], X[:, 1], c='r', s=5.)
+ ax2.set_aspect("equal")
+ plt.title(r'Iter %d; $\lVert \mu - \widehat{\mu} \rVert = %.3f; \lVert \Sigma - \widehat{\Sigma} \rVert = %.3f$' % (i, mnorm, Snorm))
+ if save and i is not None:
+ plt.savefig("../img/%d.png" % i)
+ plt.close()
+ else:
+ plt.show()
+
+
+if __name__ == "__main__":
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+ parser = argparse.ArgumentParser(
+ description="Run cubature-based Langevin algorithm on a 2D Gaussian mixture model")
+ parser.add_argument("--stsize", type=float, default=.1, help="step size")
+ parser.add_argument("--nsteps", type=int, default=1000,
+ help="number of steps")
+ parser.add_argument("--npoints", type=int, default=1000,
+ help="number of points (power of two)")
+ args = parser.parse_args()
+
+ step_size = args.stsize
+ dimension = 2
+ number_of_steps = args.nsteps
+ number_of_points = args.npoints
+ no_of_recombines = number_of_points
+ number_of_halving = int(np.log2(dimension)) + 1
+ log_no_of_recombines = int(np.log2(no_of_recombines))
+ no_of_recombines = 2**log_no_of_recombines
+
+ if dimension != int(2**(number_of_halving - 1)):
+ number_of_halving = int(np.log2(dimension)) + 2
+
+ root_two_step_size = np.sqrt(2.0*step_size)
+ points_per_recombine = (number_of_points * pow(2, number_of_halving))/no_of_recombines
+
+ """
+ Set up initial measure and target
+ """
+ np.random.seed(1337)
+ torch.manual_seed(1337)
+
+ mix = torch.distributions.Categorical(torch.Tensor([.2, .5, .3]))
+ comp = torch.distributions.Independent(torch.distributions.Normal(3.*torch.randn(3,2), torch.Tensor([[1.5, .6], [1., 1.], [1.1, 1.6]])), 1)
+ measure = MixtureSameFamily(mix, comp, validate_args=False)
+
+ prior_mean = torch.Tensor([4.0] * dimension).to(device)
+ prior_cov = torch.Tensor([1.0] * dimension).diag().to(device)
+ prior = torch.distributions.MultivariateNormal(
+ prior_mean, covariance_matrix=prior_cov)
+
+ prior_samples = prior.sample(torch.Size([number_of_points])).to(device)
+
+ cubature_measure = prior_samples.clone()
+
+ local_cubature = produce_local_cubature(dimension)
+
+ tic = time.time()
+ for i in range(number_of_steps):
+ #if i % 25 == 0:
+ # plot_output(measure, cubature_measure, d=10., step=.1, save=True, i=i)
+
+ # Propagate cubature measure
+ big_measure = propagate_measure(cubature_measure, measure, local_cubature, step_size, root_two_step_size)
+
+ # Partition particles into subsets using a ball tree
+ ball_tree = BallTree(big_measure, leaf_size=0.5*points_per_recombine)
+ _, indices, nodes, _ = ball_tree.get_arrays()
+
+ # Resample particles
+ new_indices = [np.random.choice(indices[nd[0]: nd[1]]) for nd in nodes if nd[2]]
+
+ cubature_measure = torch.Tensor(big_measure[new_indices])
+
+ toc = time.time()
+ print("Time: %.1f seconds" % (toc - tic))
+ #plot_output(measure, cubature_measure, 10.0, .1, save=True, i=1000)
+
+ cubature_measure = np.array(cubature_measure)
+
+ print("Number of particles: ", len(cubature_measure))
+ print("")
+ print("Cubature mean vector")
+ print(cubature_measure.mean(0))
+ print("")
+ print("Cubature covariance matrix")
+ print(np.cov(cubature_measure.T, bias=True))
+ print("")
+ print("Actual mean vector")
+ print(np.array(measure.mean))
+ print("")
+ print("Actual variances")
+ print(np.array(measure.variance))
+ print("")
+ print("Difference in means")
+ print(np.linalg.norm(cubature_measure.mean(0) - np.array(measure.mean)))
\ No newline at end of file
diff --git a/scripts/gmm_mcmc.py b/scripts/gmm_mcmc.py
new file mode 100644
index 0000000..d33c00b
--- /dev/null
+++ b/scripts/gmm_mcmc.py
@@ -0,0 +1,120 @@
+import matplotlib.pyplot as plt
+plt.rc('text', usetex=True)
+plt.rc('text.latex', preamble=r'\usepackage{amsmath}')
+
+from gmm import MixtureSameFamily
+import torch
+import numpy as np
+import time
+import pandas as pd
+import argparse
+
+def plot_output(P, X, d=7.0, step=0.1, save=False, i=None):
+ xv, yv = torch.meshgrid([
+ torch.arange(-d, d, step),
+ torch.arange(-d, d, step)
+ ], indexing="ij")
+ pos_xy = torch.cat((xv.unsqueeze(-1), yv.unsqueeze(-1)), dim=-1)
+ p_xy = P.log_prob(pos_xy.to(device)).exp().unsqueeze(-1).cpu()
+
+ df = torch.cat([pos_xy, p_xy], dim=-1).numpy()
+ df = pd.DataFrame({
+ 'x': df[:, :, 0].ravel(),
+ 'y': df[:, :, 1].ravel(),
+ 'p': df[:, :, 2].ravel(),
+ })
+
+ mu_hat = points.mean(0)
+ S = np.cov(points.T, bias=True)
+ mu = P.mean.numpy()
+ Sigma = P.variance.numpy()
+
+ mnorm = np.linalg.norm(mu_hat - mu)
+ Snorm = np.linalg.norm(np.diag(S) - Sigma)
+
+ fig1, ax2 = plt.subplots(constrained_layout=True)
+
+ ax2.scatter(X[:, 0], X[:, 1], c='r', s=5.)
+ ax2.set_aspect("equal")
+ plt.title(r'Iter %d; $\lVert \mu - \hat{\mu} \rVert = %.3f; \lVert \Sigma - S \rVert = %.3f$' % (i, mnorm, Snorm))
+ if save and i is not None:
+ plt.savefig("../img/%d.png" % i)
+ plt.close()
+ else:
+ plt.show()
+
+if __name__ == "__main__":
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+ parser = argparse.ArgumentParser(
+ description="Run unadjusted Langevin algorithm on a 2D Gaussian mixture")
+ parser.add_argument("--stsize", type=float, default=.1, help="step size")
+ parser.add_argument("--nsteps", type=int, default=1001000,
+ help="number of steps")
+ parser.add_argument("--burnin", type=int, default=1000,
+ help="burn-in period")
+ args = parser.parse_args()
+
+ step_size = args.stsize
+ number_of_steps = args.nsteps
+ burn_in = args.burnin
+ dimension = 2
+
+ root_two_step_size = np.sqrt(2.0*step_size)
+
+ """
+ Set up initial measure and target
+ """
+ np.random.seed(1337)
+ torch.manual_seed(1337)
+
+ mix = torch.distributions.Categorical(torch.Tensor([.2, .5, .3]))
+ comp = torch.distributions.Independent(torch.distributions.Normal(3.*torch.randn(3,2), torch.Tensor([[1.5, .6], [1., 1.], [1.1, 1.6]])), 1)
+ measure = MixtureSameFamily(mix, comp, validate_args=False)
+
+ prior_mean = torch.Tensor([4.0] * dimension).to(device)
+ prior_cov = torch.Tensor([1.0] * dimension).diag().to(device)
+ prior = torch.distributions.MultivariateNormal(prior_mean, covariance_matrix=prior_cov)
+
+ tic = time.time()
+
+ current_point = prior.sample().to(device)
+ mu0 = np.zeros(dimension)
+ Sigma0 = np.eye(dimension)
+ standard_normal = torch.distributions.MultivariateNormal(torch.Tensor(mu0).to(device),
+ covariance_matrix=torch.Tensor(Sigma0).to(device))
+
+ points = torch.zeros(number_of_steps - burn_in, dimension)
+ points[0] = current_point
+ for i in range(number_of_steps):
+ X = current_point.detach().requires_grad_()
+ measure.log_prob(X).backward()
+ w = standard_normal.sample().to(device)
+ current_point = current_point + (X.grad)*step_size + root_two_step_size*w
+ if (i >= burn_in):
+ points[i-burn_in] = current_point
+
+ toc = time.time()
+
+ points = np.array(points)
+
+ print("Time: %.1f seconds" % (toc - tic))
+
+ print("Number of samples: ", len(points))
+ print("")
+ print("Empircal mean vector")
+ print(points.mean(0))
+ print("")
+ print("Empircal covariance matrix")
+ print(np.cov(points.T, bias=True))
+ print("")
+ print("Actual mean vector")
+ print(np.array(measure.mean))
+ print("")
+ print("Actual variances")
+ print(np.array(measure.variance))
+ print("")
+ print("Difference in means")
+ print(np.linalg.norm(points.mean(0) - np.array(measure.mean)))
+
+ #plot_output(measure, points, 15., .1, save=True, i=1000)
\ No newline at end of file
diff --git a/scripts/svgd.py b/scripts/svgd.py
new file mode 100644
index 0000000..f1628b7
--- /dev/null
+++ b/scripts/svgd.py
@@ -0,0 +1,57 @@
+# This code is taken from https://github.com/dilinwang820/Stein-Variational-Gradient-Descent/blob/master/python/svgd.py
+
+import numpy as np
+from scipy.spatial.distance import pdist, squareform
+
+class SVGD():
+
+ def __init__(self):
+ pass
+
+ def svgd_kernel(self, theta, h = -1):
+ sq_dist = pdist(theta)
+ pairwise_dists = squareform(sq_dist)**2
+ if h < 0: # if h < 0, using median trick
+ h = np.median(pairwise_dists)
+ h = np.sqrt(0.5 * h / np.log(theta.shape[0]+1))
+
+ # compute the rbf kernel
+ Kxy = np.exp( -pairwise_dists / h**2 / 2)
+
+ dxkxy = -np.matmul(Kxy, theta)
+ sumkxy = np.sum(Kxy, axis=1)
+ for i in range(theta.shape[1]):
+ dxkxy[:, i] = dxkxy[:,i] + np.multiply(theta[:,i],sumkxy)
+ dxkxy = dxkxy / (h**2)
+ return (Kxy, dxkxy)
+
+
+ def update(self, x0, lnprob, n_iter = 1000, stepsize = 1e-3, bandwidth = -1, alpha = 0.9, debug = False):
+ # Check input
+ if x0 is None or lnprob is None:
+ raise ValueError('x0 or lnprob cannot be None!')
+
+ theta = np.copy(x0)
+
+ # adagrad with momentum
+ fudge_factor = 1e-6
+ historical_grad = 0
+ for iter in range(n_iter):
+ if debug and (iter+1) % 1000 == 0:
+ print('iter ' + str(iter+1))
+
+ lnpgrad = lnprob(theta)
+ # calculating the kernel matrix
+ kxy, dxkxy = self.svgd_kernel(theta, h = -1)
+ grad_theta = (np.matmul(kxy, lnpgrad) + dxkxy) / x0.shape[0]
+
+ # adagrad
+ if iter == 0:
+ historical_grad = historical_grad + grad_theta ** 2
+ else:
+ historical_grad = alpha * historical_grad + (1 - alpha) * (grad_theta ** 2)
+ adj_grad = np.divide(grad_theta, fudge_factor+np.sqrt(historical_grad))
+ theta = theta + stepsize * adj_grad
+
+ return theta
+