Merge branch 'experimental'

nchopin · Oct 25, 2021 · d389e8a · d389e8a
2 parents 3a86f60 + 44457fd
commit d389e8a
Show file tree

Hide file tree

Showing 44 changed files with 3,156 additions and 2,637 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -4,10 +4,18 @@ All notable changes to this project will be documented in this file.
 
 ## [Unreleased]
 
-- distributions: multivariate_normal supports for varying cov
-- utils: multiprocessing now based on joblib library (works better on Windows)
+## [0.3] - 2021-10-25
+### Added 
+- new module: binary_smc
+- smc_samplers: waste-free SMC (now default)
+- resampling: added killing resampling scheme
+- new tutorial notebook: how to define complicated state-space models
+
+### Changed
+- qmc: now based on scipy.stats.qmc (remove Fortran code dependency)
 
 ## [0.2] - 2021-01-27
+# Blog post: <https://statisfaction.wordpress.com/2021/01/29/particles-0-2-whats-new-whats-next-your-comments-most-welcome/>
 ### Added 
  - new module: datasets (cleaner way to load standard datasets)
  - new module: variance_estimators (single-run genealogy based estimators à la

diff --git a/INSTALL b/INSTALL
@@ -1,7 +1,7 @@
 Python 2 vs Python 3 
 ====================
 
-Short version: **particles** only supports Python 3. 
+Short version: **particles** only supports Python 3.
 
 Long version:  initially, **particles** supported both Python 2 and Python 3.
 However, Python 2 has been deprecated, and supporting it has become
@@ -23,6 +23,7 @@ The long version: **particles** requires the following libraries:
 * `NumPy <http://www.numpy.org/>`_ 
 * `SciPy <http://www.scipy.org/>`_ 
 * `numba <https://numba.pydata.org/>`_
+* `joblib <https://joblib.readthedocs.io/>`_
 
 In addition, it is **strongly recommended** to install the following plotting
 libraries:  
@@ -57,8 +58,8 @@ import the modified version.
 
 .. _Github: https://github.com/nchopin/particles.git
 
-Installation: a simpler method
-===============================
+Installation: alternative method
+================================
 
 The package is also available on PyPI_ (the Python package index), so you may
 install it by running pip. On a Linux machine:: 
@@ -70,31 +71,7 @@ install it by running pip. On a Linux machine::
 Option ``--user`` lets you install the package in your home directory, rather than
 globally for all users. 
 
-The only drawback of this method is that it install only the modules of the
+The only drawback of this method is that it installs only the modules of the
 package, and not the additional examples (e.g. the datasets and scripts that 
 were used to generate the plots found in the book). 
 
-Troubleshooting
-===============
-
-If the two methods above fail, you may try this::
-
-    cd some_folder_of_your_choosing
-    git clone https://github.com/nchopin/particles.git
-    python setup.py install --force 
-
-This will install the package, without compiling the little bit of Fortran code 
-that is used in module `qmc`. At this stage, you may already use the package if
-you do not use anything related to quasi-Monte Carlo (i.e. option ``qmc`` in
-class `SMC`). In addition, you may compile manually the said piece of Fortran 
-as follows:: 
-
-    pip show particles  # tells you the folder where the package is installed
-    cd where_particles_is_installed/particles 
-    f2py3 -c ../src/LowDiscrepancy.f -m lowdiscrepancy 
-
-Replace f2py3 by f2py if you use Python 2.  Make sure a Fortran compiler is
-installed on your machine (e.g. package gfortran on Ubuntu).  Utility `f2py` is
-part of Numpy; see here_.
-
-.. _here: https://docs.scipy.org/doc/numpy/f2py/
diff --git a/README.md b/README.md
@@ -29,7 +29,9 @@ by Nicolas Chopin and Omiros Papaspiliopoulos.
 * Exact filtering/smoothing algorithms: **Kalman** (for linear Gaussian models) 
   and **forward-backward recursions** (for finite hidden Markov models).
 
-* **SMC samplers**: SMC tempering, IBIS (a.k.a. data tempering). 
+* **Standard and waste-free SMC samplers**: SMC tempering, IBIS (a.k.a. data
+  tempering). SMC samplers for binary words (Schäfer and Chopin, 2014), with
+  application to **variable selection**.
 
 * Bayesian parameter inference for state-space models: **PMCMC** (PMMH, Particle Gibbs) 
   and **SMC^2**. 

diff --git a/book/smc2/smc2_stochvol_leverage.py b/book/smc2/smc2_stochvol_leverage.py
@@ -54,112 +54,112 @@ def qtiles(W, x):
 N = 10 ** 3  # re-run with N= 10^4 for the second CDF plots
 fks = {}
 fk_opts = {'ssm_cls': state_space_models.StochVolLeverage, 'prior': prior,
-           'data': data, 'init_Nx': 100, 'mh_options': {'nsteps': 5},
-           'smc_options': {'qmc': False}, 'ar_to_increase_Nx': 0.1}
+           'data': data, 'init_Nx': 100, 'smc_options': {'qmc': False},
+           'ar_to_increase_Nx': 0.1, 'wastefree': False, 'len_chain': 6}
 fks['smc2'] = ssp.SMC2(**fk_opts)
 fk_opts['smc_options']['qmc'] = True
 fks['smc2_qmc'] = ssp.SMC2(**fk_opts)
 fk_opts['ssm_cls'] = state_space_models.StochVol
 fks['smc2_sv'] = ssp.SMC2(**fk_opts)
 
-if __name__ == '__main__':
-    runs = particles.multiSMC(fk=fks, N=N, collect=[Moments(mom_func=qtiles)],
-                              verbose=True, nprocs=0, nruns=25)
-
-    # plots
-    #######
-    savefigs = True  # False if you don't want to save plots as pdfs
-    plt.style.use('ggplot')
-
-    colors = {'smc2': 'gray', 'smc2_qmc': 'black'}
-    lsts = {'smc2': '--', 'smc2_qmc': '-'}
-    prefix = 'smc2_sv_lvg_N%i' % N
-    T = data.shape[0]
-
-    # Figure 18.2: variance marginal likelihood vs time
-    plt.figure()
-    for k, c in colors.items():
-        plt.plot(np.var(np.array([r['output'].summaries.logLts
-                                       for r in runs if r['fk']==k]), axis=0),
-                 color=c, label=k, linewidth=2, linestyle=lsts[k])
+# runs
+runs = particles.multiSMC(fk=fks, N=N, collect=[Moments(mom_func=qtiles)],
+                          verbose=True, nprocs=0, nruns=25)
+
+# plots
+#######
+savefigs = True  # False if you don't want to save plots as pdfs
+plt.style.use('ggplot')
+
+colors = {'smc2': 'gray', 'smc2_qmc': 'black'}
+lsts = {'smc2': '--', 'smc2_qmc': '-'}
+prefix = 'smc2_sv_lvg_N%i' % N
+T = data.shape[0]
+
+# Figure 18.2: variance marginal likelihood vs time
+plt.figure()
+for k, c in colors.items():
+    plt.plot(np.var(np.array([r['output'].summaries.logLts
+                                   for r in runs if r['fk']==k]), axis=0),
+             color=c, label=k, linewidth=2, linestyle=lsts[k])
+plt.xlabel(r'$t$')
+plt.ylabel('var log-lik')
+plt.legend()
+if savefigs:
+    plt.savefig('%s_var_loglik.pdf' % prefix)
+
+# Figure 18.3: model evidence leverage vs basic SV model
+plt.figure()
+evidence_lvg = np.mean([r['output'].summaries.logLts
+                        for r in runs if r['fk']=='smc2_qmc'], axis=0)
+evidence_sv = np.mean([r['output'].summaries.logLts
+                        for r in runs if r['fk']=='smc2_sv'], axis=0)
+plt.plot(evidence_lvg - evidence_sv, 'k')
+plt.xlabel(r'$t$')
+plt.ylabel('diff marginal likelihoods')
+if savefigs:
+    plt.savefig('%s_model_comparison.pdf' % prefix)
+
+# Figure 18.4: sequential learning of parameters
+typical_run = [r for r in runs if r['fk']=='smc2_qmc'][0]['output']
+plt.figure()
+for i, p in enumerate(prior.laws.keys()):
+    plt.subplot(2, 2, i + 1)
+    q25, q50, q75 = [[typical_run.summaries.moments[t][p][j] for t in range(T)]
+                for j in [5, 10, 15]]
+    plt.fill_between(range(T), q25, q75, color='gray')
+    plt.plot(range(T), q50, 'k')
+    plt.title(r'$\%s$' % p)
     plt.xlabel(r'$t$')
-    plt.ylabel('var log-lik')
-    plt.legend()
-    if savefigs:
-        plt.savefig('%s_var_loglik.pdf' % prefix)
-
-    # Figure 18.3: model evidence leverage vs basic SV model
-    plt.figure()
-    evidence_lvg = np.mean([r['output'].summaries.logLts
-                            for r in runs if r['fk']=='smc2_qmc'], axis=0)
-    evidence_sv = np.mean([r['output'].summaries.logLts
-                            for r in runs if r['fk']=='smc2_sv'], axis=0)
-    plt.plot(evidence_lvg - evidence_sv, 'k')
-    plt.xlabel(r'$t$')
-    plt.ylabel('diff marginal likelihoods')
-    if savefigs:
-        plt.savefig('%s_model_comparison.pdf' % prefix)
-
-    # Figure 18.4: sequential learning of parameters
-    typical_run = [r for r in runs if r['fk']=='smc2_qmc'][0]['output']
-    plt.figure()
-    for i, p in enumerate(prior.laws.keys()):
-        plt.subplot(2, 2, i + 1)
-        q25, q50, q75 = [[typical_run.summaries.moments[t][p][j] for t in range(T)]
-                    for j in [5, 10, 15]]
-        plt.fill_between(range(T), q25, q75, color='gray')
-        plt.plot(range(T), q50, 'k')
-        plt.title(r'$\%s$' % p)
-        plt.xlabel(r'$t$')
-    plt.tight_layout()
-    if savefigs:
-        plt.savefig('%s_seq_inference.pdf' % prefix)
-
-    # Figure 18.1 (left panel): ESS vs time for a typical run
-    plt.figure()
-    typ_run = runs[0]
-    print(typ_run['fk'])
-    plt.plot(typ_run['output'].summaries.ESSs, 'k')
-    plt.xlabel(r'$t$')
-    plt.ylabel('ESS')
-    if savefigs:
-        plt.savefig('%s_ESS.pdf' % prefix)
-
-    # Figure 18.5 (and 18.6 with N=10^4): marginal CDFs
-    def cdf(x, w):
-        a = np.argsort(x)
-        cw = np.cumsum(w[a])
-        return x[a], cw
-
-    include_prior = False
-
-    smc2s = [r['output'] for r in runs if r['fk']=='smc2']
-    smc2qmcs = [r['output'] for r in runs if r['fk']=='smc2_qmc']
-
-    for i, p in enumerate(['mu', 'rho', 'sigma', 'phi']):
-        plt.subplot(2, 2, i + 1)
-        for r in smc2qmcs:
-            xx, yy = cdf(r.X.theta[p], r.W)
-            plt.plot(xx, yy, color='black', alpha=0.2)
-            if include_prior:
-                xx = np.linspace(xx.min(), xx.max(), 100)
-                plt.plot(xx, prior.laws[p].cdf(xx), ':')
-        plt.xlabel(r'$\%s$' % p)
-        typ_run = smc2qmcs[0]
-        m = np.average(typ_run.X.theta[p], weights=typ_run.W)
-        s = np.std(typ_run.X.theta[p])
-        plt.xlim(m - 2.5 * s, m + 2.5 * s)
-    plt.tight_layout()
-    if savefigs:
-        plt.savefig('%s_cdfs.pdf' % prefix)
-
-    # Figure 18.1 (right panel): Nx vs time
-    plt.figure()
-    for r in smc2s:
-        jitter = 5 * random.randn()
-        plt.plot(np.array(r.X.Nxs) + jitter, 'k', alpha=0.5)
-    plt.ylim(bottom=0)
-    plt.xlabel(r'$t$')
-    plt.ylabel(r'$N_x$ + jitter')
-    if savefigs:
-        plt.savefig('%s_Nx_vs_t.pdf' % prefix)
+plt.tight_layout()
+if savefigs:
+    plt.savefig('%s_seq_inference.pdf' % prefix)
+
+# Figure 18.1 (left panel): ESS vs time for a typical run
+plt.figure()
+typ_run = runs[0]
+print(typ_run['fk'])
+plt.plot(typ_run['output'].summaries.ESSs, 'k')
+plt.xlabel(r'$t$')
+plt.ylabel('ESS')
+if savefigs:
+    plt.savefig('%s_ESS.pdf' % prefix)
+
+# Figure 18.5 (and 18.6 with N=10^4): marginal CDFs
+def cdf(x, w):
+    a = np.argsort(x)
+    cw = np.cumsum(w[a])
+    return x[a], cw
+
+include_prior = False
+
+smc2s = [r['output'] for r in runs if r['fk']=='smc2']
+smc2qmcs = [r['output'] for r in runs if r['fk']=='smc2_qmc']
+
+for i, p in enumerate(['mu', 'rho', 'sigma', 'phi']):
+    plt.subplot(2, 2, i + 1)
+    for r in smc2qmcs:
+        xx, yy = cdf(r.X.theta[p], r.W)
+        plt.plot(xx, yy, color='black', alpha=0.2)
+        if include_prior:
+            xx = np.linspace(xx.min(), xx.max(), 100)
+            plt.plot(xx, prior.laws[p].cdf(xx), ':')
+    plt.xlabel(r'$\%s$' % p)
+    typ_run = smc2qmcs[0]
+    m = np.average(typ_run.X.theta[p], weights=typ_run.W)
+    s = np.std(typ_run.X.theta[p])
+    plt.xlim(m - 2.5 * s, m + 2.5 * s)
+plt.tight_layout()
+if savefigs:
+    plt.savefig('%s_cdfs.pdf' % prefix)
+
+# Figure 18.1 (right panel): Nx vs time
+plt.figure()
+for r in smc2s:
+    jitter = 5 * random.randn()
+    plt.plot(np.array(r.X.shared['Nxs']) + jitter, 'k', alpha=0.5)
+plt.ylim(bottom=0)
+plt.xlabel(r'$t$')
+plt.ylabel(r'$N_x$ + jitter')
+if savefigs:
+    plt.savefig('%s_Nx_vs_t.pdf' % prefix)