cs8850_14_factorization.html

<!doctype html>
<html lang="en">

  <head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
    <link href="css/fontawesome-free-6.2.1-web/css/all.css" rel="stylesheet">

    <script src="lib/colorbrewer.v1.min.js" charset="utf-8"></script>
    <script src="lib/colorStringStandalone.js" charset="utf-8"></script>
    <script type="text/javascript" src="lib/jquery-2.2.4.min.js"></script>

    <title>Advanced Machine Learning</title>

    <meta name="description" content="CS8850 GSU class">
    <meta name="author" content="Sergey M Plis">

    <meta name="apple-mobile-web-app-capable" content="yes">
    <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">


    <link rel="stylesheet" href="dist/reset.css">
    <link rel="stylesheet" href="dist/reveal.css">
    <!-- Code syntax highlighting -->
    <link rel="stylesheet" href="plugin/highlight/monokai.css" id="highlight-theme">
    <!-- <link rel="stylesheet" href="lib/css/zenburn.css"> -->
    <link rel="stylesheet" href="css/custom.css">
    <link rel="stylesheet" href="dist/theme/aml.css" id="theme">
    <!-- Printing and PDF exports -->
    <script>
      var link = document.createElement( 'link' );
      link.rel = 'stylesheet';
      link.type = 'text/css';
      link.href = window.location.search.match( /print-pdf/gi ) ? 'css/print/pdf.css' : 'css/print/paper.scss';
      document.getElementsByTagName( 'head' )[0].appendChild( link );
    </script>
  </head>

<body>
    <div class="reveal">
      <!-- In between the <div="reveal"> and the <div class="slides">-->
          <!-- <header style="position: absolute; top: 10px; left: 100px; z-index: 500; font-size:100px;background-color: rgba(0,0,0,0); text-align: center !important"></header>  -->
          <!-- In between the <div="reveal"> and the <div class="slides">-->
              <!-- Any section element inside of this container is displayed as a slide -->
              <div class="slides">

	        <section>
	          <section>
	            <p>
	              <h2>Advanced Machine Learning</h2>
                      <h3>14: Matrix Factorization</h3>
	            <p>
	          </section>
	          <section>
	            <h3>Outline for the lecture</h3>
                    <ul>
                      <li class="fragment roll-in"> What are we talking about
                      <li class="fragment roll-in"> Independent Component Analysis
                      <li class="fragment roll-in"> Nonnegative Matrix Factorization
                      <li class="fragment roll-in"> Dictionary Learning
                      <li class="fragment roll-in"> Autoencoders
                      <li class="fragment roll-in"> Take home points
	            </ul>
                  </section>
                </section>
                <!-- -------------------------------------------------------------------------         -->
	        <section>
                  <section>
                    <h2>One Shallow Model</h2>
                  </section>

                  <section>
                    <h3>Matrix Factorization in Linear Algebra</h3>
  <ul style="list-style-type: none; padding: 0; margin: 0;">
    <li class="fragment">
      <img data-src="figures/CR_1.svg" alt="CR_1">
    </li>
    <li class="fragment">
      <img data-src="figures/LU_2.svg" alt="LU_2">
    </li>
    <li class="fragment">
      <img data-src="figures/QR_3.svg" alt="QR_3">
    </li>
    <li class="fragment">
      <img data-src="figures/EIG_4.svg" alt="EIG_4">
    </li>
    <li class="fragment">
      <img data-src="figures/SVD_5.svg" alt="SVD_5">
    </li>
  </ul>
  <style>
    .reveal section ul {
      width: 100%;
      max-width: 100%;
      padding: 0;
      margin: 0;
    }
    .reveal section ul li {
      list-style-type: none;
      margin: 10px 0;
    }
    .reveal section ul li img {
      width: 100%;
      max-width: 100%;
      height: auto;
      display: block;
    }
  </style>
  <div class="slide-footer">
    images from <a href="https://github.com/kenjihiranabe/The-Art-of-Linear-Algebra?tab=readme-ov-file">The Art of Linear Algebra</a>
  </div>

</section>
                  <section>
                    <h3>Factorization can be viewed as a graph</h3>
                      <img style="border:0; box-shadow: 0px 0px 0px rgba(150, 150, 255, 1); " width="800" class="reveal"
                           src="figures/latent_variable_model.png" alt="RBM">
                      $\bm{V} = \bm{W}\bm{H}$
                  </section>

                </section>
                <!-- -------------------------------------------------------------------------         -->
	        <section>
                  <section>
                    <h2>Independent Component Analysis</h2>
                  </section>

                  <section>
                    <h1><i class="fa-solid fa-martini-glass-citrus"></i> Cocktail party!</h1>
                  </section>

<section>
  <row>
    <col30 class="fragment roll-in">
      <h4>sources</h4>
      <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <img src="figures/Bach.png" alt="Bach" style="width: 80px; height: 80px; margin-right: 5px; margin-top: -10px;">
        <audio controls src="figures/cocktail/single_channel_Bach.mp3" style="height: 30px;"></audio>
      </div>

      <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <img src="figures/Bengio.png" alt="Bengio" style="width: 80px; height: 80px; margin-right: 5px; margin-top: -10px;">
        <audio controls src="figures/cocktail/single_channel_Bengio.mp3" style="height: 30px;"></audio>
      </div>

      <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <img src="figures/lecun.png" alt="LeCun" style="width: 80px; height: 80px; margin-right: 5px; margin-top: -10px;">
        <audio controls src="figures/cocktail/single_channel_LeCun.mp3" style="height: 30px;"></audio>
      </div>

      <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <img src="figures/Schmidhuber.png" alt="Schmidhuber" style="width: 80px; height: 80px; margin-right: 5px; margin-top: -10px;">
        <audio controls src="figures/cocktail/single_channel_Schmidhuber.mp3" style="height: 30px;"></audio>
      </div>

      <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <img src="figures/Yudkowski.png" alt="Yudkowski" style="width: 80px; height: 80px; margin-right: 5px; margin-top: -10px;">
        <audio controls src="figures/cocktail/single_channel_Yudkowski.mp3" style="height: 30px;"></audio>
      </div>

      <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <img src="figures/Hinton.png" alt="Hinton" style="width: 80px; height: 80px; margin-right: 5px; margin-top: -10px;">
        <audio controls src="figures/cocktail/single_channel_Hinton.mp3" style="height: 30px;"></audio>
      </div>
      <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <img src="figures/Jazz.png" alt="Jazz" style="width: 80px; height: 80px; margin-right: 5px; margin-top: -10px;">
        <audio controls src="figures/cocktail/single_channel_Jazz.mp3" style="height: 30px;"></audio>
      </div>
    </col30>
    <col10>
    </col10>
    <col30 class="fragment roll-in">
      <h4>linear mixtures</h4>
      <img src="figures/godfatherAI.png" alt="father" style="width: 100%; margin-right: 5px;">
      <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <audio controls src="figures/cocktail/mixture01.mp3" style="height: 30px; width: 100%;"></audio>
      </div>

      <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <audio controls src="figures/cocktail/mixture02.mp3" style="height: 30px; width: 100%;"></audio>
      </div>

      <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <audio controls src="figures/cocktail/mixture03.mp3" style="height: 30px; width: 100%;"></audio>
      </div>

      <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <audio controls src="figures/cocktail/mixture04.mp3" style="height: 30px; width: 100%;"></audio>
      </div>

      <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <audio controls src="figures/cocktail/mixture05.mp3" style="height: 30px; width: 100%;"></audio>
      </div>

      <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <audio controls src="figures/cocktail/mixture06.mp3" style="height: 30px; width: 100%;"></audio>
      </div>
      <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <audio controls src="figures/cocktail/mixture07.mp3" style="height: 30px; width: 100%;"></audio>
      </div>
      $\bm{X} = \bm{A}\bm{S}$
    </col30>
    <col10>
    </col10>
    <col30 class="fragment roll-in">
      <h4>reconstructions</h4>
      <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <img src="figures/Bach.png" alt="Bach" style="width: 80px; height: 80px; margin-right: 5px; margin-top: -10px;">
        <audio controls src="figures/cocktail/sources01.mp3" style="height: 30px;"></audio>
      </div>

      <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <img src="figures/Bengio.png" alt="Bengio" style="width: 80px; height: 80px; margin-right: 5px; margin-top: -10px;">
        <audio controls src="figures/cocktail/sources02.mp3" style="height: 30px;"></audio>
      </div>

      <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <img src="figures/lecun.png" alt="LeCun" style="width: 80px; height: 80px; margin-right: 5px; margin-top: -10px;">
        <audio controls src="figures/cocktail/sources03.mp3" style="height: 30px;"></audio>
      </div>

      <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <img src="figures/Schmidhuber.png" alt="Schmidhuber" style="width: 80px; height: 80px; margin-right: 5px; margin-top: -10px;">
        <audio controls src="figures/cocktail/sources04.mp3" style="height: 30px;"></audio>
      </div>

      <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <img src="figures/Yudkowski.png" alt="Yudkowski" style="width: 80px; height: 80px; margin-right: 5px; margin-top: -10px;">
        <audio controls src="figures/cocktail/sources05.mp3" style="height: 30px;"></audio>
      </div>
      <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <img src="figures/Hinton.png" alt="Hinton" style="width: 80px; height: 80px; margin-right: 5px; margin-top: -10px;">
        <audio controls src="figures/cocktail/sources06.mp3" style="height: 30px;"></audio>
      </div>

      <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <img src="figures/Jazz.png" alt="Jazz" style="width: 80px; height: 80px; margin-right: 5px; margin-top: -10px;">
        <audio controls src="figures/cocktail/sources07.mp3" style="height: 30px;"></audio>
      </div>

    </col30>
  </row>
</section>

<section>
  <row style="width: 120%; margin-left: -100px;">
    <col50>
      <h3>Linear Independence</h3>
      <blockquote style="background-color: #93a1a1; color: #fdf6e3; font-size: 36px; width: 100%; text-align: left;">
A set of vectors \(\vec{v}_1, \vec{v}_2, \dots, \vec{v}_n\) in a vector space is <b>linearly independent</b> if the only solution to their linear combination being zero is when all coefficients are zero.</p>
      </blockquote>

<div style="font-size: 32px;">
  \begin{align}
  c_1 \vec{v}_1 + c_2 \vec{v}_2 + \dots + c_n \vec{v}_n = 0 \\
  \implies \quad c_1 = c_2 = \dots = c_n = 0
  \end{align}
</div>

      <blockquote style="background-color: #eee8d5; width: 100%; text-align: left;">
        <b>Example:</b> Vectors \((1, 0)\) and \((0, 1)\) are linearly independent.
      </blockquote>

      <div style="font-size: 32px;">
        $\vec{v}_i^T\vec{v}_j = 0$
      </div>

    </col50>
<col10>
  </col10>
    <col50>
      <h3>Statistical Independence</h3>
      <blockquote style="background-color: #93a1a1; color: #fdf6e3; font-size: 36px; width: 100%; text-align: left;">
        Random variables \(X_1, X_2, \dots, X_n\) are <b>statistically independent</b> if the joint probability is the product of individual probabilities.
      </blockquote>

      <div style="font-size: 32px;">
        \(P(X_1, X_2) = P(X_1) \cdot P(X_2)\)
      </div>

      <blockquote style="background-color: #eee8d5; width: 100%; text-align: left;">
        <b>Example:</b> The outcome of two dice rolls are independent.
      </blockquote>

    </col50>
  </row>
</section>

                  <section>
                    <h1>Independence</h1>

                    <blockquote style="background-color: #93a1a1; color: #fdf6e3; font-size: 36px; width: 100%; text-align: left;">
                      <b>Key Difference:</b> Linear independence deals with vector spaces, while statistical independence focuses on probability distributions.
                    </blockquote>
                  </section>

<section>
  <h2>ICA: Mathematical Setup</h2>

  <ul style="font-size: 36px; text-align: left;">
    <li class="fragment roll-in">Given observed signals \(X\), we assume:
      \[
      X = A \cdot S
      \]
      where \(X\) is the observed data, \(A\) is the unknown mixing matrix, and \(S\) are the independent sources.</li>
    <li class="fragment roll-in">The goal is to recover \(S\) by finding a suitable unmixing matrix \(W\) such that:
      \[
      Y = W \cdot X \quad \text{(where \(Y\) approximates the sources \(S\))}
      \]
    </li>
  </ul>
</section>

                  <section>
                    <h1>identifiability</h1>
                  </section>

                  <section>
                    <h1>fastICA</h1>
                  </section>
<section>
      <h2>Central Limit Theorem (CLT)</h2>
      <blockquote style="background-color: #93a1a1; color: #fdf6e3; font-size: 36px; width: 100%; text-align: left;" class="fragment roll-in">
        The <b>Central Limit Theorem</b> states that the sum (or average) of independent random variables with finite mean and variance tends towards a normal distribution, regardless of the original distribution.
      </blockquote>

      <div style="font-size: 32px;" class="fragment roll-in">
        \[
        Z_n = \frac{1}{\sqrt{n}} \sum_{i=1}^n X_i \quad \xrightarrow{n \to \infty} \quad \mathcal{N}(\mu, \sigma^2)
        \]
      </div>

      <blockquote style="background-color: #eee8d5; width: 100%; text-align: left;" class="fragment roll-in">
        <b>Example:</b> The average of a large number of dice rolls will follow a normal distribution, even though a single roll is uniformly distributed.
      </blockquote>
</section>

<section>
      <h2>Central Limit Theorem (CLT)</h2>
      <h3>Key Insights</h3>
      <ul style="font-size: 36px; text-align: left;">
        <li class="fragment roll-in">Applies to independent random variables with finite mean and variance.
        <li class="fragment roll-in">Explains why sums or averages often look Gaussian, even if the original variables are not.
        <li class="fragment roll-in">The distribution converges faster with more samples.
      </ul>

        <blockquote style="background-color: #93a1a1; color: #fdf6e3; width: 100%; text-align: left; font-size: 32px;" <li class="fragment roll-in">
          <b>Mathematical Implication:</b> Mixed signals tend to be more Gaussian, which ICA exploits to recover independent, non-Gaussian sources.
        </blockquote>
</section>
<section>
  <h2>CLT and Statistical Independence</h2>

  <blockquote style="background-color: #93a1a1; color: #fdf6e3; font-size: 36px; width: 100%; text-align: left;" class="fragment roll-in">
    The Central Limit Theorem implies that <b>linear mixtures</b> of independent, non-Gaussian variables tend to be more Gaussian.
  </blockquote>

  <div style="font-size: 32px;" class="fragment roll-in">
    \[
    Z_n = \frac{1}{\sqrt{n}} \sum_{i=1}^n S_i \quad \xrightarrow{n \to \infty} \quad \mathcal{N}(\mu, \sigma^2)
    \]
  </div>

  <blockquote style="background-color: #eee8d5; width: 100%; text-align: left;" class="fragment roll-in">
    <b>Key Insight:</b> When independent sources mix linearly, the result looks more Gaussian than the original sources.
  </blockquote>
</section>

<section>
  <h2>Connection between CLT and ICA</h2>
  <h3>How ICA Exploits This Connection</h3>

  <ul style="font-size: 36px; text-align: left;">
    <li class="fragment roll-in">ICA assumes that <b>independent sources</b> are often non-Gaussian.</li>
    <li class="fragment roll-in">A linear mixture (observed data) tends to be <b>more Gaussian</b> than the original sources.</li>
    <li class="fragment roll-in">ICA reverses this mixing by searching for components that <b>maximize non-Gaussianity</b>.</li>
  </ul>

  <blockquote style="background-color: #93a1a1; color: #fdf6e3; width: 100%; text-align: left; font-size: 32px;" class="fragment roll-in">
    <b>Takeaway:</b> ICA identifies independent sources by finding non-Gaussian signals within the observed mixtures, which are closer to the original, independent sources.
  </blockquote>
</section>

<section>
  <h4>Nonlinear Transformations Amplify Non-Gaussianity</h4>

  <ul style="font-size: 36px; text-align: left;">
    <li class="fragment roll-in">
      <blockquote style="background-color: #eee8d5; width: 100%; text-align: left;">
        Linear transformations (e.g., rotations or scaling) preserve Gaussianity. Nonlinear transformations distort the data in ways that highlight deviations from Gaussianity.
      </blockquote>
    </li>

    <li class="fragment roll-in">
      <blockquote style="background-color: #93a1a1; color: #fdf6e3; width: 100%; text-align: left;">
        Nonlinear functions like \( \tanh(u) \) or \( u^3 \) react <b>strongly to outliers</b> or higher-order statistics, making non-Gaussian features more prominent.
      </blockquote>
    </li>

    <li class="fragment roll-in">
      <ul style="font-size: 32px; text-align: left;">
        <li class="fragment roll-in">Gaussian distributions have light tails—outliers are rare.</li>
        <li class="fragment roll-in">Nonlinear functions emphasize <b>outliers or rare events</b>, which are common in non-Gaussian data (like sparse signals).</li>
      </ul>
    </li>
</section>

<section>
  <h4>Nonlinear Transformations Amplify Non-Gaussianity</h4>
  <ul style="font-size: 36px; text-align: left;">    
    <li class="fragment roll-in">
      <blockquote style="background-color: #eee8d5; width: 100%; text-align: left;">
        The FastICA algorithm computes expectations using these nonlinear functions, which helps it detect signals that are <b>far from Gaussian.</b>
      </blockquote>
    </li>

    <li class="fragment roll-in">
      <blockquote style="background-color: #93a1a1; color: #fdf6e3; width: 100%; text-align: left;">
        Nonlinear transformations amplify the non-Gaussian properties in the data, making it easier to separate independent components.
      </blockquote>
    </li>
  </ul>
</section>

<section>
  <h2>FastICA: Optimization Steps</h2>

  <ol style="font-size: 36px; text-align: left;">
    <li class="fragment roll-in"><b>Whiten the data</b>: Use PCA to make the data uncorrelated.
      \[
      Z = W_{\text{PCA}} \cdot X
      \]
    </li>

    <li class="fragment roll-in"><b>Choose a non-linearity</b> (e.g., \(g(u) = \tanh(u)\)) to maximize non-Gaussianity.</li>

    <li class="fragment roll-in"><b>Update the weight vector</b> \(w_i\) for each independent component:
      \[
      w_i \leftarrow \mathbb{E} \{Z \cdot g(w_i^T Z)\} - \mathbb{E} \{g'(w_i^T Z)\} \cdot w_i
      \]
    </li>

    <li class="fragment roll-in"><b>Orthogonalize</b> the weight vectors to ensure independence.</li>

    <li class="fragment roll-in"><b>Iterate</b> until convergence (i.e., weight vectors stabilize).</li>
  </ol>
</section>

<section>
  <h2>FastICA: Implementation</h2>

  <pre class="python fragment roll-in" style="width: 99%; font-size: 12pt;">
<code data-trim data-noescape data-line-numbers="*|20|25-26|29">
import numpy as np

# Step 1: Center and whiten the data
def whiten(X):
    X = X - np.mean(X, axis=0)  # Center the data
    cov = np.cov(X, rowvar=False)  # Covariance matrix
    eigvals, eigvecs = np.linalg.eigh(cov)  # Eigen-decomposition
    D = np.diag(1.0 / np.sqrt(eigvals))  # Whitening matrix
    return X @ eigvecs @ D @ eigvecs.T

# Step 2: Nonlinear function for maximizing non-Gaussianity
def g(u):
    return np.tanh(u)  # Hyperbolic tangent nonlinearity

def g_derivative(u):
    return 1 - np.tanh(u) ** 2  # Derivative of tanh

# Step 3: FastICA iteration
def fastica(X, n_components, max_iter=100, tol=1e-5):
    X = whiten(X)
    n_samples, n_features = X.shape
    W = np.random.rand(n_components, n_features)  # Initialize random weights

    for i in range(max_iter):
        W_new = (X.T @ g(X @ W.T)) / n_samples  # Update all weights
        W_new -= np.diag(np.mean(g_derivative(X @ W.T), axis=0)) @ W_new

        # Decorrelate weights (orthogonalization)
        W_new = np.linalg.qr(W_new)[0]

        # Check for convergence
        if np.max(np.abs(np.abs(np.diag(W_new @ W.T)) - 1)) < tol:
            break
        W = W_new

    return W @ X.T  # Recovered signals

# Step 4: Example usage with synthetic data
np.random.seed(0)
S = np.array([np.sin(np.linspace(0, 8, 1000)),
              np.sign(np.sin(np.linspace(0, 8, 1000)))]).T
A = np.array([[1, 1], [0.5, 2]])  # Mixing matrix
X = S @ A.T  # Mixed signals

# Apply FastICA
S_estimated = fastica(X, n_components=2)

# Plot results
import matplotlib.pyplot as plt
fig, axs = plt.subplots(3, 1, figsize=(8, 6))
axs[0].plot(S)
axs[0].set_title('Original Signals')
axs[1].plot(X)
axs[1].set_title('Mixed Signals')
axs[2].plot(S_estimated.T)
axs[2].set_title('Recovered Signals (FastICA)')
plt.tight_layout()
plt.show()
</code>
  </pre>
</section>
                  <section>
                    <h1>Infomax</h1>
                  </section>

                  <section>
                    <h1>Maximal Likelihood</h1>
                  </section>

                </section>
                <!-- --------------------------------------------------------------------------->
	        <section>
                  <section>
                    <h2>Nonnegative matrix factorization</h2>
                    <div class="slide-footer">
                      <a href="https://www.nature.com/articles/44565">Learning the parts of objects by non-negative matrix factorization</a>
                    </div>
                  </section>

                  <section>
                    <h2>Additive features</h2>
                    <row>
                      <col60>
                        <ul style="list-style-type: disk; font-size: 26pt">
                          <li class="fragment roll-in" data-fragment-index="0"> Features are non- negative and only add up
                          <li class="fragment roll-in" data-fragment-index="1"> Features are unknown: data comes as their combination
                        </ul>
                      </col60>
                      <col>
                      <div style="position:relative; width:640px; height:800px; margin:0 auto;">
                        <img class="fragment current-visible" data-transition="slide fade-out" data-fragment-index="0" width="640" src="figures/addfeatures.png" style="position:absolute;top:0;left:0;border:0; box-shadow: 0px 0px 0px rgba(255, 255, 255, 255);" />
                        <img class="fragment current-visible" data-transition="slide fade-out" data-fragment-index="1" width="640" src="figures/fullface.png" style="position:absolute;top:0;left:0;border:0; box-shadow: 0px 0px 0px rgba(255, 255, 255, 255);" />
                      </div>
                      </col>
                    </row>
                  </section>

                  <section>
                    <h2>NMF Formally</h2>
                    <blockquote style="background-color: #eee8d5; width: 100%; font-size: 22pt" class="fragment" data-fragment-index="0">
                      Find a low rank non-negative approximation to a matrix
                    </blockquote>
                    <ul  style="list-style-type: square; font-size: 32px;">
                      <li class="fragment roll-in" data-fragment-index="1"> Given data $\bm{X}$ find their factorization:
                        \begin{align*}
                        \bm{X} \approx \bm{W}\bm{H}\\
                        \bm{X}_{ij} \ge 0 \mbox{ }\bm{W}_{ij} \ge 0 \mbox{ }\bm{H}_{ij} \ge 0
                        \end{align*}
                      <li class="fragment roll-in" data-fragment-index="2"> Minimize the objective function:
                        \begin{align}\nonumber
                        E = \frac{1}{2}\|\bm{X} - \bm{W}\bm{H}\|_F^2
                        \end{align}
                      <li class="fragment roll-in" data-fragment-index="3"> Ignore other possible objectives
                    </ul>
                  </section>

                  <section>
                    <h3>Gradient Descent</h3>
                    <ul  style="list-style-type: square;  font-size: 32px;">
                      <li class="fragment roll-in" data-fragment-index="0"> Compute the derivative and find its zero
                        \begin{align}\nonumber
                        \frac{\partial E}{\partial \bm{W}} &=&
                        \bm{WHH}^{T} - \bm{XH}^{T}\\\nonumber
                        \frac{\partial E}{\partial \bm{H}} &=&
                        \bm{W}^{T}\bm{WH} - \bm{W}^{T}\bm{X}
                        \end{align}
                      <li class="fragment roll-in" data-fragment-index="1"> Classical solution
                        \begin{align}\nonumber
                        \bm{H} &=& \bm{H} + \bm{\eta} \odot (\bm{W}^T\bm{X} - \bm{W}^T\bm{W}\bm{H})
                        \end{align}
                      <li class="fragment roll-in" data-fragment-index="2"> Exponentiated gradient
                        \begin{align}\nonumber
                        \bm{H} &=& \bm{H}\odot e^{\bm{\eta} \odot (\bm{W}^T\bm{X} - \bm{W}^T\bm{W}\bm{H})}
                        \end{align}
                    </ul>
                  </section>

                  <section>
                    <h2>Multiplicative updates</h2>
                    <row style="font-size: 32px;">
                      <col50>
                      <ul  style="list-style-type: square;">
                        <li class="fragment roll-in" data-fragment-index="0"> Setting the learning rates:
                          \begin{align}
                          \bm{\eta}_{\bm{H}} &= \frac{\bm{H}}{\bm{W}^T\bm{W}\bm{H}}\\
                          \bm{\eta}_{\bm{W}} &= \frac{\bm{W}}{\bm{W}\bm{H}\bm{H}^T}\\
                          \end{align}
                        <li class="fragment roll-in" data-fragment-index="1">Results in updates:
                          \begin{align*}
                          \bm{H} &=& \bm{H}\odot \frac{\bm{W}^{T}\bm{X}}
			  {\bm{W}^{T}\bm{W}\bm{H}}\\
                          \bm{W} &=& \bm{W}\odot \frac{\bm{X}\bm{H}^{T}}
			  {\bm{W}\bm{H}\bm{H}^{T}}
                          \end{align*}
                      </ul>
                    </col50>
                    <col50>
                      <div class="fragment roll-in" data-fragment-index="2">
                        <blockquote style="background-color: #93a1a1; color: #fdf6e3; font-size: 38px; width:100%;">
                          Advantages:
                        </blockquote>
                        <ul>
                          <li> automatic non-negativity constraint satisfaction
                          <li> adaptive learning rate
                          <li> no parameter setting
                        </ul>
                      </div>
                    </col50>
                    </row>
                  </section>

                  <section>
                    <h2>NMF on faces</h2>
                      <img style="border:0; box-shadow: 0px 0px 0px rgba(150, 150, 255, 1); " width="800" class="reveal"
                           src="figures/nmf_example1.svg" alt="nmf faces">
                  </section>

                  <section>
                    <h2>NMF on hyperspectral images</h2>
                      <img style="border:0; box-shadow: 0px 0px 0px rgba(150, 150, 255, 1); " width="800" class="reveal"
                           src="figures/nmf_example2.svg" alt="nmf hyper spectral">
                  </section>

                </section>
                <!-- -------------------------------------------------------------------------         -->
	        <section>
                  <section>
                    <h1>Dictionary Learning</h1>
                  </section>

                  <section>
                    <h2>The problem</h2>
                      <img style="border:0; box-shadow: 0px 0px 0px rgba(150, 150, 255, 1); " width="500"
                           src="figures/dictionary_learning_matrices.svg" alt="DL matrices">
                      \begin{align*}
                      \underset{\vec{\alpha} \in \RR^m}{\min} \frac{1}{2}\|\vec{x} - \bm{D}\vec{\alpha}\|^2_2 + \lambda\phi(\vec{\alpha})
                      \end{align*}
                  </section>

                  <section>
                    <h2>Application: Denoising</h2>
                      <img width="80%"
                           src="figures/DL_denoising.svg" alt="Elad denoising" style="margin-top: -40px;">
                    <div class="slide-footer">
                      <a href="https://www.springer.com/gp/book/9781441970107">
                        Elad, M., 2010. Sparse and redundant representations: from theory to applications in signal and image processing. Springer Science & Business Media.
                    </div>

                  </section>

                  <section>
                    <h2>Application: Compression</h2>
                      <img style="border:0; box-shadow: 0px 0px 0px rgba(150, 150, 255, 1); " width="600"
                           src="figures/DL_compression.svg" alt="Elad compression">
                    <div class="slide-footer">
                      <a href="https://elad.cs.technion.ac.il/wp-content/uploads/2018/02/IEEE_08_Deblocking.pdf">
                        Bryt, O. and Elad, M., 2008, Improving the k-SVD facial image compression using a linear deblocking method.
                        </a>
                    </div>

                  </section>

                </section>
                <!-- -------------------------------------------------------------------------         -->
	        <section>
                  <section>
                    <h1>Autoencoders</h1>
                  </section>

                  <section>
                    <h2>an alternative view of PCA</h2>
                    <row>
                      <col>
                      <img style="border:0; box-shadow: 0px 0px 0px rgba(150, 150, 255, 1); " width="500"
                           src="figures/AE_PCA.svg" alt="AE_PCA">
                      </col>
                      <col50 style="font-size: 28px;">
                        <blockquote style="background-color: #93a1a1; color: #fdf6e3; font-size: 38px; width:100%;">
                          Reconstruction error:
                        </blockquote>
                        <ul style="list-style-type: none; font-size: 22px;">
                          <li class="fragment roll-in">
                            \begin{align*}
                            \prob{J}{\bm{X}, \bm{X}^{\prime}} & = \underset{\bm{W}}{\argmin} \|\bm{X} - \bm{X}^{\prime}\|^2
                            \end{align*}
                          <li class="fragment roll-in">
                            \begin{align*}
                            \prob{J}{\bm{X}, \bm{X}^{\prime}} & = \underset{\bm{W}}{\argmin} \|\bm{X} - \bm{W}^T\bm{W}\bm{X}\|^2
                            \end{align*}
                          <li class="fragment roll-in"> Encoder
                            \begin{align*}
                            \bm{W}
                            \end{align*}
                          <li class="fragment roll-in"> Decoder
                            \begin{align*}
                            \bm{W}^T
                            \end{align*}
                        </ul>
                      </col50>
                    </row>
                    <div class="slide-footer">
                      <a href="https://link.springer.com/article/10.1007/BF00332918">Bourlard, H. and Kamp, Y., 1988. Auto-association by multilayer perceptrons and singular value decomposition. Biological cybernetics, 59(4-5), pp.291-294.</a>
                    </div>
                  </section>

                  <section>
                    <h2>Even this simple model is not convex</h2>
                      <img width="500" src="figures/x_times_y.svg" alt="x times y">
                  </section>

                  <section>
                    <h2>So why limit ourselves: Autoencoder</h2>
                      <img style="border:0; box-shadow: 0px 0px 0px rgba(150, 150, 255, 1); " width="850"
                           src="figures/AE.svg" alt="Autoencoder">
                  </section>

                  <section>
                    <h2>pre-training Autoencoder</h2>
                      <img width="600" src="figures/science_AE.svg" alt="Science Autoencoder">
                    <div class="slide-footer">
                      <a href="https://www.cs.toronto.edu/~hinton/science.pdf">Hinton, G.E. and Salakhutdinov, R.R., 2006. Reducing the dimensionality of data with neural networks. science, 313(5786), pp.504-507.</a>
                    </div>
                  </section>

                  <section>
                    <h2>pre-training Autoencoder: MNIST</h2>
                    PCA vs. 784-1000-500-250-2 AE
                      <img style="border:0; box-shadow: 0px 0px 0px rgba(150, 150, 255, 1); " width="850"
                           src="figures/science_AE_MNIST.svg" alt="Science Autoencoder">
                    <div class="slide-footer">
                      <a href="https://www.cs.toronto.edu/~hinton/science.pdf">Hinton, G.E. and Salakhutdinov, R.R., 2006. Reducing the dimensionality of data with neural networks. science, 313(5786), pp.504-507.</a>
                    </div>
                  </section>

                  <section>
                    <h2>denoising Autoencoder</h2>
                    <row>
                      <col50>
                      <img style="border:0; box-shadow: 0px 0px 0px rgba(150, 150, 255, 1); " width="850"
                           src="figures/filters_corruption_AE.png" alt="Autoencoder">
                      </col50>
                      <col50>
                      <img style="border:0; box-shadow: 0px 0px 0px rgba(150, 150, 255, 1); " width="850"
                           src="figures/filters_corruption_DAE.png" alt="Denoising Autoencoder">
                      </col50>
                    </row>
                    <div class="slide-footer">
                      <a href="https://www.cs.toronto.edu/~larocheh/publications/vincent10a.pdf">Vincent, P., Larochelle, H., Lajoie, I., Bengio, Y. and Manzagol, P.A., 2010. Stacked denoising autoencoders: Learning useful representations in a deep network with a local denoising criterion. Journal of machine learning research, 11(Dec), pp.3371-3408.
                      </a>
                    </div>
                  </section>

                </section>


                                <!-- -------------------------------------------------------------------------         -->
	        <section>
                  <section>
                    <h2>Take Home Points</h2>
                  </section>

                  <section>
                    <h2>matrix factorization methods</h2>
                      <img style="border:0; box-shadow: 0px 0px 0px rgba(150, 150, 255, 1); " width="1000"
                           src="figures/factorizations.svg" alt="methods">
                  </section>

                  <section>
                    <h2>Effect of sparsity parameter</h2>
                      <img style="border:0; box-shadow: 0px 0px 0px rgba(150, 150, 255, 1); " width="1400"
                           src="figures/sparsity_demo.svg" alt="sparse NMF">

                  </section>

                  <section>
                    <h2>Things to have in mind</h2>
                    <row  style="font-size: 26px;">
                      <col50 class="fragment roll-in" data-fragment-index="0">
                        <blockquote style="background-color: #93a1a1; color: #fdf6e3;">
                          Principal Component Analysis
                        </blockquote>
                        <ul>
                          <li class="fragment roll-in" data-fragment-index="1"> Finds orthogonal axes of maximal variance
                          <li class="fragment roll-in" data-fragment-index="2"> Uses full rank transform
                          <li class="fragment roll-in" data-fragment-index="3"> Can be used for compression when lower variance axes are dropped at reconstruction
                          <li class="fragment roll-in" data-fragment-index="4"> Frequently used to pre-process data
                        </ul>
                      </col50>
                      <col50 class="fragment roll-in" data-fragment-index="5">
                        <blockquote style="background-color: #93a1a1; color: #fdf6e3; font-size: 24px; ">
                          Independent Component Analysis
                        </blockquote>
                        <ul>
                          <li class="fragment roll-in" data-fragment-index="6"> A blind source separation problem
                          <li class="fragment roll-in" data-fragment-index="7"> Finds a linear transform that maximizes statistical independence of sources
                          <li class="fragment roll-in" data-fragment-index="8"> Resulting basis is not orthogonal
                          <li class="fragment roll-in" data-fragment-index="9"> Noise is often independent of the rest of data
                        </ul>
                      </col50>
                    </row>
                    <row style="font-size: 26px;">
                      <col50 class="fragment roll-in" data-fragment-index="10">
                        <blockquote style="background-color: #93a1a1; color: #fdf6e3; font-size: 24px;">
                          Nonnegative Matrix Factorization
                        </blockquote>
                        <ul>
                          <li class="fragment roll-in" data-fragment-index="11"> Additive features $\to$ nonnegative problem
                          <li class="fragment roll-in" data-fragment-index="12"> Low rank approximation
                          <li class="fragment roll-in" data-fragment-index="13"> Multiplicative updates
                          <li class="fragment roll-in" data-fragment-index="14"> Nonnegativity leads to sparse solution
                        </ul>
                      </col50>
                      <col50 class="fragment roll-in" data-fragment-index="15">
                        <blockquote style="background-color: #93a1a1; color: #fdf6e3; ">
                          Dictionary Learning
                        </blockquote>
                        <ul>
                          <li class="fragment roll-in" data-fragment-index="16"> Overcomplete dictionary
                          <li class="fragment roll-in" data-fragment-index="17"> Sparse representation of samples
                          <li class="fragment roll-in" data-fragment-index="18"> Only a few bases are involved in encoding each sample
                          <li class="fragment roll-in" data-fragment-index="19"> uses explicit sparsity constraint
                        </ul>
                      </col50>
                    </row>
                  </section>

                </section>


              </div>

            </div>

            <script src="dist/reveal.js"></script>

            <link rel="stylesheet" href="plugin/highlight/monokai.css">
            <script src="plugin/highlight/highlight.js"></script>
            <script src="plugin/math/math.js"></script>
            <script src="plugin/chalkboard/plugin.js"></script>
            <script src="plugin/notes/notes.js"></script>
            <script src="plugin/zoom/zoom.js"></script>
            <script src="plugin/fullscreen/fullscreen.js"></script>
            <script src="plugin/menu/menu.js"></script>

            <script>
              // Full list of configuration options available at:
              // https://github.com/hakimel/reveal.js#configuration

              Reveal.initialize({
                  // history: true,
                  hash: true,
                  margin: 0.01,
                  minScale: 0.01,
                  maxScale: 1.23,

                  menu: {
                      themes: false,
                      openSlideNumber: true,
                      openButton: false,
                  },

                  chalkboard: {
                      boardmarkerWidth: 1,
                      chalkWidth: 2,
                      chalkEffect: 1,
                      toggleNotesButton: false,
                      toggleChalkboardButton: false,
                      slideWidth: Reveal.width,
                      slideHeight: Reveal.height,
                      // src: "chalkboards/chalkboard_em2.json",
                      readOnly: false,
                      theme: "blackboard",
                      eraser: { src: "plugin/chalkboard/img/sponge.png", radius: 30},
                  },

                  math: {
                      mathjax: 'https://cdn.jsdelivr.net/gh/mathjax/mathjax@2.7.8/MathJax.js',
                      config: 'TeX-AMS_SVG-full',
                      // pass other options into `MathJax.Hub.Config()`
                      TeX: {
                          Macros: {
        	              RR: '\\mathbb{R}',
        	              PP: '\\mathbb{P}',
        	              EE: '\\mathbb{E}',
        	              NN: '\\mathbb{N}',
        	              vth: '\\vec{\\theta}',
                              loss: '{\\cal l}',
                              hclass: '{\\cal H}',
                              CD: '{\\cal D}',
                              def: '\\stackrel{\\text{def}}{=}',
                              pag: ['\\text{pa}_{{\cal G}^{#1}}(#2)}', 2],
                              vec: ['\\boldsymbol{\\mathbf #1}', 1],
        	              set: [ '\\left\\{#1 \\; : \\; #2\\right\\}', 2 ],
                              bm: ['\\boldsymbol{\\mathbf #1}', 1],
                              argmin: ['\\operatorname\{arg\\,min\\,\}'],
                              argmax: ['\\operatorname\{arg\\,max\\,\}'],
                              prob: ["\\mbox{#1$\\left(#2\\right)$}", 2],
                          },
                          loader: {load: ['[tex]/color']},
                          extensions: ["color.js"],
                          tex: {packages: {'[+]': ['color']}},
                          svg: {
                              fontCache: 'global'
                          }
                      }
                  },

                  plugins: [ RevealMath, RevealChalkboard, RevealHighlight, RevealNotes, RevealZoom, RevealMenu ],

              });

              Reveal.configure({ fragments: true }); // set false when developing to see everything at once
              Reveal.configure({ slideNumber: true });
              //Reveal.configure({ history: true });
              Reveal.configure({ slideNumber: 'c / t' });
              Reveal.addEventListener( 'darkside', function() {
                  document.getElementById('theme').setAttribute('href','dist/theme/aml_dark.css');
              }, false );
              Reveal.addEventListener( 'brightside', function() {
                  document.getElementById('theme').setAttribute('href','dist/theme/aml.css');
              }, false );

            </script>

            <style type="text/css">
              /* 1. Style header/footer <div> so they are positioned as desired. */
              #header-left {
                  position: absolute;
                  top: 0%;
                  left: 0%;
              }
              #header-right {
                  position: absolute;
                  top: 0%;
                  right: 0%;
              }
              #footer-left {
                  position: absolute;
                  bottom: 0%;
                  left: 0%;
              }
            </style>

            <!-- // 2. Create hidden header/footer -->
            <div id="hidden" style="display:none;">
              <div id="header">
                <div id="header-left"><h4>CS8850</h4></div>
                <div id="header-right"><h4>Advanced Machine Learning</h4></div>
                <div id="footer-left">
                  <img style="border:0; box-shadow: 0px 0px 0px rgba(150, 150, 255, 1);" width="200"
                       src="figures/valentino.png" alt="robot learning">
                </div>
              </div>
            </div>


            <script type="text/javascript">
              // 3. On Reveal.js ready event, copy header/footer <div> into each `.slide-background` <div>
              var header = $('#header').html();
              if ( window.location.search.match( /print-pdf/gi ) ) {
                  Reveal.addEventListener( 'ready', function( event ) {
                      $('.slide-background').append(header);
                  });
              }
              else {
                  $('div.reveal').append(header);
              }
            </script>

  </body>
</html>