PythonOT
diff --git a/‎ot/bregman.py
Lines changed: 129 additions & 107 deletions b/‎ot/bregman.py
Lines changed: 129 additions & 107 deletions
diff --git a/‎ot/da.py
Lines changed: 280 additions & 219 deletions b/‎ot/da.py
Lines changed: 280 additions & 219 deletions
diff --git a/‎ot/datasets.py
Lines changed: 6 additions & 6 deletions b/‎ot/datasets.py
Lines changed: 6 additions & 6 deletions
diff --git a/‎ot/dr.py
Lines changed: 23 additions & 17 deletions b/‎ot/dr.py
Lines changed: 23 additions & 17 deletions
diff --git a/‎ot/gromov.py
Lines changed: 20 additions & 15 deletions b/‎ot/gromov.py
Lines changed: 20 additions & 15 deletions
@@ -13,7 +13,7 @@
 
 
 def make_1D_gauss(n, m, s):
-    """return a 1D histogram for a gaussian distribution (n bins, mean m and std s)
+    """return a 1D histogram for a gaussian distribution (`n` bins, mean `m` and std `s`)
 
     Parameters
     ----------
@@ -26,7 +26,7 @@ def make_1D_gauss(n, m, s):
 
     Returns
     -------
-    h : ndarray (n,)
+    h : ndarray (`n`,)
         1D histogram for a gaussian distribution
     """
     x = np.arange(n, dtype=np.float64)
@@ -41,7 +41,7 @@ def get_1D_gauss(n, m, sigma):
 
 
 def make_2D_samples_gauss(n, m, sigma, random_state=None):
-    """Return n samples drawn from 2D gaussian N(m,sigma)
+    """Return `n` samples drawn from 2D gaussian :math:`\mathcal{N}(m, \sigma)`
 
     Parameters
     ----------
@@ -59,8 +59,8 @@ def make_2D_samples_gauss(n, m, sigma, random_state=None):
 
     Returns
     -------
-    X : ndarray, shape (n, 2)
-        n samples drawn from N(m, sigma).
+    X : ndarray, shape (`n`, 2)
+        n samples drawn from :math:`\mathcal{N}(m, \sigma)`.
     """
 
     generator = check_random_state(random_state)
@@ -102,7 +102,7 @@ def make_data_classif(dataset, n, nz=.5, theta=0, p=.5, random_state=None, **kwa
     Returns
     -------
     X : ndarray, shape (n, d)
-        n observation of size d
+        `n` observation of size `d`
     y : ndarray, shape (n,)
         labels of the samples.
     """
 
@@ -22,15 +22,15 @@
 
 
 def dist(x1, x2):
-    """ Compute squared euclidean distance between samples (autograd)
+    r""" Compute squared euclidean distance between samples (autograd)
     """
     x1p2 = np.sum(np.square(x1), 1)
     x2p2 = np.sum(np.square(x2), 1)
     return x1p2.reshape((-1, 1)) + x2p2.reshape((1, -1)) - 2 * np.dot(x1, x2.T)
 
 
 def sinkhorn(w1, w2, M, reg, k):
-    """Sinkhorn algorithm with fixed number of iteration (autograd)
+    r"""Sinkhorn algorithm with fixed number of iteration (autograd)
     """
     K = np.exp(-M / reg)
     ui = np.ones((M.shape[0],))
@@ -43,14 +43,14 @@ def sinkhorn(w1, w2, M, reg, k):
 
 
 def split_classes(X, y):
-    """split samples in X by classes in y
+    r"""split samples in :math:`\mathbf{X}` by classes in :math:`\mathbf{y}`
     """
     lstsclass = np.unique(y)
     return [X[y == i, :].astype(np.float32) for i in lstsclass]
 
 
 def fda(X, y, p=2, reg=1e-16):
-    """Fisher Discriminant Analysis
+    r"""Fisher Discriminant Analysis
 
     Parameters
     ----------
@@ -111,18 +111,19 @@ def proj(X):
 
 def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None, normalize=False):
     r"""
-    Wasserstein Discriminant Analysis [11]_
+    Wasserstein Discriminant Analysis :ref:`[11] <references-wda>`
 
     The function solves the following optimization problem:
 
     .. math::
-        P = \\text{arg}\min_P \\frac{\\sum_i W(PX^i,PX^i)}{\\sum_{i,j\\neq i} W(PX^i,PX^j)}
+        \mathbf{P} = \mathop{\arg \min}_\mathbf{P} \quad
+        \frac{\sum\limits_i W(P \mathbf{X}^i, P \mathbf{X}^i)}{\sum\limits_{i, j \neq i} W(P \mathbf{X}^i, P \mathbf{X}^j)}
 
     where :
 
-    - :math:`P` is a linear projection operator in the Stiefel(p,d) manifold
+    - :math:`P` is a linear projection operator in the Stiefel(`p`, `d`) manifold
     - :math:`W` is entropic regularized Wasserstein distances
-    - :math:`X^i` are samples in the dataset corresponding to class i
+    - :math:`\mathbf{X}^i` are samples in the dataset corresponding to class i
 
     Parameters
     ----------
@@ -140,7 +141,7 @@ def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None, no
     P0 : ndarray, shape (d, p)
         Initial starting point for projection.
     normalize : bool, optional
-        Normalise the Wasserstaiun distane by the average distance on P0 (default : False)
+        Normalise the Wasserstaiun distance by the average distance on P0 (default : False)
     verbose : int, optional
         Print information along iterations.
 
@@ -151,6 +152,8 @@ def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None, no
     proj : callable
         Projection function including mean centering.
 
+
+    .. _references-wda:
     References
     ----------
     .. [11] Flamary, R., Cuturi, M., Courty, N., & Rakotomamonjy, A. (2016).
@@ -217,27 +220,28 @@ def proj(X):
 
 def projection_robust_wasserstein(X, Y, a, b, tau, U0=None, reg=0.1, k=2, stopThr=1e-3, maxiter=100, verbose=0):
     r"""
-    Projection Robust Wasserstein Distance [32]
+    Projection Robust Wasserstein Distance :ref:`[32] <references-projection-robust-wasserstein>`
 
     The function solves the following optimization problem:
 
     .. math::
-        \max_{U \in St(d, k)} \min_{\pi \in \Pi(\mu,\nu)} \sum_{i,j} \pi_{i,j} \|U^T(x_i - y_j)\|^2 - reg * H(\pi)
+        \max_{U \in St(d, k)} \ \min_{\pi \in \Pi(\mu,\nu)} \quad \sum_{i,j} \pi_{i,j}
+        \|U^T(\mathbf{x}_i - \mathbf{y}_j)\|^2 - \mathrm{reg} \cdot H(\pi)
 
-    - :math:`U` is a linear projection operator in the Stiefel(d, k) manifold
+    - :math:`U` is a linear projection operator in the Stiefel(`d`, `k`) manifold
     - :math:`H(\pi)` is entropy regularizer
-    - :math:`x_i`, :math:`y_j` are samples of measures \mu and \nu respectively
+    - :math:`\mathbf{x}_i`, :math:`\mathbf{y}_j` are samples of measures :math:`\mu` and :math:`\nu` respectively
 
     Parameters
     ----------
     X : ndarray, shape (n, d)
-        Samples from measure \mu
+        Samples from measure :math:`\mu`
     Y : ndarray, shape (n, d)
-        Samples from measure \nu
+        Samples from measure :math:`\nu`
     a : ndarray, shape (n, )
-        weights for measure \mu
+        weights for measure :math:`\mu`
     b : ndarray, shape (n, )
-        weights for measure \nu
+        weights for measure :math:`\nu`
     tau : float
         stepsize for Riemannian Gradient Descent
     U0 : ndarray, shape (d, p)
@@ -258,6 +262,8 @@ def projection_robust_wasserstein(X, Y, a, b, tau, U0=None, reg=0.1, k=2, stopTh
     U : ndarray, shape (d, k)
         Projection operator.
 
+
+    .. _references-projection-robust-wasserstein:
     References
     ----------
     .. [32] Huang, M. , Ma S. & Lai L. (2021).
 
@@ -327,7 +327,8 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs
     The function solves the following optimization problem:
 
     .. math::
-        \mathbf{GW} = \mathop{\arg \min}_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
+        \mathbf{GW} = \mathop{\arg \min}_\mathbf{T} \quad \sum_{i,j,k,l}
+        L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
 
     Where :
 
@@ -410,7 +411,8 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwarg
     The function solves the following optimization problem:
 
     .. math::
-        GW = \min_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
+        GW = \min_\mathbf{T} \quad \sum_{i,j,k,l}
+        L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
 
     Where :
 
@@ -487,8 +489,8 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5,
     Computes the FGW transport between two graphs (see :ref:`[24] <references-fused-gromov-wasserstein>`)
 
     .. math::
-        \gamma = \mathop{\arg \min}_\gamma (1 - \alpha) <\gamma, \mathbf{M}>_F + \alpha \sum_{i,j,k,l}
-        L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
+        \gamma = \mathop{\arg \min}_\gamma \quad (1 - \alpha) \langle \gamma, \mathbf{M} \rangle_F +
+        \alpha \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
 
         s.t. \ \mathbf{\gamma} \mathbf{1} &= \mathbf{p}
 
@@ -569,7 +571,7 @@ def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5
     Computes the FGW distance between two graphs see (see :ref:`[24] <references-fused-gromov-wasserstein2>`)
 
     .. math::
-        \min_\gamma (1 - \alpha) <\gamma, \mathbf{M}>_F + \alpha \sum_{i,j,k,l}
+        \min_\gamma \quad (1 - \alpha) \langle \gamma, \mathbf{M} \rangle_F + \alpha \sum_{i,j,k,l}
         L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
 
         s.t. \ \mathbf{\gamma} \mathbf{1} &= \mathbf{p}
@@ -591,9 +593,9 @@ def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5
     M : array-like, shape (ns, nt)
         Metric cost matrix between features across domains
     C1 : array-like, shape (ns, ns)
-        Metric cost matrix respresentative of the structure in the source space.
+        Metric cost matrix representative of the structure in the source space.
     C2 : array-like, shape (nt, nt)
-        Metric cost matrix espresentative of the structure in the target space.
+        Metric cost matrix representative of the structure in the target space.
     p :  array-like, shape (ns,)
         Distribution in the source space.
     q :  array-like, shape (nt,)
@@ -612,8 +614,8 @@ def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5
 
     Returns
     -------
-    gamma : array-like, shape (ns, nt)
-        Optimal transportation matrix for the given parameters.
+    fgw-distance : float
+        Fused gromov wasserstein distance for the given parameters.
     log : dict
         Log dictionary return only if log==True in parameters.
 
@@ -780,7 +782,8 @@ def pointwise_gromov_wasserstein(C1, C2, p, q, loss_fun,
     The function solves the following optimization problem:
 
     .. math::
-        \mathbf{GW} = \mathop{\arg \min}_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
+        \mathbf{GW} = \mathop{\arg \min}_\mathbf{T} \quad \sum_{i,j,k,l}
+        L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
 
         s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p}
 
@@ -901,7 +904,8 @@ def sampled_gromov_wasserstein(C1, C2, p, q, loss_fun,
     The function solves the following optimization problem:
 
     .. math::
-        \mathbf{GW} = \mathop{\arg \min}_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
+        \mathbf{GW} = \mathop{\arg \min}_\mathbf{T} \quad \sum_{i,j,k,l}
+        L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
 
         s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p}
 
@@ -1052,7 +1056,7 @@ def entropic_gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
     The function solves the following optimization problem:
 
     .. math::
-        \mathbf{GW} = \mathop{\arg\min}_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} - \epsilon(H(\mathbf{T}))
+        \mathbf{GW} = \mathop{\arg\min}_\mathbf{T} \quad \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} - \epsilon(H(\mathbf{T}))
 
         s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p}
 
@@ -1157,7 +1161,8 @@ def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
     The function solves the following optimization problem:
 
     .. math::
-        GW = \min_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} - \epsilon(H(\mathbf{T}))
+        GW = \min_\mathbf{T} \quad \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l})
+        \mathbf{T}_{i,j} \mathbf{T}_{k,l} - \epsilon(H(\mathbf{T}))
 
     Where :
 
@@ -1223,7 +1228,7 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
 
     .. math::
 
-        \mathbf{C} = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s)
+        \mathbf{C} = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \quad \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s)
 
     Where :
 
@@ -1336,7 +1341,7 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun,
 
     .. math::
 
-        \mathbf{C} = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s)
+        \mathbf{C} = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \quad \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s)
 
     Where :