Skip to content

Commit 9c6ac88

Browse files
ncassereaurflamary
andauthored
[MRG] Docs updates (#298)
* bregman docs * sliced docs * docs partial * unbalanced docs * stochastic docs * plot docs * datasets docs * utils docs * dr docs * dr docs corrected * smooth docs * docs da * pep8 * docs gromov * more space after min and argmin * docs lp * bregman docs * bregman docs mistake corrected * pep8 Co-authored-by: Rémi Flamary <remi.flamary@gmail.com>
1 parent e1b67c6 commit 9c6ac88

File tree

14 files changed

+1048
-877
lines changed

14 files changed

+1048
-877
lines changed

ot/bregman.py

Lines changed: 129 additions & 107 deletions
Large diffs are not rendered by default.

ot/da.py

Lines changed: 280 additions & 219 deletions
Large diffs are not rendered by default.

ot/datasets.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414

1515
def make_1D_gauss(n, m, s):
16-
"""return a 1D histogram for a gaussian distribution (n bins, mean m and std s)
16+
"""return a 1D histogram for a gaussian distribution (`n` bins, mean `m` and std `s`)
1717
1818
Parameters
1919
----------
@@ -26,7 +26,7 @@ def make_1D_gauss(n, m, s):
2626
2727
Returns
2828
-------
29-
h : ndarray (n,)
29+
h : ndarray (`n`,)
3030
1D histogram for a gaussian distribution
3131
"""
3232
x = np.arange(n, dtype=np.float64)
@@ -41,7 +41,7 @@ def get_1D_gauss(n, m, sigma):
4141

4242

4343
def make_2D_samples_gauss(n, m, sigma, random_state=None):
44-
"""Return n samples drawn from 2D gaussian N(m,sigma)
44+
"""Return `n` samples drawn from 2D gaussian :math:`\mathcal{N}(m, \sigma)`
4545
4646
Parameters
4747
----------
@@ -59,8 +59,8 @@ def make_2D_samples_gauss(n, m, sigma, random_state=None):
5959
6060
Returns
6161
-------
62-
X : ndarray, shape (n, 2)
63-
n samples drawn from N(m, sigma).
62+
X : ndarray, shape (`n`, 2)
63+
n samples drawn from :math:`\mathcal{N}(m, \sigma)`.
6464
"""
6565

6666
generator = check_random_state(random_state)
@@ -102,7 +102,7 @@ def make_data_classif(dataset, n, nz=.5, theta=0, p=.5, random_state=None, **kwa
102102
Returns
103103
-------
104104
X : ndarray, shape (n, d)
105-
n observation of size d
105+
`n` observation of size `d`
106106
y : ndarray, shape (n,)
107107
labels of the samples.
108108
"""

ot/dr.py

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,15 @@
2222

2323

2424
def dist(x1, x2):
25-
""" Compute squared euclidean distance between samples (autograd)
25+
r""" Compute squared euclidean distance between samples (autograd)
2626
"""
2727
x1p2 = np.sum(np.square(x1), 1)
2828
x2p2 = np.sum(np.square(x2), 1)
2929
return x1p2.reshape((-1, 1)) + x2p2.reshape((1, -1)) - 2 * np.dot(x1, x2.T)
3030

3131

3232
def sinkhorn(w1, w2, M, reg, k):
33-
"""Sinkhorn algorithm with fixed number of iteration (autograd)
33+
r"""Sinkhorn algorithm with fixed number of iteration (autograd)
3434
"""
3535
K = np.exp(-M / reg)
3636
ui = np.ones((M.shape[0],))
@@ -43,14 +43,14 @@ def sinkhorn(w1, w2, M, reg, k):
4343

4444

4545
def split_classes(X, y):
46-
"""split samples in X by classes in y
46+
r"""split samples in :math:`\mathbf{X}` by classes in :math:`\mathbf{y}`
4747
"""
4848
lstsclass = np.unique(y)
4949
return [X[y == i, :].astype(np.float32) for i in lstsclass]
5050

5151

5252
def fda(X, y, p=2, reg=1e-16):
53-
"""Fisher Discriminant Analysis
53+
r"""Fisher Discriminant Analysis
5454
5555
Parameters
5656
----------
@@ -111,18 +111,19 @@ def proj(X):
111111

112112
def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None, normalize=False):
113113
r"""
114-
Wasserstein Discriminant Analysis [11]_
114+
Wasserstein Discriminant Analysis :ref:`[11] <references-wda>`
115115
116116
The function solves the following optimization problem:
117117
118118
.. math::
119-
P = \\text{arg}\min_P \\frac{\\sum_i W(PX^i,PX^i)}{\\sum_{i,j\\neq i} W(PX^i,PX^j)}
119+
\mathbf{P} = \mathop{\arg \min}_\mathbf{P} \quad
120+
\frac{\sum\limits_i W(P \mathbf{X}^i, P \mathbf{X}^i)}{\sum\limits_{i, j \neq i} W(P \mathbf{X}^i, P \mathbf{X}^j)}
120121
121122
where :
122123
123-
- :math:`P` is a linear projection operator in the Stiefel(p,d) manifold
124+
- :math:`P` is a linear projection operator in the Stiefel(`p`, `d`) manifold
124125
- :math:`W` is entropic regularized Wasserstein distances
125-
- :math:`X^i` are samples in the dataset corresponding to class i
126+
- :math:`\mathbf{X}^i` are samples in the dataset corresponding to class i
126127
127128
Parameters
128129
----------
@@ -140,7 +141,7 @@ def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None, no
140141
P0 : ndarray, shape (d, p)
141142
Initial starting point for projection.
142143
normalize : bool, optional
143-
Normalise the Wasserstaiun distane by the average distance on P0 (default : False)
144+
Normalise the Wasserstaiun distance by the average distance on P0 (default : False)
144145
verbose : int, optional
145146
Print information along iterations.
146147
@@ -151,6 +152,8 @@ def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None, no
151152
proj : callable
152153
Projection function including mean centering.
153154
155+
156+
.. _references-wda:
154157
References
155158
----------
156159
.. [11] Flamary, R., Cuturi, M., Courty, N., & Rakotomamonjy, A. (2016).
@@ -217,27 +220,28 @@ def proj(X):
217220

218221
def projection_robust_wasserstein(X, Y, a, b, tau, U0=None, reg=0.1, k=2, stopThr=1e-3, maxiter=100, verbose=0):
219222
r"""
220-
Projection Robust Wasserstein Distance [32]
223+
Projection Robust Wasserstein Distance :ref:`[32] <references-projection-robust-wasserstein>`
221224
222225
The function solves the following optimization problem:
223226
224227
.. math::
225-
\max_{U \in St(d, k)} \min_{\pi \in \Pi(\mu,\nu)} \sum_{i,j} \pi_{i,j} \|U^T(x_i - y_j)\|^2 - reg * H(\pi)
228+
\max_{U \in St(d, k)} \ \min_{\pi \in \Pi(\mu,\nu)} \quad \sum_{i,j} \pi_{i,j}
229+
\|U^T(\mathbf{x}_i - \mathbf{y}_j)\|^2 - \mathrm{reg} \cdot H(\pi)
226230
227-
- :math:`U` is a linear projection operator in the Stiefel(d, k) manifold
231+
- :math:`U` is a linear projection operator in the Stiefel(`d`, `k`) manifold
228232
- :math:`H(\pi)` is entropy regularizer
229-
- :math:`x_i`, :math:`y_j` are samples of measures \mu and \nu respectively
233+
- :math:`\mathbf{x}_i`, :math:`\mathbf{y}_j` are samples of measures :math:`\mu` and :math:`\nu` respectively
230234
231235
Parameters
232236
----------
233237
X : ndarray, shape (n, d)
234-
Samples from measure \mu
238+
Samples from measure :math:`\mu`
235239
Y : ndarray, shape (n, d)
236-
Samples from measure \nu
240+
Samples from measure :math:`\nu`
237241
a : ndarray, shape (n, )
238-
weights for measure \mu
242+
weights for measure :math:`\mu`
239243
b : ndarray, shape (n, )
240-
weights for measure \nu
244+
weights for measure :math:`\nu`
241245
tau : float
242246
stepsize for Riemannian Gradient Descent
243247
U0 : ndarray, shape (d, p)
@@ -258,6 +262,8 @@ def projection_robust_wasserstein(X, Y, a, b, tau, U0=None, reg=0.1, k=2, stopTh
258262
U : ndarray, shape (d, k)
259263
Projection operator.
260264
265+
266+
.. _references-projection-robust-wasserstein:
261267
References
262268
----------
263269
.. [32] Huang, M. , Ma S. & Lai L. (2021).

ot/gromov.py

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,8 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs
327327
The function solves the following optimization problem:
328328
329329
.. math::
330-
\mathbf{GW} = \mathop{\arg \min}_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
330+
\mathbf{GW} = \mathop{\arg \min}_\mathbf{T} \quad \sum_{i,j,k,l}
331+
L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
331332
332333
Where :
333334
@@ -410,7 +411,8 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwarg
410411
The function solves the following optimization problem:
411412
412413
.. math::
413-
GW = \min_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
414+
GW = \min_\mathbf{T} \quad \sum_{i,j,k,l}
415+
L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
414416
415417
Where :
416418
@@ -487,8 +489,8 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5,
487489
Computes the FGW transport between two graphs (see :ref:`[24] <references-fused-gromov-wasserstein>`)
488490
489491
.. math::
490-
\gamma = \mathop{\arg \min}_\gamma (1 - \alpha) <\gamma, \mathbf{M}>_F + \alpha \sum_{i,j,k,l}
491-
L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
492+
\gamma = \mathop{\arg \min}_\gamma \quad (1 - \alpha) \langle \gamma, \mathbf{M} \rangle_F +
493+
\alpha \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
492494
493495
s.t. \ \mathbf{\gamma} \mathbf{1} &= \mathbf{p}
494496
@@ -569,7 +571,7 @@ def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5
569571
Computes the FGW distance between two graphs see (see :ref:`[24] <references-fused-gromov-wasserstein2>`)
570572
571573
.. math::
572-
\min_\gamma (1 - \alpha) <\gamma, \mathbf{M}>_F + \alpha \sum_{i,j,k,l}
574+
\min_\gamma \quad (1 - \alpha) \langle \gamma, \mathbf{M} \rangle_F + \alpha \sum_{i,j,k,l}
573575
L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
574576
575577
s.t. \ \mathbf{\gamma} \mathbf{1} &= \mathbf{p}
@@ -591,9 +593,9 @@ def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5
591593
M : array-like, shape (ns, nt)
592594
Metric cost matrix between features across domains
593595
C1 : array-like, shape (ns, ns)
594-
Metric cost matrix respresentative of the structure in the source space.
596+
Metric cost matrix representative of the structure in the source space.
595597
C2 : array-like, shape (nt, nt)
596-
Metric cost matrix espresentative of the structure in the target space.
598+
Metric cost matrix representative of the structure in the target space.
597599
p : array-like, shape (ns,)
598600
Distribution in the source space.
599601
q : array-like, shape (nt,)
@@ -612,8 +614,8 @@ def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5
612614
613615
Returns
614616
-------
615-
gamma : array-like, shape (ns, nt)
616-
Optimal transportation matrix for the given parameters.
617+
fgw-distance : float
618+
Fused gromov wasserstein distance for the given parameters.
617619
log : dict
618620
Log dictionary return only if log==True in parameters.
619621
@@ -780,7 +782,8 @@ def pointwise_gromov_wasserstein(C1, C2, p, q, loss_fun,
780782
The function solves the following optimization problem:
781783
782784
.. math::
783-
\mathbf{GW} = \mathop{\arg \min}_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
785+
\mathbf{GW} = \mathop{\arg \min}_\mathbf{T} \quad \sum_{i,j,k,l}
786+
L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
784787
785788
s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p}
786789
@@ -901,7 +904,8 @@ def sampled_gromov_wasserstein(C1, C2, p, q, loss_fun,
901904
The function solves the following optimization problem:
902905
903906
.. math::
904-
\mathbf{GW} = \mathop{\arg \min}_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
907+
\mathbf{GW} = \mathop{\arg \min}_\mathbf{T} \quad \sum_{i,j,k,l}
908+
L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
905909
906910
s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p}
907911
@@ -1052,7 +1056,7 @@ def entropic_gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
10521056
The function solves the following optimization problem:
10531057
10541058
.. math::
1055-
\mathbf{GW} = \mathop{\arg\min}_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} - \epsilon(H(\mathbf{T}))
1059+
\mathbf{GW} = \mathop{\arg\min}_\mathbf{T} \quad \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} - \epsilon(H(\mathbf{T}))
10561060
10571061
s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p}
10581062
@@ -1157,7 +1161,8 @@ def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
11571161
The function solves the following optimization problem:
11581162
11591163
.. math::
1160-
GW = \min_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} - \epsilon(H(\mathbf{T}))
1164+
GW = \min_\mathbf{T} \quad \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l})
1165+
\mathbf{T}_{i,j} \mathbf{T}_{k,l} - \epsilon(H(\mathbf{T}))
11611166
11621167
Where :
11631168
@@ -1223,7 +1228,7 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
12231228
12241229
.. math::
12251230
1226-
\mathbf{C} = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s)
1231+
\mathbf{C} = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \quad \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s)
12271232
12281233
Where :
12291234
@@ -1336,7 +1341,7 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun,
13361341
13371342
.. math::
13381343
1339-
\mathbf{C} = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s)
1344+
\mathbf{C} = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \quad \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s)
13401345
13411346
Where :
13421347

0 commit comments

Comments
 (0)