Skip to content

Commit 78b3467

Browse files
committed
fix example format style
1 parent 26815f0 commit 78b3467

File tree

1 file changed

+75
-59
lines changed

1 file changed

+75
-59
lines changed

python/paddle/nn/functional/loss.py

Lines changed: 75 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,40 +1102,52 @@ def margin_cross_entropy(logits,
11021102
return_softmax=False,
11031103
reduction='mean'):
11041104
"""
1105+
Margin Loss from ArcFace,
1106+
11051107
.. math::
11061108
1107-
L=-\frac{1}{N}\sum^N_{i=1}\log\frac{e^{s(cos(m_{1}\theta_{y_i}+m_{2})-m_{3})}}{e^{s(cos(m_{1}\theta_{y_i}+m_{2})-m_{3})}+\sum^n_{j=1,j\neq y_i} e^{scos\theta_{y_i}}}
1109+
L=-\\frac{1}{N}\sum^N_{i=1}\log\\frac{e^{s(cos(m_{1}\\theta_{y_i}+m_{2})-m_{3})}}{e^{s(cos(m_{1}\\theta_{y_i}+m_{2})-m_{3})}+\sum^n_{j=1,j\\neq y_i} e^{scos\\theta_{y_i}}}
11081110
1109-
where the :math: `\theta_{y_i}` is the angle between the feature :math: `x` and
1111+
where the :math: `\\theta_{y_i}` is the angle between the feature :math: `x` and
11101112
the representation of class :math: `i`. The details of ArcFace loss
11111113
could be referred to https://arxiv.org/abs/1801.07698.
11121114
1113-
Note that the API supports model parallel and single GPU. And logits.shape[-1] can be different each rank.
1115+
.. hint::
1116+
Note that the API supports model parallel and single GPU. And logits.shape[-1] can be different each rank.
11141117
11151118
Args:
11161119
logits (Tensor): shape[N, local_num_classes], the output of the normalized X multiply the normalized W.
11171120
The logits is shard_logits when using model parallel.
11181121
label (Tensor): shape[N] or shape[N, 1], the groud truth label.
1119-
margin1 (float): (1.0), m1 of margin loss.
1120-
margin2 (float): (0.5), m2 of margin loss.
1121-
margin3 (float): (0.0), m3 of margin loss.
1122-
scale (float): (64.0), s of margin loss.
1123-
group (Group): The abstract representation of group, see paddle.distributed.collective.Group
1124-
return_softmax (bool): (False), whether return softmax probability.
1125-
reduction (str): ('mean'), The candicates are ``'none'`` | ``'mean'`` | ``'sum'``.
1122+
margin1 (float, optional): m1 of margin loss, default value is `1.0`.
1123+
margin2 (float, optional): m2 of margin loss, default value is `0.5`.
1124+
margin3 (float, optional): m3 of margin loss, default value is `0.0`.
1125+
scale (float, optional): s of margin loss, default value is `64.0`.
1126+
group (Group, optional): The abstract representation of group, see paddle.distributed.collective.Group.
1127+
Default `None`.
1128+
return_softmax (bool, optional): Whether return softmax probability. Default value is `False`.
1129+
reduction (str, optional): The candicates are ``'none'`` | ``'mean'`` | ``'sum'``.
11261130
If :attr:`reduction` is ``'mean'``, return the average of loss;
11271131
If :attr:`reduction` is ``'sum'``, return the sum of loss;
11281132
If :attr:`reduction` is ``'none'``, no reduction will be applied.
1133+
Default value is `'mean'`.
11291134
1130-
Return:
1135+
Returns:
11311136
loss (Tensor or Scalar): if reduction==None, shape[N, 1], else shape[1], the cross entropy loss.
11321137
softmax (Tensor): softmax probability. The softmax is shard_softmax when using model parallel.
1138+
``Tensor`` or Tuple of two ``Tensor`` : Return the cross entropy loss if \
1139+
`return_softmax` is False, otherwise the tuple \
1140+
(loss, softmax), softmax is shard_softmax when \
1141+
using model parallel, otherwise softmax is in \
1142+
the same shape with input logits.
11331143
11341144
Examples:
1145+
11351146
.. code-block:: python
1147+
:linenos:
1148+
:caption: for single GPU
11361149
11371150
# required: gpu
1138-
# for single GPU
11391151
import paddle
11401152
import numpy as np
11411153
m1 = 1.0
@@ -1182,53 +1194,57 @@ def margin_cross_entropy(logits,
11821194
# [[0.99978819, 0.00000000, 0.00000000, 0.00021181],
11831195
# [0.99992995, 0.00006468, 0.00000000, 0.00000537]])
11841196
1185-
## for multi GPU, test_margin_cross_entropy.py
1186-
#import paddle
1187-
#import paddle.distributed as dist
1188-
#import numpy as np
1189-
#strategy = dist.fleet.DistributedStrategy()
1190-
#dist.fleet.init(is_collective=True, strategy=strategy)
1191-
#rank_id = dist.get_rank()
1192-
#m1 = 1.0
1193-
#m2 = 0.5
1194-
#m3 = 0.0
1195-
#s = 64.0
1196-
#batch_size = 2
1197-
#feature_length = 4
1198-
#num_class_per_card = [4, 8]
1199-
#num_classes = np.sum(num_class_per_card)
1200-
1201-
#np_label = np.random.randint(0, num_classes, (batch_size,))
1202-
#label = paddle.to_tensor(np_label, dtype="int64")
1203-
#label_list = []
1204-
#dist.all_gather(label_list, label)
1205-
#label = paddle.concat(label_list, axis=0)
1206-
1207-
#X = paddle.randn(
1208-
# shape=[batch_size, feature_length],
1209-
# dtype='float64')
1210-
#X_list = []
1211-
#dist.all_gather(X_list, X)
1212-
#X = paddle.concat(X_list, axis=0)
1213-
#X_l2 = paddle.sqrt(paddle.sum(paddle.square(X), axis=1, keepdim=True))
1214-
#X = paddle.divide(X, X_l2)
1215-
1216-
#W = paddle.randn(
1217-
# shape=[feature_length, num_class_per_card[rank_id]],
1218-
# dtype='float64')
1219-
#W_l2 = paddle.sqrt(paddle.sum(paddle.square(W), axis=0, keepdim=True))
1220-
#W = paddle.divide(W, W_l2)
1221-
1222-
#logits = paddle.matmul(X, W)
1223-
#loss, softmax = paddle.nn.functional.margin_cross_entropy(
1224-
# logits, label, margin1=m1, margin2=m2, margin3=m3, scale=s, return_softmax=True, reduction=None)
1225-
1226-
#print(logits)
1227-
#print(label)
1228-
#print(loss)
1229-
#print(softmax)
1230-
1231-
#python -m paddle.distributed.launch --gpus=0,1 test_margin_cross_entropy.py
1197+
.. code-block:: python
1198+
:linenos:
1199+
:caption: for multi GPU, test_margin_cross_entropy.py
1200+
1201+
# required: distributed
1202+
import paddle
1203+
import paddle.distributed as dist
1204+
import numpy as np
1205+
strategy = dist.fleet.DistributedStrategy()
1206+
dist.fleet.init(is_collective=True, strategy=strategy)
1207+
rank_id = dist.get_rank()
1208+
m1 = 1.0
1209+
m2 = 0.5
1210+
m3 = 0.0
1211+
s = 64.0
1212+
batch_size = 2
1213+
feature_length = 4
1214+
num_class_per_card = [4, 8]
1215+
num_classes = np.sum(num_class_per_card)
1216+
1217+
np_label = np.random.randint(0, num_classes, (batch_size,))
1218+
label = paddle.to_tensor(np_label, dtype="int64")
1219+
label_list = []
1220+
dist.all_gather(label_list, label)
1221+
label = paddle.concat(label_list, axis=0)
1222+
1223+
X = paddle.randn(
1224+
shape=[batch_size, feature_length],
1225+
dtype='float64')
1226+
X_list = []
1227+
dist.all_gather(X_list, X)
1228+
X = paddle.concat(X_list, axis=0)
1229+
X_l2 = paddle.sqrt(paddle.sum(paddle.square(X), axis=1, keepdim=True))
1230+
X = paddle.divide(X, X_l2)
1231+
1232+
W = paddle.randn(
1233+
shape=[feature_length, num_class_per_card[rank_id]],
1234+
dtype='float64')
1235+
W_l2 = paddle.sqrt(paddle.sum(paddle.square(W), axis=0, keepdim=True))
1236+
W = paddle.divide(W, W_l2)
1237+
1238+
logits = paddle.matmul(X, W)
1239+
loss, softmax = paddle.nn.functional.margin_cross_entropy(
1240+
logits, label, margin1=m1, margin2=m2, margin3=m3, scale=s, return_softmax=True, reduction=None)
1241+
1242+
print(logits)
1243+
print(label)
1244+
print(loss)
1245+
print(softmax)
1246+
1247+
# python -m paddle.distributed.launch --gpus=0,1 test_margin_cross_entropy.py
12321248
## for rank0 input
12331249
#Tensor(shape=[4, 4], dtype=float64, place=CUDAPlace(0), stop_gradient=True,
12341250
# [[ 0.32888934, 0.02408748, -0.02763289, 0.18173063],

0 commit comments

Comments
 (0)