@@ -1102,40 +1102,52 @@ def margin_cross_entropy(logits,
11021102 return_softmax = False ,
11031103 reduction = 'mean' ):
11041104 """
1105+ Margin Loss from ArcFace,
1106+
11051107 .. math::
11061108
1107- L=-\f rac{1}{N}\sum^N_{i=1}\log\f rac{e^{s(cos(m_{1}\t heta_{y_i}+m_{2})-m_{3})}}{e^{s(cos(m_{1}\t heta_{y_i}+m_{2})-m_{3})}+\sum^n_{j=1,j\n eq y_i} e^{scos\t heta_{y_i}}}
1109+ L=-\\ frac{1}{N}\sum^N_{i=1}\log\\ frac{e^{s(cos(m_{1}\\ theta_{y_i}+m_{2})-m_{3})}}{e^{s(cos(m_{1}\\ theta_{y_i}+m_{2})-m_{3})}+\sum^n_{j=1,j\\ neq y_i} e^{scos\ \ theta_{y_i}}}
11081110
1109- where the :math: `\t heta_{y_i}` is the angle between the feature :math: `x` and
1111+ where the :math: `\\ theta_{y_i}` is the angle between the feature :math: `x` and
11101112 the representation of class :math: `i`. The details of ArcFace loss
11111113 could be referred to https://arxiv.org/abs/1801.07698.
11121114
1113- Note that the API supports model parallel and single GPU. And logits.shape[-1] can be different each rank.
1115+ .. hint::
1116+ Note that the API supports model parallel and single GPU. And logits.shape[-1] can be different each rank.
11141117
11151118 Args:
11161119 logits (Tensor): shape[N, local_num_classes], the output of the normalized X multiply the normalized W.
11171120 The logits is shard_logits when using model parallel.
11181121 label (Tensor): shape[N] or shape[N, 1], the groud truth label.
1119- margin1 (float): (1.0), m1 of margin loss.
1120- margin2 (float): (0.5), m2 of margin loss.
1121- margin3 (float): (0.0), m3 of margin loss.
1122- scale (float): (64.0), s of margin loss.
1123- group (Group): The abstract representation of group, see paddle.distributed.collective.Group
1124- return_softmax (bool): (False), whether return softmax probability.
1125- reduction (str): ('mean'), The candicates are ``'none'`` | ``'mean'`` | ``'sum'``.
1122+ margin1 (float, optional): m1 of margin loss, default value is `1.0`.
1123+ margin2 (float, optional): m2 of margin loss, default value is `0.5`.
1124+ margin3 (float, optional): m3 of margin loss, default value is `0.0`.
1125+ scale (float, optional): s of margin loss, default value is `64.0`.
1126+ group (Group, optional): The abstract representation of group, see paddle.distributed.collective.Group.
1127+ Default `None`.
1128+ return_softmax (bool, optional): Whether return softmax probability. Default value is `False`.
1129+ reduction (str, optional): The candicates are ``'none'`` | ``'mean'`` | ``'sum'``.
11261130 If :attr:`reduction` is ``'mean'``, return the average of loss;
11271131 If :attr:`reduction` is ``'sum'``, return the sum of loss;
11281132 If :attr:`reduction` is ``'none'``, no reduction will be applied.
1133+ Default value is `'mean'`.
11291134
1130- Return :
1135+ Returns :
11311136 loss (Tensor or Scalar): if reduction==None, shape[N, 1], else shape[1], the cross entropy loss.
11321137 softmax (Tensor): softmax probability. The softmax is shard_softmax when using model parallel.
1138+ ``Tensor`` or Tuple of two ``Tensor`` : Return the cross entropy loss if \
1139+ `return_softmax` is False, otherwise the tuple \
1140+ (loss, softmax), softmax is shard_softmax when \
1141+ using model parallel, otherwise softmax is in \
1142+ the same shape with input logits.
11331143
11341144 Examples:
1145+
11351146 .. code-block:: python
1147+ :linenos:
1148+ :caption: for single GPU
11361149
11371150 # required: gpu
1138- # for single GPU
11391151 import paddle
11401152 import numpy as np
11411153 m1 = 1.0
@@ -1182,53 +1194,57 @@ def margin_cross_entropy(logits,
11821194 # [[0.99978819, 0.00000000, 0.00000000, 0.00021181],
11831195 # [0.99992995, 0.00006468, 0.00000000, 0.00000537]])
11841196
1185- ## for multi GPU, test_margin_cross_entropy.py
1186- #import paddle
1187- #import paddle.distributed as dist
1188- #import numpy as np
1189- #strategy = dist.fleet.DistributedStrategy()
1190- #dist.fleet.init(is_collective=True, strategy=strategy)
1191- #rank_id = dist.get_rank()
1192- #m1 = 1.0
1193- #m2 = 0.5
1194- #m3 = 0.0
1195- #s = 64.0
1196- #batch_size = 2
1197- #feature_length = 4
1198- #num_class_per_card = [4, 8]
1199- #num_classes = np.sum(num_class_per_card)
1200-
1201- #np_label = np.random.randint(0, num_classes, (batch_size,))
1202- #label = paddle.to_tensor(np_label, dtype="int64")
1203- #label_list = []
1204- #dist.all_gather(label_list, label)
1205- #label = paddle.concat(label_list, axis=0)
1206-
1207- #X = paddle.randn(
1208- # shape=[batch_size, feature_length],
1209- # dtype='float64')
1210- #X_list = []
1211- #dist.all_gather(X_list, X)
1212- #X = paddle.concat(X_list, axis=0)
1213- #X_l2 = paddle.sqrt(paddle.sum(paddle.square(X), axis=1, keepdim=True))
1214- #X = paddle.divide(X, X_l2)
1215-
1216- #W = paddle.randn(
1217- # shape=[feature_length, num_class_per_card[rank_id]],
1218- # dtype='float64')
1219- #W_l2 = paddle.sqrt(paddle.sum(paddle.square(W), axis=0, keepdim=True))
1220- #W = paddle.divide(W, W_l2)
1221-
1222- #logits = paddle.matmul(X, W)
1223- #loss, softmax = paddle.nn.functional.margin_cross_entropy(
1224- # logits, label, margin1=m1, margin2=m2, margin3=m3, scale=s, return_softmax=True, reduction=None)
1225-
1226- #print(logits)
1227- #print(label)
1228- #print(loss)
1229- #print(softmax)
1230-
1231- #python -m paddle.distributed.launch --gpus=0,1 test_margin_cross_entropy.py
1197+ .. code-block:: python
1198+ :linenos:
1199+ :caption: for multi GPU, test_margin_cross_entropy.py
1200+
1201+ # required: distributed
1202+ import paddle
1203+ import paddle.distributed as dist
1204+ import numpy as np
1205+ strategy = dist.fleet.DistributedStrategy()
1206+ dist.fleet.init(is_collective=True, strategy=strategy)
1207+ rank_id = dist.get_rank()
1208+ m1 = 1.0
1209+ m2 = 0.5
1210+ m3 = 0.0
1211+ s = 64.0
1212+ batch_size = 2
1213+ feature_length = 4
1214+ num_class_per_card = [4, 8]
1215+ num_classes = np.sum(num_class_per_card)
1216+
1217+ np_label = np.random.randint(0, num_classes, (batch_size,))
1218+ label = paddle.to_tensor(np_label, dtype="int64")
1219+ label_list = []
1220+ dist.all_gather(label_list, label)
1221+ label = paddle.concat(label_list, axis=0)
1222+
1223+ X = paddle.randn(
1224+ shape=[batch_size, feature_length],
1225+ dtype='float64')
1226+ X_list = []
1227+ dist.all_gather(X_list, X)
1228+ X = paddle.concat(X_list, axis=0)
1229+ X_l2 = paddle.sqrt(paddle.sum(paddle.square(X), axis=1, keepdim=True))
1230+ X = paddle.divide(X, X_l2)
1231+
1232+ W = paddle.randn(
1233+ shape=[feature_length, num_class_per_card[rank_id]],
1234+ dtype='float64')
1235+ W_l2 = paddle.sqrt(paddle.sum(paddle.square(W), axis=0, keepdim=True))
1236+ W = paddle.divide(W, W_l2)
1237+
1238+ logits = paddle.matmul(X, W)
1239+ loss, softmax = paddle.nn.functional.margin_cross_entropy(
1240+ logits, label, margin1=m1, margin2=m2, margin3=m3, scale=s, return_softmax=True, reduction=None)
1241+
1242+ print(logits)
1243+ print(label)
1244+ print(loss)
1245+ print(softmax)
1246+
1247+ # python -m paddle.distributed.launch --gpus=0,1 test_margin_cross_entropy.py
12321248 ## for rank0 input
12331249 #Tensor(shape=[4, 4], dtype=float64, place=CUDAPlace(0), stop_gradient=True,
12341250 # [[ 0.32888934, 0.02408748, -0.02763289, 0.18173063],
0 commit comments