Skip to content

Commit 797249b

Browse files
authored
Merge pull request #5494 from lcy-seso/fix_nce
NCE does not need to set its activation.
2 parents 53cb4df + a3a158c commit 797249b

File tree

1 file changed

+35
-30
lines changed
  • python/paddle/trainer_config_helpers

1 file changed

+35
-30
lines changed

python/paddle/trainer_config_helpers/layers.py

Lines changed: 35 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -5479,15 +5479,18 @@ def crf_decoding_layer(input,
54795479
return LayerOutput(name, LayerType.CRF_DECODING_LAYER, parents, size=1)
54805480

54815481

5482-
@wrap_act_default(act=SigmoidActivation())
5482+
"""
5483+
Following are cost Layers.
5484+
"""
5485+
5486+
54835487
@wrap_bias_attr_default(has_bias=True)
54845488
@wrap_param_attr_default()
54855489
@wrap_name_default()
54865490
@layer_support()
54875491
def nce_layer(input,
54885492
label,
54895493
num_classes=None,
5490-
act=None,
54915494
param_attr=None,
54925495
weight=None,
54935496
num_neg_samples=10,
@@ -5496,9 +5499,12 @@ def nce_layer(input,
54965499
bias_attr=None,
54975500
layer_attr=None):
54985501
"""
5499-
Noise-contrastive estimation.
5500-
Implements the method in the following paper:
5501-
A fast and simple algorithm for training neural probabilistic language models.
5502+
Noise-contrastive estimation. This layer implements the method in the
5503+
following paper:
5504+
5505+
Reference:
5506+
A fast and simple algorithm for training neural probabilistic language
5507+
models. https://www.cs.toronto.edu/~amnih/papers/ncelm.pdf
55025508
55035509
The example usage is:
55045510
@@ -5510,31 +5516,37 @@ def nce_layer(input,
55105516
55115517
:param name: The name of this layer. It is optional.
55125518
:type name: basestring
5513-
:param input: The input layers. It could be a LayerOutput of list/tuple of LayerOutput.
5519+
:param input: The input layers. It should be a LayerOutput or a list/tuple
5520+
of LayerOutput.
55145521
:type input: LayerOutput | list | tuple | collections.Sequence
5515-
:param label: label layer
5522+
:param label: The ground truth.
55165523
:type label: LayerOutput
5517-
:param weight: weight layer, can be None(default)
5524+
:param weight: The weight layer defines a weight for each sample in the
5525+
mini-batch. The default value is None.
55185526
:type weight: LayerOutput
5519-
:param num_classes: number of classes.
5527+
:param num_classes: The class number.
55205528
:type num_classes: int
5521-
:param act: Activation type. SigmoidActivation is the default.
5522-
:type act: BaseActivation
5523-
:param param_attr: The Parameter Attribute|list.
5524-
:type param_attr: ParameterAttribute
5525-
:param num_neg_samples: number of negative samples. Default is 10.
5529+
:param param_attr: The parameter attributes.
5530+
:type param_attr: ParameterAttribute|list
5531+
:param num_neg_samples: The number of sampled negative labels. The default
5532+
value is 10.
55265533
:type num_neg_samples: int
5527-
:param neg_distribution: The distribution for generating the random negative labels.
5528-
A uniform distribution will be used if not provided.
5529-
If not None, its length must be equal to num_classes.
5534+
:param neg_distribution: The discrete noisy distribution over the output
5535+
space from which num_neg_samples negative labels
5536+
are sampled. If this parameter is not set, a
5537+
uniform distribution will be used. A user defined
5538+
distribution is a list whose length must be equal
5539+
to the num_classes. Each member of the list defines
5540+
the probability of a class given input x.
55305541
:type neg_distribution: list | tuple | collections.Sequence | None
5531-
:param bias_attr: The bias attribute. If the parameter is set to False or an object
5532-
whose type is not ParameterAttribute, no bias is defined. If the
5533-
parameter is set to True, the bias is initialized to zero.
5542+
:param bias_attr: The attribute for bias. If this parameter is set False or
5543+
any object whose type is not ParameterAttribute, no bias
5544+
is added. If this parameter is set True, the bias is
5545+
initialized to zero.
55345546
:type bias_attr: ParameterAttribute | None | bool | Any
55355547
:param layer_attr: Extra Layer Attribute.
55365548
:type layer_attr: ExtraLayerAttribute
5537-
:return: layer name.
5549+
:return: The LayerOutput object.
55385550
:rtype: LayerOutput
55395551
"""
55405552
if isinstance(input, LayerOutput):
@@ -5557,8 +5569,6 @@ def nce_layer(input,
55575569
assert isinstance(neg_distribution, collections.Sequence)
55585570
assert len(neg_distribution) == num_classes
55595571
assert abs(sum(neg_distribution) - 1.0) < 1e-5
5560-
if not isinstance(act, BaseActivation):
5561-
raise TypeError()
55625572

55635573
ipts_for_layer = []
55645574
parents = []
@@ -5580,7 +5590,7 @@ def nce_layer(input,
55805590
type=LayerType.NCE_LAYER,
55815591
num_classes=num_classes,
55825592
neg_sampling_dist=neg_distribution,
5583-
active_type=act.name,
5593+
active_type=SigmoidActivation().name,
55845594
num_neg_samples=num_neg_samples,
55855595
inputs=ipts_for_layer,
55865596
bias=ParamAttr.to_bias(bias_attr),
@@ -5590,12 +5600,7 @@ def nce_layer(input,
55905600
LayerType.NCE_LAYER,
55915601
parents=parents,
55925602
size=l.config.size,
5593-
activation=act)
5594-
5595-
5596-
"""
5597-
following are cost Layers.
5598-
"""
5603+
activation=SigmoidActivation())
55995604

56005605

56015606
@wrap_name_default()

0 commit comments

Comments
 (0)