Skip to content

Commit 4c134c7

Browse files
committed
add comments.
1 parent 30725a0 commit 4c134c7

File tree

3 files changed

+36
-10
lines changed

3 files changed

+36
-10
lines changed

paddle/gserver/gradientmachines/RecurrentGradientMachine.h

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ class RecurrentGradientMachine : public NeuralNetwork {
190190
std::vector<int> ids;
191191

192192
/**
193-
* @brief idsProb, log probability of each generated words.
193+
* @brief idsProb, log probability of each generated word.
194194
*/
195195
std::vector<real> idsProb;
196196

@@ -472,16 +472,42 @@ class RecurrentGradientMachine : public NeuralNetwork {
472472
void copyDataOutlinkFrame(size_t machineCur);
473473

474474
/*
475-
* @brief In generation, if the layer group has more than 1 outlink, outlinks
476-
* except the first one are data outlinks. This function creates the data
477-
* outlinks.
478-
* @note In beam search, only one generated sequence with the hightest log
479-
* probabilites are retained.
475+
* @brief In generation, if the layer group has more than 1 outlink, outlink
476+
* except the first one is a data outlink. In RecurrentLayerGroup, each time
477+
* step is a separate Network, outputs of a layer inside the
478+
* RecurrentLayerGroup are stored in separate Arguments. If one layer is
479+
* specified as an outlink of RecurrentLayerGroup. This function will
480+
* collect outputs in each time step of each generated sequence which are
481+
* dispersed in separate Arguments to form a new single Argument as output of
482+
* RecurrentLayerGroup.
480483
*/
481484
void createDataOutlink();
485+
486+
/*
487+
* @brief decide to select how many rows from the Matrix stored the forward
488+
* pass results from a start position.
489+
*
490+
* @param isSeq: a flag indicating whetehr the layer to be output of the
491+
* RecurrentGradientMachine is a sequence or not
492+
* @param outArgs: all of the the returned Arguments of the forward pass
493+
* during the generation process.
494+
* @param copySize: the returned result, number of rows to select from the
495+
* Matrix stored the forward pass results from a start position.
496+
*/
482497
void createDataOutlinkCopySizeInfo(bool isSeq,
483498
std::vector<Argument>& outArgs,
484499
std::vector<int>& copySize);
500+
501+
/*
502+
* @brief decide index of the start row for each time step of a generated
503+
* sequence in Matrix stored the entire beam search batch's forward pass
504+
* results.
505+
*
506+
* @param isSeq: a flag indicating whetehr the layer to be output of the
507+
* RecurrentGradientMachine is a sequence or not
508+
* @param outArgs: all of the the returned Arguments of the forward pass
509+
* during the generation process.
510+
*/
485511
void createDataOutlinkSelRowsInfo(bool isSeq, std::vector<Argument>& outArgs);
486512

487513
/*

paddle/parameter/Argument.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -352,8 +352,8 @@ void Argument::concat(const std::vector<Argument>& args,
352352
CHECK_GE(args.size(), static_cast<size_t>(endPos - startPos));
353353
for (int j = startPos; j < endPos; ++j) {
354354
const Argument& arg = args[j - startPos];
355-
CHECK_EQ(arg.dataId, dataId) << "Arguments in concat should have the "
356-
<< "same dataId";
355+
CHECK_EQ(arg.dataId, dataId) << "Arguments to concatenate should have "
356+
<< "the same dataId.";
357357
const int srcStartRow = selectRows[j];
358358
copyArg(in, arg.in, desStartRow, srcStartRow, copySize[i], useGpu);
359359
copyArg(value, arg.value, desStartRow, srcStartRow, copySize[i], useGpu);

python/paddle/trainer_config_helpers/networks.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1375,9 +1375,9 @@ def simple_attention(encoded_sequence,
13751375
weight=attention_weight,
13761376
input=encoded_sequence,
13771377
name='%s_scaling' % name)
1378+
13781379
return pooling_layer(
1379-
input=scaled, pooling_type=SumPooling(),
1380-
name="%s_pooling" % name), attention_weight
1380+
input=scaled, pooling_type=SumPooling(), name="%s_pooling" % name)
13811381

13821382

13831383
def inputs(layers, *args):

0 commit comments

Comments
 (0)