Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 78 additions & 32 deletions paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1012,11 +1012,6 @@ void RecurrentGradientMachine::generateSequence() {
/* width */ resultNum,
false,
/* useGpu */ false);
Matrix::resizeOrCreate(generator_.outArg.value,
/* height */ maxGenWordCount,
/* width */ 1,
false,
/* useGpu */ false);
}
ICpuGpuVector::resizeOrCreate(generator_.outArg.sequenceStartPositions,
numSequences + 1,
Expand All @@ -1026,7 +1021,7 @@ void RecurrentGradientMachine::generateSequence() {
} else {
oneWaySearch(numSequences);
}
if (dataArgsSize_) createDataOutlink(batchMachineIdVec_);
if (dataArgsSize_) createDataOutlink();

size_t size = generator_.ids.size();
generator_.outArg.ids->resize(size);
Expand Down Expand Up @@ -1106,6 +1101,7 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) {
}

batchMachineIdVec_.clear();
batchMachineStartPos_.clear();
int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false);
starts[0] = 0;
generator_.ids.clear();
Expand Down Expand Up @@ -1312,13 +1308,20 @@ void RecurrentGradientMachine::fillGenOutputs() {
finalPaths_[i].resize(minFinalPathsSize);
}

batchMachineIdVec_.clear();
generator_.ids.clear();
int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false);
starts[0] = 0;
if (numResults > 1) {
real* probs = generator_.outArg.in->getData();
int idsProbSaveSize = 0;
for (auto inSeq : finalPaths_) {
for (auto path : inSeq) idsProbSaveSize += path.ids.size();
idsProbSaveSize += inSeq.size();
}
Matrix::resizeOrCreate(
generator_.outArg.value, idsProbSaveSize, 1, false, false);
real* idsProb = generator_.outArg.value->getData();

real* probs = generator_.outArg.in->getData();
size_t curPos = 0;
for (size_t i = 0; i < finalPaths_.size(); ++i) {
for (size_t j = 0; j < finalPaths_[i].size(); ++j) {
Expand All @@ -1333,24 +1336,16 @@ void RecurrentGradientMachine::fillGenOutputs() {
curPos += genLen;
idsProb[curPos++] = -1.0;
probs[i * numResults + j] = path.logProb;

if (!j && dataArgsSize_) {
// in beam search, here only reserved the top 1 generated result
// for out_links that are not the generated word indices.
batchMachineIdVec_.insert(batchMachineIdVec_.end(),
path.machineIdVec.begin(),
path.machineIdVec.end());
}
}
starts[i + 1] = generator_.ids.size();
}
} else {
for (size_t i = 0; i < finalPaths_.size(); ++i) {
CHECK(!finalPaths_[i].empty());
generator_.ids.insert(generator_.ids.begin(),
finalPaths_[i][0].ids.begin(),
finalPaths_[i][0].ids.end());
starts[i + 1] = starts[i] + finalPaths_[i][0].ids.size();
Path& path = finalPaths_[i][0];
generator_.ids.insert(
generator_.ids.begin(), path.ids.begin(), path.ids.end());
starts[i + 1] = starts[i] + path.ids.size();
}
}
}
Expand All @@ -1364,25 +1359,76 @@ void RecurrentGradientMachine::copyDataOutlinkFrame(size_t machineCur) {
}
}

void RecurrentGradientMachine::createDataOutlink(
std::vector<int>& machineIdVec) {
size_t seqNum =
getBeamSize() > 1UL ? finalPaths_.size() : finalPaths_[0].size();
std::vector<int> starts(seqNum + 1, 0);
for (size_t i = 0; i < seqNum; ++i) {
size_t seqLen = getBeamSize() > 1UL ? finalPaths_[i][0].ids.size()
: finalPaths_[0][i].ids.size();
starts[i + 1] = starts[i] + seqLen;
void RecurrentGradientMachine::createDataOutlinkSelRowsInfo(
bool isSeq, std::vector<Argument>& outArgs) {
batchMachineIdVec_.clear();

size_t seqIdx = 0;
for (size_t i = 0; i < finalPaths_.size(); ++i) {
for (size_t j = 0; j < finalPaths_[i].size(); ++j) {
std::vector<int>& machineIdVec = finalPaths_[i][j].machineIdVec;
if (isSeq) {
for (size_t i = 0; i < machineIdVec.size(); ++i) {
size_t rowId = machineIdVec[i];
int* seqPos =
outArgs[i].sequenceStartPositions->getMutableData(false);
batchMachineIdVec_.push_back(seqPos[rowId]);
}
} else {
batchMachineIdVec_.insert(
batchMachineIdVec_.end(), machineIdVec.begin(), machineIdVec.end());
}
seqIdx++;
}
}
}

void RecurrentGradientMachine::createDataOutlinkCopySizeInfo(
bool isSeq, std::vector<Argument>& outArgs, std::vector<int>& copySize) {
size_t totalSeqNum = std::accumulate(
finalPaths_.begin(),
finalPaths_.end(),
0UL,
[](size_t a, const std::vector<Path>& b) { return a + b.size(); });
copySize.resize(totalSeqNum, 1);

batchMachineStartPos_.resize(totalSeqNum + 1, 0);
if (isSeq) {
ICpuGpuVectorPtr inputSeqStartPos = outArgs[0].sequenceStartPositions;
CHECK_EQ(static_cast<size_t>(inputSeqStartPos->getSize() - 1),
getBeamSize() > 1 ? finalPaths_.size() : finalPaths_[0].size());
int* starts = inputSeqStartPos->getMutableData(false);
int seqId = 0;
for (int i = 0; i < finalPaths_.size(); ++i) {
for (int j = 0; j < finalPaths_[i].size(); ++j) {
copySize[seqId] = getBeamSize() > 1 ? starts[i + 1] - starts[i]
: starts[j + 1] - starts[j];
batchMachineStartPos_[seqId + 1] =
batchMachineStartPos_[seqId] + finalPaths_[i][j].ids.size();
seqId++;
}
}
} else {
for (size_t i = 0; i < finalPaths_[0].size(); ++i)
batchMachineStartPos_[i + 1] =
batchMachineStartPos_[i] + finalPaths_[0][i].ids.size();
}
}

void RecurrentGradientMachine::createDataOutlink() {
for (size_t i = 0; i < dataArgsSize_; i++) {
bool isSeq = dataArgsFrame_[i][0].hasSeq();
std::vector<int> copySize;
createDataOutlinkCopySizeInfo(isSeq, dataArgsFrame_[i], copySize);
createDataOutlinkSelRowsInfo(isSeq, dataArgsFrame_[i]);

dataArgs_[i].concat(dataArgsFrame_[i],
machineIdVec,
starts,
batchMachineIdVec_,
batchMachineStartPos_,
copySize,
useGpu_,
HPPL_STREAM_1,
PASS_TEST);

auto dataAgent =
dynamic_cast<DataLayer*>(outFrameLines_[i + 1].agentLayer.get());
CHECK_NOTNULL(dataAgent);
Expand Down
47 changes: 38 additions & 9 deletions paddle/gserver/gradientmachines/RecurrentGradientMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ class RecurrentGradientMachine : public NeuralNetwork {
std::vector<int> ids;

/**
* @brief idsProb, log probability of each generated words.
* @brief idsProb, log probability of each generated word.
*/
std::vector<real> idsProb;

Expand Down Expand Up @@ -472,15 +472,43 @@ class RecurrentGradientMachine : public NeuralNetwork {
void copyDataOutlinkFrame(size_t machineCur);

/*
* @brief In generation, if the layer group has more than 1 outlink, outlinks
* except the first one are data outlinks. This function creates the data
* outlinks.
* @note In beam search, only one generated sequence with the hightest log
* probabilites are retained.
* @param machineIdVec : select a row of output matrix in each frame
* that the generation process expanded.
* @brief In generation, if the layer group has more than 1 outlink, outlink
* except the first one is a data outlink. In RecurrentLayerGroup, each time
* step is a separate Network, outputs of a layer inside the
* RecurrentLayerGroup are stored in separate Arguments. If one layer is
* specified as an outlink of RecurrentLayerGroup. This function will
* collect outputs in each time step of each generated sequence which are
* dispersed in separate Arguments to form a new single Argument as output of
* RecurrentLayerGroup.
*/
void createDataOutlink(std::vector<int>& machineIdVec);
void createDataOutlink();

/*
* @brief decide to select how many rows from the Matrix stored the forward
* pass results from a start position.
*
* @param isSeq: a flag indicating whetehr the layer to be output of the
* RecurrentGradientMachine is a sequence or not
* @param outArgs: all of the the returned Arguments of the forward pass
* during the generation process.
* @param copySize: the returned result, number of rows to select from the
* Matrix stored the forward pass results from a start position.
*/
void createDataOutlinkCopySizeInfo(bool isSeq,
std::vector<Argument>& outArgs,
std::vector<int>& copySize);

/*
* @brief decide index of the start row for each time step of a generated
* sequence in Matrix stored the entire beam search batch's forward pass
* results.
*
* @param isSeq: a flag indicating whether the layer to be output of the
* RecurrentGradientMachine is a sequence or not
* @param outArgs: all of the returned Arguments of the forward pass
* during the generation process.
*/
void createDataOutlinkSelRowsInfo(bool isSeq, std::vector<Argument>& outArgs);

/*
* @brief used in beam search, connect previous frame to form recurrent link
Expand Down Expand Up @@ -543,6 +571,7 @@ class RecurrentGradientMachine : public NeuralNetwork {
std::vector<int> topIds_;
std::vector<int> seqIds_;
std::vector<int> batchMachineIdVec_;
std::vector<int> batchMachineStartPos_;
std::vector<std::vector<Path>> finalPaths_;
std::vector<real> minFinalPathLogProb_;
BeamSearchControlCallbacks* beamSearchCtrlCallbacks_;
Expand Down
52 changes: 29 additions & 23 deletions paddle/parameter/Argument.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,17 +276,21 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src,
void Argument::concat(const std::vector<Argument>& args,
const std::vector<int>& selectRows,
const std::vector<int>& seqStartPos,
const std::vector<int>& copySize,
bool useGpu,
hl_stream_t stream,
PassType passType) {
CHECK(!subSequenceStartPositions)
<< "undefined behavior for subsequence positions";

size_t batchSize = selectRows.size();
size_t batchSize = 0;
for (size_t i = 0; i < copySize.size(); ++i)
batchSize += copySize[i] * (seqStartPos[i + 1] - seqStartPos[i]);

auto copyArg = [batchSize, stream](MatrixPtr& dst,
MatrixPtr src,
int startRow,
int pos,
int desStartRow,
int srcStartRow,
int size,
bool useGpu) {
if (!src) {
Expand All @@ -300,28 +304,29 @@ void Argument::concat(const std::vector<Argument>& args,
dst->resize(batchSize, width);
}

MatrixPtr tmpMatrix = dst->subMatrix(startRow, size);
tmpMatrix->copyFrom(*src->subMatrix(pos, size), stream);
MatrixPtr tmpMatrix = dst->subMatrix(desStartRow, size);
tmpMatrix->copyFrom(*src->subMatrix(srcStartRow, size), stream);
};

auto copyIds = [batchSize, stream](IVectorPtr& dst,
const IVectorPtr& src,
int startRow,
int pos,
int desStartRow,
int srcStartRow,
int size,
bool useGpu) {
if (!src) {
dst.reset();
return;
}
IVector::resizeOrCreate(dst, batchSize, useGpu);
dst->subVec(startRow, size)->copyFrom(*src->subVec(pos, size), stream);
dst->subVec(desStartRow, size)
->copyFrom(*src->subVec(srcStartRow, size), stream);
};

auto copyStrs = [batchSize, stream](SVectorPtr& dst,
const SVectorPtr& src,
int startRow,
int pos,
int desStartRow,
int srcStartRow,
int size,
bool useGpu) {
if (!src) {
Expand All @@ -333,30 +338,31 @@ void Argument::concat(const std::vector<Argument>& args,
} else {
dst->resize(batchSize);
}
std::copy(
src->begin() + pos, src->begin() + pos + size, dst->begin() + startRow);
std::copy(src->begin() + srcStartRow,
src->begin() + srcStartRow + size,
dst->begin() + desStartRow);
};

dataId = args[0].dataId;
CHECK_NE(seqStartPos.size(), 0UL);
size_t sampleNum = seqStartPos.size() - 1;
for (size_t i = 0; i < sampleNum; ++i) {
int desStartRow = 0;
for (size_t i = 0; i < copySize.size(); ++i) {
int startPos = seqStartPos[i];
int endPos = seqStartPos[i + 1];
CHECK_GE(args.size(), static_cast<size_t>(endPos - startPos));
for (int j = startPos; j < endPos; ++j) {
const Argument& arg = args[j - startPos];
CHECK_EQ(arg.dataId, dataId) << "Arguments in concat should have"
<< " same dataId";
const int copySize = 1;
const int rowIdx = selectRows[j];
copyArg(in, arg.in, j, rowIdx, copySize, useGpu);
copyArg(value, arg.value, j, rowIdx, copySize, useGpu);
CHECK_EQ(arg.dataId, dataId) << "Arguments to concatenate should have "
<< "the same dataId.";
const int srcStartRow = selectRows[j];
copyArg(in, arg.in, desStartRow, srcStartRow, copySize[i], useGpu);
copyArg(value, arg.value, desStartRow, srcStartRow, copySize[i], useGpu);
if (passType != PASS_TEST) {
copyArg(grad, arg.grad, j, rowIdx, copySize, useGpu);
copyArg(grad, arg.grad, desStartRow, srcStartRow, copySize[i], useGpu);
}
copyIds(ids, arg.ids, j, rowIdx, copySize, useGpu);
copyStrs(strs, arg.strs, j, rowIdx, copySize, useGpu);
copyIds(ids, arg.ids, desStartRow, srcStartRow, copySize[i], useGpu);
copyStrs(strs, arg.strs, desStartRow, srcStartRow, copySize[i], useGpu);
desStartRow += copySize[i];
}
}
ICpuGpuVector::resizeOrCreate(
Expand Down
1 change: 1 addition & 0 deletions paddle/parameter/Argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ struct Argument {
void concat(const std::vector<Argument>& args,
const std::vector<int>& selectRows,
const std::vector<int>& seqStartPos,
const std::vector<int>& copySize,
bool useGpu,
hl_stream_t stream,
PassType passType);
Expand Down