Skip to content

Nmt model #7340

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 42 commits into from
Jan 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
ba088c1
init nmt
Superjomn Nov 22, 2017
c5da31a
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Superjomn Dec 4, 2017
f5e4cb6
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Superjomn Dec 4, 2017
19dee22
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Superjomn Dec 4, 2017
a95984b
encoder ready
Superjomn Dec 4, 2017
ec24923
only generation implementation
Superjomn Dec 5, 2017
fffceb8
Merge branch 'feature/nmt-on-while' into feature/nmt-model
Superjomn Dec 5, 2017
3be6422
init python
Superjomn Dec 6, 2017
cf56456
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Superjomn Dec 6, 2017
d9a321c
remove decoder temporary
Superjomn Dec 6, 2017
4a0567f
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Superjomn Dec 7, 2017
a5b9399
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Superjomn Dec 7, 2017
d068644
add implementation of decoder
Superjomn Dec 11, 2017
c5275a8
merged
Superjomn Dec 11, 2017
d5784f3
add
Superjomn Dec 11, 2017
a823d46
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Superjomn Dec 11, 2017
20a105e
clean code
Superjomn Dec 11, 2017
71f5c72
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Superjomn Dec 18, 2017
9128bb6
restore op_registry.h
Superjomn Dec 18, 2017
8648790
restore op_registry.h
Superjomn Dec 18, 2017
70a4af4
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Superjomn Dec 19, 2017
98dec8f
fix fluid python bugs
Superjomn Dec 20, 2017
eb252c4
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Superjomn Dec 21, 2017
ec42a61
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Superjomn Jan 2, 2018
2088ba2
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Jan 9, 2018
8856253
fix beam_search_op
jacquesqiao Jan 9, 2018
f8abb6d
add debug, fix many bugs
jacquesqiao Jan 10, 2018
2520de4
rm enforce
jacquesqiao Jan 10, 2018
8798e09
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Jan 17, 2018
3b6dae6
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Jan 21, 2018
1a5f5fb
beam search can run
jacquesqiao Jan 21, 2018
29aaf32
add sequence expand
jacquesqiao Jan 22, 2018
8ed6d95
clean log
jacquesqiao Jan 22, 2018
16751a0
clean code
jacquesqiao Jan 22, 2018
cd58943
clean debug code
jacquesqiao Jan 22, 2018
0009343
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Jan 23, 2018
dd128f5
clean code
jacquesqiao Jan 23, 2018
3972cbc
clean debug code
jacquesqiao Jan 23, 2018
52e6c17
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Jan 23, 2018
e2e966d
return LoDTensor
jacquesqiao Jan 23, 2018
81a09bd
triger pre-commit
jacquesqiao Jan 23, 2018
06373a7
fix style check
jacquesqiao Jan 23, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doc/design/ops/sequence_decoder.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ The current `LoDTensor` is designed to store levels of variable-length sequences
The integers in each level represent the begin and end (not inclusive) offset of a sequence **in the underlying tensor**,
let's call this format the **absolute-offset LoD** for clarity.

The relative-offset LoD can retrieve any sequence very quickly but fails to represent empty sequences, for example, a two-level LoD is as follows
The absolute-offset LoD can retrieve any sequence very quickly but fails to represent empty sequences, for example, a two-level LoD is as follows
```python
[[0, 3, 9]
[0, 2, 3, 3, 3, 9]]
Expand Down Expand Up @@ -119,7 +119,7 @@ def generate():
encoder_ctx_expanded = pd.lod_expand(encoder_ctx, target_word)
decoder_input = pd.fc(
act=pd.activation.Linear(),
input=[target_word, encoder_ctx],
input=[target_word, encoder_ctx_expanded],
size=3 * decoder_dim)
gru_out, cur_mem = pd.gru_step(
decoder_input, mem=decoder_mem, size=decoder_dim)
Expand Down
3 changes: 2 additions & 1 deletion paddle/framework/executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,9 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,

for (auto& op_desc : block.AllOps()) {
auto op = paddle::framework::OpRegistry::CreateOp(*op_desc);
VLOG(3) << op->DebugStringEx(local_scope);
VLOG(4) << op->DebugStringEx(local_scope);
op->Run(*local_scope, place_);
VLOG(3) << op->DebugStringEx(local_scope);
if (FLAGS_do_memory_benchmark) {
VLOG(2) << "Memory used after operator " + op->Type() + " running: "
<< memory::memory_usage(place_);
Expand Down
7 changes: 4 additions & 3 deletions paddle/framework/lod_tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,10 @@ LoD ToAbsOffset(const LoD &in) {
// the lowest level stores relative offsets
if (in.empty() || in.size() == 1) return in;
LoD result = in;
for (int level = result.size() - 2; level >= 0; level--) {
for (auto &ele : result[level]) {
ele = result[level + 1][ele];
for (auto level = static_cast<int>(in.size() - 2); level >= 0; level--) {
for (size_t i = 0; i < in[level].size(); ++i) {
size_t index = in[level][i];
result[level][i] = result[level + 1][index];
}
}
return result;
Expand Down
83 changes: 74 additions & 9 deletions paddle/operators/beam_search_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,18 @@ namespace operators {
void BeamSearch::operator()(const framework::LoDTensor &pre_ids,
framework::LoDTensor *selected_ids,
framework::LoDTensor *selected_scores) {
auto abs_lod = framework::ToAbsOffset(ids_->lod());
auto &high_level = abs_lod[lod_level_];

auto items = SelectTopBeamSizeItems();
auto selected_items = ToMap(items);
auto selected_items = ToMap(items, high_level.back());
VLOG(3) << "selected_items:";
for (size_t i = 0; i < selected_items.size(); ++i) {
VLOG(3) << "offset:" << i;
for (auto &item : selected_items[i]) {
VLOG(3) << ItemToString(item);
}
}
PruneEndidCandidates(pre_ids, &selected_items);
// calculate the output tensor's height
size_t num_instances = std::accumulate(
Expand Down Expand Up @@ -63,11 +73,12 @@ void BeamSearch::operator()(const framework::LoDTensor &pre_ids,
low_level.push_back(low_offset);

// fill lod
auto abs_lod = framework::ToAbsOffset(ids_->lod());
auto &high_level = abs_lod[lod_level_];
framework::LoD lod(2);
lod[0].assign(high_level.begin(), high_level.end());
lod[1].assign(low_level.begin(), low_level.end());
if (!framework::CheckLoD(lod)) {
PADDLE_THROW("lod %s is not right", framework::LoDToString(lod));
}
selected_ids->set_lod(lod);
selected_scores->set_lod(lod);
}
Expand All @@ -90,13 +101,11 @@ int BeamSearch::PruneEndidCandidates(const framework::LoDTensor &pre_ids,
}

std::vector<std::vector<BeamSearch::Item>> BeamSearch::ToMap(
const std::vector<std::vector<Item>> &items) {
const std::vector<std::vector<Item>> &items, size_t element_num) {
std::vector<std::vector<Item>> result;
result.resize(element_num);
for (auto &entries : items) {
for (const auto &item : entries) {
if (item.offset >= result.size()) {
result.resize(item.offset + 1);
}
result[item.offset].push_back(item);
}
}
Expand All @@ -122,6 +131,14 @@ BeamSearch::SelectTopBeamSizeItems() {
}
result.emplace_back(items);
}
VLOG(3) << "SelectTopBeamSizeItems result size " << result.size();
for (auto &items : result) {
VLOG(3) << "item set:";
for (auto &item : items) {
VLOG(3) << ItemToString(item);
}
}

return result;
}

Expand Down Expand Up @@ -159,6 +176,22 @@ bool BeamSearch::NextItemSet(std::vector<BeamSearch::Item> *items) {
return true;
}

std::ostream &operator<<(std::ostream &os, const BeamSearch::Item &item) {
os << "{";
os << "offset: " << item.offset << ", ";
os << "id: " << item.id << ", ";
os << "score: " << item.score << "";
os << "}";

return os;
}

std::string ItemToString(const BeamSearch::Item &item) {
std::ostringstream stream;
stream << item;
return stream.str();
}

class BeamSearchProtoAndCheckerMaker
: public framework::OpProtoAndCheckerMaker {
public:
Expand Down Expand Up @@ -186,8 +219,40 @@ class BeamSearchProtoAndCheckerMaker
}
};

class BeamSearchInferShape : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext *context) const override {
for (const std::string &arg :
std::vector<std::string>({"pre_ids", "ids", "scores"})) {
PADDLE_ENFORCE(context->HasInput(arg),
"BeamSearch need input argument '%s'", arg);
}
for (const std::string &arg :
std::vector<std::string>({"selected_ids", "selected_scores"})) {
PADDLE_ENFORCE(context->HasOutput(arg),
"BeamSearch need output argument '%s'", arg);
}
}
};

class BeamSearchInferVarType : public framework::VarTypeInference {
public:
void operator()(const framework::OpDesc &op_desc,
framework::BlockDesc *block) const override {
for (auto &o : op_desc.Output("selected_ids")) {
block->Var(o)->SetType(framework::proto::VarDesc::LOD_TENSOR);
}
for (auto &o : op_desc.Output("selected_scores")) {
block->Var(o)->SetType(framework::proto::VarDesc::LOD_TENSOR);
}
}
};

} // namespace operators
} // namespace paddle

REGISTER_OP_WITHOUT_GRADIENT(beam_search, paddle::operators::BeamSearchOp,
paddle::operators::BeamSearchProtoAndCheckerMaker);
REGISTER_OPERATOR(beam_search, paddle::operators::BeamSearchOp,
paddle::operators::BeamSearchProtoAndCheckerMaker,
paddle::operators::BeamSearchInferShape,
paddle::operators::BeamSearchInferVarType,
paddle::framework::EmptyGradOpMaker);
14 changes: 6 additions & 8 deletions paddle/operators/beam_search_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,6 @@ class BeamSearch {
void operator()(const framework::LoDTensor& pre_ids,
framework::LoDTensor* selected_ids,
framework::LoDTensor* selected_scores);

protected:
/*
* The basic items help to sort.
*/
Expand All @@ -155,6 +153,7 @@ class BeamSearch {
score_t score;
};

protected:
/*
* Delete all the records that follows the end token.
*/
Expand All @@ -166,7 +165,7 @@ class BeamSearch {
* NOTE low performance
*/
std::vector<std::vector<Item>> ToMap(
const std::vector<std::vector<Item>>& inputs);
const std::vector<std::vector<Item>>& inputs, size_t element_num);

/*
* For each source, select top beam_size records.
Expand All @@ -187,6 +186,10 @@ class BeamSearch {
int end_id_{0};
};

std::ostream& operator<<(std::ostream& os, const BeamSearch::Item& item);

std::string ItemToString(const BeamSearch::Item& item);

class BeamSearchOp : public framework::OperatorBase {
public:
BeamSearchOp(const std::string& type,
Expand All @@ -203,7 +206,6 @@ class BeamSearchOp : public framework::OperatorBase {

void Run(const framework::Scope& scope,
const platform::Place& dev_place) const override {
LOG(INFO) << "run beam search op";
auto ids_var = scope.FindVar(Input("ids"));
auto scores_var = scope.FindVar(Input("scores"));
auto pre_ids_var = scope.FindVar(Input("pre_ids"));
Expand All @@ -217,10 +219,8 @@ class BeamSearchOp : public framework::OperatorBase {
size_t level = Attr<int>("level");
size_t beam_size = Attr<int>("beam_size");
int end_id = Attr<int>("end_id");
LOG(INFO) << "init beam search";
BeamSearch alg(ids, scores, level, beam_size, end_id);

LOG(INFO) << "after beam search";
auto selected_ids_var = scope.FindVar(Output("selected_ids"));
auto selected_scores_var = scope.FindVar(Output("selected_scores"));
PADDLE_ENFORCE_NOT_NULL(selected_ids_var);
Expand All @@ -229,9 +229,7 @@ class BeamSearchOp : public framework::OperatorBase {
*selected_ids_var->GetMutable<framework::LoDTensor>();
auto& selected_scores_tensor =
*selected_scores_var->GetMutable<framework::LoDTensor>();
LOG(INFO) << "run beam search";
alg(pre_ids, &selected_ids_tensor, &selected_scores_tensor);
LOG(INFO) << "finish beam search";
}
};

Expand Down
1 change: 1 addition & 0 deletions paddle/operators/sequence_expand_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
const T* x_data = x->data<T>();
auto x_dims = x->dims();
auto* y = context.Input<LoDTensor>("Y");
PADDLE_ENFORCE(!y->lod().empty(), "y should have lod");
PADDLE_ENFORCE_EQ(static_cast<size_t>(x_dims[0]),
y->lod().back().size() - 1,
"The size of last lod level in Input(Y)"
Expand Down
7 changes: 4 additions & 3 deletions paddle/operators/top_k_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ namespace paddle {
namespace operators {

using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;

template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
Expand All @@ -33,9 +34,9 @@ class TopkKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext& ctx) const override {
// Get the top k elements of each row of input tensor
// FIXME: only deal with matrix(2d tensor).
auto* input = ctx.Input<Tensor>("X");
auto* output = ctx.Output<Tensor>("Out");
auto* indices = ctx.Output<Tensor>("Indices");
auto* input = ctx.Input<LoDTensor>("X");
auto* output = ctx.Output<LoDTensor>("Out");
auto* indices = ctx.Output<LoDTensor>("Indices");
// k is determined by Attr
const size_t k = static_cast<int>(ctx.Attr<int>("k"));

Expand Down
3 changes: 2 additions & 1 deletion python/paddle/v2/fluid/layer_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ def input_dtype(self, input_param_name='input'):
if dtype is None:
dtype = each.dtype
elif dtype != each.dtype:
raise ValueError("Data Type mismatch")
raise ValueError("Data Type mismatch: %d to %d" %
(dtype, each.dtype))
return dtype

def create_parameter(self,
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/v2/fluid/layers/control_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,7 @@ def topk(input, k):
array = fluid.layers.topk(x, k)
"""
helper = LayerHelper('topk', **locals())
topk_out = helper.create_tmp_variable(dtype=input.data_type)
topk_out = helper.create_tmp_variable(dtype=input.dtype)
topk_indices = helper.create_tmp_variable(dtype='int64')
helper.append_op(
type='top_k',
Expand Down
39 changes: 35 additions & 4 deletions python/paddle/v2/fluid/layers/nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
'sequence_reshape',
'transpose',
'nce',
'beam_search',
]


Expand Down Expand Up @@ -162,10 +163,8 @@ def fc(input,
tmp = helper.create_tmp_variable(dtype)
helper.append_op(
type="mul",
inputs={
"X": input_var,
"Y": w,
},
inputs={"X": input_var,
"Y": w},
outputs={"Out": tmp},
attrs={"x_num_col_dims": num_flatten_dims,
"y_num_col_dims": 1})
Expand Down Expand Up @@ -1534,6 +1533,38 @@ def sequence_expand(x, y, name=None):
return tmp


def beam_search(pre_ids, ids, scores, beam_size, end_id, level=0):
'''
This function implements the beam search algorithm.
'''
helper = LayerHelper('beam_search', **locals())
score_type = scores.dtype
id_type = ids.dtype

selected_scores = helper.create_tmp_variable(dtype=score_type)
selected_ids = helper.create_tmp_variable(dtype=id_type)

helper.append_op(
type='beam_search',
inputs={
'pre_ids': pre_ids,
'ids': ids,
'scores': scores,
},
outputs={
'selected_ids': selected_ids,
'selected_scores': selected_scores,
},
attrs={
# TODO(ChunweiYan) to assure other value support
'level': level,
'beam_size': beam_size,
'end_id': end_id,
})

return selected_ids, selected_scores


def lstm_unit(x_t,
hidden_t_prev,
cell_t_prev,
Expand Down
Loading