Skip to content

Add Inference example and unit test for rnn_encoder_decoder #8176

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Feb 8, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions paddle/framework/block_desc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,8 @@ BlockDesc::BlockDesc(const BlockDesc &other, proto::BlockDesc *desc,
: prog_(prog), desc_(desc) {
need_update_ = true;
for (auto &op : other.ops_) {
ops_.emplace_back(new OpDesc(*op, this));
ops_.emplace_back(new OpDesc(*op->Proto(), prog, this));
}

for (auto &it : other.vars_) {
auto *var = new VarDesc(*it.second);
vars_[it.first].reset(var);
Expand Down
5 changes: 2 additions & 3 deletions paddle/framework/op_desc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,10 @@ OpDesc::OpDesc(const proto::OpDesc &desc, ProgramDesc *prog, BlockDesc *block)
// restore attrs_
for (const proto::OpDesc::Attr &attr : desc_.attrs()) {
std::string attr_name = attr.name();
// The sub_block referred to by the BLOCK attr hasn't been added
// to ProgramDesc class yet, we skip setting BLOCK attr here.
if (attr.type() != proto::AttrType::BLOCK) {
attrs_[attr_name] = GetAttrValue(attr);
} else {
auto bid = attr.block_idx();
attrs_[attr_name] = prog->MutableBlock(bid);
}
}
this->block_ = block;
Expand Down
21 changes: 20 additions & 1 deletion paddle/framework/program_desc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,37 @@ ProgramDesc::ProgramDesc() {

ProgramDesc::ProgramDesc(const ProgramDesc &o) {
desc_ = o.desc_;

for (int i = 0; i < desc_.blocks_size(); ++i) {
auto *block = desc_.mutable_blocks(i);
blocks_.emplace_back(new BlockDesc(*o.blocks_[i], block, this));
}
for (auto &block : blocks_) {
for (auto *op : block->AllOps()) {
for (const auto &attr : op->Proto()->attrs()) {
if (attr.type() == proto::AttrType::BLOCK) {
size_t blk_idx = attr.block_idx();
op->SetBlockAttr(attr.name(), *this->MutableBlock(blk_idx));
}
}
}
}
}

ProgramDesc::ProgramDesc(const proto::ProgramDesc &desc) {
desc_ = desc;
for (auto &block_desc : *desc_.mutable_blocks()) {
blocks_.emplace_back(new BlockDesc(this, &block_desc));
}
for (auto &block : blocks_) {
for (auto *op : block->AllOps()) {
for (const auto &attr : op->Proto()->attrs()) {
if (attr.type() == proto::AttrType::BLOCK) {
size_t blk_idx = attr.block_idx();
op->SetBlockAttr(attr.name(), *this->MutableBlock(blk_idx));
}
}
}
}
}

ProgramDesc::ProgramDesc(const std::string &binary_str) {
Expand Down
84 changes: 67 additions & 17 deletions paddle/framework/prune.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,28 @@ bool IsTarget(const proto::OpDesc& op_desc) {
return false;
}

void prune_impl(const proto::ProgramDesc& input, proto::ProgramDesc* output,
int block_id) {
// TODO(tonyyang-svail):
// - will change to use multiple blocks for RNN op and Cond Op
int GetSubBlockIndex(const proto::OpDesc& op_desc) {
for (auto& attr : op_desc.attrs()) {
if (attr.type() == proto::AttrType::BLOCK) {
PADDLE_ENFORCE(attr.has_block_idx());
return attr.block_idx();
}
}
return -1;
}

bool HasSubBlock(const proto::OpDesc& op_desc) {
return GetSubBlockIndex(op_desc) > 0;
}

// block_id is the idx of the current block in the input desc
// parent_block_id is the idx of the parent of the current block
// in the output desc, -1 means the current block is global block
// dependent_vars is passed recursively from the parent block to
// the child block to help pruning
void prune_impl(const proto::ProgramDesc& input, proto::ProgramDesc* output,
int block_id, int parent_block_id,
std::set<std::string>& dependent_vars) {
auto& block = input.blocks(block_id);
auto& ops = block.ops();

Expand All @@ -72,19 +89,16 @@ void prune_impl(const proto::ProgramDesc& input, proto::ProgramDesc* output,
expect_fetch = (op_desc.type() == kFetchOpType);
}

std::set<std::string> dependent_vars;
std::vector<bool> should_run;
for (auto op_iter = ops.rbegin(); op_iter != ops.rend(); ++op_iter) {
auto& op_desc = *op_iter;

if (IsTarget(op_desc) || HasDependentVar(op_desc, dependent_vars)) {
// insert its input to the dependency graph
for (auto& var : op_desc.inputs()) {
for (auto& argu : var.arguments()) {
dependent_vars.insert(argu);
}
}

should_run.push_back(true);
} else {
should_run.push_back(false);
Expand All @@ -95,45 +109,81 @@ void prune_impl(const proto::ProgramDesc& input, proto::ProgramDesc* output,
// we reverse the should_run vector
std::reverse(should_run.begin(), should_run.end());

*output = input;
auto* op_field = output->mutable_blocks(block_id)->mutable_ops();
// copy the current block from input to output
auto* block_field = output->mutable_blocks();
*block_field->Add() = input.blocks(block_id);

int output_block_id = output->blocks_size() - 1;
auto* output_block = output->mutable_blocks(output_block_id);
output_block->set_idx(output_block_id);
output_block->set_parent_idx(parent_block_id);

auto* op_field = output_block->mutable_ops();
op_field->Clear();
for (size_t i = 0; i < should_run.size(); ++i) {
if (should_run[i]) {
*op_field->Add() = input.blocks(block_id).ops(i);
auto* op = op_field->Add();
*op = input.blocks(block_id).ops(i);
if (HasSubBlock(*op)) {
// create sub_block_dependent_vars here to help prune the sub block
std::set<std::string> sub_block_dependent_vars;
for (auto& var : op->inputs()) {
for (auto& argu : var.arguments()) {
sub_block_dependent_vars.insert(argu);
}
}
for (auto& var : op->outputs()) {
for (auto& argu : var.arguments()) {
sub_block_dependent_vars.insert(argu);
}
}
// GetSubBlockIndex(*op) is the idx of the sub_block in the input desc
// output_block_id is the idx of the current block in the output desc
prune_impl(input, output, GetSubBlockIndex(*op), output_block_id,
sub_block_dependent_vars);
}
}
}

// remove the VarDescs in BlockDesc that are not referenced in
// the pruned OpDescs
std::unordered_map<std::string, proto::VarDesc> var_map;
auto* var_field = output->mutable_blocks(block_id)->mutable_vars();
auto* var_field = output->mutable_blocks(output_block_id)->mutable_vars();
for (const auto& var : *var_field) {
var_map[var.name()] = var;
}

var_field->Clear();
std::set<std::string> var_names;
for (const auto& op : *op_field) {
// add VarDescs of all input arguments for each OpDesc
auto& input_field = op.inputs();
for (auto& input_var : input_field) {
for (auto& arg : input_var.arguments()) {
*var_field->Add() = var_map[arg];
if (var_map.count(arg) != 0) {
var_names.insert(arg);
}
}
}
// add VarDescs of all output arguments for each OpDesc
auto& output_field = op.outputs();
for (auto& output_var : output_field) {
for (auto& arg : output_var.arguments()) {
*var_field->Add() = var_map[arg];
if (var_map.count(arg) != 0) {
var_names.insert(arg);
}
}
}
}

var_field->Clear();
for (const auto& name : var_names) {
*var_field->Add() = var_map[name];
}
}

// TODO(fengjiayi): Prune() could be inplaced to avoid unnecessary copies
void Prune(const proto::ProgramDesc& input, proto::ProgramDesc* output) {
prune_impl(input, output, 0);
std::set<std::string> dependent_vars;
output->clear_blocks();
prune_impl(input, output, 0, -1, dependent_vars);
}

void inference_optimize_impl(const proto::ProgramDesc& input,
Expand Down
1 change: 1 addition & 0 deletions paddle/inference/tests/book/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ endfunction(inference_test)
inference_test(recognize_digits ARGS mlp)
inference_test(image_classification ARGS vgg resnet)
inference_test(label_semantic_roles)
inference_test(rnn_encoder_decoder)
67 changes: 67 additions & 0 deletions paddle/inference/tests/book/test_inference_rnn_encoder_decoder.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include <gtest/gtest.h>
#include "gflags/gflags.h"
#include "test_helper.h"

DEFINE_string(dirname, "", "Directory of the inference model.");

TEST(inference, rnn_encoder_decoder) {
if (FLAGS_dirname.empty()) {
LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model";
}

LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl;
std::string dirname = FLAGS_dirname;

// 0. Call `paddle::framework::InitDevices()` initialize all the devices
// In unittests, this is done in paddle/testing/paddle_gtest_main.cc

paddle::framework::LoDTensor word_data, trg_word;
paddle::framework::LoD lod{{0, 4, 10}};

SetupLoDTensor(
word_data, lod, static_cast<int64_t>(0), static_cast<int64_t>(1));
SetupLoDTensor(
trg_word, lod, static_cast<int64_t>(0), static_cast<int64_t>(1));

std::vector<paddle::framework::LoDTensor*> cpu_feeds;
cpu_feeds.push_back(&word_data);
cpu_feeds.push_back(&trg_word);

paddle::framework::LoDTensor output1;
std::vector<paddle::framework::LoDTensor*> cpu_fetchs1;
cpu_fetchs1.push_back(&output1);

// Run inference on CPU
TestInference<paddle::platform::CPUPlace, float>(
dirname, cpu_feeds, cpu_fetchs1);
LOG(INFO) << output1.lod();
LOG(INFO) << output1.dims();

#ifdef PADDLE_WITH_CUDA
paddle::framework::LoDTensor output2;
std::vector<paddle::framework::LoDTensor*> cpu_fetchs2;
cpu_fetchs2.push_back(&output2);

// Run inference on CUDA GPU
TestInference<paddle::platform::CUDAPlace, float>(
dirname, cpu_feeds, cpu_fetchs2);
LOG(INFO) << output2.lod();
LOG(INFO) << output2.dims();

CheckError<float>(output1, output2);
#endif
}
Loading