Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#41 from jiweibo/dev/lite_engine
Browse files Browse the repository at this point in the history
add lite_engine_test and lite_engine_op_test
  • Loading branch information
Shixiaowei02 committed Oct 21, 2019
2 parents 623ec28 + fe0c182 commit 6150ae0
Show file tree
Hide file tree
Showing 10 changed files with 367 additions and 65 deletions.
2 changes: 1 addition & 1 deletion paddle/fluid/inference/lite/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ cc_binary(test_leaky_relu SRCS test_leaky_relu.cc DEPS lite_full_static dynload_
cc_library(lite_op_teller SRCS op_teller.cc DEPS framework_proto device_context boost xxhash)
cc_library(lite_engine SRCS engine.cc DEPS lite_full_static framework_proto)
cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy lite_full_static framework_proto boost)
cc_test(test_lite_engine SRCS test_engine.cc DEPS lite_engine protobuf)
cc_test(test_lite_engine SRCS test_engine.cc DEPS lite_engine protobuf framework_proto glog gtest analysis)
cc_test(test_lite_predictor SRCS test_predictor.cc DEPS lite_engine paddle_fluid)
18 changes: 7 additions & 11 deletions paddle/fluid/inference/lite/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,16 @@
#define LITE_WITH_CUDA 1

#include "paddle/fluid/inference/lite/engine.h"
#include "lite/core/context.h"
#include "lite/core/device_info.h"

namespace paddle {
namespace inference {
namespace lite {

bool EngineManager::Empty() const {
return engines_.size() == 0;
}
bool EngineManager::Empty() const { return engines_.size() == 0; }

bool EngineManager::Has(const std::string& name) const {
if (engines_.count(name) == 0) {
return false;
return false;
}
return engines_.at(name).get() != nullptr;
}
Expand All @@ -37,12 +33,12 @@ paddle::lite::Predictor* EngineManager::Get(const std::string& name) const {
return engines_.at(name).get();
}

paddle::lite::Predictor* EngineManager::Create(
const std::string& name, const EngineConfig& cfg) {
paddle::lite::Env<TARGET(kCUDA)>::Init();
paddle::lite::Predictor* EngineManager::Create(const std::string& name,
const EngineConfig& cfg) {
auto* p = new paddle::lite::Predictor();
p->Build("", cfg.model, cfg.param, cfg.prefer_place, cfg.valid_places, cfg.neglected_passes,
cfg.model_type, cfg.memory_from_memory);
paddle::lite::Env<TARGET(kCUDA)>::Init();
p->Build("", cfg.model, cfg.param, cfg.prefer_place, cfg.valid_places,
cfg.neglected_passes, cfg.model_type, cfg.model_from_memory);
engines_[name].reset(p);
return p;
}
Expand Down
11 changes: 8 additions & 3 deletions paddle/fluid/inference/lite/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
#pragma once

#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>

#include "lite/api/cxx_api.h"

Expand All @@ -31,18 +33,21 @@ struct EngineConfig {
std::vector<paddle::lite::Place> valid_places;
std::vector<std::string> neglected_passes;
lite_api::LiteModelType model_type{lite_api::LiteModelType::kProtobuf};
bool memory_from_memory{true};
bool model_from_memory{true};
};

class EngineManager {
public:
bool Empty() const;
bool Has(const std::string& name) const;
paddle::lite::Predictor* Get(const std::string& name) const;
paddle::lite::Predictor* Create(const std::string& name, const EngineConfig& cfg);
paddle::lite::Predictor* Create(const std::string& name,
const EngineConfig& cfg);
void DeleteAll();

private:
std::unordered_map<std::string, std::unique_ptr<paddle::lite::Predictor>> engines_;
std::unordered_map<std::string, std::unique_ptr<paddle::lite::Predictor>>
engines_;
};

} // namespace lite
Expand Down
55 changes: 40 additions & 15 deletions paddle/fluid/inference/lite/tensor_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <map>
#include "paddle/fluid/inference/lite/engine.h"
#include "paddle/fluid/inference/lite/tensor_utils.h"
#include <map>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/lite/engine.h"

namespace paddle {
namespace inference {
Expand All @@ -40,7 +40,20 @@ platform::Place GetNativePlace(const TargetType& type) {
}
}

framework::proto::VarType::Type GetNativePrecisionType(const PrecisionType& type) {
PrecisionType GetLitePrecisionType(framework::proto::VarType::Type type) {
switch (type) {
case framework::proto::VarType_Type_FP32:
return PrecisionType::kFloat;
case framework::proto::VarType_Type_INT8:
return PrecisionType::kInt8;
default:
LOG(FATAL) << "Error precision type.";
return PrecisionType::kUnk;
}
}

framework::proto::VarType::Type GetNativePrecisionType(
const PrecisionType& type) {
switch (type) {
case PrecisionType::kFloat:
return framework::proto::VarType_Type_FP32;
Expand All @@ -63,22 +76,27 @@ framework::DataLayout GetNativeLayoutType(const DataLayoutType& type) {
}

void MemoryCopy(const platform::Place& dst_place, void* dst_data,
const platform::Place& src_place, const void* src_data, const size_t size) {
const platform::Place& src_place, const void* src_data,
const size_t size) {
const platform::CPUPlace cpu_place;
const platform::CUDAPlace gpu_place;
if (platform::is_cpu_place(dst_place) && platform::is_cpu_place(src_place)) {
memory::Copy(cpu_place, dst_data, cpu_place, src_data, size);
} else {
#ifdef PADDLE_WITH_CUDA
// get device context from pool
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto &ctx = *pool.Get(platform::CUDAPlace());
auto stream = reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
if (platform::is_cpu_place(dst_place) && platform::is_gpu_place(src_place)) {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto& ctx = *pool.Get(platform::CUDAPlace());
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
if (platform::is_cpu_place(dst_place) &&
platform::is_gpu_place(src_place)) {
memory::Copy(cpu_place, dst_data, gpu_place, src_data, size, stream);
} else if (platform::is_gpu_place(dst_place) && platform::is_cpu_place(src_place)) {
} else if (platform::is_gpu_place(dst_place) &&
platform::is_cpu_place(src_place)) {
memory::Copy(gpu_place, dst_data, cpu_place, src_data, size, stream);
} else if (platform::is_gpu_place(dst_place) && platform::is_gpu_place(src_place)) {
} else if (platform::is_gpu_place(dst_place) &&
platform::is_gpu_place(src_place)) {
memory::Copy(gpu_place, dst_data, gpu_place, src_data, size, stream);
}
#else
Expand All @@ -87,9 +105,14 @@ void MemoryCopy(const platform::Place& dst_place, void* dst_data,
}
}

} // namespace
} // namespace

void InitLiteTensorType(paddle::lite::Tensor* lite,
const framework::LoDTensor& fluid) {
lite->set_precision(GetLitePrecisionType(fluid.type()));
}

template<>
template <>
void TensorCopy(paddle::lite::Tensor* dst, const framework::LoDTensor& src) {
const platform::Place& src_place = src.place();
const platform::Place& dst_place = GetNativePlace(dst->target());
Expand All @@ -98,10 +121,11 @@ void TensorCopy(paddle::lite::Tensor* dst, const framework::LoDTensor& src) {
dst->Resize(framework::vectorize(src.dims()));
const void* src_data = src.data<void>();
void* dst_data = dst->mutable_data(size);
MemoryCopy(dst_place, dst_data, src_place, src_data, size);
MemoryCopy(dst_place, dst_data, src_place, src_data,
size * framework::SizeOfType(src.type()));
}

template<>
template <>
void TensorCopy(framework::LoDTensor* dst, const paddle::lite::Tensor& src) {
const platform::Place& src_place = GetNativePlace(src.target());
const platform::Place& dst_place = dst->place();
Expand All @@ -110,7 +134,8 @@ void TensorCopy(framework::LoDTensor* dst, const paddle::lite::Tensor& src) {
const size_t size = static_cast<size_t>(src.numel());
const void* src_data = src.raw_data();
void* dst_data = dst->mutable_data(dst_place, dst->type());
MemoryCopy(dst_place, dst_data, src_place, src_data, size);
MemoryCopy(dst_place, dst_data, src_place, src_data,
size * framework::SizeOfType(dst->type()));
}

} // namespace lite
Expand Down
5 changes: 4 additions & 1 deletion paddle/fluid/inference/lite/tensor_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@

#pragma once

#include "paddle/fluid/framework/tensor.h"
#include "lite/api/paddle_place.h"
#include "lite/core/tensor.h"
#include "paddle/fluid/framework/tensor.h"

namespace paddle {
namespace inference {
Expand All @@ -25,6 +25,9 @@ namespace lite {
template <typename DstTensor, typename SrcTensor>
void TensorCopy(DstTensor* dst, const SrcTensor& src);

void InitLiteTensorType(paddle::lite::Tensor* lite,
const framework::LoDTensor& fluid);

} // namespace lite
} // namespace inference
} // namespace paddle
92 changes: 68 additions & 24 deletions paddle/fluid/inference/lite/test_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ios>
#include <fstream>
#include <gtest/gtest.h>
#include <fstream>
#include <ios>

#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
Expand All @@ -23,44 +23,88 @@
#include "paddle/fluid/inference/lite/engine.h"
#include "paddle/fluid/inference/utils/singleton.h"

#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"

namespace paddle {
namespace lite {

namespace {

std::string read_file(const std::string &file) {
std::ifstream ifs(file.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
std::ifstream::pos_type file_size = ifs.tellg();
ifs.seekg(0, std::ios::beg);
std::vector<char> bytes(file_size);
ifs.read(bytes.data(), file_size);
return std::string(bytes.data(), file_size);
void AddTensorToBlockDesc(framework::proto::BlockDesc* block,
const std::string& name,
const std::vector<int64_t>& shape) {
using framework::proto::VarType;
auto* var = block->add_vars();
framework::VarDesc desc(name);
desc.SetType(VarType::LOD_TENSOR);
desc.SetDataType(VarType::FP32);
desc.SetShape(shape);
*var = *desc.Proto();
}

} // namespace
void make_fake_model(std::string* model, std::string* param) {
framework::ProgramDesc program;
auto* block_ = program.Proto()->mutable_blocks(0);
LOG(INFO) << "create block desc";
framework::BlockDesc block_desc(&program, block_);
LOG(INFO) << "create feed op";
auto* feed0 = block_desc.AppendOp();
feed0->SetType("feed");
feed0->SetInput("X", {"feed"});
feed0->SetOutput("Out", {"x"});
feed0->SetAttr("col", 1);
AddTensorToBlockDesc(block_, "x", std::vector<int64_t>({2, 4, 1, 1}));
*block_->add_ops() = *feed0->Proto();
ASSERT_EQ(block_->ops_size(), 1);
framework::Scope scope;
platform::CPUPlace place;
platform::CPUDeviceContext ctx(place);
*model = program.Proto()->SerializeAsString();
}

} // namespace

TEST(EngineManager, Create) {
const std::string unique_key("engine_0");
const std::string model_dir = "/shixiaowei02/models/tmp/__model__";
TEST(EngineManager, manual) {
ASSERT_EQ(
inference::Singleton<inference::lite::EngineManager>::Global().Empty(),
true);

inference::lite::EngineConfig config;
config.model = read_file(model_dir);
config.param = "";
config.prefer_place = {TARGET(kCUDA), PRECISION(kFloat)};
make_fake_model(&(config.model), &(config.param));

const std::string unique_key("engine_0");
config.model_from_memory = true;
config.prefer_place = {TARGET(kX86), PRECISION(kFloat)};
config.valid_places = {
paddle::lite::Place({TARGET(kHost), PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}),
#ifdef PADDLE_WITH_CUDA
paddle::lite::Place({TARGET(kCUDA), PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kCUDA), PRECISION(kFloat)}),
#endif
};

inference::Singleton<inference::lite::EngineManager>::Global()
.Create(unique_key, config);
/*
paddle::lite::Predictor* engine = inference::Singleton<inference::lite::EngineManager>::Global()
.Get(Attr<std::string>(unique_key));
*/
LOG(INFO) << "Create EngineManager";
inference::Singleton<inference::lite::EngineManager>::Global().Create(
unique_key, config);
LOG(INFO) << "Create EngineManager done";
ASSERT_EQ(
inference::Singleton<inference::lite::EngineManager>::Global().Empty(),
false);
ASSERT_EQ(inference::Singleton<inference::lite::EngineManager>::Global().Has(
unique_key),
true);
paddle::lite::Predictor* engine_0 =
inference::Singleton<inference::lite::EngineManager>::Global().Get(
unique_key);

CHECK_NOTNULL(engine_0);
inference::Singleton<inference::lite::EngineManager>::Global().DeleteAll();
CHECK(inference::Singleton<inference::lite::EngineManager>::Global().Get(
unique_key) == nullptr)
<< "the engine_0 should be nullptr";
}

} // namespace lite
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/operators/lite/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
op_library(lite_engine_op DEPS lite_engine lite_tensor_utils)
cc_test(test_lite_engine_op SRCS lite_engine_op_test.cc DEPS lite_engine_op analysis)
28 changes: 18 additions & 10 deletions paddle/fluid/operators/lite/lite_engine_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,19 @@ class LiteEngineOp : public framework::OperatorBase {
private:
std::vector<std::string> in_names_;
std::vector<std::string> out_names_;
paddle::lite::Predictor* engine_;
paddle::lite::Predictor *engine_;

public:
LiteEngineOp(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: framework::OperatorBase(type, inputs, outputs, attrs) {
in_names_ = Inputs("Xs");
out_names_ = Outputs("Ys");
engine_ = inference::Singleton<inference::lite::EngineManager>::Global()
.Get(Attr<std::string>("engine_key"));
engine_ =
inference::Singleton<inference::lite::EngineManager>::Global().Get(
Attr<std::string>("engine_key"));
}

protected:
Expand All @@ -61,15 +62,22 @@ class LiteEngineOp : public framework::OperatorBase {
void Execute(const framework::Scope &scope,
const platform::Place &dev_place) const {
for (size_t i = 0; i < in_names_.size(); i++) {
const framework::LoDTensor& src_t = inference::analysis::GetFromScope<framework::LoDTensor>(scope, in_names_[i]);
paddle::lite::Tensor* dst_t = engine_->GetInput(i);
const framework::LoDTensor &src_t =
inference::analysis::GetFromScope<framework::LoDTensor>(scope,
in_names_[i]);
paddle::lite::Tensor *dst_t = engine_->GetInput(i);
inference::lite::InitLiteTensorType(dst_t, src_t);
inference::lite::TensorCopy(dst_t, src_t);
}
engine_->Run();
cudaDeviceSynchronize();
for (size_t i = 0; i < out_names_.size(); i++) {
const paddle::lite::Tensor& src_t = *(engine_->GetOutput(i));
framework::LoDTensor* dst_t = &inference::analysis::GetFromScope<framework::LoDTensor>(scope, out_names_[i]);
const paddle::lite::Tensor &src_t = *(engine_->GetOutput(i));
framework::LoDTensor *dst_t =
&inference::analysis::GetFromScope<framework::LoDTensor>(
scope, out_names_[i]);
inference::lite::InitLiteTensorType(
&const_cast<paddle::lite::Tensor &>(src_t), *dst_t);
inference::lite::TensorCopy(dst_t, src_t);
}
}
Expand Down
Loading

0 comments on commit 6150ae0

Please sign in to comment.