Merge pull request PaddlePaddle#41 from jiweibo/dev/lite_engine

add lite_engine_test and lite_engine_op_test
jiweibo · Oct 21, 2019 · 6150ae0 · 6150ae0
2 parents 623ec28 + fe0c182
commit 6150ae0
Show file tree

Hide file tree

Showing 10 changed files with 367 additions and 65 deletions.
diff --git a/paddle/fluid/inference/lite/CMakeLists.txt b/paddle/fluid/inference/lite/CMakeLists.txt
@@ -2,5 +2,5 @@ cc_binary(test_leaky_relu SRCS test_leaky_relu.cc DEPS lite_full_static dynload_
 cc_library(lite_op_teller SRCS op_teller.cc DEPS framework_proto device_context boost xxhash)
 cc_library(lite_engine SRCS engine.cc DEPS lite_full_static framework_proto)
 cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy lite_full_static framework_proto boost)
-cc_test(test_lite_engine SRCS test_engine.cc DEPS lite_engine protobuf)
+cc_test(test_lite_engine SRCS test_engine.cc DEPS lite_engine protobuf framework_proto glog gtest analysis)
 cc_test(test_lite_predictor SRCS test_predictor.cc DEPS lite_engine paddle_fluid)
diff --git a/paddle/fluid/inference/lite/engine.cc b/paddle/fluid/inference/lite/engine.cc
@@ -15,20 +15,16 @@
 #define LITE_WITH_CUDA 1
 
 #include "paddle/fluid/inference/lite/engine.h"
-#include "lite/core/context.h"
-#include "lite/core/device_info.h"
 
 namespace paddle {
 namespace inference {
 namespace lite {
 
-bool EngineManager::Empty() const {
-    return engines_.size() == 0;
-}
+bool EngineManager::Empty() const { return engines_.size() == 0; }
 
 bool EngineManager::Has(const std::string& name) const {
   if (engines_.count(name) == 0) {
-     return false;
+    return false;
   }
   return engines_.at(name).get() != nullptr;
 }
@@ -37,12 +33,12 @@ paddle::lite::Predictor* EngineManager::Get(const std::string& name) const {
   return engines_.at(name).get();
 }
 
-paddle::lite::Predictor* EngineManager::Create(
-  const std::string& name, const EngineConfig& cfg) {
-  paddle::lite::Env<TARGET(kCUDA)>::Init();
+paddle::lite::Predictor* EngineManager::Create(const std::string& name,
+                                               const EngineConfig& cfg) {
   auto* p = new paddle::lite::Predictor();
-  p->Build("", cfg.model, cfg.param, cfg.prefer_place, cfg.valid_places, cfg.neglected_passes,
-    cfg.model_type, cfg.memory_from_memory);
+  paddle::lite::Env<TARGET(kCUDA)>::Init();
+  p->Build("", cfg.model, cfg.param, cfg.prefer_place, cfg.valid_places,
+           cfg.neglected_passes, cfg.model_type, cfg.model_from_memory);
   engines_[name].reset(p);
   return p;
 }

diff --git a/paddle/fluid/inference/lite/engine.h b/paddle/fluid/inference/lite/engine.h
@@ -15,8 +15,10 @@
 #pragma once
 
 #include <map>
+#include <memory>
 #include <string>
 #include <unordered_map>
+#include <vector>
 
 #include "lite/api/cxx_api.h"
 
@@ -31,18 +33,21 @@ struct EngineConfig {
   std::vector<paddle::lite::Place> valid_places;
   std::vector<std::string> neglected_passes;
   lite_api::LiteModelType model_type{lite_api::LiteModelType::kProtobuf};
-  bool memory_from_memory{true};
+  bool model_from_memory{true};
 };
 
 class EngineManager {
  public:
   bool Empty() const;
   bool Has(const std::string& name) const;
   paddle::lite::Predictor* Get(const std::string& name) const;
-  paddle::lite::Predictor* Create(const std::string& name, const EngineConfig& cfg);
+  paddle::lite::Predictor* Create(const std::string& name,
+                                  const EngineConfig& cfg);
   void DeleteAll();
+
  private:
-  std::unordered_map<std::string, std::unique_ptr<paddle::lite::Predictor>> engines_;
+  std::unordered_map<std::string, std::unique_ptr<paddle::lite::Predictor>>
+      engines_;
 };
 
 }  // namespace lite

diff --git a/paddle/fluid/inference/lite/tensor_utils.cc b/paddle/fluid/inference/lite/tensor_utils.cc
@@ -12,11 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <map>
-#include "paddle/fluid/inference/lite/engine.h"
 #include "paddle/fluid/inference/lite/tensor_utils.h"
+#include <map>
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/inference/lite/engine.h"
 
 namespace paddle {
 namespace inference {
@@ -40,7 +40,20 @@ platform::Place GetNativePlace(const TargetType& type) {
   }
 }
 
-framework::proto::VarType::Type GetNativePrecisionType(const PrecisionType& type) {
+PrecisionType GetLitePrecisionType(framework::proto::VarType::Type type) {
+  switch (type) {
+    case framework::proto::VarType_Type_FP32:
+      return PrecisionType::kFloat;
+    case framework::proto::VarType_Type_INT8:
+      return PrecisionType::kInt8;
+    default:
+      LOG(FATAL) << "Error precision type.";
+      return PrecisionType::kUnk;
+  }
+}
+
+framework::proto::VarType::Type GetNativePrecisionType(
+    const PrecisionType& type) {
   switch (type) {
     case PrecisionType::kFloat:
       return framework::proto::VarType_Type_FP32;
@@ -63,22 +76,27 @@ framework::DataLayout GetNativeLayoutType(const DataLayoutType& type) {
 }
 
 void MemoryCopy(const platform::Place& dst_place, void* dst_data,
-    const platform::Place& src_place, const void* src_data, const size_t size) {
+                const platform::Place& src_place, const void* src_data,
+                const size_t size) {
   const platform::CPUPlace cpu_place;
   const platform::CUDAPlace gpu_place;
   if (platform::is_cpu_place(dst_place) && platform::is_cpu_place(src_place)) {
     memory::Copy(cpu_place, dst_data, cpu_place, src_data, size);
   } else {
 #ifdef PADDLE_WITH_CUDA
     // get device context from pool
-    platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
-    auto &ctx = *pool.Get(platform::CUDAPlace());
-    auto stream = reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
-    if (platform::is_cpu_place(dst_place) && platform::is_gpu_place(src_place)) {
+    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
+    auto& ctx = *pool.Get(platform::CUDAPlace());
+    auto stream =
+        reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
+    if (platform::is_cpu_place(dst_place) &&
+        platform::is_gpu_place(src_place)) {
       memory::Copy(cpu_place, dst_data, gpu_place, src_data, size, stream);
-    } else if (platform::is_gpu_place(dst_place) && platform::is_cpu_place(src_place)) {
+    } else if (platform::is_gpu_place(dst_place) &&
+               platform::is_cpu_place(src_place)) {
       memory::Copy(gpu_place, dst_data, cpu_place, src_data, size, stream);
-    } else if (platform::is_gpu_place(dst_place) && platform::is_gpu_place(src_place)) {
+    } else if (platform::is_gpu_place(dst_place) &&
+               platform::is_gpu_place(src_place)) {
       memory::Copy(gpu_place, dst_data, gpu_place, src_data, size, stream);
     }
 #else
@@ -87,9 +105,14 @@ void MemoryCopy(const platform::Place& dst_place, void* dst_data,
   }
 }
 
-} // namespace
+}  // namespace
+
+void InitLiteTensorType(paddle::lite::Tensor* lite,
+                        const framework::LoDTensor& fluid) {
+  lite->set_precision(GetLitePrecisionType(fluid.type()));
+}
 
-template<>
+template <>
 void TensorCopy(paddle::lite::Tensor* dst, const framework::LoDTensor& src) {
   const platform::Place& src_place = src.place();
   const platform::Place& dst_place = GetNativePlace(dst->target());
@@ -98,10 +121,11 @@ void TensorCopy(paddle::lite::Tensor* dst, const framework::LoDTensor& src) {
   dst->Resize(framework::vectorize(src.dims()));
   const void* src_data = src.data<void>();
   void* dst_data = dst->mutable_data(size);
-  MemoryCopy(dst_place, dst_data, src_place, src_data, size);
+  MemoryCopy(dst_place, dst_data, src_place, src_data,
+             size * framework::SizeOfType(src.type()));
 }
 
-template<>
+template <>
 void TensorCopy(framework::LoDTensor* dst, const paddle::lite::Tensor& src) {
   const platform::Place& src_place = GetNativePlace(src.target());
   const platform::Place& dst_place = dst->place();
@@ -110,7 +134,8 @@ void TensorCopy(framework::LoDTensor* dst, const paddle::lite::Tensor& src) {
   const size_t size = static_cast<size_t>(src.numel());
   const void* src_data = src.raw_data();
   void* dst_data = dst->mutable_data(dst_place, dst->type());
-  MemoryCopy(dst_place, dst_data, src_place, src_data, size);
+  MemoryCopy(dst_place, dst_data, src_place, src_data,
+             size * framework::SizeOfType(dst->type()));
 }
 
 }  // namespace lite

diff --git a/paddle/fluid/inference/lite/tensor_utils.h b/paddle/fluid/inference/lite/tensor_utils.h
@@ -14,9 +14,9 @@
 
 #pragma once
 
-#include "paddle/fluid/framework/tensor.h"
 #include "lite/api/paddle_place.h"
 #include "lite/core/tensor.h"
+#include "paddle/fluid/framework/tensor.h"
 
 namespace paddle {
 namespace inference {
@@ -25,6 +25,9 @@ namespace lite {
 template <typename DstTensor, typename SrcTensor>
 void TensorCopy(DstTensor* dst, const SrcTensor& src);
 
+void InitLiteTensorType(paddle::lite::Tensor* lite,
+                        const framework::LoDTensor& fluid);
+
 }  // namespace lite
 }  // namespace inference
 }  // namespace paddle
diff --git a/paddle/fluid/inference/lite/test_engine.cc b/paddle/fluid/inference/lite/test_engine.cc
@@ -12,9 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <ios>
-#include <fstream>
 #include <gtest/gtest.h>
+#include <fstream>
+#include <ios>
 
 #include "lite/api/paddle_use_kernels.h"
 #include "lite/api/paddle_use_ops.h"
@@ -23,44 +23,88 @@
 #include "paddle/fluid/inference/lite/engine.h"
 #include "paddle/fluid/inference/utils/singleton.h"
 
+#include "paddle/fluid/framework/block_desc.h"
+#include "paddle/fluid/framework/op_desc.h"
+#include "paddle/fluid/framework/program_desc.h"
+#include "paddle/fluid/framework/scope.h"
+
 namespace paddle {
 namespace lite {
 
 namespace {
 
-std::string read_file(const std::string &file) {
-  std::ifstream ifs(file.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
-  std::ifstream::pos_type file_size = ifs.tellg();
-  ifs.seekg(0, std::ios::beg);
-  std::vector<char> bytes(file_size);
-  ifs.read(bytes.data(), file_size);
-  return std::string(bytes.data(), file_size);
+void AddTensorToBlockDesc(framework::proto::BlockDesc* block,
+                          const std::string& name,
+                          const std::vector<int64_t>& shape) {
+  using framework::proto::VarType;
+  auto* var = block->add_vars();
+  framework::VarDesc desc(name);
+  desc.SetType(VarType::LOD_TENSOR);
+  desc.SetDataType(VarType::FP32);
+  desc.SetShape(shape);
+  *var = *desc.Proto();
 }
 
-} // namespace
+void make_fake_model(std::string* model, std::string* param) {
+  framework::ProgramDesc program;
+  auto* block_ = program.Proto()->mutable_blocks(0);
+  LOG(INFO) << "create block desc";
+  framework::BlockDesc block_desc(&program, block_);
+  LOG(INFO) << "create feed op";
+  auto* feed0 = block_desc.AppendOp();
+  feed0->SetType("feed");
+  feed0->SetInput("X", {"feed"});
+  feed0->SetOutput("Out", {"x"});
+  feed0->SetAttr("col", 1);
+  AddTensorToBlockDesc(block_, "x", std::vector<int64_t>({2, 4, 1, 1}));
+  *block_->add_ops() = *feed0->Proto();
+  ASSERT_EQ(block_->ops_size(), 1);
+  framework::Scope scope;
+  platform::CPUPlace place;
+  platform::CPUDeviceContext ctx(place);
+  *model = program.Proto()->SerializeAsString();
+}
 
+}  // namespace
 
-TEST(EngineManager, Create) {
-  const std::string unique_key("engine_0");
-  const std::string model_dir = "/shixiaowei02/models/tmp/__model__";
+TEST(EngineManager, manual) {
+  ASSERT_EQ(
+      inference::Singleton<inference::lite::EngineManager>::Global().Empty(),
+      true);
 
   inference::lite::EngineConfig config;
-  config.model = read_file(model_dir);
-  config.param = "";
-  config.prefer_place = {TARGET(kCUDA), PRECISION(kFloat)};
+  make_fake_model(&(config.model), &(config.param));
+
+  const std::string unique_key("engine_0");
+  config.model_from_memory = true;
+  config.prefer_place = {TARGET(kX86), PRECISION(kFloat)};
   config.valid_places = {
-    paddle::lite::Place({TARGET(kHost), PRECISION(kFloat)}),
+      paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}),
+      paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}),
 #ifdef PADDLE_WITH_CUDA
-    paddle::lite::Place({TARGET(kCUDA), PRECISION(kFloat)}),
+      paddle::lite::Place({TARGET(kCUDA), PRECISION(kFloat)}),
 #endif
   };
 
-  inference::Singleton<inference::lite::EngineManager>::Global()
-    .Create(unique_key, config);
-  /*
-  paddle::lite::Predictor* engine = inference::Singleton<inference::lite::EngineManager>::Global()
-          .Get(Attr<std::string>(unique_key));
-  */
+  LOG(INFO) << "Create EngineManager";
+  inference::Singleton<inference::lite::EngineManager>::Global().Create(
+      unique_key, config);
+  LOG(INFO) << "Create EngineManager done";
+  ASSERT_EQ(
+      inference::Singleton<inference::lite::EngineManager>::Global().Empty(),
+      false);
+  ASSERT_EQ(inference::Singleton<inference::lite::EngineManager>::Global().Has(
+                unique_key),
+            true);
+  paddle::lite::Predictor* engine_0 =
+      inference::Singleton<inference::lite::EngineManager>::Global().Get(
+          unique_key);
+
+  CHECK_NOTNULL(engine_0);
+  inference::Singleton<inference::lite::EngineManager>::Global().DeleteAll();
+  CHECK(inference::Singleton<inference::lite::EngineManager>::Global().Get(
+            unique_key) == nullptr)
+      << "the engine_0 should be nullptr";
 }
 
 }  // namespace lite

diff --git a/paddle/fluid/operators/lite/CMakeLists.txt b/paddle/fluid/operators/lite/CMakeLists.txt
@@ -1 +1,2 @@
 op_library(lite_engine_op DEPS lite_engine lite_tensor_utils)
+cc_test(test_lite_engine_op SRCS lite_engine_op_test.cc DEPS lite_engine_op analysis)
diff --git a/paddle/fluid/operators/lite/lite_engine_op.h b/paddle/fluid/operators/lite/lite_engine_op.h
@@ -38,18 +38,19 @@ class LiteEngineOp : public framework::OperatorBase {
  private:
   std::vector<std::string> in_names_;
   std::vector<std::string> out_names_;
-  paddle::lite::Predictor* engine_;
+  paddle::lite::Predictor *engine_;
 
  public:
   LiteEngineOp(const std::string &type,
-                 const framework::VariableNameMap &inputs,
-                 const framework::VariableNameMap &outputs,
-                 const framework::AttributeMap &attrs)
+               const framework::VariableNameMap &inputs,
+               const framework::VariableNameMap &outputs,
+               const framework::AttributeMap &attrs)
       : framework::OperatorBase(type, inputs, outputs, attrs) {
     in_names_ = Inputs("Xs");
     out_names_ = Outputs("Ys");
-    engine_ = inference::Singleton<inference::lite::EngineManager>::Global()
-            .Get(Attr<std::string>("engine_key"));
+    engine_ =
+        inference::Singleton<inference::lite::EngineManager>::Global().Get(
+            Attr<std::string>("engine_key"));
   }
 
  protected:
@@ -61,15 +62,22 @@ class LiteEngineOp : public framework::OperatorBase {
   void Execute(const framework::Scope &scope,
                const platform::Place &dev_place) const {
     for (size_t i = 0; i < in_names_.size(); i++) {
-      const framework::LoDTensor& src_t = inference::analysis::GetFromScope<framework::LoDTensor>(scope, in_names_[i]);
-      paddle::lite::Tensor* dst_t = engine_->GetInput(i);
+      const framework::LoDTensor &src_t =
+          inference::analysis::GetFromScope<framework::LoDTensor>(scope,
+                                                                  in_names_[i]);
+      paddle::lite::Tensor *dst_t = engine_->GetInput(i);
+      inference::lite::InitLiteTensorType(dst_t, src_t);
       inference::lite::TensorCopy(dst_t, src_t);
     }
     engine_->Run();
     cudaDeviceSynchronize();
     for (size_t i = 0; i < out_names_.size(); i++) {
-      const paddle::lite::Tensor& src_t = *(engine_->GetOutput(i));
-      framework::LoDTensor* dst_t = &inference::analysis::GetFromScope<framework::LoDTensor>(scope, out_names_[i]);
+      const paddle::lite::Tensor &src_t = *(engine_->GetOutput(i));
+      framework::LoDTensor *dst_t =
+          &inference::analysis::GetFromScope<framework::LoDTensor>(
+              scope, out_names_[i]);
+      inference::lite::InitLiteTensorType(
+          &const_cast<paddle::lite::Tensor &>(src_t), *dst_t);
       inference::lite::TensorCopy(dst_t, src_t);
     }
   }