Skip to content
11 changes: 5 additions & 6 deletions cmake/anakin_subgraph.cmake
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
if(NOT WITH_GPU)
return()
endif()

set(ANAKIN_ROOT "/usr" CACHE PATH "ANAKIN ROOT")
find_path(ANAKIN_INCLUDE_DIR anakin_config.h
PATHS ${ANAKIN_ROOT} ${ANAKIN_ROOT}/include
Expand All @@ -16,9 +12,7 @@ find_library(ANAKIN_LIBRARY NAMES libanakin_saber_common.so libanakin.so
DOC "Path to ANAKIN library.")

if(ANAKIN_INCLUDE_DIR AND ANAKIN_LIBRARY)
if(WITH_DSO)
set(ANAKIN_FOUND ON)
endif(WITH_DSO)
else()
set(ANAKIN_FOUND OFF)
endif()
Expand All @@ -31,3 +25,8 @@ if(ANAKIN_FOUND)
link_directories(${ANAKIN_ROOT})
add_definitions(-DPADDLE_WITH_ANAKIN)
endif()

if(ANAKIN_FOUND AND WITH_GPU AND WITH_DSO)
message(STATUS "Compile with anakin subgraph.")
set(ANAKIN_SUBGRAPH ON)
endif()
2 changes: 1 addition & 1 deletion paddle/fluid/framework/ir/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ pass_library(fillconstant_elementwisemul_fuse inference)
pass_library(shuffle_channel_detect_pass inference)
pass_library(delete_quant_dequant_op_pass inference)

if(ANAKIN_FOUND)
if(ANAKIN_SUBGRAPH)
pass_library(simplify_anakin_priorbox_detection_out_pass inference)
endif()

Expand Down
8 changes: 6 additions & 2 deletions paddle/fluid/inference/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ if (TENSORRT_FOUND)
add_subdirectory(tensorrt)
endif()

if (ANAKIN_FOUND)
if (ANAKIN_SUBGRAPH)
add_subdirectory(anakin)
endif()

Expand All @@ -43,11 +43,15 @@ if(WITH_MKLDNN)
endif()

set(STATIC_INFERENCE_APIS paddle_fluid_api paddle_inference_api analysis_predictor)
if (ANAKIN_FOUND)
set(ANAKIN_SHARED_INFERENCE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/api/api_anakin_engine.cc)
endif()
set(SHARED_INFERENCE_SRCS
io.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api_impl.cc
${CMAKE_CURRENT_SOURCE_DIR}/api/analysis_predictor.cc
${mkldnn_quantizer_src}
${CMAKE_CURRENT_SOURCE_DIR}/api/details/zero_copy_tensor.cc)
${CMAKE_CURRENT_SOURCE_DIR}/api/details/zero_copy_tensor.cc
${ANAKIN_SHARED_INFERENCE_SRCS})

if(WIN32)
sep_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/inference/anakin/convert/elementwise.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ void ElementwiseMulOpConverter<TargetT, PrecisionT>::operator()(
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();

this->engine_->AddOp(op_name, "Eltwise", {x_name, y_name}, {out_name});
std::string elementwise_type = "Prod";
std::string elementwise_type = "Mul";
this->engine_->template AddOpAttr<std::string>(op_name, "type",
elementwise_type);
std::vector<float> coeff = {1.0, 1.0};
Expand Down
27 changes: 16 additions & 11 deletions paddle/fluid/inference/anakin/convert/op_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,12 @@ template class AnakinOpConverter<::anakin::saber::NV,
::anakin::Precision::FP32>;
template class AnakinOpConverter<::anakin::saber::NV,
::anakin::Precision::INT8>;

#ifdef ANAKIN_X86_PLACE
template class AnakinOpConverter<::anakin::saber::X86,
::anakin::Precision::FP32>;
template class AnakinOpConverter<::anakin::saber::X86,
::anakin::Precision::INT8>;
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
Expand Down Expand Up @@ -203,16 +204,16 @@ template class AnakinOpConverter<::anakin::saber::X86,
CPU, ::anakin::saber::X86, precision_type__, \
::anakin::Precision::precision_type__)

#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && defined(ANAKIN_X86_PLACE)
#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8); \
REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \
REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8)
#else
#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \
REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8)
#elif defined(PADDLE_WITH_CUDA)
#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8)
#endif

#define USE_ANAKIN_CONVERTER_BASE(op_type__, place_type__, precision_type__) \
Expand All @@ -221,12 +222,16 @@ template class AnakinOpConverter<::anakin::saber::X86,
__attribute__((unused)) = \
Touch_anakin_##op_type__##_##place_type__##_##precision_type__();

#if defined(PADDLE_WITH_CUDA) && defined(ANAKIN_X86_PLACE)
#define USE_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, FP32) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, FP32)
#define USE_INT8_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, INT8) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, INT8)
#elif defined(PADDLE_WITH_CUDA)
#define USE_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, FP32)
#define USE_INT8_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, INT8)

#define USE_CPU_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, FP32)
#define USE_CPU_INT8_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, INT8)
#endif
32 changes: 0 additions & 32 deletions paddle/fluid/inference/anakin/convert/test_activation_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,32 +77,6 @@ TEST(swish_op, gpu) {
}
#endif

/*
TEST(sigm_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("sigmoid", ctx, false);
}

TEST(tanh_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("tanh", ctx, false);
}

TEST(relu6_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("relu6", ctx, false);
}

TEST(swish_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("swish", ctx, false);
}
*/

} // namespace anakin
} // namespace inference
} // namespace paddle
Expand All @@ -112,13 +86,7 @@ USE_OP(tanh);
USE_OP(relu6);
USE_OP(swish);

USE_CPU_ANAKIN_CONVERTER(sigmoid);
USE_CPU_ANAKIN_CONVERTER(tanh);
USE_CPU_ANAKIN_CONVERTER(relu6);
USE_CPU_ANAKIN_CONVERTER(swish);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(sigmoid);
USE_ANAKIN_CONVERTER(tanh);
USE_ANAKIN_CONVERTER(relu6);
USE_ANAKIN_CONVERTER(swish);
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -57,19 +57,16 @@ TEST(affine_channel_op, gpu) {
test_affine_channel_op<::anakin::saber::NV>(ctx, true);
}
#endif

#ifdef ANAKIN_X86_PLACE
TEST(affine_channel_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_affine_channel_op<::anakin::saber::X86>(ctx, false);
}

#endif
} // namespace anakin
} // namespace inference
} // namespace paddle

USE_OP(affine_channel);
USE_CPU_ANAKIN_CONVERTER(affine_channel);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(affine_channel);
#endif
8 changes: 2 additions & 6 deletions paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,19 +73,15 @@ TEST(batch_norm_op, gpu) {
test_batchnorm_op<::anakin::saber::NV>(ctx, true);
}
#endif

#ifdef ANAKIN_X86_PLACE
TEST(batch_norm_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_batchnorm_op<::anakin::saber::X86>(ctx, false);
}

#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(batch_norm);
USE_CPU_ANAKIN_CONVERTER(batch_norm);

#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(batch_norm);
#endif
8 changes: 2 additions & 6 deletions paddle/fluid/inference/anakin/convert/test_concat_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,15 @@ TEST(concat_op, gpu) {
test_concat_op<::anakin::saber::NV>(ctx, true);
}
#endif

#ifdef ANAKIN_X86_PLACE
TEST(concat_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_concat_op<::anakin::saber::X86>(ctx, false);
}

#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(concat);
USE_CPU_ANAKIN_CONVERTER(concat);

#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(concat);
#endif
8 changes: 2 additions & 6 deletions paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,20 +60,16 @@ TEST(conv2d_op, gpu) {
test_conv2d_op<::anakin::saber::NV>(ctx, true);
}
#endif

#ifdef ANAKIN_X86_PLACE
TEST(conv2d_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_conv2d_op<::anakin::saber::X86>(ctx, false);
}

#endif
} // namespace anakin
} // namespace inference
} // namespace paddle

USE_OP(conv2d);
USE_CPU_ANAKIN_CONVERTER(conv2d);

#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(conv2d);
#endif
7 changes: 2 additions & 5 deletions paddle/fluid/inference/anakin/convert/test_dropout_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,19 +54,16 @@ TEST(dropout_op, gpu) {
test_dropout_op<::anakin::saber::NV>(ctx, true);
}
#endif

#ifdef ANAKIN_X86_PLACE
TEST(dropout_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_dropout_op<::anakin::saber::X86>(ctx, false);
}

#endif
} // namespace anakin
} // namespace inference
} // namespace paddle

USE_OP(dropout);
USE_CPU_ANAKIN_CONVERTER(dropout);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(dropout);
#endif
10 changes: 2 additions & 8 deletions paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,29 +59,23 @@ TEST(elementwise_op, native_mul_gpu) {
test_elementwise_op<::anakin::saber::NV>("elementwise_mul", ctx, true);
}
#endif

#ifdef ANAKIN_X86_PLACE
TEST(elementwise_op, native_add_cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_elementwise_op<::anakin::saber::X86>("elementwise_add", ctx, false);
}

TEST(elementwise_op, native_mul_cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_elementwise_op<::anakin::saber::X86>("elementwise_mul", ctx, false);
}

#endif
} // namespace anakin
} // namespace inference
} // namespace paddle

USE_OP(elementwise_add);
USE_OP(elementwise_mul);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(elementwise_add);
USE_ANAKIN_CONVERTER(elementwise_mul);
#endif

USE_CPU_ANAKIN_CONVERTER(elementwise_add);
USE_CPU_ANAKIN_CONVERTER(elementwise_mul);
7 changes: 2 additions & 5 deletions paddle/fluid/inference/anakin/convert/test_fc_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,19 +49,16 @@ TEST(mul_op, gpu) {
test_mul_op<::anakin::saber::NV>(ctx, true);
}
#endif

#ifdef ANAKIN_X86_PLACE
TEST(mul_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_mul_op<::anakin::saber::X86>(ctx, false);
}

#endif
} // namespace anakin
} // namespace inference
} // namespace paddle

USE_OP(mul);
USE_CPU_ANAKIN_CONVERTER(fc);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(fc);
#endif
7 changes: 2 additions & 5 deletions paddle/fluid/inference/anakin/convert/test_flatten_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,20 +48,17 @@ TEST(flatten_op, gpu) {
test_flatten_op<::anakin::saber::NV>(ctx, true);
}
#endif

#ifdef ANAKIN_X86_PLACE
TEST(flatten_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_flatten_op<::anakin::saber::X86>(ctx, false);
}

#endif
} // namespace anakin
} // namespace inference
} // namespace paddle

USE_OP(reshape);
USE_OP_ITSELF(flatten);
USE_CPU_ANAKIN_CONVERTER(flatten);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(flatten);
#endif
8 changes: 2 additions & 6 deletions paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ TEST(Pool2dOpConverter, avg_ceil_test) {
test_pool2d<::anakin::saber::NV>(ctx, true, false, true, "avg");
}
#endif

#ifdef ANAKIN_X86_PLACE
TEST(Pool2dOpConverter, normal_cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
Expand All @@ -110,14 +110,10 @@ TEST(Pool2dOpConverter, avg_ceil_test_cpu) {
platform::CPUDeviceContext ctx(cpu_place);
test_pool2d<::anakin::saber::X86>(ctx, false, false, true, "avg");
}

#endif
} // namespace anakin
} // namespace inference
} // namespace paddle

USE_OP(pool2d);
USE_CPU_ANAKIN_CONVERTER(pool2d);

#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(pool2d);
#endif
5 changes: 0 additions & 5 deletions paddle/fluid/inference/anakin/convert/test_relu_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,5 @@ TEST(leaky_relu_op, gpu) {

USE_OP(relu);
USE_OP(leaky_relu);
USE_CPU_ANAKIN_CONVERTER(relu);
USE_CPU_ANAKIN_CONVERTER(leaky_relu);

#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(relu);
USE_ANAKIN_CONVERTER(leaky_relu);
#endif
Loading