ziqi-jin · ziqi-jin · Aug 12, 2022 · Aug 12, 2022 · Aug 12, 2022 · Aug 12, 2022
diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in
@@ -31,7 +31,7 @@ if(ENABLE_PADDLE_BACKEND)
   find_library(PADDLE_LIB paddle_inference ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/paddle/lib NO_DEFAULT_PATH)
   if(WIN32)
     set(DNNL_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/mkldnn/lib/mkldnn.lib")
-    set(IOMP_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/mklml/lib/libiomp5.lib")
+    set(IOMP_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/mklml/lib/libiomp5md.lib")
   elseif(APPLE)
     set(DNNL_LIB "")
     set(IOMP_LIB "")
@@ -57,12 +57,17 @@ if(WITH_GPU)
   list(APPEND FASTDEPLOY_LIBS ${CUDA_LIB})
 
   if (ENABLE_TRT_BACKEND)
-    find_library(TRT_INFER_LIB nvinfer ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib NO_DEFAULT_PATH)
-    find_library(TRT_ONNX_LIB nvonnxparser ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib NO_DEFAULT_PATH)
-    find_library(TRT_CAFFE_LIB nvcaffe_parser ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib NO_DEFAULT_PATH)
-    find_library(TRT_PLUGIN_LIB nvinfer_plugin ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib NO_DEFAULT_PATH)
-    list(APPEND FASTDEPLOY_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_CAFFE_LIB} ${TRT_PLUGIN_LIB})
+    find_library(TRT_INFER_LIB nvinfer ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib)
+    find_library(TRT_ONNX_LIB nvonnxparser ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib)
+    find_library(TRT_PLUGIN_LIB nvinfer_plugin ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib)
+    if (NOT WIN32)
+      find_library(TRT_CAFFE_LIB nvcaffe_parser ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib)
+      list(APPEND FASTDEPLOY_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_CAFFE_LIB} ${TRT_PLUGIN_LIB})
+    else()
+      list(APPEND FASTDEPLOY_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_PLUGIN_LIB})
+    endif()
   endif()
+
 endif()
 
 if(ENABLE_VISION)

diff --git a/README.md b/README.md
diff --git a/csrc/fastdeploy/backends/ort/ort_backend.cc b/csrc/fastdeploy/backends/ort/ort_backend.cc
@@ -58,8 +58,6 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
                 << std::endl;
       option_.use_gpu = false;
     } else {
-      FDASSERT(option.gpu_id == 0, "Requires gpu_id == 0, but now gpu_id = " +
-                                       std::to_string(option.gpu_id) + ".");
       OrtCUDAProviderOptions cuda_options;
       cuda_options.device_id = option.gpu_id;
       session_options_.AppendExecutionProvider_CUDA(cuda_options);

diff --git a/csrc/fastdeploy/function/eigen.h b/csrc/fastdeploy/function/eigen.h
@@ -104,7 +104,9 @@ struct EigenMatrix : public EigenTensor<T, 2, MajorType, IndexType> {
                                             int num_col_dims) {
     int rank = tensor.shape.size();
     FDASSERT((num_col_dims > 0 && num_col_dims < rank),
-             "Input dimension number(num_col_dims).");
+             "Input dimension number(num_col_dims) must be between 0 and %d, "
+             "but received number is %d.",
+             rank, num_col_dims);
     const int n = SizeToAxis(num_col_dims, tensor.shape);
     const int d = SizeFromAxis(num_col_dims, tensor.shape);
     return EigenMatrix::From(tensor, {n, d});
@@ -114,7 +116,9 @@ struct EigenMatrix : public EigenTensor<T, 2, MajorType, IndexType> {
                                                  int num_col_dims) {
     int rank = tensor.shape.size();
     FDASSERT((num_col_dims > 0 && num_col_dims < rank),
-             "Input dimension number(num_col_dims).");
+             "Input dimension number(num_col_dims) must be between 0 and %d, "
+             "but received number is %d.",
+             rank, num_col_dims);
     const int n = SizeToAxis(num_col_dims, tensor.shape);
     const int d = SizeFromAxis(num_col_dims, tensor.shape);
     return EigenMatrix::From(tensor, {n, d});

diff --git a/csrc/fastdeploy/function/reduce.cc b/csrc/fastdeploy/function/reduce.cc
@@ -14,6 +14,7 @@
 
 #include "fastdeploy/function/reduce.h"
 
+#include <limits>
 #include <set>
 
 #include "fastdeploy/function/eigen.h"
@@ -215,9 +216,139 @@ void Reduce(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
   }
   reduce_all = (reduce_all || full_dim);
 
-  FD_VISIT_ALL_TYPES(x.dtype, "ReduceKernelImpl", ([&] {
-                       ReduceKernelImpl<data_t, Functor>(x, out, dims, keep_dim,
-                                                         reduce_all);
+  FD_VISIT_INT_FLOAT_TYPES(x.dtype, "ReduceKernelImpl", ([&] {
+                             ReduceKernelImpl<data_t, Functor>(
+                                 x, out, dims, keep_dim, reduce_all);
+                           }));
+}
+
+enum ArgMinMaxType { kArgMin, kArgMax };
+
+template <typename T, typename Tout, int64_t Rank, ArgMinMaxType argMinMaxValue>
+struct ArgMinMaxFunctor {};
+
+#define DECLARE_ARG_MIN_MAX_FUNCTOR(eigen_op_type, enum_argminmax_value) \
+  template <typename T, typename Tout, int64_t Rank>                     \
+  struct ArgMinMaxFunctor<T, Tout, Rank, enum_argminmax_value> {         \
+    void operator()(const FDTensor& in, FDTensor* out,                   \
+                    const std::vector<int64_t>& x_dims, int64_t axis,    \
+                    bool keepdims, bool flatten) {                       \
+      const auto& dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); \
+      auto in_eigen = EigenTensor<T, Rank>::From(in, x_dims);            \
+      if (keepdims) {                                                    \
+        if (!flatten) {                                                  \
+          auto out_eigen = EigenTensor<Tout, Rank>::From(*out);          \
+          out_eigen.device(dev) =                                        \
+              in_eigen.eigen_op_type(axis).template cast<Tout>();        \
+        } else {                                                         \
+          auto out_eigen = EigenScalar<Tout>::From(*out);                \
+          out_eigen.device(dev) =                                        \
+              in_eigen.eigen_op_type(axis).template cast<Tout>();        \
+        }                                                                \
+      } else {                                                           \
+        auto out_eigen = EigenTensor<Tout, Rank - 1>::From(*out);        \
+        out_eigen.device(dev) =                                          \
+            in_eigen.eigen_op_type(axis).template cast<Tout>();          \
+      }                                                                  \
+    }                                                                    \
+  }
+
+DECLARE_ARG_MIN_MAX_FUNCTOR(argmin, ArgMinMaxType::kArgMin);
+DECLARE_ARG_MIN_MAX_FUNCTOR(argmax, ArgMinMaxType::kArgMax);
+
+template <typename T, typename Tout, ArgMinMaxType EnumArgMinMaxValue>
+void ArgMinMaxKernel(const FDTensor& x, FDTensor* out, int64_t axis,
+                     bool keepdims, bool flatten) {
+  bool new_keepdims = keepdims | flatten;
+  // if flatten, will construct the new dims for the cacluate
+  std::vector<int64_t> x_dims;
+  int new_axis = axis;
+  if (flatten) {
+    x_dims = {x.Numel()};
+    // if flatten, the axis just as 0
+    new_axis = 0;
+  } else {
+    x_dims = x.shape;
+    if (axis < 0) new_axis = axis + x_dims.size();
+  }
+#define CALL_ARG_MINMAX_FUNCTOR(rank)                                \
+  ArgMinMaxFunctor<T, Tout, rank, EnumArgMinMaxValue> functor##rank; \
+  functor##rank(x, out, x_dims, new_axis, new_keepdims, flatten)
+
+  switch (x_dims.size()) {
+    case 1:
+      CALL_ARG_MINMAX_FUNCTOR(1);
+      break;
+    case 2:
+      CALL_ARG_MINMAX_FUNCTOR(2);
+      break;
+    case 3:
+      CALL_ARG_MINMAX_FUNCTOR(3);
+      break;
+    case 4:
+      CALL_ARG_MINMAX_FUNCTOR(4);
+      break;
+    case 5:
+      CALL_ARG_MINMAX_FUNCTOR(5);
+      break;
+    case 6:
+      CALL_ARG_MINMAX_FUNCTOR(6);
+      break;
+    default:
+      FDASSERT(x_dims.size() <= 6,
+               "%s operator doesn't supports tensors whose ranks are greater "
+               "than 6.",
+               (EnumArgMinMaxValue == kArgMin ? "argmin" : "argmax"));
+      break;
+#undef CALL_ARG_MINMAX_FUNCTOR
+  }
+}
+
+template <typename T, ArgMinMaxType EnumArgMinMaxValue>
+void ArgMinMax(const FDTensor& x, FDTensor* out, int64_t axis,
+               FDDataType output_dtype, bool keepdims, bool flatten) {
+  const auto& x_dims = x.shape;
+  int64_t x_rank = x_dims.size();
+  FDASSERT(axis >= -x_rank,
+           "'axis'(%d) must be greater than or equal to -Rank(X)(%d).", axis,
+           -x_rank);
+  FDASSERT(axis < x_rank,
+           "'axis'(%d) must be less than or equal to Rank(X)(%d).", axis,
+           x_rank);
+  FDASSERT(output_dtype == FDDataType::INT32 || FDDataType::INT64,
+           "The attribute of dtype in argmin/argmax must be [%s] or [%s], but "
+           "received [%s].",
+           Str(FDDataType::INT32), Str(FDDataType::INT64), Str(output_dtype));
+  if (axis < 0) axis += x_rank;
+  if (output_dtype == FDDataType::INT32) {
+    int64_t all_element_num = 0;
+    if (flatten) {
+      all_element_num = x.Numel();
+
+    } else {
+      all_element_num = x_dims[axis];
+    }
+    FDASSERT(all_element_num <= std::numeric_limits<int>::max(),
+             "The element num of the argmin/argmax input at axis is "
+             "%d, is larger than int32 maximum value:%d, you must "
+             "set the dtype of argmin/argmax to 'int64'.",
+             all_element_num, std::numeric_limits<int>::max());
+  }
+  std::vector<int64_t> vec;
+  if (flatten) {
+    vec.emplace_back(static_cast<int64_t>(1));
+  } else {
+    for (int64_t i = 0; i < axis; i++) vec.emplace_back(x_dims[i]);
+    if (keepdims) {
+      vec.emplace_back(static_cast<int64_t>(1));
+    }
+    for (int64_t i = axis + 1; i < x_rank; i++) vec.emplace_back(x_dims[i]);
+  }
+  out->Allocate(vec, output_dtype);
+
+  FD_VISIT_INT_TYPES(output_dtype, "ArgMinMaxKernel", ([&] {
+                       ArgMinMaxKernel<T, data_t, EnumArgMinMaxValue>(
+                           x, out, axis, keepdims, flatten);
                      }));
 }
 
@@ -255,6 +386,23 @@ void Prod(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
           bool keep_dim, bool reduce_all) {
   Reduce<ProdFunctor>(x, out, dims, keep_dim, reduce_all);
 }
+
+void ArgMax(const FDTensor& x, FDTensor* out, int64_t axis,
+            FDDataType output_dtype, bool keep_dim, bool flatten) {
+  FD_VISIT_INT_FLOAT_TYPES(x.dtype, "ArgMaxKernel", ([&] {
+                             ArgMinMax<data_t, kArgMax>(
+                                 x, out, axis, output_dtype, keep_dim, flatten);
+                           }));
+}
+
+void ArgMin(const FDTensor& x, FDTensor* out, int64_t axis,
+            FDDataType output_dtype, bool keep_dim, bool flatten) {
+  FD_VISIT_INT_FLOAT_TYPES(x.dtype, "ArgMaxKernel", ([&] {
+                             ArgMinMax<data_t, kArgMin>(
+                                 x, out, axis, output_dtype, keep_dim, flatten);
+                           }));
+}
+
 #endif
 
 }  // namespace fastdeploy
diff --git a/csrc/fastdeploy/function/reduce.h b/csrc/fastdeploy/function/reduce.h
@@ -96,5 +96,33 @@ FASTDEPLOY_DECL void Prod(const FDTensor& x, FDTensor* out,
                           const std::vector<int64_t>& dims,
                           bool keep_dim = false, bool reduce_all = false);
 
+/** Excute the argmax operation for input FDTensor along given dims.
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param axis The axis which will be reduced.
+    @param output_dtype The data type of output FDTensor, INT64 or INT32,
+   default to INT64.
+    @param keep_dim Whether to keep the reduced dims, default false.
+    @param flatten Whether to flatten FDTensor to get the argmin index, default
+   false.
+*/
+FASTDEPLOY_DECL void ArgMax(const FDTensor& x, FDTensor* out, int64_t axis,
+                            FDDataType output_dtype = FDDataType::INT64,
+                            bool keep_dim = false, bool flatten = false);
+
+/** Excute the argmin operation for input FDTensor along given dims.
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param axis The axis which will be reduced.
+    @param output_dtype The data type of output FDTensor, INT64 or INT32,
+   default to INT64.
+    @param keep_dim Whether to keep the reduced dims, default false.
+    @param flatten Whether to flatten FDTensor to get the argmin index, default
+   false.
+*/
+FASTDEPLOY_DECL void ArgMin(const FDTensor& x, FDTensor* out, int64_t axis,
+                            FDDataType output_dtype = FDDataType::INT64,
+                            bool keep_dim = false, bool flatten = false);
+
 #endif
 }  // namespace fastdeploy
diff --git a/csrc/fastdeploy/function/softmax.cc b/csrc/fastdeploy/function/softmax.cc
@@ -114,8 +114,11 @@ void SoftmaxKernel(const FDTensor& x, FDTensor* out, int axis) {
 }
 
 void Softmax(const FDTensor& x, FDTensor* out, int axis) {
-  FDASSERT(std::abs(axis) < x.shape.size(),
-           "The given axis should be smaller than the input's dimension");
+  FDASSERT(
+      std::abs(axis) < x.shape.size(),
+      "The absolute given axis should be smaller than the input's "
+      "dimension. Expected absolute axis is smaller than %d, but receive %d.",
+      x.shape.size(), std::abs(axis));
   FD_VISIT_FLOAT_TYPES(x.dtype, "SoftmaxKernel",
                        ([&] { SoftmaxKernel<data_t>(x, out, axis); }));
 }

diff --git a/csrc/fastdeploy/function/transpose.cc b/csrc/fastdeploy/function/transpose.cc
@@ -94,10 +94,14 @@ void Transpose(const FDTensor& x, FDTensor* out,
                const std::vector<int64_t>& dims) {
   size_t dims_size = dims.size();
   FDASSERT(dims_size == x.shape.size(),
-           "The input tensor's dimension should be equal to the dims's size.");
+           "The input tensor's dimension should be equal to the dims's size. "
+           "Expect dims size is %d, but receive %d.",
+           x.shape.size(), dims_size);
   std::vector<int> count(dims_size, 0);
   for (size_t i = 0; i < dims_size; i++) {
-    FDASSERT(dims[i] >= 0, "The dims should be greater than or equal to 0.");
+    FDASSERT(dims[i] >= 0,
+             "The dims should be greater than or equal to 0, but receive %d.",
+             dims[i]);
     FDASSERT(dims[i] < static_cast<int>(dims_size) && ++count[dims[i]] == 1,
              "Each element of Attribute axis should be a unique value range "
              "from 0 to (dims - 1), where the dims is the axis's size, unique "