diff --git a/modules/perception/common/perception_gflags.cc b/modules/perception/common/perception_gflags.cc index 6350d2ca4f4..009812fd4c8 100644 --- a/modules/perception/common/perception_gflags.cc +++ b/modules/perception/common/perception_gflags.cc @@ -39,10 +39,10 @@ DEFINE_string(work_root, "", "Project work root direcotry."); // lidar_point_pillars DEFINE_int32(gpu_id, 0, "The id of gpu used for inference."); -DEFINE_string(pfe_torch_file, +DEFINE_string(pfe_onnx_file, "/apollo/modules/perception/production/data/perception/lidar/" - "models/detection/point_pillars/pfe.pt", - "The path of pillars feature extractor torch file."); + "models/detection/point_pillars/pfe.onnx", + "The path of PFE onnx file."); DEFINE_string(rpn_onnx_file, "/apollo/modules/perception/production/data/perception/lidar/" "models/detection/point_pillars/rpn.onnx", diff --git a/modules/perception/common/perception_gflags.h b/modules/perception/common/perception_gflags.h index f3639cc50f3..5bcd08f9b3b 100644 --- a/modules/perception/common/perception_gflags.h +++ b/modules/perception/common/perception_gflags.h @@ -33,7 +33,7 @@ DECLARE_string(work_root); // lidar_point_pillars DECLARE_int32(gpu_id); -DECLARE_string(pfe_torch_file); +DECLARE_string(pfe_onnx_file); DECLARE_string(rpn_onnx_file); DECLARE_double(normalizing_factor); DECLARE_int32(num_point_feature); diff --git a/modules/perception/lidar/lib/detection/lidar_point_pillars/BUILD b/modules/perception/lidar/lib/detection/lidar_point_pillars/BUILD index 696fcae52b6..4cf57f39b93 100644 --- a/modules/perception/lidar/lib/detection/lidar_point_pillars/BUILD +++ b/modules/perception/lidar/lib/detection/lidar_point_pillars/BUILD @@ -22,12 +22,12 @@ cc_library( ":common", ":nms_cuda", ":params", + ":pfe_cuda", ":postprocess_cuda", ":preprocess_points", ":scatter_cuda", "//cyber/common", "//modules/perception/common:perception_gflags", - "//third_party:libtorch", "@local_config_cuda//cuda:cudart", "@local_config_tensorrt//:tensorrt", ], @@ -93,6 +93,16 @@ cuda_library( ], ) +cuda_library( + name = "pfe_cuda", + srcs = ["pfe_cuda.cu"], + hdrs = ["pfe_cuda.h"], + deps = [ + ":common", + "@local_config_cuda//cuda:cudart", + ], +) + cuda_library( name = "postprocess_cuda", srcs = ["postprocess_cuda.cu"], diff --git a/modules/perception/lidar/lib/detection/lidar_point_pillars/params.h b/modules/perception/lidar/lib/detection/lidar_point_pillars/params.h index c1532fbab80..7bb042fb58b 100644 --- a/modules/perception/lidar/lib/detection/lidar_point_pillars/params.h +++ b/modules/perception/lidar/lib/detection/lidar_point_pillars/params.h @@ -37,6 +37,7 @@ class Params { static constexpr int kMaxNumPillars = 30000; static constexpr int kMaxNumPointsPerPillar = 60; static constexpr int kNumPointFeature = 5; // x, y, z, i, delta of time + static constexpr int kNumGatherPointFeature = 9; static constexpr int kNumAnchor = 200 * 140 * 8 + 220 * 140 * 8; static constexpr int kNumOutputBoxFeature = 7; static constexpr int kBatchSize = 1; diff --git a/modules/perception/lidar/lib/detection/lidar_point_pillars/pfe_cuda.cu b/modules/perception/lidar/lib/detection/lidar_point_pillars/pfe_cuda.cu new file mode 100644 index 00000000000..7fafb966e44 --- /dev/null +++ b/modules/perception/lidar/lib/detection/lidar_point_pillars/pfe_cuda.cu @@ -0,0 +1,99 @@ +/****************************************************************************** + * Copyright 2020 The Apollo Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *****************************************************************************/ + +#include "modules/perception/lidar/lib/detection/lidar_point_pillars/pfe_cuda.h" + +namespace apollo { +namespace perception { +namespace lidar { + +__global__ void gather_point_feature_kernel( + float* dev_pillar_point_feature, float* dev_num_points_per_pillar, + float* dev_pillar_coors, float* dev_pfe_gather_feature, + int max_num_points_per_pillar, int num_point_feature, + int num_gather_point_feature, float pillar_x_size, float pillar_y_size, + float min_x_range, float min_y_range) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + float point_mean[3]; + for (int point_id = 0; point_id < dev_num_points_per_pillar[tid]; + ++point_id) { + for (int i = 0; i < 3; ++i) { + int feature_id = i + point_id * num_point_feature + + tid * max_num_points_per_pillar * num_point_feature; + point_mean[i] += dev_pillar_point_feature[feature_id]; + } + } + for (int i = 0; i < 3; ++i) { + point_mean[i] = point_mean[i] / dev_num_points_per_pillar[tid]; + } + + float x_offset = pillar_x_size / 2 + min_x_range; + float y_offset = pillar_y_size / 2 + min_y_range; + for (int point_id = 0; point_id < dev_num_points_per_pillar[tid]; + ++point_id) { + int point_head_id = point_id * num_point_feature + + tid * max_num_points_per_pillar * num_point_feature; + int pfe_gather_head_id = point_id * num_gather_point_feature + + tid * max_num_points_per_pillar * num_gather_point_feature; + float point_x = dev_pillar_point_feature[point_head_id]; + float point_y = dev_pillar_point_feature[point_head_id + 1]; + dev_pfe_gather_feature[pfe_gather_head_id] = + sqrt(point_x * point_x + point_y * point_y); + for (int i = 2; i < num_point_feature; ++i) { + dev_pfe_gather_feature[pfe_gather_head_id + i - 1] = + dev_pillar_point_feature[point_head_id + i]; + } + for (int i = 4; i < 7; ++i) { + dev_pfe_gather_feature[pfe_gather_head_id + i] = + dev_pillar_point_feature[point_head_id + i - 4] - point_mean[i - 4]; + } + dev_pfe_gather_feature[pfe_gather_head_id + 7] = + dev_pillar_point_feature[point_head_id] - + (dev_pillar_coors[tid * 4 + 3] * pillar_x_size + x_offset); + dev_pfe_gather_feature[pfe_gather_head_id + 8] = + dev_pillar_point_feature[point_head_id + 1] - + (dev_pillar_coors[tid * 4 + 2] * pillar_y_size + y_offset); + } +} + +PfeCuda::PfeCuda(int max_num_pillars, int max_num_points_per_pillar, + int num_point_feature, int num_gather_point_feature, + float pillar_x_size, float pillar_y_size, float min_x_range, + float min_y_range, int num_threads) + : max_num_pillars_(max_num_pillars), + max_num_points_per_pillar_(max_num_points_per_pillar), + num_point_feature_(num_point_feature), + num_gather_point_feature_(num_gather_point_feature), + pillar_x_size_(pillar_x_size), + pillar_y_size_(pillar_y_size), + min_x_range_(min_x_range), + min_y_range_(min_y_range), + num_threads_(num_threads) {} + +void PfeCuda::GatherPointFeature(float* dev_pillar_point_feature, + float* dev_num_points_per_pillar, + float* dev_pillar_coors, + float* dev_pfe_gather_feature) { + const int num_blocks = DIVUP(max_num_pillars_, num_threads_); + gather_point_feature_kernel<<>>( + dev_pillar_point_feature, dev_num_points_per_pillar, dev_pillar_coors, + dev_pfe_gather_feature, max_num_points_per_pillar_, num_point_feature_, + num_gather_point_feature_, pillar_x_size_, pillar_y_size_, min_x_range_, + min_y_range_); +} +} // namespace lidar +} // namespace perception +} // namespace apollo diff --git a/modules/perception/lidar/lib/detection/lidar_point_pillars/pfe_cuda.h b/modules/perception/lidar/lib/detection/lidar_point_pillars/pfe_cuda.h new file mode 100644 index 00000000000..48de3eecd60 --- /dev/null +++ b/modules/perception/lidar/lib/detection/lidar_point_pillars/pfe_cuda.h @@ -0,0 +1,51 @@ +/****************************************************************************** + * Copyright 2020 The Apollo Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *****************************************************************************/ + +#pragma once + +#include "modules/perception/lidar/lib/detection/lidar_point_pillars/common.h" + +namespace apollo { +namespace perception { +namespace lidar { + +class PfeCuda { + public: + PfeCuda(int max_num_pillars, int max_num_points_per_pillar, + int num_point_feature, int num_gather_point_feature, + float pillar_x_size, float pillar_y_size, float min_x_range, + float min_y_range, int num_threads); + ~PfeCuda() = default; + + void GatherPointFeature(float* dev_pillar_point_feature, + float* dev_num_points_per_pillar, + float* dev_pillar_coors, + float* dev_pfe_gather_feature); + + private: + int max_num_pillars_; + int max_num_points_per_pillar_; + int num_point_feature_; + int num_gather_point_feature_; + float pillar_x_size_; + float pillar_y_size_; + float min_x_range_; + float min_y_range_; + int num_threads_; +}; +} // namespace lidar +} // namespace perception +} // namespace apollo diff --git a/modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars.cc b/modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars.cc index 3d3cd6c16bf..37a8f718f49 100644 --- a/modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars.cc +++ b/modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars.cc @@ -29,16 +29,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars.h" -// headers in STL #include #include #include "cyber/common/log.h" -// headers in local files -#include "modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars.h" - namespace apollo { namespace perception { namespace lidar { @@ -56,6 +53,7 @@ const int PointPillars::kNumClass = Params::kNumClass; const int PointPillars::kMaxNumPillars = Params::kMaxNumPillars; const int PointPillars::kMaxNumPointsPerPillar = Params::kMaxNumPointsPerPillar; const int PointPillars::kNumPointFeature = Params::kNumPointFeature; +const int PointPillars::kNumGatherPointFeature = Params::kNumGatherPointFeature; const int PointPillars::kGridXSize = static_cast((kMaxXRange - kMinXRange) / kPillarXSize); const int PointPillars::kGridYSize = @@ -101,13 +99,15 @@ const std::vector> PointPillars::kAnchorRo = PointPillars::PointPillars(const bool reproduce_result_mode, const float score_threshold, const float nms_overlap_threshold, - const std::string pfe_torch_file, + const std::string pfe_onnx_file, const std::string rpn_onnx_file) : reproduce_result_mode_(reproduce_result_mode), score_threshold_(score_threshold), nms_overlap_threshold_(nms_overlap_threshold), - pfe_torch_file_(pfe_torch_file), - rpn_onnx_file_(rpn_onnx_file) { + pfe_onnx_file_(pfe_onnx_file), + rpn_onnx_file_(rpn_onnx_file), + pfe_engine_(nullptr), + rpn_engine_(nullptr) { if (reproduce_result_mode_) { preprocess_points_ptr_.reset(new PreprocessPoints( kMaxNumPillars, kMaxNumPointsPerPillar, kNumPointFeature, kGridXSize, @@ -124,8 +124,12 @@ PointPillars::PointPillars(const bool reproduce_result_mode, kNumThreads, kNumIndsForScan, kNumAnchor, kMinXRange, kMinYRange, kPillarXSize, kPillarYSize, kGridXSize, kGridYSize)); - scatter_cuda_ptr_.reset( - new ScatterCuda(kNumThreads, kMaxNumPillars, kGridXSize, kGridYSize)); + pfe_cuda_ptr_.reset(new PfeCuda(kMaxNumPillars, kMaxNumPointsPerPillar, + kNumPointFeature, kNumGatherPointFeature, + kPillarXSize, kPillarYSize, kMinXRange, + kMinYRange, kNumThreads)); + + scatter_cuda_ptr_.reset(new ScatterCuda(kNumThreads, kGridXSize, kGridYSize)); const float float_min = std::numeric_limits::lowest(); const float float_max = std::numeric_limits::max(); @@ -135,7 +139,6 @@ PointPillars::PointPillars(const bool reproduce_result_mode, kNumBoxCorners, kNumOutputBoxFeature)); DeviceMemoryMalloc(); - InitTorch(); InitTRT(); InitAnchors(); } @@ -169,9 +172,9 @@ PointPillars::~PointPillars() { GPU_CHECK(cudaFree(dev_box_anchors_max_y_)); GPU_CHECK(cudaFree(dev_anchor_mask_)); + GPU_CHECK(cudaFree(dev_pfe_gather_feature_)); GPU_CHECK(cudaFree(pfe_buffers_[0])); GPU_CHECK(cudaFree(pfe_buffers_[1])); - GPU_CHECK(cudaFree(pfe_buffers_[2])); GPU_CHECK(cudaFree(rpn_buffers_[0])); GPU_CHECK(cudaFree(rpn_buffers_[1])); @@ -194,8 +197,9 @@ PointPillars::~PointPillars() { GPU_CHECK(cudaFree(dev_box_for_nms_)); GPU_CHECK(cudaFree(dev_filter_count_)); + pfe_context_->destroy(); rpn_context_->destroy(); - rpn_runtime_->destroy(); + pfe_engine_->destroy(); rpn_engine_->destroy(); } @@ -234,11 +238,13 @@ void PointPillars::DeviceMemoryMalloc() { // for trt inference // create GPU buffers and a stream + GPU_CHECK(cudaMalloc(reinterpret_cast(&dev_pfe_gather_feature_), + kMaxNumPillars * kMaxNumPointsPerPillar * + kNumGatherPointFeature * sizeof(float))); GPU_CHECK( cudaMalloc(&pfe_buffers_[0], kMaxNumPillars * kMaxNumPointsPerPillar * - kNumPointFeature * sizeof(float))); - GPU_CHECK(cudaMalloc(&pfe_buffers_[1], kMaxNumPillars * sizeof(float))); - GPU_CHECK(cudaMalloc(&pfe_buffers_[2], kMaxNumPillars * 4 * sizeof(float))); + kNumGatherPointFeature * sizeof(float))); + GPU_CHECK(cudaMalloc(&pfe_buffers_[1], kMaxNumPillars * 64 * sizeof(float))); GPU_CHECK(cudaMalloc(&rpn_buffers_[0], kRpnInputSize * sizeof(float))); GPU_CHECK(cudaMalloc(&rpn_buffers_[1], kRpnBoxOutputSize * sizeof(float))); @@ -445,48 +451,25 @@ void PointPillars::ConvertAnchors2BoxAnchors(float* anchors_px, } } -void PointPillars::InitTorch() { - if (gpu_id_ >= 0) { - device_type_ = torch::kCUDA; - device_id_ = gpu_id_; - } else { - device_type_ = torch::kCPU; - } - - // Init torch net - torch::Device device(device_type_, device_id_); - pfe_net_ = torch::jit::load(pfe_torch_file_, device); -} - void PointPillars::InitTRT() { - // create a TensorRT model from the onnx model and serialize it to a stream - nvinfer1::IHostMemory* rpn_trt_model_stream{nullptr}; - OnnxToTRTModel(rpn_onnx_file_, &rpn_trt_model_stream); - if (rpn_trt_model_stream == nullptr) { - std::cerr << "Failed to load ONNX file " << std::endl; + // create a TensorRT model from the onnx model and load it into an engine + OnnxToTRTModel(pfe_onnx_file_, &pfe_engine_); + OnnxToTRTModel(rpn_onnx_file_, &rpn_engine_); + if (pfe_engine_ == nullptr || rpn_engine_ == nullptr) { + AERROR << "Failed to load ONNX file."; } - // deserialize the engine - rpn_runtime_ = nvinfer1::createInferRuntime(g_logger_); - if (rpn_runtime_ == nullptr) { - std::cerr << "Failed to create TensorRT Runtime object." << std::endl; - } - rpn_engine_ = rpn_runtime_->deserializeCudaEngine( - rpn_trt_model_stream->data(), rpn_trt_model_stream->size(), nullptr); - if (rpn_engine_ == nullptr) { - std::cerr << "Failed to create TensorRT Engine." << std::endl; - } - rpn_trt_model_stream->destroy(); + // create execution context from the engine + pfe_context_ = pfe_engine_->createExecutionContext(); rpn_context_ = rpn_engine_->createExecutionContext(); - if (rpn_context_ == nullptr) { - std::cerr << "Failed to create TensorRT Execution Context." << std::endl; + if (pfe_context_ == nullptr || rpn_context_ == nullptr) { + AERROR << "Failed to create TensorRT Execution Context."; } } void PointPillars::OnnxToTRTModel( const std::string& model_file, // name of the onnx model - nvinfer1::IHostMemory** - trt_model_stream) { // output buffer for the TensorRT model + nvinfer1::ICudaEngine** engine_ptr) { int verbosity = static_cast(nvinfer1::ILogger::Severity::kWARNING); // create the builder @@ -497,6 +480,7 @@ void PointPillars::OnnxToTRTModel( nvinfer1::INetworkDefinition* network = builder->createNetworkV2(explicit_batch); + // parse onnx model auto parser = nvonnxparser::createParser(*network, g_logger_); if (!parser->parseFromFile(model_file.c_str(), verbosity)) { std::string msg("failed to parse onnx file"); @@ -511,11 +495,8 @@ void PointPillars::OnnxToTRTModel( nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config); + *engine_ptr = engine; parser->destroy(); - - // serialize the engine, then close everything down - *trt_model_stream = engine->serialize(); - engine->destroy(); network->destroy(); config->destroy(); builder->destroy(); @@ -616,11 +597,6 @@ void PointPillars::DoInference(const float* in_points_array, const int in_num_points, std::vector* out_detections, std::vector* out_labels) { - if (device_id_ < 0) { - AERROR << "Torch is not using GPU!"; - return; - } - Preprocess(in_points_array, in_num_points); anchor_mask_cuda_ptr_->DoAnchorMaskCuda( @@ -630,39 +606,24 @@ void PointPillars::DoInference(const float* in_points_array, cudaStream_t stream; GPU_CHECK(cudaStreamCreate(&stream)); - GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[0], dev_pillar_point_feature_, + + GPU_CHECK(cudaMemset(dev_pfe_gather_feature_, 0, + kMaxNumPillars * kMaxNumPointsPerPillar * + kNumGatherPointFeature * sizeof(float))); + pfe_cuda_ptr_->GatherPointFeature(dev_pillar_point_feature_, + dev_num_points_per_pillar_, + dev_pillar_coors_, dev_pfe_gather_feature_); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[0], dev_pfe_gather_feature_, kMaxNumPillars * kMaxNumPointsPerPillar * - kNumPointFeature * sizeof(float), + kNumGatherPointFeature * sizeof(float), cudaMemcpyDeviceToDevice, stream)); - GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[1], dev_num_points_per_pillar_, - kMaxNumPillars * sizeof(float), - cudaMemcpyDeviceToDevice, stream)); - GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[2], dev_pillar_coors_, - kMaxNumPillars * 4 * sizeof(float), - cudaMemcpyDeviceToDevice, stream)); - - torch::Tensor tensor_pillar_point_feature = torch::from_blob(pfe_buffers_[0], - {kMaxNumPillars, kMaxNumPointsPerPillar, kNumPointFeature}, - torch::kCUDA); - torch::Tensor tensor_num_points_per_pillar = torch::from_blob( - pfe_buffers_[1], {kMaxNumPillars}, torch::kCUDA); - torch::Tensor tensor_pillar_coors = torch::from_blob(pfe_buffers_[2], - {kMaxNumPillars, 4}, torch::kCUDA); - - torch::Device device(device_type_, device_id_); - tensor_pillar_point_feature.to(device); - tensor_num_points_per_pillar.to(device); - tensor_pillar_coors.to(device); - - auto pfe_output = pfe_net_.forward({tensor_pillar_point_feature, - tensor_num_points_per_pillar, - tensor_pillar_coors}).toTensor(); + pfe_context_->enqueueV2(pfe_buffers_, stream, nullptr); GPU_CHECK( cudaMemset(dev_scattered_feature_, 0, kRpnInputSize * sizeof(float))); scatter_cuda_ptr_->DoScatterCuda( host_pillar_count_[0], dev_x_coors_, dev_y_coors_, - pfe_output.data_ptr(), dev_scattered_feature_); + reinterpret_cast(pfe_buffers_[1]), dev_scattered_feature_); GPU_CHECK(cudaMemcpyAsync(rpn_buffers_[0], dev_scattered_feature_, kBatchSize * kRpnInputSize * sizeof(float), diff --git a/modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars.h b/modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars.h index d22a089c590..9b79c080079 100644 --- a/modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars.h +++ b/modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars.h @@ -53,13 +53,11 @@ #include "NvInfer.h" #include "NvOnnxParser.h" -#include "torch/script.h" -#include "torch/torch.h" - // headers in local files #include "modules/perception/lidar/lib/detection/lidar_point_pillars/anchor_mask_cuda.h" #include "modules/perception/lidar/lib/detection/lidar_point_pillars/common.h" #include "modules/perception/lidar/lib/detection/lidar_point_pillars/params.h" +#include "modules/perception/lidar/lib/detection/lidar_point_pillars/pfe_cuda.h" #include "modules/perception/lidar/lib/detection/lidar_point_pillars/postprocess_cuda.h" #include "modules/perception/lidar/lib/detection/lidar_point_pillars/preprocess_points.h" #include "modules/perception/lidar/lib/detection/lidar_point_pillars/preprocess_points_cuda.h" @@ -118,6 +116,7 @@ class PointPillars { static const int kMaxNumPillars; static const int kMaxNumPointsPerPillar; static const int kNumPointFeature; + static const int kNumGatherPointFeature; static const int kGridXSize; static const int kGridYSize; static const int kGridZSize; @@ -147,7 +146,7 @@ class PointPillars { const bool reproduce_result_mode_; const float score_threshold_; const float nms_overlap_threshold_; - const std::string pfe_torch_file_; + const std::string pfe_onnx_file_; const std::string rpn_onnx_file_; // end initializer list @@ -182,7 +181,9 @@ class PointPillars { float* dev_box_anchors_max_y_; int* dev_anchor_mask_; - void* pfe_buffers_[3]; + float* dev_pfe_gather_feature_; + void* pfe_buffers_[2]; + void* rpn_buffers_[4]; float* dev_scattered_feature_; @@ -204,17 +205,15 @@ class PointPillars { std::unique_ptr preprocess_points_ptr_; std::unique_ptr preprocess_points_cuda_ptr_; std::unique_ptr anchor_mask_cuda_ptr_; + std::unique_ptr pfe_cuda_ptr_; std::unique_ptr scatter_cuda_ptr_; std::unique_ptr postprocess_cuda_ptr_; Logger g_logger_; - int device_id_ = -1; - int gpu_id_ = 0; - torch::DeviceType device_type_; - torch::jit::script::Module pfe_net_; - nvinfer1::IExecutionContext* rpn_context_; - nvinfer1::IRuntime* rpn_runtime_; + nvinfer1::ICudaEngine* pfe_engine_; nvinfer1::ICudaEngine* rpn_engine_; + nvinfer1::IExecutionContext* pfe_context_; + nvinfer1::IExecutionContext* rpn_context_; /** * @brief Memory allocation for device memory @@ -228,12 +227,6 @@ class PointPillars { */ void InitAnchors(); - /** - * @brief Initializing LibTorch net - * @details Called in the constructor - */ - void InitTorch(); - /** * @brief Initializing TensorRT instances * @details Called in the constructor @@ -266,11 +259,11 @@ class PointPillars { /** * @brief Convert ONNX to TensorRT model * @param[in] model_file ONNX model file path - * @param[out] trt_model_stream TensorRT model made out of ONNX model + * @param[out] engine_ptr TensorRT model engine made out of ONNX model * @details Load ONNX model, and convert it to TensorRT model */ void OnnxToTRTModel(const std::string& model_file, - nvinfer1::IHostMemory** trt_model_stream); + nvinfer1::ICudaEngine** engine_ptr); /** * @brief Preproces points @@ -332,13 +325,13 @@ class PointPillars { * reproducible for the same input * @param[in] score_threshold Score threshold for filtering output * @param[in] nms_overlap_threshold IOU threshold for NMS - * @param[in] pfe_torch_file Pillar Feature Extractor Torch file path + * @param[in] pfe_onnx_file Pillar Feature Extractor ONNX file path * @param[in] rpn_onnx_file Region Proposal Network ONNX file path * @details Variables could be changed through point_pillars_detection */ PointPillars(const bool reproduce_result_mode, const float score_threshold, const float nms_overlap_threshold, - const std::string pfe_torch_file, + const std::string pfe_onnx_file, const std::string rpn_onnx_file); ~PointPillars(); diff --git a/modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars_detection.cc b/modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars_detection.cc index 0d38c00c534..8181802c2dd 100644 --- a/modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars_detection.cc +++ b/modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars_detection.cc @@ -22,7 +22,6 @@ #include #include "cyber/common/log.h" - #include "modules/perception/base/object_pool_types.h" #include "modules/perception/base/point_cloud_util.h" #include "modules/perception/common/perception_gflags.h" @@ -55,7 +54,7 @@ PointPillarsDetection::PointPillarsDetection() bool PointPillarsDetection::Init(const DetectionInitOptions& options) { point_pillars_ptr_.reset(new PointPillars( FLAGS_reproduce_result_mode, FLAGS_score_threshold, - FLAGS_nms_overlap_threshold, FLAGS_pfe_torch_file, FLAGS_rpn_onnx_file)); + FLAGS_nms_overlap_threshold, FLAGS_pfe_onnx_file, FLAGS_rpn_onnx_file)); return true; } diff --git a/modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars_test.cc b/modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars_test.cc index c9a6dee0f45..e775e0eb5c2 100644 --- a/modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars_test.cc +++ b/modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars_test.cc @@ -150,7 +150,7 @@ TestClass::TestClass() point_pillars_ptr_.reset(new PointPillars( reproduce_result_mode, score_threshold, nms_overlap_threshold, - FLAGS_pfe_torch_file, FLAGS_rpn_onnx_file)); + FLAGS_pfe_onnx_file, FLAGS_rpn_onnx_file)); } TestClass::TestClass(const int num_class, const int max_num_pillars, @@ -192,7 +192,7 @@ TestClass::TestClass(const int num_class, const int max_num_pillars, point_pillars_ptr_.reset(new PointPillars( reproduce_result_mode, score_threshold, nms_overlap_threshold, - FLAGS_pfe_torch_file, FLAGS_rpn_onnx_file)); + FLAGS_pfe_onnx_file, FLAGS_rpn_onnx_file)); } void TestClass::Preprocess(const float* in_points_array, int in_num_points, diff --git a/modules/perception/lidar/lib/detection/lidar_point_pillars/scatter_cuda.cu b/modules/perception/lidar/lib/detection/lidar_point_pillars/scatter_cuda.cu index b2c8d349bb1..8264a9b36c6 100644 --- a/modules/perception/lidar/lib/detection/lidar_point_pillars/scatter_cuda.cu +++ b/modules/perception/lidar/lib/detection/lidar_point_pillars/scatter_cuda.cu @@ -38,31 +38,29 @@ namespace perception { namespace lidar { __global__ void scatter_kernel(int *x_coors, int *y_coors, float *pfe_output, - float *scattered_feature, - const int max_num_pillars_, - const int grid_x_size, const int grid_y_size) { + float *scattered_feature, const int grid_x_size, + const int grid_y_size) { int i_pillar = blockIdx.x; int i_feature = threadIdx.x; int x_ind = x_coors[i_pillar]; int y_ind = y_coors[i_pillar]; - float feature = pfe_output[i_feature * max_num_pillars_ + i_pillar]; + float feature = pfe_output[i_pillar * 64 + i_feature]; scattered_feature[i_feature * grid_y_size * grid_x_size + y_ind * grid_x_size + x_ind] = feature; } -ScatterCuda::ScatterCuda(const int num_threads, const int max_num_pillars, - const int grid_x_size, const int grid_y_size) +ScatterCuda::ScatterCuda(const int num_threads, const int grid_x_size, + const int grid_y_size) : num_threads_(num_threads), - max_num_pillars_(max_num_pillars), grid_x_size_(grid_x_size), grid_y_size_(grid_y_size) {} void ScatterCuda::DoScatterCuda(const int pillar_count, int *x_coors, int *y_coors, float *pfe_output, float *scattered_feature) { - scatter_kernel<<>>( - x_coors, y_coors, pfe_output, scattered_feature, max_num_pillars_, - grid_x_size_, grid_y_size_); + scatter_kernel<<>>(x_coors, y_coors, pfe_output, + scattered_feature, + grid_x_size_, grid_y_size_); } } // namespace lidar diff --git a/modules/perception/lidar/lib/detection/lidar_point_pillars/scatter_cuda.h b/modules/perception/lidar/lib/detection/lidar_point_pillars/scatter_cuda.h index adc36e4cd4c..f6dee1f931a 100644 --- a/modules/perception/lidar/lib/detection/lidar_point_pillars/scatter_cuda.h +++ b/modules/perception/lidar/lib/detection/lidar_point_pillars/scatter_cuda.h @@ -46,7 +46,6 @@ namespace lidar { class ScatterCuda { private: const int num_threads_; - const int max_num_pillars_; const int grid_x_size_; const int grid_y_size_; @@ -54,13 +53,12 @@ class ScatterCuda { /** * @brief Constructor * @param[in] num_threads The number of threads to launch cuda kernel - * @param[in] max_num_pillars Maximum number of pillars * @param[in] grid_x_size Number of pillars in x-coordinate * @param[in] grid_y_size Number of pillars in y-coordinate * @details Captital variables never change after the compile */ - ScatterCuda(const int num_threads, const int max_num_pillars, - const int grid_x_size, const int grid_y_size); + ScatterCuda(const int num_threads, const int grid_x_size, + const int grid_y_size); /** * @brief Call scatter cuda kernel diff --git a/modules/perception/production/data/perception/lidar/models/detection/point_pillars/pfe.onnx b/modules/perception/production/data/perception/lidar/models/detection/point_pillars/pfe.onnx new file mode 100644 index 00000000000..dc0027294d9 Binary files /dev/null and b/modules/perception/production/data/perception/lidar/models/detection/point_pillars/pfe.onnx differ diff --git a/modules/perception/production/data/perception/lidar/models/detection/point_pillars/pfe.pt b/modules/perception/production/data/perception/lidar/models/detection/point_pillars/pfe.pt deleted file mode 100644 index b813ccad2ce..00000000000 Binary files a/modules/perception/production/data/perception/lidar/models/detection/point_pillars/pfe.pt and /dev/null differ