Skip to content

Commit

Permalink
Perception: upgrade PointPillars model & reduce time delay
Browse files Browse the repository at this point in the history
  • Loading branch information
jeroldchen authored and jinghaomiao committed Sep 17, 2020
1 parent 0fe6d35 commit 8456b8d
Show file tree
Hide file tree
Showing 21 changed files with 206 additions and 182 deletions.
4 changes: 2 additions & 2 deletions modules/perception/base/object.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ Object::Object() {
type_probs.resize(static_cast<int>(ObjectType::MAX_OBJECT_TYPE), 0);
sub_type_probs.resize(static_cast<int>(ObjectSubType::MAX_OBJECT_TYPE), 0.0f);
b_cipv = false;
feature.reset();
// feature.reset();
}

void Object::Reset() {
Expand Down Expand Up @@ -68,7 +68,7 @@ void Object::Reset() {
radar_supplement.Reset();
camera_supplement.Reset();
fusion_supplement.Reset();
feature.reset();
// feature.reset();
}

std::string Object::ToString() const {
Expand Down
4 changes: 2 additions & 2 deletions modules/perception/base/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#include "modules/perception/base/object_types.h"
#include "modules/perception/base/point_cloud.h"
#include "modules/perception/base/vehicle_struct.h"
#include "modules/prediction/proto/feature.pb.h"
// #include "modules/prediction/proto/feature.pb.h"

namespace apollo {
namespace perception {
Expand Down Expand Up @@ -117,7 +117,7 @@ struct alignas(16) Object {
FusionObjectSupplement fusion_supplement;

// @debug feature to be used for semantic mapping
std::shared_ptr<apollo::prediction::Feature> feature;
// std::shared_ptr<apollo::prediction::Feature> feature;
};

using ObjectPtr = std::shared_ptr<Object>;
Expand Down
6 changes: 3 additions & 3 deletions modules/perception/common/perception_gflags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ DEFINE_string(work_root, "", "Project work root direcotry.");

// lidar_point_pillars
DEFINE_int32(gpu_id, 0, "The id of gpu used for inference.");
DEFINE_string(pfe_onnx_file,
DEFINE_string(pfe_torch_file,
"/apollo/modules/perception/production/data/perception/lidar/"
"models/detection/point_pillars/pfe.onnx",
"The path of pillars feature extractor onnx file.");
"models/detection/point_pillars/pfe.pt",
"The path of pillars feature extractor torch file.");
DEFINE_string(rpn_onnx_file,
"/apollo/modules/perception/production/data/perception/lidar/"
"models/detection/point_pillars/rpn.onnx",
Expand Down
2 changes: 1 addition & 1 deletion modules/perception/common/perception_gflags.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ DECLARE_string(work_root);

// lidar_point_pillars
DECLARE_int32(gpu_id);
DECLARE_string(pfe_onnx_file);
DECLARE_string(pfe_torch_file);
DECLARE_string(rpn_onnx_file);
DECLARE_double(normalizing_factor);
DECLARE_int32(num_point_feature);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ cc_library(
":preprocess_points",
":scatter_cuda",
"//modules/perception/common:perception_gflags",
"//third_party:libtorch",
"@local_config_cuda//cuda:cudart",
"@local_config_tensorrt//:tensorrt",
],
Expand Down
34 changes: 15 additions & 19 deletions modules/perception/lidar/lib/detection/lidar_point_pillars/params.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,68 +27,64 @@ class Params {
static constexpr float kPillarXSize = 0.25f;
static constexpr float kPillarYSize = 0.25f;
static constexpr float kPillarZSize = 8.0f;
static constexpr float kMinXRange = -50.0f;
static constexpr float kMinYRange = -50.0f;
static constexpr float kMinXRange = -100.0f;
static constexpr float kMinYRange = -70.0f;
static constexpr float kMinZRange = -5.0f;
static constexpr float kMaxXRange = 50.0f;
static constexpr float kMaxYRange = 50.0f;
static constexpr float kMaxXRange = 100.0f;
static constexpr float kMaxYRange = 70.0f;
static constexpr float kMaxZRange = 3.0f;
static constexpr int kNumClass = 10;
static constexpr int kNumClass = 9;
static constexpr int kMaxNumPillars = 30000;
static constexpr int kMaxNumPointsPerPillar = 60;
static constexpr int kNumPointFeature = 5; // x, y, z, i, delta of time
static constexpr int kNumAnchor = 100 * 100 * 12 + 160 * 160 * 8;
static constexpr int kNumAnchor = 200 * 140 * 8 + 220 * 140 * 8;
static constexpr int kNumOutputBoxFeature = 7;
static constexpr int kBatchSize = 1;
static constexpr int kNumIndsForScan = 512;
static constexpr int kNumIndsForScan = 1024;
static constexpr int kNumThreads = 64;
static constexpr int kNumBoxCorners = 4;

static std::vector<int> AnchorStrides() { return std::vector<int>{4, 2}; }

static std::vector<int> NumAnchorSets() { return std::vector<int>{12, 8}; }
static std::vector<int> NumAnchorSets() { return std::vector<int>{8, 8}; }

static std::vector<std::vector<float>> AnchorDxSizes() {
return std::vector<std::vector<float>>{
std::vector<float>{2.94046906f, 1.95017717f, 2.73050468f, 3.0f, 2.0f,
2.4560939f},
std::vector<float>{2.94046906f, 1.95017717f, 2.73050468f, 2.4560939f},
std::vector<float>{2.49008838f, 0.60058911f, 0.76279481f, 0.66344886f,
0.39694519f}};
}

static std::vector<std::vector<float>> AnchorDySizes() {
return std::vector<std::vector<float>>{
std::vector<float>{11.1885991, 4.60718145f, 6.38352896f, 15.0f, 3.0f,
6.73778078f},
std::vector<float>{11.1885991, 4.60718145f, 6.38352896f, 6.73778078f},
std::vector<float>{0.48578221f, 1.68452161f, 2.09973778f, 0.7256437f,
0.40359262f}};
}

static std::vector<std::vector<float>> AnchorDzSizes() {
return std::vector<std::vector<float>>{
std::vector<float>{3.47030982f, 1.72270761f, 3.13312415f, 3.8f, 3.8f,
2.73004906f},
std::vector<float>{3.47030982f, 1.72270761f, 3.13312415f, 2.73004906f},
std::vector<float>{0.98297065f, 1.27192197f, 1.44403034f, 1.75748069f,
1.06232151f}};
}

static std::vector<std::vector<float>> AnchorZCoors() {
return std::vector<std::vector<float>>{
std::vector<float>{-0.0715754f, -0.93897414f, -0.08168083f, 0.22228277f,
0.22228277f, -0.37937912f},
std::vector<float>{-0.0715754f, -0.93897414f, -0.08168083f,
-0.37937912f},
std::vector<float>{-1.27247968f, -1.03743013f, -0.99194854f,
-0.73911038f, -1.27868911f}};
}

static std::vector<std::vector<int>> NumAnchorRo() {
return std::vector<std::vector<int>>{std::vector<int>{2, 2, 2, 2, 2, 2},
return std::vector<std::vector<int>>{std::vector<int>{2, 2, 2, 2},
std::vector<int>{2, 2, 2, 1, 1}};
}

static std::vector<std::vector<float>> AnchorRo() {
return std::vector<std::vector<float>>{
std::vector<float>{0, M_PI / 2, 0, M_PI / 2, 0, M_PI / 2, 0, M_PI / 2,
0, M_PI / 2, 0, M_PI / 2},
std::vector<float>{0, M_PI / 2, 0, M_PI / 2, 0, M_PI / 2, 0, M_PI / 2},
std::vector<float>{0, M_PI / 2, 0, M_PI / 2, 0, M_PI / 2, 0, 0}};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ const int PointPillars::kNumClass = Params::kNumClass;
const int PointPillars::kMaxNumPillars = Params::kMaxNumPillars;
const int PointPillars::kMaxNumPointsPerPillar = Params::kMaxNumPointsPerPillar;
const int PointPillars::kNumPointFeature = Params::kNumPointFeature;
const int PointPillars::kPfeOutputSize = kMaxNumPillars * 64;
const int PointPillars::kGridXSize =
static_cast<int>((kMaxXRange - kMinXRange) / kPillarXSize);
const int PointPillars::kGridYSize =
Expand All @@ -79,10 +78,10 @@ const std::vector<int> PointPillars::kAnchorRanges{
kGridXSize,
0,
kGridYSize,
static_cast<int>(kGridXSize * 0.1),
static_cast<int>(kGridXSize * 0.9),
static_cast<int>(kGridYSize * 0.1),
static_cast<int>(kGridYSize * 0.9)};
static_cast<int>(kGridXSize * 0.25),
static_cast<int>(kGridXSize * 0.75),
static_cast<int>(kGridYSize * 0.25),
static_cast<int>(kGridYSize * 0.75)};
const std::vector<int> PointPillars::kNumAnchorSets = Params::NumAnchorSets();
const std::vector<std::vector<float>> PointPillars::kAnchorDxSizes =
Params::AnchorDxSizes();
Expand All @@ -100,12 +99,12 @@ const std::vector<std::vector<float>> PointPillars::kAnchorRo =
PointPillars::PointPillars(const bool reproduce_result_mode,
const float score_threshold,
const float nms_overlap_threshold,
const std::string pfe_onnx_file,
const std::string pfe_torch_file,
const std::string rpn_onnx_file)
: reproduce_result_mode_(reproduce_result_mode),
score_threshold_(score_threshold),
nms_overlap_threshold_(nms_overlap_threshold),
pfe_onnx_file_(pfe_onnx_file),
pfe_torch_file_(pfe_torch_file),
rpn_onnx_file_(rpn_onnx_file) {
if (reproduce_result_mode_) {
preprocess_points_ptr_.reset(new PreprocessPoints(
Expand Down Expand Up @@ -134,6 +133,7 @@ PointPillars::PointPillars(const bool reproduce_result_mode,
kNumBoxCorners, kNumOutputBoxFeature));

DeviceMemoryMalloc();
InitTorch();
InitTRT();
InitAnchors();
}
Expand Down Expand Up @@ -170,7 +170,6 @@ PointPillars::~PointPillars() {
GPU_CHECK(cudaFree(pfe_buffers_[0]));
GPU_CHECK(cudaFree(pfe_buffers_[1]));
GPU_CHECK(cudaFree(pfe_buffers_[2]));
GPU_CHECK(cudaFree(pfe_buffers_[3]));

GPU_CHECK(cudaFree(rpn_buffers_[0]));
GPU_CHECK(cudaFree(rpn_buffers_[1]));
Expand All @@ -193,12 +192,8 @@ PointPillars::~PointPillars() {
GPU_CHECK(cudaFree(dev_box_for_nms_));
GPU_CHECK(cudaFree(dev_filter_count_));

pfe_context_->destroy();
rpn_context_->destroy();

pfe_runtime_->destroy();
rpn_runtime_->destroy();
pfe_engine_->destroy();
rpn_engine_->destroy();
}

Expand Down Expand Up @@ -242,7 +237,6 @@ void PointPillars::DeviceMemoryMalloc() {
kNumPointFeature * sizeof(float)));
GPU_CHECK(cudaMalloc(&pfe_buffers_[1], kMaxNumPillars * sizeof(float)));
GPU_CHECK(cudaMalloc(&pfe_buffers_[2], kMaxNumPillars * 4 * sizeof(float)));
GPU_CHECK(cudaMalloc(&pfe_buffers_[3], kPfeOutputSize * sizeof(float)));

GPU_CHECK(cudaMalloc(&rpn_buffers_[0], kRpnInputSize * sizeof(float)));
GPU_CHECK(cudaMalloc(&rpn_buffers_[1], kRpnBoxOutputSize * sizeof(float)));
Expand Down Expand Up @@ -449,34 +443,40 @@ void PointPillars::ConvertAnchors2BoxAnchors(float* anchors_px,
}
}

void PointPillars::InitTorch() {
if (gpu_id_ >= 0) {
device_type_ = torch::kCUDA;
device_id_ = gpu_id_;
} else {
device_type_ = torch::kCPU;
}

// Init torch net
torch::Device device(device_type_, device_id_);
pfe_net_ = torch::jit::load(pfe_torch_file_, device);
}

void PointPillars::InitTRT() {
// create a TensorRT model from the onnx model and serialize it to a stream
nvinfer1::IHostMemory* pfe_trt_model_stream{nullptr};
nvinfer1::IHostMemory* rpn_trt_model_stream{nullptr};
OnnxToTRTModel(pfe_onnx_file_, &pfe_trt_model_stream);
OnnxToTRTModel(rpn_onnx_file_, &rpn_trt_model_stream);
if (pfe_trt_model_stream == nullptr || rpn_trt_model_stream == nullptr) {
if (rpn_trt_model_stream == nullptr) {
std::cerr << "Failed to load ONNX file " << std::endl;
}

// deserialize the engine
pfe_runtime_ = nvinfer1::createInferRuntime(g_logger_);
rpn_runtime_ = nvinfer1::createInferRuntime(g_logger_);
if (pfe_runtime_ == nullptr || rpn_runtime_ == nullptr) {
if (rpn_runtime_ == nullptr) {
std::cerr << "Failed to create TensorRT Runtime object." << std::endl;
}
pfe_engine_ = pfe_runtime_->deserializeCudaEngine(
pfe_trt_model_stream->data(), pfe_trt_model_stream->size(), nullptr);
rpn_engine_ = rpn_runtime_->deserializeCudaEngine(
rpn_trt_model_stream->data(), rpn_trt_model_stream->size(), nullptr);
if (pfe_engine_ == nullptr || rpn_engine_ == nullptr) {
if (rpn_engine_ == nullptr) {
std::cerr << "Failed to create TensorRT Engine." << std::endl;
}
pfe_trt_model_stream->destroy();
rpn_trt_model_stream->destroy();
pfe_context_ = pfe_engine_->createExecutionContext();
rpn_context_ = rpn_engine_->createExecutionContext();
if (pfe_context_ == nullptr || rpn_context_ == nullptr) {
if (rpn_context_ == nullptr) {
std::cerr << "Failed to create TensorRT Execution Context." << std::endl;
}
}
Expand Down Expand Up @@ -634,13 +634,32 @@ void PointPillars::DoInference(const float* in_points_array,
kMaxNumPillars * 4 * sizeof(float),
cudaMemcpyDeviceToDevice, stream));

pfe_context_->enqueueV2(pfe_buffers_, stream, nullptr);
torch::Tensor tensor_pillar_point_feature = torch::from_blob(pfe_buffers_[0],
{kMaxNumPillars, kMaxNumPointsPerPillar, kNumPointFeature},
torch::kCUDA);
torch::Tensor tensor_num_points_per_pillar = torch::from_blob(
pfe_buffers_[1], {kMaxNumPillars}, torch::kCUDA);
torch::Tensor tensor_pillar_coors = torch::from_blob(pfe_buffers_[2],
{kMaxNumPillars, 4}, torch::kCUDA);

torch::Device device(device_type_, device_id_);
if (device_id_ >= 0) {
tensor_pillar_point_feature.to(device);
tensor_num_points_per_pillar.to(device);
tensor_pillar_coors.to(device);
} else {
return;
}

auto pfe_output = pfe_net_.forward({tensor_pillar_point_feature,
tensor_num_points_per_pillar,
tensor_pillar_coors}).toTensor();

GPU_CHECK(
cudaMemset(dev_scattered_feature_, 0, kRpnInputSize * sizeof(float)));
scatter_cuda_ptr_->DoScatterCuda(
host_pillar_count_[0], dev_x_coors_, dev_y_coors_,
reinterpret_cast<float*>(pfe_buffers_[3]), dev_scattered_feature_);
pfe_output.data_ptr<float>(), dev_scattered_feature_);

GPU_CHECK(cudaMemcpyAsync(rpn_buffers_[0], dev_scattered_feature_,
kBatchSize * kRpnInputSize * sizeof(float),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@
#include "NvInfer.h"
#include "NvOnnxParser.h"

#include "torch/script.h"
#include "torch/torch.h"

// headers in local files
#include "modules/perception/lidar/lib/detection/lidar_point_pillars/anchor_mask_cuda.h"
#include "modules/perception/lidar/lib/detection/lidar_point_pillars/common.h"
Expand Down Expand Up @@ -115,7 +118,6 @@ class PointPillars {
static const int kMaxNumPillars;
static const int kMaxNumPointsPerPillar;
static const int kNumPointFeature;
static const int kPfeOutputSize;
static const int kGridXSize;
static const int kGridYSize;
static const int kGridZSize;
Expand Down Expand Up @@ -145,7 +147,7 @@ class PointPillars {
const bool reproduce_result_mode_;
const float score_threshold_;
const float nms_overlap_threshold_;
const std::string pfe_onnx_file_;
const std::string pfe_torch_file_;
const std::string rpn_onnx_file_;
// end initializer list

Expand Down Expand Up @@ -180,7 +182,7 @@ class PointPillars {
float* dev_box_anchors_max_y_;
int* dev_anchor_mask_;

void* pfe_buffers_[4];
void* pfe_buffers_[3];
void* rpn_buffers_[4];

float* dev_scattered_feature_;
Expand All @@ -206,11 +208,12 @@ class PointPillars {
std::unique_ptr<PostprocessCuda> postprocess_cuda_ptr_;

Logger g_logger_;
nvinfer1::IExecutionContext* pfe_context_;
int device_id_ = -1;
int gpu_id_ = 0;
torch::DeviceType device_type_;
torch::jit::script::Module pfe_net_;
nvinfer1::IExecutionContext* rpn_context_;
nvinfer1::IRuntime* pfe_runtime_;
nvinfer1::IRuntime* rpn_runtime_;
nvinfer1::ICudaEngine* pfe_engine_;
nvinfer1::ICudaEngine* rpn_engine_;

/**
Expand All @@ -225,6 +228,12 @@ class PointPillars {
*/
void InitAnchors();

/**
* @brief Initializing LibTorch net
* @details Called in the constructor
*/
void InitTorch();

/**
* @brief Initializing TensorRT instances
* @details Called in the constructor
Expand Down Expand Up @@ -323,13 +332,13 @@ class PointPillars {
* reproducible for the same input
* @param[in] score_threshold Score threshold for filtering output
* @param[in] nms_overlap_threshold IOU threshold for NMS
* @param[in] pfe_onnx_file Pillar Feature Extractor ONNX file path
* @param[in] pfe_torch_file Pillar Feature Extractor Torch file path
* @param[in] rpn_onnx_file Region Proposal Network ONNX file path
* @details Variables could be changed through point_pillars_detection
*/
PointPillars(const bool reproduce_result_mode, const float score_threshold,
const float nms_overlap_threshold,
const std::string pfe_onnx_file,
const std::string pfe_torch_file,
const std::string rpn_onnx_file);
~PointPillars();

Expand Down
Loading

0 comments on commit 8456b8d

Please sign in to comment.