Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Perception: upgrade PointPillars model & reduce time delay #12561

Merged
merged 6 commits into from
Sep 17, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions modules/perception/base/object.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ Object::Object() {
type_probs.resize(static_cast<int>(ObjectType::MAX_OBJECT_TYPE), 0);
sub_type_probs.resize(static_cast<int>(ObjectSubType::MAX_OBJECT_TYPE), 0.0f);
b_cipv = false;
feature.reset();
// feature.reset();
}

void Object::Reset() {
Expand Down Expand Up @@ -68,7 +68,7 @@ void Object::Reset() {
radar_supplement.Reset();
camera_supplement.Reset();
fusion_supplement.Reset();
feature.reset();
// feature.reset();
}

std::string Object::ToString() const {
Expand Down
4 changes: 2 additions & 2 deletions modules/perception/base/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#include "modules/perception/base/object_types.h"
#include "modules/perception/base/point_cloud.h"
#include "modules/perception/base/vehicle_struct.h"
#include "modules/prediction/proto/feature.pb.h"
// #include "modules/prediction/proto/feature.pb.h"

namespace apollo {
namespace perception {
Expand Down Expand Up @@ -117,7 +117,7 @@ struct alignas(16) Object {
FusionObjectSupplement fusion_supplement;

// @debug feature to be used for semantic mapping
std::shared_ptr<apollo::prediction::Feature> feature;
// std::shared_ptr<apollo::prediction::Feature> feature;
};

using ObjectPtr = std::shared_ptr<Object>;
Expand Down
6 changes: 3 additions & 3 deletions modules/perception/common/perception_gflags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ DEFINE_string(work_root, "", "Project work root direcotry.");

// lidar_point_pillars
DEFINE_int32(gpu_id, 0, "The id of gpu used for inference.");
DEFINE_string(pfe_onnx_file,
DEFINE_string(pfe_torch_file,
"/apollo/modules/perception/production/data/perception/lidar/"
"models/detection/point_pillars/pfe.onnx",
"The path of pillars feature extractor onnx file.");
"models/detection/point_pillars/pfe.pt",
"The path of pillars feature extractor torch file.");
DEFINE_string(rpn_onnx_file,
"/apollo/modules/perception/production/data/perception/lidar/"
"models/detection/point_pillars/rpn.onnx",
Expand Down
2 changes: 1 addition & 1 deletion modules/perception/common/perception_gflags.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ DECLARE_string(work_root);

// lidar_point_pillars
DECLARE_int32(gpu_id);
DECLARE_string(pfe_onnx_file);
DECLARE_string(pfe_torch_file);
DECLARE_string(rpn_onnx_file);
DECLARE_double(normalizing_factor);
DECLARE_int32(num_point_feature);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ cc_library(
":preprocess_points",
":scatter_cuda",
"//modules/perception/common:perception_gflags",
"//third_party:libtorch",
"@local_config_cuda//cuda:cudart",
"@local_config_tensorrt//:tensorrt",
],
Expand Down
34 changes: 15 additions & 19 deletions modules/perception/lidar/lib/detection/lidar_point_pillars/params.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,68 +27,64 @@ class Params {
static constexpr float kPillarXSize = 0.25f;
static constexpr float kPillarYSize = 0.25f;
static constexpr float kPillarZSize = 8.0f;
static constexpr float kMinXRange = -50.0f;
static constexpr float kMinYRange = -50.0f;
static constexpr float kMinXRange = -100.0f;
static constexpr float kMinYRange = -70.0f;
static constexpr float kMinZRange = -5.0f;
static constexpr float kMaxXRange = 50.0f;
static constexpr float kMaxYRange = 50.0f;
static constexpr float kMaxXRange = 100.0f;
static constexpr float kMaxYRange = 70.0f;
static constexpr float kMaxZRange = 3.0f;
static constexpr int kNumClass = 10;
static constexpr int kNumClass = 9;
static constexpr int kMaxNumPillars = 30000;
static constexpr int kMaxNumPointsPerPillar = 60;
static constexpr int kNumPointFeature = 5; // x, y, z, i, delta of time
static constexpr int kNumAnchor = 100 * 100 * 12 + 160 * 160 * 8;
static constexpr int kNumAnchor = 200 * 140 * 8 + 220 * 140 * 8;
static constexpr int kNumOutputBoxFeature = 7;
static constexpr int kBatchSize = 1;
static constexpr int kNumIndsForScan = 512;
static constexpr int kNumIndsForScan = 1024;
static constexpr int kNumThreads = 64;
static constexpr int kNumBoxCorners = 4;

static std::vector<int> AnchorStrides() { return std::vector<int>{4, 2}; }

static std::vector<int> NumAnchorSets() { return std::vector<int>{12, 8}; }
static std::vector<int> NumAnchorSets() { return std::vector<int>{8, 8}; }

static std::vector<std::vector<float>> AnchorDxSizes() {
return std::vector<std::vector<float>>{
std::vector<float>{2.94046906f, 1.95017717f, 2.73050468f, 3.0f, 2.0f,
2.4560939f},
std::vector<float>{2.94046906f, 1.95017717f, 2.73050468f, 2.4560939f},
std::vector<float>{2.49008838f, 0.60058911f, 0.76279481f, 0.66344886f,
0.39694519f}};
}

static std::vector<std::vector<float>> AnchorDySizes() {
return std::vector<std::vector<float>>{
std::vector<float>{11.1885991, 4.60718145f, 6.38352896f, 15.0f, 3.0f,
6.73778078f},
std::vector<float>{11.1885991, 4.60718145f, 6.38352896f, 6.73778078f},
std::vector<float>{0.48578221f, 1.68452161f, 2.09973778f, 0.7256437f,
0.40359262f}};
}

static std::vector<std::vector<float>> AnchorDzSizes() {
return std::vector<std::vector<float>>{
std::vector<float>{3.47030982f, 1.72270761f, 3.13312415f, 3.8f, 3.8f,
2.73004906f},
std::vector<float>{3.47030982f, 1.72270761f, 3.13312415f, 2.73004906f},
std::vector<float>{0.98297065f, 1.27192197f, 1.44403034f, 1.75748069f,
1.06232151f}};
}

static std::vector<std::vector<float>> AnchorZCoors() {
return std::vector<std::vector<float>>{
std::vector<float>{-0.0715754f, -0.93897414f, -0.08168083f, 0.22228277f,
0.22228277f, -0.37937912f},
std::vector<float>{-0.0715754f, -0.93897414f, -0.08168083f,
-0.37937912f},
std::vector<float>{-1.27247968f, -1.03743013f, -0.99194854f,
-0.73911038f, -1.27868911f}};
}

static std::vector<std::vector<int>> NumAnchorRo() {
return std::vector<std::vector<int>>{std::vector<int>{2, 2, 2, 2, 2, 2},
return std::vector<std::vector<int>>{std::vector<int>{2, 2, 2, 2},
std::vector<int>{2, 2, 2, 1, 1}};
}

static std::vector<std::vector<float>> AnchorRo() {
return std::vector<std::vector<float>>{
std::vector<float>{0, M_PI / 2, 0, M_PI / 2, 0, M_PI / 2, 0, M_PI / 2,
0, M_PI / 2, 0, M_PI / 2},
std::vector<float>{0, M_PI / 2, 0, M_PI / 2, 0, M_PI / 2, 0, M_PI / 2},
std::vector<float>{0, M_PI / 2, 0, M_PI / 2, 0, M_PI / 2, 0, 0}};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ const int PointPillars::kNumClass = Params::kNumClass;
const int PointPillars::kMaxNumPillars = Params::kMaxNumPillars;
const int PointPillars::kMaxNumPointsPerPillar = Params::kMaxNumPointsPerPillar;
const int PointPillars::kNumPointFeature = Params::kNumPointFeature;
const int PointPillars::kPfeOutputSize = kMaxNumPillars * 64;
const int PointPillars::kGridXSize =
static_cast<int>((kMaxXRange - kMinXRange) / kPillarXSize);
const int PointPillars::kGridYSize =
Expand All @@ -79,10 +78,10 @@ const std::vector<int> PointPillars::kAnchorRanges{
kGridXSize,
0,
kGridYSize,
static_cast<int>(kGridXSize * 0.1),
static_cast<int>(kGridXSize * 0.9),
static_cast<int>(kGridYSize * 0.1),
static_cast<int>(kGridYSize * 0.9)};
static_cast<int>(kGridXSize * 0.25),
static_cast<int>(kGridXSize * 0.75),
static_cast<int>(kGridYSize * 0.25),
static_cast<int>(kGridYSize * 0.75)};
const std::vector<int> PointPillars::kNumAnchorSets = Params::NumAnchorSets();
const std::vector<std::vector<float>> PointPillars::kAnchorDxSizes =
Params::AnchorDxSizes();
Expand All @@ -100,12 +99,12 @@ const std::vector<std::vector<float>> PointPillars::kAnchorRo =
PointPillars::PointPillars(const bool reproduce_result_mode,
const float score_threshold,
const float nms_overlap_threshold,
const std::string pfe_onnx_file,
const std::string pfe_torch_file,
const std::string rpn_onnx_file)
: reproduce_result_mode_(reproduce_result_mode),
score_threshold_(score_threshold),
nms_overlap_threshold_(nms_overlap_threshold),
pfe_onnx_file_(pfe_onnx_file),
pfe_torch_file_(pfe_torch_file),
rpn_onnx_file_(rpn_onnx_file) {
if (reproduce_result_mode_) {
preprocess_points_ptr_.reset(new PreprocessPoints(
Expand Down Expand Up @@ -134,6 +133,7 @@ PointPillars::PointPillars(const bool reproduce_result_mode,
kNumBoxCorners, kNumOutputBoxFeature));

DeviceMemoryMalloc();
InitTorch();
InitTRT();
InitAnchors();
}
Expand Down Expand Up @@ -170,7 +170,6 @@ PointPillars::~PointPillars() {
GPU_CHECK(cudaFree(pfe_buffers_[0]));
GPU_CHECK(cudaFree(pfe_buffers_[1]));
GPU_CHECK(cudaFree(pfe_buffers_[2]));
GPU_CHECK(cudaFree(pfe_buffers_[3]));

GPU_CHECK(cudaFree(rpn_buffers_[0]));
GPU_CHECK(cudaFree(rpn_buffers_[1]));
Expand All @@ -193,12 +192,8 @@ PointPillars::~PointPillars() {
GPU_CHECK(cudaFree(dev_box_for_nms_));
GPU_CHECK(cudaFree(dev_filter_count_));

pfe_context_->destroy();
rpn_context_->destroy();

pfe_runtime_->destroy();
rpn_runtime_->destroy();
pfe_engine_->destroy();
rpn_engine_->destroy();
}

Expand Down Expand Up @@ -242,7 +237,6 @@ void PointPillars::DeviceMemoryMalloc() {
kNumPointFeature * sizeof(float)));
GPU_CHECK(cudaMalloc(&pfe_buffers_[1], kMaxNumPillars * sizeof(float)));
GPU_CHECK(cudaMalloc(&pfe_buffers_[2], kMaxNumPillars * 4 * sizeof(float)));
GPU_CHECK(cudaMalloc(&pfe_buffers_[3], kPfeOutputSize * sizeof(float)));

GPU_CHECK(cudaMalloc(&rpn_buffers_[0], kRpnInputSize * sizeof(float)));
GPU_CHECK(cudaMalloc(&rpn_buffers_[1], kRpnBoxOutputSize * sizeof(float)));
Expand Down Expand Up @@ -449,34 +443,40 @@ void PointPillars::ConvertAnchors2BoxAnchors(float* anchors_px,
}
}

void PointPillars::InitTorch() {
if (gpu_id_ >= 0) {
device_type_ = torch::kCUDA;
device_id_ = gpu_id_;
} else {
device_type_ = torch::kCPU;
}

// Init torch net
torch::Device device(device_type_, device_id_);
pfe_net_ = torch::jit::load(pfe_torch_file_, device);
}

void PointPillars::InitTRT() {
// create a TensorRT model from the onnx model and serialize it to a stream
nvinfer1::IHostMemory* pfe_trt_model_stream{nullptr};
nvinfer1::IHostMemory* rpn_trt_model_stream{nullptr};
OnnxToTRTModel(pfe_onnx_file_, &pfe_trt_model_stream);
OnnxToTRTModel(rpn_onnx_file_, &rpn_trt_model_stream);
if (pfe_trt_model_stream == nullptr || rpn_trt_model_stream == nullptr) {
if (rpn_trt_model_stream == nullptr) {
std::cerr << "Failed to load ONNX file " << std::endl;
}

// deserialize the engine
pfe_runtime_ = nvinfer1::createInferRuntime(g_logger_);
rpn_runtime_ = nvinfer1::createInferRuntime(g_logger_);
if (pfe_runtime_ == nullptr || rpn_runtime_ == nullptr) {
if (rpn_runtime_ == nullptr) {
std::cerr << "Failed to create TensorRT Runtime object." << std::endl;
}
pfe_engine_ = pfe_runtime_->deserializeCudaEngine(
pfe_trt_model_stream->data(), pfe_trt_model_stream->size(), nullptr);
rpn_engine_ = rpn_runtime_->deserializeCudaEngine(
rpn_trt_model_stream->data(), rpn_trt_model_stream->size(), nullptr);
if (pfe_engine_ == nullptr || rpn_engine_ == nullptr) {
if (rpn_engine_ == nullptr) {
std::cerr << "Failed to create TensorRT Engine." << std::endl;
}
pfe_trt_model_stream->destroy();
rpn_trt_model_stream->destroy();
pfe_context_ = pfe_engine_->createExecutionContext();
rpn_context_ = rpn_engine_->createExecutionContext();
if (pfe_context_ == nullptr || rpn_context_ == nullptr) {
if (rpn_context_ == nullptr) {
std::cerr << "Failed to create TensorRT Execution Context." << std::endl;
}
}
Expand Down Expand Up @@ -634,13 +634,32 @@ void PointPillars::DoInference(const float* in_points_array,
kMaxNumPillars * 4 * sizeof(float),
cudaMemcpyDeviceToDevice, stream));

pfe_context_->enqueueV2(pfe_buffers_, stream, nullptr);
torch::Tensor tensor_pillar_point_feature = torch::from_blob(pfe_buffers_[0],
{kMaxNumPillars, kMaxNumPointsPerPillar, kNumPointFeature},
torch::kCUDA);
torch::Tensor tensor_num_points_per_pillar = torch::from_blob(
pfe_buffers_[1], {kMaxNumPillars}, torch::kCUDA);
torch::Tensor tensor_pillar_coors = torch::from_blob(pfe_buffers_[2],
{kMaxNumPillars, 4}, torch::kCUDA);

torch::Device device(device_type_, device_id_);
if (device_id_ >= 0) {
tensor_pillar_point_feature.to(device);
tensor_num_points_per_pillar.to(device);
tensor_pillar_coors.to(device);
} else {
return;
}

auto pfe_output = pfe_net_.forward({tensor_pillar_point_feature,
tensor_num_points_per_pillar,
tensor_pillar_coors}).toTensor();

GPU_CHECK(
cudaMemset(dev_scattered_feature_, 0, kRpnInputSize * sizeof(float)));
scatter_cuda_ptr_->DoScatterCuda(
host_pillar_count_[0], dev_x_coors_, dev_y_coors_,
reinterpret_cast<float*>(pfe_buffers_[3]), dev_scattered_feature_);
pfe_output.data_ptr<float>(), dev_scattered_feature_);

GPU_CHECK(cudaMemcpyAsync(rpn_buffers_[0], dev_scattered_feature_,
kBatchSize * kRpnInputSize * sizeof(float),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@
#include "NvInfer.h"
#include "NvOnnxParser.h"

#include "torch/script.h"
#include "torch/torch.h"

// headers in local files
#include "modules/perception/lidar/lib/detection/lidar_point_pillars/anchor_mask_cuda.h"
#include "modules/perception/lidar/lib/detection/lidar_point_pillars/common.h"
Expand Down Expand Up @@ -115,7 +118,6 @@ class PointPillars {
static const int kMaxNumPillars;
static const int kMaxNumPointsPerPillar;
static const int kNumPointFeature;
static const int kPfeOutputSize;
static const int kGridXSize;
static const int kGridYSize;
static const int kGridZSize;
Expand Down Expand Up @@ -145,7 +147,7 @@ class PointPillars {
const bool reproduce_result_mode_;
const float score_threshold_;
const float nms_overlap_threshold_;
const std::string pfe_onnx_file_;
const std::string pfe_torch_file_;
const std::string rpn_onnx_file_;
// end initializer list

Expand Down Expand Up @@ -180,7 +182,7 @@ class PointPillars {
float* dev_box_anchors_max_y_;
int* dev_anchor_mask_;

void* pfe_buffers_[4];
void* pfe_buffers_[3];
void* rpn_buffers_[4];

float* dev_scattered_feature_;
Expand All @@ -206,11 +208,12 @@ class PointPillars {
std::unique_ptr<PostprocessCuda> postprocess_cuda_ptr_;

Logger g_logger_;
nvinfer1::IExecutionContext* pfe_context_;
int device_id_ = -1;
int gpu_id_ = 0;
torch::DeviceType device_type_;
torch::jit::script::Module pfe_net_;
nvinfer1::IExecutionContext* rpn_context_;
nvinfer1::IRuntime* pfe_runtime_;
nvinfer1::IRuntime* rpn_runtime_;
nvinfer1::ICudaEngine* pfe_engine_;
nvinfer1::ICudaEngine* rpn_engine_;

/**
Expand All @@ -225,6 +228,12 @@ class PointPillars {
*/
void InitAnchors();

/**
* @brief Initializing LibTorch net
* @details Called in the constructor
*/
void InitTorch();

/**
* @brief Initializing TensorRT instances
* @details Called in the constructor
Expand Down Expand Up @@ -323,13 +332,13 @@ class PointPillars {
* reproducible for the same input
* @param[in] score_threshold Score threshold for filtering output
* @param[in] nms_overlap_threshold IOU threshold for NMS
* @param[in] pfe_onnx_file Pillar Feature Extractor ONNX file path
* @param[in] pfe_torch_file Pillar Feature Extractor Torch file path
* @param[in] rpn_onnx_file Region Proposal Network ONNX file path
* @details Variables could be changed through point_pillars_detection
*/
PointPillars(const bool reproduce_result_mode, const float score_threshold,
const float nms_overlap_threshold,
const std::string pfe_onnx_file,
const std::string pfe_torch_file,
const std::string rpn_onnx_file);
~PointPillars();

Expand Down
Loading