Skip to content

Add RCNNLossLayer, RCNNDetectionLayer for Faster(er) R-CNN #3496

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions doc/api/v2/config/layer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,11 @@ multibox_loss
.. autoclass:: paddle.v2.layer.multibox_loss
:noindex:

rcnn_loss
--------------
.. autoclass:: paddle.v2.layer.rcnn_loss
:noindex:

Check Layer
============

Expand Down Expand Up @@ -510,3 +515,8 @@ detection_output
----------------
.. autoclass:: paddle.v2.layer.detection_output
:noindex:

rcnn_detection
----------------
.. autoclass:: paddle.v2.layer.rcnn_detection
:noindex:
143 changes: 143 additions & 0 deletions paddle/gserver/layers/DetectionUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -573,4 +573,147 @@ NormalizedBBox clipBBox(const NormalizedBBox& bbox) {
return clippedBBox;
}

void applyNMSFast(const vector<pair<real, NormalizedBBox>>& bboxes,
size_t topK,
real confThreshold,
real nmsThreshold,
vector<size_t>* indices) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see applyNMSFast is similar with SSD:

void applyNMSFast(const vector<NormalizedBBox>& bboxes,
                  const real* confScoreData,
                  size_t classIdx,
                  size_t topK,
                  real confThreshold,
                  real nmsThreshold,
                  size_t numPriorBBoxes,
                  size_t numClasses,
                  vector<size_t>* indices) {
  vector<pair<real, size_t>> scores;
  for (size_t i = 0; i < numPriorBBoxes; ++i) {
    size_t confOffset = i * numClasses + classIdx;
    if (confScoreData[confOffset] > confThreshold)
      scores.push_back(std::make_pair(confScoreData[confOffset], i));
  }
  // ...
}

觉得可以写成下面,依据confScoreData来判断:

void applyNMSFast(const vector<NormalizedBBox>& bboxes,
                  size_t topK,
                  real confThreshold,
                  real nmsThreshold,
                  size_t numClasses,
                  vector<size_t>* indices,
                   const real* confScoreData,
                  size_t classIdx,
                  size_t numPriorBBoxes) {
  vector<pair<real, size_t>> scores;
  if (confScoreData) {
    for (size_t i = 0; i < numPriorBBoxes; ++i) {
      size_t confOffset = i * numClasses + classIdx;
      if (confScoreData[confOffset] > confThreshold)
        scores.push_back(std::make_pair(confScoreData[confOffset], i));
    }
  } else {
    for (size_t i = 0; i < bboxes.size(); ++i) {
      scores.push_back(std::make_pair(bboxes[i].first, i));
    }
  }
 // ...
}

vector<pair<real, size_t>> scores;
for (size_t i = 0; i < bboxes.size(); ++i) {
scores.push_back(std::make_pair(bboxes[i].first, i));
}
std::stable_sort(scores.begin(), scores.end(), sortScorePairDescend<size_t>);
if (topK > 0 && topK < scores.size()) scores.resize(topK);
while (scores.size() > 0) {
const size_t idx = scores.front().second;
bool keep = true;
for (size_t i = 0; i < indices->size(); ++i) {
if (keep) {
const size_t savedIdx = (*indices)[i];
real overlap =
jaccardOverlap(bboxes[idx].second, bboxes[savedIdx].second);
keep = overlap <= nmsThreshold;
} else {
break;
}
}
if (keep) indices->push_back(idx);
scores.erase(scores.begin());
}
}

size_t getDetectionIndices(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

目测和applyNMSFast类似,和SSD里的代码可复用~

const size_t backgroundId,
const size_t confThreshold,
const size_t nmsTopK,
const real nmsThreshold,
const size_t keepTopK,
const map<size_t, map<size_t, vector<pair<real, NormalizedBBox>>>>&
allDecodedBBoxes,
map<size_t, map<size_t, vector<size_t>>>* allDetectionIndices) {
size_t totalKeepNum = 0;
for (const auto& batchIdxBBoxesPair : allDecodedBBoxes) {
size_t batchIdx = batchIdxBBoxesPair.first;
std::map<size_t, std::vector<size_t>>& indices =
(*allDetectionIndices)[batchIdx];
size_t numDetected = 0;
for (const auto& classDecodedBBoxesPair : batchIdxBBoxesPair.second) {
size_t classId = classDecodedBBoxesPair.first;
if (classId == backgroundId) {
continue;
} else {
applyNMSFast(classDecodedBBoxesPair.second,
nmsTopK,
confThreshold,
nmsThreshold,
&(indices[classId]));
numDetected += indices[classId].size();
}
}
if (keepTopK > 0 && numDetected > keepTopK) {
vector<pair<real, pair<size_t, size_t>>> scoreIndexPairs;
for (const auto& classDecodedBBoxesPair : batchIdxBBoxesPair.second) {
size_t classId = classDecodedBBoxesPair.first;
const vector<size_t>& labelIndices = indices[classId];
for (size_t i = 0; i < labelIndices.size(); ++i) {
real score = classDecodedBBoxesPair.second[labelIndices[i]].first;
scoreIndexPairs.push_back(
std::make_pair(score, std::make_pair(classId, labelIndices[i])));
}
}
std::sort(scoreIndexPairs.begin(),
scoreIndexPairs.end(),
sortScorePairDescend<pair<size_t, size_t>>);
scoreIndexPairs.resize(keepTopK);
indices.clear();
for (size_t i = 0; i < scoreIndexPairs.size(); ++i) {
size_t label = scoreIndexPairs[i].second.first;
size_t idx = scoreIndexPairs[i].second.second;
indices[label].push_back(idx);
}
numDetected = keepTopK;
}
totalKeepNum += numDetected;
}
return totalKeepNum;
}

void getDetectionOutput(
const size_t numKept,
const map<size_t, map<size_t, vector<size_t>>>& allIndices,
const map<size_t, map<size_t, vector<pair<real, NormalizedBBox>>>>&
allDecodedBBoxes,
Matrix& out) {
MatrixPtr outBuffer;
Matrix::resizeOrCreate(outBuffer, numKept, 7, false, false);
real* bufferData = outBuffer->getData();
size_t count = 0;
for (const auto& batchIdxIndicesPair : allIndices) {
size_t batchIdx = batchIdxIndicesPair.first;
for (const auto& classIndicesPair : batchIdxIndicesPair.second) {
size_t classId = classIndicesPair.first;
const vector<size_t>& indices = classIndicesPair.second;
const vector<pair<real, NormalizedBBox>>& scoreBBoxes =
allDecodedBBoxes.at(batchIdx).at(classId);
for (size_t i = 0; i < indices.size(); ++i) {
size_t idx = indices[i];
bufferData[count * 7] = batchIdx;
bufferData[count * 7 + 1] = classId;
bufferData[count * 7 + 2] = scoreBBoxes[idx].first;
bufferData[count * 7 + 3] = scoreBBoxes[idx].second.xMin;
bufferData[count * 7 + 4] = scoreBBoxes[idx].second.yMin;
bufferData[count * 7 + 5] = scoreBBoxes[idx].second.xMax;
bufferData[count * 7 + 6] = scoreBBoxes[idx].second.yMax;
++count;
}
}
}
out.copyFrom(bufferData, numKept * 7);
}

NormalizedBBox decodeBBox(const vector<real>& priorBBoxData,
const vector<real>& locPredData) {
real priorBoxWidth = priorBBoxData[2] - priorBBoxData[0] + 1;
real priorBoxHeight = priorBBoxData[3] - priorBBoxData[1] + 1;
real priorBoxCenterX = priorBBoxData[0] + priorBoxWidth / 2;
real priorBoxCenterY = priorBBoxData[1] + priorBoxHeight / 2;
real dx = locPredData[0];
real dy = locPredData[1];
real dw = locPredData[2];
real dh = locPredData[3];

real decodedBBoxCenterX = dx * priorBoxWidth + priorBoxCenterX;
real decodedBBoxCenterY = dy * priorBoxHeight + priorBoxCenterY;
real decodedBBoxWidth = std::exp(dw) * priorBoxWidth;
real decodedBBoxHeight = std::exp(dh) * priorBoxHeight;

NormalizedBBox decodedBBox;
decodedBBox.xMin = decodedBBoxCenterX - decodedBBoxWidth / 2;
decodedBBox.yMin = decodedBBoxCenterY - decodedBBoxHeight / 2;
decodedBBox.xMax = decodedBBoxCenterX + decodedBBoxWidth / 2;
decodedBBox.yMax = decodedBBoxCenterY + decodedBBoxHeight / 2;

return decodedBBox;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

上面代码也类似,不过为了方便调试收敛效果,代码优化也行。


} // namespace paddle
62 changes: 62 additions & 0 deletions paddle/gserver/layers/DetectionUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -304,4 +304,66 @@ void getDetectionOutput(const real* confData,

NormalizedBBox clipBBox(const NormalizedBBox& bbox);

/**
* @brief Do NMS for bboxes to remove duplicated bboxes
* @param bboxes BBoxes to apply NMS
* @param topK Number to keep
* @param confThreshold Low boundary of confidence score
* @param nmsThreshold Threshold of overlap
* @param indices Indices of high quality bboxes
*/
void applyNMSFast(const vector<NormalizedBBox>& bboxes,
size_t topK,
real confThreshold,
real nmsThreshold,
vector<size_t>* indices);

/**
* @brief Get detection results which satify requirements
* @param backgroundId Background class
* @param confThreshold Threshold of class confidence
* @param nmsTopK Used in NMS operation to keep top k bbox
* @param nmsThreshold Used in NMS, threshold of overlap
* @param keepTopK How many bboxes keeped in an image
* @param allDecodedBBoxes Decoded bboxes for all images
* @param allDetectionIndices Save detection bbox indices
*/
size_t getDetectionIndices(
const size_t backgroundId,
const size_t confThreshold,
const size_t nmsTopK,
const real nmsThreshold,
const size_t keepTopK,
const map<size_t, map<size_t, vector<pair<real, NormalizedBBox>>>>&
allDecodedBBoxes,
map<size_t, map<size_t, vector<size_t>>>* allDetectionIndices);

/**
* @brief Get detection results
* @param confData Confidence scores
* @param numPriorBBoxes Prior bbox number
* @param numClasses Class number
* @param batchSize Image number
* @param allIndices Indices of predicted bboxes
* @param allDecodedBBoxes BBoxes decoded
* @param out Output matrix
* image number | label | confidence score | xMin | yMin | xMax | yMax
*/
void getDetectionOutput(
const size_t numKept,
const map<size_t, map<size_t, vector<size_t>>>& allIndices,
const map<size_t, map<size_t, vector<pair<real, NormalizedBBox>>>>&
allDecodedBBoxes,
Matrix& out);

/**
* @brief Decode prior bbox with offset parameters
* and variances of prior bbox are considered
* @param priorBBox Prior bbox to be decoded
* @param priorBBoxVar Variance parameters of prior bbox
* @param locPredData Offset parameters
*/
NormalizedBBox decodeBBox(const vector<real>& priorBBoxData,
const vector<real>& locPredData);

} // namespace paddle
123 changes: 123 additions & 0 deletions paddle/gserver/layers/RCNNDetectionLayer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "RCNNDetectionLayer.h"

namespace paddle {

REGISTER_LAYER(rcnn_detection, RCNNDetectionLayer);

bool RCNNDetectionLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
auto& layerConf = config_.inputs(0).rcnn_detection_conf();
nmsThreshold_ = layerConf.nms_threshold();
confidenceThreshold_ = layerConf.confidence_threshold();
nmsTopK_ = layerConf.nms_top_k();
keepTopK_ = layerConf.keep_top_k();
numClasses_ = layerConf.num_classes();
backgroundId_ = layerConf.background_id();
return true;
}

void RCNNDetectionLayer::forward(PassType passType) {
Layer::forward(passType);

MatrixPtr roiValue = getInputValue(0);
MatrixPtr locPredValue = getInputValue(1);
MatrixPtr confPredValue = getInputValue(2);

// do softmax
MatrixPtr confPredNormValue;
Matrix::resizeOrCreate(confPredNormValue,
confPredValue->getHeight(),
confPredValue->getWidth(),
false,
useGpu_);
confPredNormValue->copyFrom(*confPredValue);
confPredNormValue->softmax(*confPredNormValue);
confPredValue = confPredNormValue;

if (useGpu_) { // copy data from GPU
MatrixPtr roiCpuBuffer;
Matrix::resizeOrCreate(roiCpuBuffer,
roiValue->getHeight(),
roiValue->getWidth(),
false,
false);
MatrixPtr locCpuBuffer;
Matrix::resizeOrCreate(locCpuBuffer,
locPredValue->getHeight(),
locPredValue->getWidth(),
false,
false);
MatrixPtr confCpuBuffer;
Matrix::resizeOrCreate(confCpuBuffer,
confPredValue->getHeight(),
confPredValue->getWidth(),
false,
false);
roiCpuBuffer->copyFrom(*roiValue);
locCpuBuffer->copyFrom(*locPredValue);
confCpuBuffer->copyFrom(*confPredValue);
roiValue = roiCpuBuffer;
locPredValue = locCpuBuffer;
confPredValue = confCpuBuffer;
}

// The format of the RoI is:
// | batch_idx | xmin | ymin | xmax | ymax |
real* roisData = roiValue->getData();
size_t roiDim = roiValue->getWidth();
size_t roiNum = roiValue->getHeight();
real* locPredData = locPredValue->getData();
real* confPredData = confPredValue->getData();

// <batchIdx, <classIdx, <(score, box)>>>
std::map<size_t,
std::map<size_t, std::vector<std::pair<real, NormalizedBBox>>>>
allDecodedBBoxes;
for (size_t n = 0; n < roiNum; ++n) {
int batchIdx = *(roisData + n * roiDim);
std::vector<real> roiLocData(4); // RoI location
for (size_t j = 0; j < 4; ++j) {
roiLocData[j] = *(roisData + n * roiDim + 1 + j);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

int batchIdx = *(roisData + n * roiDim);
std::vector<real> roiLocData(4);  // RoI location
for (size_t j = 0; j < 4; ++j) {
  roiLocData[j] = *(roisData + n * roiDim + 1 + j);
}

==>

roisData += roiDim;
int batchIdx = *roisData;
std::vector<real> roiLocData(roisData+ 1, roisData+ 5);

std::vector的初始化:http://www.cplusplus.com/reference/vector/vector/vector/

// location predictions for each class
for (size_t c = 0; c < numClasses_; ++c) {
if (c == backgroundId_) continue;
std::vector<real> predLocData(4);
for (size_t j = 0; j < 4; ++j) {
predLocData[j] = *(locPredData + n * numClasses_ * 4 + c * 4 + j);
}
real predConfData = *(confPredData + n * numClasses_ + c);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

同样代码可以短一些:

locPredData += numClasses_ * 4;
for (size_t c = 0; c < numClasses_; ++c) {
  if (c == backgroundId_) continue;
  std::vector<real> predLocData(locPredData + c * 4, locPredData + c * 4 + 4);
  real predConfData = *(confPredData + c);
  // ...

allDecodedBBoxes[batchIdx][c].push_back(
std::make_pair(predConfData, decodeBBox(roiLocData, predLocData)));
}
}
// <batchIdx, <classIdx, <bboxIdxes>>
std::map<size_t, std::map<size_t, std::vector<size_t>>> allIndices;
size_t numKept = getDetectionIndices(backgroundId_,
confidenceThreshold_,
nmsTopK_,
nmsThreshold_,
keepTopK_,
allDecodedBBoxes,
&allIndices);
resetOutput(numKept, 7);
MatrixPtr outV = getOutputValue();
getDetectionOutput(numKept, allIndices, allDecodedBBoxes, *outV);
}

} // namespace paddle
Loading