-
Notifications
You must be signed in to change notification settings - Fork 5.7k
Add RCNNLossLayer, RCNNDetectionLayer for Faster(er) R-CNN #3496
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -573,4 +573,147 @@ NormalizedBBox clipBBox(const NormalizedBBox& bbox) { | |
return clippedBBox; | ||
} | ||
|
||
void applyNMSFast(const vector<pair<real, NormalizedBBox>>& bboxes, | ||
size_t topK, | ||
real confThreshold, | ||
real nmsThreshold, | ||
vector<size_t>* indices) { | ||
vector<pair<real, size_t>> scores; | ||
for (size_t i = 0; i < bboxes.size(); ++i) { | ||
scores.push_back(std::make_pair(bboxes[i].first, i)); | ||
} | ||
std::stable_sort(scores.begin(), scores.end(), sortScorePairDescend<size_t>); | ||
if (topK > 0 && topK < scores.size()) scores.resize(topK); | ||
while (scores.size() > 0) { | ||
const size_t idx = scores.front().second; | ||
bool keep = true; | ||
for (size_t i = 0; i < indices->size(); ++i) { | ||
if (keep) { | ||
const size_t savedIdx = (*indices)[i]; | ||
real overlap = | ||
jaccardOverlap(bboxes[idx].second, bboxes[savedIdx].second); | ||
keep = overlap <= nmsThreshold; | ||
} else { | ||
break; | ||
} | ||
} | ||
if (keep) indices->push_back(idx); | ||
scores.erase(scores.begin()); | ||
} | ||
} | ||
|
||
size_t getDetectionIndices( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 目测和 |
||
const size_t backgroundId, | ||
const size_t confThreshold, | ||
const size_t nmsTopK, | ||
const real nmsThreshold, | ||
const size_t keepTopK, | ||
const map<size_t, map<size_t, vector<pair<real, NormalizedBBox>>>>& | ||
allDecodedBBoxes, | ||
map<size_t, map<size_t, vector<size_t>>>* allDetectionIndices) { | ||
size_t totalKeepNum = 0; | ||
for (const auto& batchIdxBBoxesPair : allDecodedBBoxes) { | ||
size_t batchIdx = batchIdxBBoxesPair.first; | ||
std::map<size_t, std::vector<size_t>>& indices = | ||
(*allDetectionIndices)[batchIdx]; | ||
size_t numDetected = 0; | ||
for (const auto& classDecodedBBoxesPair : batchIdxBBoxesPair.second) { | ||
size_t classId = classDecodedBBoxesPair.first; | ||
if (classId == backgroundId) { | ||
continue; | ||
} else { | ||
applyNMSFast(classDecodedBBoxesPair.second, | ||
nmsTopK, | ||
confThreshold, | ||
nmsThreshold, | ||
&(indices[classId])); | ||
numDetected += indices[classId].size(); | ||
} | ||
} | ||
if (keepTopK > 0 && numDetected > keepTopK) { | ||
vector<pair<real, pair<size_t, size_t>>> scoreIndexPairs; | ||
for (const auto& classDecodedBBoxesPair : batchIdxBBoxesPair.second) { | ||
size_t classId = classDecodedBBoxesPair.first; | ||
const vector<size_t>& labelIndices = indices[classId]; | ||
for (size_t i = 0; i < labelIndices.size(); ++i) { | ||
real score = classDecodedBBoxesPair.second[labelIndices[i]].first; | ||
scoreIndexPairs.push_back( | ||
std::make_pair(score, std::make_pair(classId, labelIndices[i]))); | ||
} | ||
} | ||
std::sort(scoreIndexPairs.begin(), | ||
scoreIndexPairs.end(), | ||
sortScorePairDescend<pair<size_t, size_t>>); | ||
scoreIndexPairs.resize(keepTopK); | ||
indices.clear(); | ||
for (size_t i = 0; i < scoreIndexPairs.size(); ++i) { | ||
size_t label = scoreIndexPairs[i].second.first; | ||
size_t idx = scoreIndexPairs[i].second.second; | ||
indices[label].push_back(idx); | ||
} | ||
numDetected = keepTopK; | ||
} | ||
totalKeepNum += numDetected; | ||
} | ||
return totalKeepNum; | ||
} | ||
|
||
void getDetectionOutput( | ||
const size_t numKept, | ||
const map<size_t, map<size_t, vector<size_t>>>& allIndices, | ||
const map<size_t, map<size_t, vector<pair<real, NormalizedBBox>>>>& | ||
allDecodedBBoxes, | ||
Matrix& out) { | ||
MatrixPtr outBuffer; | ||
Matrix::resizeOrCreate(outBuffer, numKept, 7, false, false); | ||
real* bufferData = outBuffer->getData(); | ||
size_t count = 0; | ||
for (const auto& batchIdxIndicesPair : allIndices) { | ||
size_t batchIdx = batchIdxIndicesPair.first; | ||
for (const auto& classIndicesPair : batchIdxIndicesPair.second) { | ||
size_t classId = classIndicesPair.first; | ||
const vector<size_t>& indices = classIndicesPair.second; | ||
const vector<pair<real, NormalizedBBox>>& scoreBBoxes = | ||
allDecodedBBoxes.at(batchIdx).at(classId); | ||
for (size_t i = 0; i < indices.size(); ++i) { | ||
size_t idx = indices[i]; | ||
bufferData[count * 7] = batchIdx; | ||
bufferData[count * 7 + 1] = classId; | ||
bufferData[count * 7 + 2] = scoreBBoxes[idx].first; | ||
bufferData[count * 7 + 3] = scoreBBoxes[idx].second.xMin; | ||
bufferData[count * 7 + 4] = scoreBBoxes[idx].second.yMin; | ||
bufferData[count * 7 + 5] = scoreBBoxes[idx].second.xMax; | ||
bufferData[count * 7 + 6] = scoreBBoxes[idx].second.yMax; | ||
++count; | ||
} | ||
} | ||
} | ||
out.copyFrom(bufferData, numKept * 7); | ||
} | ||
|
||
NormalizedBBox decodeBBox(const vector<real>& priorBBoxData, | ||
const vector<real>& locPredData) { | ||
real priorBoxWidth = priorBBoxData[2] - priorBBoxData[0] + 1; | ||
real priorBoxHeight = priorBBoxData[3] - priorBBoxData[1] + 1; | ||
real priorBoxCenterX = priorBBoxData[0] + priorBoxWidth / 2; | ||
real priorBoxCenterY = priorBBoxData[1] + priorBoxHeight / 2; | ||
real dx = locPredData[0]; | ||
real dy = locPredData[1]; | ||
real dw = locPredData[2]; | ||
real dh = locPredData[3]; | ||
|
||
real decodedBBoxCenterX = dx * priorBoxWidth + priorBoxCenterX; | ||
real decodedBBoxCenterY = dy * priorBoxHeight + priorBoxCenterY; | ||
real decodedBBoxWidth = std::exp(dw) * priorBoxWidth; | ||
real decodedBBoxHeight = std::exp(dh) * priorBoxHeight; | ||
|
||
NormalizedBBox decodedBBox; | ||
decodedBBox.xMin = decodedBBoxCenterX - decodedBBoxWidth / 2; | ||
decodedBBox.yMin = decodedBBoxCenterY - decodedBBoxHeight / 2; | ||
decodedBBox.xMax = decodedBBoxCenterX + decodedBBoxWidth / 2; | ||
decodedBBox.yMax = decodedBBoxCenterY + decodedBBoxHeight / 2; | ||
|
||
return decodedBBox; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 上面代码也类似,不过为了方便调试收敛效果,代码优化也行。 |
||
|
||
} // namespace paddle |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#include "RCNNDetectionLayer.h" | ||
|
||
namespace paddle { | ||
|
||
REGISTER_LAYER(rcnn_detection, RCNNDetectionLayer); | ||
|
||
bool RCNNDetectionLayer::init(const LayerMap& layerMap, | ||
const ParameterMap& parameterMap) { | ||
Layer::init(layerMap, parameterMap); | ||
auto& layerConf = config_.inputs(0).rcnn_detection_conf(); | ||
nmsThreshold_ = layerConf.nms_threshold(); | ||
confidenceThreshold_ = layerConf.confidence_threshold(); | ||
nmsTopK_ = layerConf.nms_top_k(); | ||
keepTopK_ = layerConf.keep_top_k(); | ||
numClasses_ = layerConf.num_classes(); | ||
backgroundId_ = layerConf.background_id(); | ||
return true; | ||
} | ||
|
||
void RCNNDetectionLayer::forward(PassType passType) { | ||
Layer::forward(passType); | ||
|
||
MatrixPtr roiValue = getInputValue(0); | ||
MatrixPtr locPredValue = getInputValue(1); | ||
MatrixPtr confPredValue = getInputValue(2); | ||
|
||
// do softmax | ||
MatrixPtr confPredNormValue; | ||
Matrix::resizeOrCreate(confPredNormValue, | ||
confPredValue->getHeight(), | ||
confPredValue->getWidth(), | ||
false, | ||
useGpu_); | ||
confPredNormValue->copyFrom(*confPredValue); | ||
confPredNormValue->softmax(*confPredNormValue); | ||
confPredValue = confPredNormValue; | ||
|
||
if (useGpu_) { // copy data from GPU | ||
MatrixPtr roiCpuBuffer; | ||
Matrix::resizeOrCreate(roiCpuBuffer, | ||
roiValue->getHeight(), | ||
roiValue->getWidth(), | ||
false, | ||
false); | ||
MatrixPtr locCpuBuffer; | ||
Matrix::resizeOrCreate(locCpuBuffer, | ||
locPredValue->getHeight(), | ||
locPredValue->getWidth(), | ||
false, | ||
false); | ||
MatrixPtr confCpuBuffer; | ||
Matrix::resizeOrCreate(confCpuBuffer, | ||
confPredValue->getHeight(), | ||
confPredValue->getWidth(), | ||
false, | ||
false); | ||
roiCpuBuffer->copyFrom(*roiValue); | ||
locCpuBuffer->copyFrom(*locPredValue); | ||
confCpuBuffer->copyFrom(*confPredValue); | ||
roiValue = roiCpuBuffer; | ||
locPredValue = locCpuBuffer; | ||
confPredValue = confCpuBuffer; | ||
} | ||
|
||
// The format of the RoI is: | ||
// | batch_idx | xmin | ymin | xmax | ymax | | ||
real* roisData = roiValue->getData(); | ||
size_t roiDim = roiValue->getWidth(); | ||
size_t roiNum = roiValue->getHeight(); | ||
real* locPredData = locPredValue->getData(); | ||
real* confPredData = confPredValue->getData(); | ||
|
||
// <batchIdx, <classIdx, <(score, box)>>> | ||
std::map<size_t, | ||
std::map<size_t, std::vector<std::pair<real, NormalizedBBox>>>> | ||
allDecodedBBoxes; | ||
for (size_t n = 0; n < roiNum; ++n) { | ||
int batchIdx = *(roisData + n * roiDim); | ||
std::vector<real> roiLocData(4); // RoI location | ||
for (size_t j = 0; j < 4; ++j) { | ||
roiLocData[j] = *(roisData + n * roiDim + 1 + j); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
==>
std::vector的初始化:http://www.cplusplus.com/reference/vector/vector/vector/ |
||
// location predictions for each class | ||
for (size_t c = 0; c < numClasses_; ++c) { | ||
if (c == backgroundId_) continue; | ||
std::vector<real> predLocData(4); | ||
for (size_t j = 0; j < 4; ++j) { | ||
predLocData[j] = *(locPredData + n * numClasses_ * 4 + c * 4 + j); | ||
} | ||
real predConfData = *(confPredData + n * numClasses_ + c); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 同样代码可以短一些: locPredData += numClasses_ * 4;
for (size_t c = 0; c < numClasses_; ++c) {
if (c == backgroundId_) continue;
std::vector<real> predLocData(locPredData + c * 4, locPredData + c * 4 + 4);
real predConfData = *(confPredData + c);
// ... |
||
allDecodedBBoxes[batchIdx][c].push_back( | ||
std::make_pair(predConfData, decodeBBox(roiLocData, predLocData))); | ||
} | ||
} | ||
// <batchIdx, <classIdx, <bboxIdxes>> | ||
std::map<size_t, std::map<size_t, std::vector<size_t>>> allIndices; | ||
size_t numKept = getDetectionIndices(backgroundId_, | ||
confidenceThreshold_, | ||
nmsTopK_, | ||
nmsThreshold_, | ||
keepTopK_, | ||
allDecodedBBoxes, | ||
&allIndices); | ||
resetOutput(numKept, 7); | ||
MatrixPtr outV = getOutputValue(); | ||
getDetectionOutput(numKept, allIndices, allDecodedBBoxes, *outV); | ||
} | ||
|
||
} // namespace paddle |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see
applyNMSFast
is similar with SSD:觉得可以写成下面,依据confScoreData来判断: