Skip to content

Commit

Permalink
cleanup and include ILSVRC mean image
Browse files Browse the repository at this point in the history
  • Loading branch information
rbgirshick committed Nov 22, 2013
1 parent 89dedba commit f785ff2
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 122 deletions.
Binary file added matlab/caffe/ilsvrc_2012_mean.mat
Binary file not shown.
210 changes: 108 additions & 102 deletions matlab/caffe/matcaffe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,99 +11,78 @@

using namespace caffe;

// A simple wrapper over CaffeNet that runs the forward process.
struct CaffeNet
{
// The pointer to the internal caffe::Net instance
shared_ptr<Net<float> > net_;

CaffeNet() {}

void init(string param_file, string pretrained_param_file) {
net_.reset(new Net<float>(param_file));
net_->CopyTrainedLayersFrom(pretrained_param_file);
}

virtual ~CaffeNet() {}

/*
inline void check_array_against_blob(
PyArrayObject* arr, Blob<float>* blob) {
CHECK(PyArray_FLAGS(arr) & NPY_ARRAY_C_CONTIGUOUS);
CHECK_EQ(PyArray_NDIM(arr), 4);
CHECK_EQ(PyArray_ITEMSIZE(arr), 4);
npy_intp* dims = PyArray_DIMS(arr);
CHECK_EQ(dims[0], blob->num());
CHECK_EQ(dims[1], blob->channels());
CHECK_EQ(dims[2], blob->height());
CHECK_EQ(dims[3], blob->width());
// The pointer to the internal caffe::Net instance
static shared_ptr<Net<float> > net_;

// Five things to be aware of:
// caffe uses row-major order
// matlab uses column-major order
// caffe uses BGR color channel order
// matlab uses RGB color channel order
// images need to have the data mean subtracted
//
// Data coming in from matlab needs to be in the order
// [batch_images, channels, height, width]
// where width is the fastest dimension.
// Here is the rough matlab for putting image data into the correct
// format:
// % convert from uint8 to single
// im = single(im);
// % reshape to a fixed size (e.g., 227x227)
// im = imresize(im, [IMAGE_DIM IMAGE_DIM], 'bilinear');
// % permute from RGB to BGR and subtract the data mean (already in BGR)
// im = im(:,:,[3 2 1]) - data_mean;
// % flip width and height to make width the fastest dimension
// im = permute(im, [2 1 3]);
//
// If you have multiple images, cat them with cat(4, ...)
//
// The actual forward function. It takes in a cell array of 4-D arrays as
// input and outputs a cell array.
static mxArray* do_forward(const mxArray* const bottom) {
vector<Blob<float>*>& input_blobs = net_->input_blobs();
CHECK_EQ(static_cast<unsigned int>(mxGetDimensions(bottom)[0]),
input_blobs.size());
for (unsigned int i = 0; i < input_blobs.size(); ++i) {
const mxArray* const elem = mxGetCell(bottom, i);
const float* const data_ptr =
reinterpret_cast<const float* const>(mxGetPr(elem));
switch (Caffe::mode()) {
case Caffe::CPU:
memcpy(input_blobs[i]->mutable_cpu_data(), data_ptr,
sizeof(float) * input_blobs[i]->count());
break;
case Caffe::GPU:
cudaMemcpy(input_blobs[i]->mutable_gpu_data(), data_ptr,
sizeof(float) * input_blobs[i]->count(), cudaMemcpyHostToDevice);
break;
default:
LOG(FATAL) << "Unknown Caffe mode.";
} // switch (Caffe::mode())
}
*/

// Data needs to be [images, channels, height, width] where width is the fastest dimension
//
// In matlab, reading an image gives [height, width, channels] where height is the fastest dimension
// - want to have the order as [width, height, channels, images]
// (channels in BGR order)
// -
//
// The matlab model is:
// - bottom is a cell array of 4D tensors in the correct format
// - top is allocated in here as a cell array of outputs
//
// The actual forward function. It takes in a python list of numpy arrays as
// input and a python list of numpy arrays as output. The input and output
// should all have correct shapes, are single-precisionabcdnt- and c contiguous.
//
//
mxArray* Forward(const mxArray* const bottom) {
vector<Blob<float>*>& input_blobs = net_->input_blobs();
CHECK_EQ(static_cast<unsigned int>(mxGetDimensions(bottom)[0]),
input_blobs.size());
for (unsigned int i = 0; i < input_blobs.size(); ++i) {
const mxArray* const elem = mxGetCell(bottom, i);
const float* const data_ptr =
reinterpret_cast<const float* const>(mxGetPr(elem));
//check_array_against_blob(arr, input_blobs[i]);
switch (Caffe::mode()) {
case Caffe::CPU:
memcpy(input_blobs[i]->mutable_cpu_data(), data_ptr,
sizeof(float) * input_blobs[i]->count());
break;
case Caffe::GPU:
cudaMemcpy(input_blobs[i]->mutable_gpu_data(), data_ptr,
sizeof(float) * input_blobs[i]->count(), cudaMemcpyHostToDevice);
break;
default:
LOG(FATAL) << "Unknown Caffe mode.";
} // switch (Caffe::mode())
}
const vector<Blob<float>*>& output_blobs = net_->ForwardPrefilled();
mxArray* mx_out = mxCreateCellMatrix(output_blobs.size(), 1);
for (unsigned int i = 0; i < output_blobs.size(); ++i) {
mxArray* mx_blob = mxCreateNumericMatrix(output_blobs[i]->count(),
1, mxSINGLE_CLASS, mxREAL);
mxSetCell(mx_out, i, mx_blob);
float* data_ptr = reinterpret_cast<float*>(mxGetPr(mx_blob));
//check_array_against_blob(arr, output_blobs[i]);
switch (Caffe::mode()) {
case Caffe::CPU:
memcpy(data_ptr, output_blobs[i]->cpu_data(),
sizeof(float) * output_blobs[i]->count());
break;
case Caffe::GPU:
cudaMemcpy(data_ptr, output_blobs[i]->gpu_data(),
sizeof(float) * output_blobs[i]->count(), cudaMemcpyDeviceToHost);
break;
default:
LOG(FATAL) << "Unknown Caffe mode.";
} // switch (Caffe::mode())
}

return mx_out;
const vector<Blob<float>*>& output_blobs = net_->ForwardPrefilled();
mxArray* mx_out = mxCreateCellMatrix(output_blobs.size(), 1);
for (unsigned int i = 0; i < output_blobs.size(); ++i) {
mxArray* mx_blob = mxCreateNumericMatrix(output_blobs[i]->count(),
1, mxSINGLE_CLASS, mxREAL);
mxSetCell(mx_out, i, mx_blob);
float* data_ptr = reinterpret_cast<float*>(mxGetPr(mx_blob));
switch (Caffe::mode()) {
case Caffe::CPU:
memcpy(data_ptr, output_blobs[i]->cpu_data(),
sizeof(float) * output_blobs[i]->count());
break;
case Caffe::GPU:
cudaMemcpy(data_ptr, output_blobs[i]->gpu_data(),
sizeof(float) * output_blobs[i]->count(), cudaMemcpyDeviceToHost);
break;
default:
LOG(FATAL) << "Unknown Caffe mode.";
} // switch (Caffe::mode())
}

};
return mx_out;
}

// The caffe::Caffe utility functions.
static void set_mode_cpu(MEX_ARGS) {
Expand All @@ -123,19 +102,38 @@ static void set_phase_test(MEX_ARGS) {
}

static void set_device(MEX_ARGS) {
if (nrhs != 1) {
LOG(ERROR) << "Only given " << nrhs << " arguments";
mexErrMsgTxt("Wrong number of arguments");
}

int device_id = static_cast<int>(mxGetScalar(prhs[0]));
Caffe::SetDevice(device_id);
}

static CaffeNet net;
static void init(MEX_ARGS) {
if (nrhs != 2) {
LOG(ERROR) << "Only given " << nrhs << " arguments";
mexErrMsgTxt("Wrong number of arguments");
}

char* param_file = mxArrayToString(prhs[0]);
char* model_file = mxArrayToString(prhs[1]);

static void net_init(MEX_ARGS) {
net.init("/home/rbg/working/caffe/examples/imagenet_deploy.prototxt",
"/home/rbg/working/caffe/examples/alexnet_train_iter_470000");
net_.reset(new Net<float>(string(param_file)));
net_->CopyTrainedLayersFrom(string(model_file));

mxFree(param_file);
mxFree(model_file);
}

static void net_forward(MEX_ARGS) {
plhs[0] = net.Forward(prhs[0]);
static void forward(MEX_ARGS) {
if (nrhs != 1) {
LOG(ERROR) << "Only given " << nrhs << " arguments";
mexErrMsgTxt("Wrong number of arguments");
}

plhs[0] = do_forward(prhs[0]);
}

/** -----------------------------------------------------------------
Expand All @@ -148,8 +146,8 @@ struct handler_registry {

static handler_registry handlers[] = {
// Public API functions
{ "forward", net_forward },
{ "init", net_init },
{ "forward", forward },
{ "init", init },
{ "set_mode_cpu", set_mode_cpu },
{ "set_mode_gpu", set_mode_gpu },
{ "set_phase_train", set_phase_train },
Expand All @@ -164,19 +162,27 @@ static handler_registry handlers[] = {
** matlab entry point: caffe(api_command, arg1, arg2, ...)
**/
void mexFunction(MEX_ARGS) {
// TODO: check args
if (nrhs == 0) {
LOG(ERROR) << "No API command given";
mexErrMsgTxt("An API command is requires");
return;
}

{ // Handle input command
char *cmd = mxArrayToString(prhs[0]);
//bool dispatched = false;
bool dispatched = false;
// Dispatch to cmd handler
for (int i = 0; handlers[i].func != NULL; i++) {
if (handlers[i].cmd.compare(cmd) == 0) {
handlers[i].func(nlhs, plhs, nrhs-1, prhs+1);
//dispatched = true;
dispatched = true;
break;
}
}
if (!dispatched) {
LOG(ERROR) << "Unknown command `" << cmd << "'";
mexErrMsgTxt("API command not recognized");
}
mxFree(cmd);
//checkM(dispatched, "Command not found!");
}
}
56 changes: 36 additions & 20 deletions matlab/caffe/matcaffe_demo.m
Original file line number Diff line number Diff line change
@@ -1,46 +1,62 @@
function res = matcaffe_demo(im, gpu)
function scores = matcaffe_demo(im, use_gpu)
% scores = matcaffe_demo(im, use_gpu)
%
% Demo of the matlab wrapper using the ILSVRC network.
%
% input
% im color image as uint8 HxWx3
% use_gpu 1 to use the GPU, 0 to use the CPU
%
% output
% scores 1000-dimensional ILSVRC score vector

% load image net mean
% // In matlab, reading an image gives [height, width, channels] where height is the fastest dimension
% // - want to have the order as [width, height, channels, images]
% // (channels in BGR order)
% // -
model_def_file = '../../examples/imagenet_deploy.prototxt';
% NOTE: you'll have to get the pre-trained ILSVRC network
model_file = '../../examples/alexnet_train_iter_470000';

% 1: swap channel order to BGR
% 2: extract 5 crops and their flips
% 3: swap rows and columns and concat along 4th dim
% 4: wrap in cell aray
% init caffe network (spews logging info)
caffe('init', model_def_file, model_file);

caffe('init');
if gpu
% set to use GPU or CPU
if exist('use_gpu', 'var') && use_gpu
caffe('set_mode_gpu');
else
caffe('set_mode_cpu');
end

% put into test mode
caffe('set_phase_test');

% prepare oversampled input
tic;
blob = {prepare_image(im)};
input_data = {prepare_image(im)};
toc;

% do forward pass to get scores
tic;
res = caffe('forward', blob);
scores = caffe('forward', input_data);
toc;
res = reshape(res{1}, [1000 10]);
res = mean(res, 2);

% average output scores
scores = reshape(scores{1}, [1000 10]);
scores = mean(scores, 2);


% ------------------------------------------------------------------------
function images = prepare_image(im)
% ------------------------------------------------------------------------
d = load('ilsvrc_2012_mean');
image_mean = d.image_mean;
IMAGE_MEAN = d.image_mean;
IMAGE_DIM = 256;
CROPPED_DIM = 227;

% resize to fixed input size
im = single(im);
im = imresize(im, [IMAGE_DIM IMAGE_DIM], 'bilinear');
% permute from RGB to BGR
im = im(:,:,[3 2 1]) - image_mean;
% permute from RGB to BGR (IMAGE_MEAN is already BGR)
im = im(:,:,[3 2 1]) - IMAGE_MEAN;

% oversample
% oversample (4 corners, center, and their x-axis flips)
images = zeros(CROPPED_DIM, CROPPED_DIM, 3, 10, 'single');
indices = [0 IMAGE_DIM-CROPPED_DIM] + 1;
curr = 1;
Expand Down

0 comments on commit f785ff2

Please sign in to comment.