diff --git a/matlab/caffe/ilsvrc_2012_mean.mat b/matlab/caffe/ilsvrc_2012_mean.mat new file mode 100644 index 00000000000..f1da25c84a1 Binary files /dev/null and b/matlab/caffe/ilsvrc_2012_mean.mat differ diff --git a/matlab/caffe/matcaffe.cpp b/matlab/caffe/matcaffe.cpp index 1f11a2bc66a..d137b31e812 100644 --- a/matlab/caffe/matcaffe.cpp +++ b/matlab/caffe/matcaffe.cpp @@ -11,99 +11,78 @@ using namespace caffe; -// A simple wrapper over CaffeNet that runs the forward process. -struct CaffeNet -{ - // The pointer to the internal caffe::Net instance - shared_ptr > net_; - - CaffeNet() {} - - void init(string param_file, string pretrained_param_file) { - net_.reset(new Net(param_file)); - net_->CopyTrainedLayersFrom(pretrained_param_file); - } - - virtual ~CaffeNet() {} - - /* - inline void check_array_against_blob( - PyArrayObject* arr, Blob* blob) { - CHECK(PyArray_FLAGS(arr) & NPY_ARRAY_C_CONTIGUOUS); - CHECK_EQ(PyArray_NDIM(arr), 4); - CHECK_EQ(PyArray_ITEMSIZE(arr), 4); - npy_intp* dims = PyArray_DIMS(arr); - CHECK_EQ(dims[0], blob->num()); - CHECK_EQ(dims[1], blob->channels()); - CHECK_EQ(dims[2], blob->height()); - CHECK_EQ(dims[3], blob->width()); +// The pointer to the internal caffe::Net instance +static shared_ptr > net_; + +// Five things to be aware of: +// caffe uses row-major order +// matlab uses column-major order +// caffe uses BGR color channel order +// matlab uses RGB color channel order +// images need to have the data mean subtracted +// +// Data coming in from matlab needs to be in the order +// [batch_images, channels, height, width] +// where width is the fastest dimension. +// Here is the rough matlab for putting image data into the correct +// format: +// % convert from uint8 to single +// im = single(im); +// % reshape to a fixed size (e.g., 227x227) +// im = imresize(im, [IMAGE_DIM IMAGE_DIM], 'bilinear'); +// % permute from RGB to BGR and subtract the data mean (already in BGR) +// im = im(:,:,[3 2 1]) - data_mean; +// % flip width and height to make width the fastest dimension +// im = permute(im, [2 1 3]); +// +// If you have multiple images, cat them with cat(4, ...) +// +// The actual forward function. It takes in a cell array of 4-D arrays as +// input and outputs a cell array. +static mxArray* do_forward(const mxArray* const bottom) { + vector*>& input_blobs = net_->input_blobs(); + CHECK_EQ(static_cast(mxGetDimensions(bottom)[0]), + input_blobs.size()); + for (unsigned int i = 0; i < input_blobs.size(); ++i) { + const mxArray* const elem = mxGetCell(bottom, i); + const float* const data_ptr = + reinterpret_cast(mxGetPr(elem)); + switch (Caffe::mode()) { + case Caffe::CPU: + memcpy(input_blobs[i]->mutable_cpu_data(), data_ptr, + sizeof(float) * input_blobs[i]->count()); + break; + case Caffe::GPU: + cudaMemcpy(input_blobs[i]->mutable_gpu_data(), data_ptr, + sizeof(float) * input_blobs[i]->count(), cudaMemcpyHostToDevice); + break; + default: + LOG(FATAL) << "Unknown Caffe mode."; + } // switch (Caffe::mode()) } - */ - - // Data needs to be [images, channels, height, width] where width is the fastest dimension - // - // In matlab, reading an image gives [height, width, channels] where height is the fastest dimension - // - want to have the order as [width, height, channels, images] - // (channels in BGR order) - // - - // - // The matlab model is: - // - bottom is a cell array of 4D tensors in the correct format - // - top is allocated in here as a cell array of outputs - // - // The actual forward function. It takes in a python list of numpy arrays as - // input and a python list of numpy arrays as output. The input and output - // should all have correct shapes, are single-precisionabcdnt- and c contiguous. - // - // - mxArray* Forward(const mxArray* const bottom) { - vector*>& input_blobs = net_->input_blobs(); - CHECK_EQ(static_cast(mxGetDimensions(bottom)[0]), - input_blobs.size()); - for (unsigned int i = 0; i < input_blobs.size(); ++i) { - const mxArray* const elem = mxGetCell(bottom, i); - const float* const data_ptr = - reinterpret_cast(mxGetPr(elem)); - //check_array_against_blob(arr, input_blobs[i]); - switch (Caffe::mode()) { - case Caffe::CPU: - memcpy(input_blobs[i]->mutable_cpu_data(), data_ptr, - sizeof(float) * input_blobs[i]->count()); - break; - case Caffe::GPU: - cudaMemcpy(input_blobs[i]->mutable_gpu_data(), data_ptr, - sizeof(float) * input_blobs[i]->count(), cudaMemcpyHostToDevice); - break; - default: - LOG(FATAL) << "Unknown Caffe mode."; - } // switch (Caffe::mode()) - } - const vector*>& output_blobs = net_->ForwardPrefilled(); - mxArray* mx_out = mxCreateCellMatrix(output_blobs.size(), 1); - for (unsigned int i = 0; i < output_blobs.size(); ++i) { - mxArray* mx_blob = mxCreateNumericMatrix(output_blobs[i]->count(), - 1, mxSINGLE_CLASS, mxREAL); - mxSetCell(mx_out, i, mx_blob); - float* data_ptr = reinterpret_cast(mxGetPr(mx_blob)); - //check_array_against_blob(arr, output_blobs[i]); - switch (Caffe::mode()) { - case Caffe::CPU: - memcpy(data_ptr, output_blobs[i]->cpu_data(), - sizeof(float) * output_blobs[i]->count()); - break; - case Caffe::GPU: - cudaMemcpy(data_ptr, output_blobs[i]->gpu_data(), - sizeof(float) * output_blobs[i]->count(), cudaMemcpyDeviceToHost); - break; - default: - LOG(FATAL) << "Unknown Caffe mode."; - } // switch (Caffe::mode()) - } - - return mx_out; + const vector*>& output_blobs = net_->ForwardPrefilled(); + mxArray* mx_out = mxCreateCellMatrix(output_blobs.size(), 1); + for (unsigned int i = 0; i < output_blobs.size(); ++i) { + mxArray* mx_blob = mxCreateNumericMatrix(output_blobs[i]->count(), + 1, mxSINGLE_CLASS, mxREAL); + mxSetCell(mx_out, i, mx_blob); + float* data_ptr = reinterpret_cast(mxGetPr(mx_blob)); + switch (Caffe::mode()) { + case Caffe::CPU: + memcpy(data_ptr, output_blobs[i]->cpu_data(), + sizeof(float) * output_blobs[i]->count()); + break; + case Caffe::GPU: + cudaMemcpy(data_ptr, output_blobs[i]->gpu_data(), + sizeof(float) * output_blobs[i]->count(), cudaMemcpyDeviceToHost); + break; + default: + LOG(FATAL) << "Unknown Caffe mode."; + } // switch (Caffe::mode()) } -}; + return mx_out; +} // The caffe::Caffe utility functions. static void set_mode_cpu(MEX_ARGS) { @@ -123,19 +102,38 @@ static void set_phase_test(MEX_ARGS) { } static void set_device(MEX_ARGS) { + if (nrhs != 1) { + LOG(ERROR) << "Only given " << nrhs << " arguments"; + mexErrMsgTxt("Wrong number of arguments"); + } + int device_id = static_cast(mxGetScalar(prhs[0])); Caffe::SetDevice(device_id); } -static CaffeNet net; +static void init(MEX_ARGS) { + if (nrhs != 2) { + LOG(ERROR) << "Only given " << nrhs << " arguments"; + mexErrMsgTxt("Wrong number of arguments"); + } + + char* param_file = mxArrayToString(prhs[0]); + char* model_file = mxArrayToString(prhs[1]); -static void net_init(MEX_ARGS) { - net.init("/home/rbg/working/caffe/examples/imagenet_deploy.prototxt", - "/home/rbg/working/caffe/examples/alexnet_train_iter_470000"); + net_.reset(new Net(string(param_file))); + net_->CopyTrainedLayersFrom(string(model_file)); + + mxFree(param_file); + mxFree(model_file); } -static void net_forward(MEX_ARGS) { - plhs[0] = net.Forward(prhs[0]); +static void forward(MEX_ARGS) { + if (nrhs != 1) { + LOG(ERROR) << "Only given " << nrhs << " arguments"; + mexErrMsgTxt("Wrong number of arguments"); + } + + plhs[0] = do_forward(prhs[0]); } /** ----------------------------------------------------------------- @@ -148,8 +146,8 @@ struct handler_registry { static handler_registry handlers[] = { // Public API functions - { "forward", net_forward }, - { "init", net_init }, + { "forward", forward }, + { "init", init }, { "set_mode_cpu", set_mode_cpu }, { "set_mode_gpu", set_mode_gpu }, { "set_phase_train", set_phase_train }, @@ -164,19 +162,27 @@ static handler_registry handlers[] = { ** matlab entry point: caffe(api_command, arg1, arg2, ...) **/ void mexFunction(MEX_ARGS) { - // TODO: check args + if (nrhs == 0) { + LOG(ERROR) << "No API command given"; + mexErrMsgTxt("An API command is requires"); + return; + } + { // Handle input command char *cmd = mxArrayToString(prhs[0]); - //bool dispatched = false; + bool dispatched = false; // Dispatch to cmd handler for (int i = 0; handlers[i].func != NULL; i++) { if (handlers[i].cmd.compare(cmd) == 0) { handlers[i].func(nlhs, plhs, nrhs-1, prhs+1); - //dispatched = true; + dispatched = true; break; } } + if (!dispatched) { + LOG(ERROR) << "Unknown command `" << cmd << "'"; + mexErrMsgTxt("API command not recognized"); + } mxFree(cmd); - //checkM(dispatched, "Command not found!"); } } diff --git a/matlab/caffe/matcaffe_demo.m b/matlab/caffe/matcaffe_demo.m index 6b4ca2f79e1..d070268412c 100644 --- a/matlab/caffe/matcaffe_demo.m +++ b/matlab/caffe/matcaffe_demo.m @@ -1,46 +1,62 @@ -function res = matcaffe_demo(im, gpu) +function scores = matcaffe_demo(im, use_gpu) +% scores = matcaffe_demo(im, use_gpu) +% +% Demo of the matlab wrapper using the ILSVRC network. +% +% input +% im color image as uint8 HxWx3 +% use_gpu 1 to use the GPU, 0 to use the CPU +% +% output +% scores 1000-dimensional ILSVRC score vector -% load image net mean -% // In matlab, reading an image gives [height, width, channels] where height is the fastest dimension -% // - want to have the order as [width, height, channels, images] -% // (channels in BGR order) -% // - +model_def_file = '../../examples/imagenet_deploy.prototxt'; +% NOTE: you'll have to get the pre-trained ILSVRC network +model_file = '../../examples/alexnet_train_iter_470000'; -% 1: swap channel order to BGR -% 2: extract 5 crops and their flips -% 3: swap rows and columns and concat along 4th dim -% 4: wrap in cell aray +% init caffe network (spews logging info) +caffe('init', model_def_file, model_file); -caffe('init'); -if gpu +% set to use GPU or CPU +if exist('use_gpu', 'var') && use_gpu caffe('set_mode_gpu'); else caffe('set_mode_cpu'); end + +% put into test mode caffe('set_phase_test'); + +% prepare oversampled input tic; -blob = {prepare_image(im)}; +input_data = {prepare_image(im)}; toc; + +% do forward pass to get scores tic; -res = caffe('forward', blob); +scores = caffe('forward', input_data); toc; -res = reshape(res{1}, [1000 10]); -res = mean(res, 2); + +% average output scores +scores = reshape(scores{1}, [1000 10]); +scores = mean(scores, 2); +% ------------------------------------------------------------------------ function images = prepare_image(im) +% ------------------------------------------------------------------------ d = load('ilsvrc_2012_mean'); -image_mean = d.image_mean; +IMAGE_MEAN = d.image_mean; IMAGE_DIM = 256; CROPPED_DIM = 227; % resize to fixed input size im = single(im); im = imresize(im, [IMAGE_DIM IMAGE_DIM], 'bilinear'); -% permute from RGB to BGR -im = im(:,:,[3 2 1]) - image_mean; +% permute from RGB to BGR (IMAGE_MEAN is already BGR) +im = im(:,:,[3 2 1]) - IMAGE_MEAN; -% oversample +% oversample (4 corners, center, and their x-axis flips) images = zeros(CROPPED_DIM, CROPPED_DIM, 3, 10, 'single'); indices = [0 IMAGE_DIM-CROPPED_DIM] + 1; curr = 1;