Skip to content

Commit

Permalink
Merge pull request #469 from weinman/grayscale-io-convert
Browse files Browse the repository at this point in the history
Add grayscale input processing for intensity images in tools and
pycaffe.
  • Loading branch information
shelhamer committed Jun 11, 2014
2 parents 300f770 + 8dc270e commit 63c7429
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 25 deletions.
8 changes: 7 additions & 1 deletion include/caffe/util/io.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,19 @@ inline void WriteProtoToBinaryFile(
}

bool ReadImageToDatum(const string& filename, const int label,
const int height, const int width, Datum* datum);
const int height, const int width, const bool is_color, Datum* datum);

inline bool ReadImageToDatum(const string& filename, const int label,
const int height, const int width, Datum* datum) {
return ReadImageToDatum(filename, label, height, width, true, datum);
}

inline bool ReadImageToDatum(const string& filename, const int label,
Datum* datum) {
return ReadImageToDatum(filename, label, 0, 0, datum);
}


template <typename Dtype>
void hdf5_load_nd_dataset_helper(
hid_t file_id, const char* dataset_name_, int min_dim, int max_dim,
Expand Down
11 changes: 8 additions & 3 deletions python/caffe/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,24 @@
from caffe.proto import caffe_pb2


def load_image(filename):
def load_image(filename, color=True):
"""
Load an image converting from grayscale or alpha as needed.
Take
filename: string
color: flag for color format. True (default) loads as RGB while False
loads as intensity (if image is already grayscale).
Give
image: an image of size (H x W x 3) with RGB channels of type uint8.
image: an image with type np.float32 of size (H x W x 3) in RGB or
of size (H x W x 1) in grayscale.
"""
img = skimage.img_as_float(skimage.io.imread(filename)).astype(np.float32)
if img.ndim == 2:
img = np.tile(img[:, :, np.newaxis], (1, 1, 3))
img = img[:, :, np.newaxis]
if color:
img = np.tile(img, (1, 1, 3))
elif img.shape[2] == 4:
img = img[:, :, :3]
return img
Expand Down
26 changes: 19 additions & 7 deletions src/caffe/util/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,32 +72,44 @@ void WriteProtoToBinaryFile(const Message& proto, const char* filename) {
}

bool ReadImageToDatum(const string& filename, const int label,
const int height, const int width, Datum* datum) {
const int height, const int width, const bool is_color, Datum* datum) {
cv::Mat cv_img;
int cv_read_flag = (is_color ? CV_LOAD_IMAGE_COLOR :
CV_LOAD_IMAGE_GRAYSCALE);
if (height > 0 && width > 0) {
cv::Mat cv_img_origin = cv::imread(filename, CV_LOAD_IMAGE_COLOR);
cv::Mat cv_img_origin = cv::imread(filename, cv_read_flag);
cv::resize(cv_img_origin, cv_img, cv::Size(height, width));
} else {
cv_img = cv::imread(filename, CV_LOAD_IMAGE_COLOR);
cv_img = cv::imread(filename, cv_read_flag);
}
if (!cv_img.data) {
LOG(ERROR) << "Could not open or find file " << filename;
return false;
}
datum->set_channels(3);
int num_channels = (is_color ? 3 : 1);
datum->set_channels(num_channels);
datum->set_height(cv_img.rows);
datum->set_width(cv_img.cols);
datum->set_label(label);
datum->clear_data();
datum->clear_float_data();
string* datum_string = datum->mutable_data();
for (int c = 0; c < 3; ++c) {
if (is_color) {
for (int c = 0; c < num_channels; ++c) {
for (int h = 0; h < cv_img.rows; ++h) {
for (int w = 0; w < cv_img.cols; ++w) {
datum_string->push_back(
static_cast<char>(cv_img.at<cv::Vec3b>(h, w)[c]));
}
}
}
} else { // Faster than repeatedly testing is_color for each pixel w/i loop
for (int h = 0; h < cv_img.rows; ++h) {
for (int w = 0; w < cv_img.cols; ++w) {
datum_string->push_back(
static_cast<char>(cv_img.at<cv::Vec3b>(h, w)[c]));
static_cast<char>(cv_img.at<uchar>(h, w)));
}
}
}
}
return true;
}
Expand Down
35 changes: 21 additions & 14 deletions tools/convert_imageset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@
// This program converts a set of images to a leveldb by storing them as Datum
// proto buffers.
// Usage:
// convert_imageset ROOTFOLDER/ LISTFILE DB_NAME RANDOM_SHUFFLE[0 or 1] \
// convert_imageset [-g] ROOTFOLDER/ LISTFILE DB_NAME RANDOM_SHUFFLE[0 or 1] \
// [resize_height] [resize_width]
// where ROOTFOLDER is the root folder that holds all the images, and LISTFILE
// should be a list of files as well as their labels, in the format as
// subfolder1/file1.JPEG 7
// ....
// if RANDOM_SHUFFLE is 1, a random shuffle will be carried out before we
// process the file lines.
// Optional flag -g indicates the images should be read as
// single-channel grayscale. If omitted, grayscale images will be
// converted to color.

#include <glog/logging.h>
#include <leveldb/db.h>
Expand All @@ -30,49 +33,53 @@ using std::string;

int main(int argc, char** argv) {
::google::InitGoogleLogging(argv[0]);
if (argc < 4 || argc > 7) {
if (argc < 4 || argc > 8) {
printf("Convert a set of images to the leveldb format used\n"
"as input for Caffe.\n"
"Usage:\n"
" convert_imageset ROOTFOLDER/ LISTFILE DB_NAME"
" convert_imageset [-g] ROOTFOLDER/ LISTFILE DB_NAME"
" RANDOM_SHUFFLE_DATA[0 or 1] [resize_height] [resize_width]\n"
"The ImageNet dataset for the training demo is at\n"
" http://www.image-net.org/download-images\n");
return 1;
}
std::ifstream infile(argv[2]);

// Test whether argv[1] == "-g"
bool is_color= !(string("-g") == string(argv[1]));
int arg_offset = (is_color ? 0 : 1);
std::ifstream infile(argv[arg_offset+2]);
std::vector<std::pair<string, int> > lines;
string filename;
int label;
while (infile >> filename >> label) {
lines.push_back(std::make_pair(filename, label));
}
if (argc >= 5 && argv[4][0] == '1') {
if (argc >= (arg_offset+5) && argv[arg_offset+4][0] == '1') {
// randomly shuffle data
LOG(INFO) << "Shuffling data";
std::random_shuffle(lines.begin(), lines.end());
}
LOG(INFO) << "A total of " << lines.size() << " images.";
int resize_height = 0;
int resize_width = 0;
if (argc >= 6) {
resize_height = atoi(argv[5]);
if (argc >= (arg_offset+6)) {
resize_height = atoi(argv[arg_offset+5]);
}
if (argc >= 7) {
resize_width = atoi(argv[6]);
if (argc >= (arg_offset+7)) {
resize_width = atoi(argv[arg_offset+6]);
}

leveldb::DB* db;
leveldb::Options options;
options.error_if_exists = true;
options.create_if_missing = true;
options.write_buffer_size = 268435456;
LOG(INFO) << "Opening leveldb " << argv[3];
LOG(INFO) << "Opening leveldb " << argv[arg_offset+3];
leveldb::Status status = leveldb::DB::Open(
options, argv[3], &db);
CHECK(status.ok()) << "Failed to open leveldb " << argv[3];
options, argv[arg_offset+3], &db);
CHECK(status.ok()) << "Failed to open leveldb " << argv[arg_offset+3];

string root_folder(argv[1]);
string root_folder(argv[arg_offset+1]);
Datum datum;
int count = 0;
const int kMaxKeyLength = 256;
Expand All @@ -82,7 +89,7 @@ int main(int argc, char** argv) {
bool data_size_initialized = false;
for (int line_id = 0; line_id < lines.size(); ++line_id) {
if (!ReadImageToDatum(root_folder + lines[line_id].first,
lines[line_id].second, resize_height, resize_width, &datum)) {
lines[line_id].second, resize_height, resize_width, is_color, &datum)) {
continue;
}
if (!data_size_initialized) {
Expand Down

0 comments on commit 63c7429

Please sign in to comment.