From d21da02474a0f5be28ce62517de017be761ac8e0 Mon Sep 17 00:00:00 2001 From: Jerod Weinman Date: Mon, 2 Jun 2014 15:54:25 -0500 Subject: [PATCH 1/3] Added an iscolor flag to io.cpp method ReadImageToDatum to handle grayscale images and a corresponding commandline flag [-g] to convert_imageset.cpp. --- include/caffe/util/io.hpp | 8 +++++++- src/caffe/util/io.cpp | 25 ++++++++++++++++++------- tools/convert_imageset.cpp | 35 +++++++++++++++++++++-------------- 3 files changed, 46 insertions(+), 22 deletions(-) diff --git a/include/caffe/util/io.hpp b/include/caffe/util/io.hpp index 056b573db4c..829293ecaef 100644 --- a/include/caffe/util/io.hpp +++ b/include/caffe/util/io.hpp @@ -61,13 +61,19 @@ inline void WriteProtoToBinaryFile( } bool ReadImageToDatum(const string& filename, const int label, - const int height, const int width, Datum* datum); + const int height, const int width, const bool iscolor, Datum* datum); + +inline bool ReadImageToDatum(const string& filename, const int label, + const int height, const int width, Datum* datum) { + return ReadImageToDatum(filename, label, height, width, true, datum); +} inline bool ReadImageToDatum(const string& filename, const int label, Datum* datum) { return ReadImageToDatum(filename, label, 0, 0, datum); } + template void hdf5_load_nd_dataset_helper( hid_t file_id, const char* dataset_name_, int min_dim, int max_dim, diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp index 44858f48a36..f2650e9a22a 100644 --- a/src/caffe/util/io.cpp +++ b/src/caffe/util/io.cpp @@ -72,32 +72,43 @@ void WriteProtoToBinaryFile(const Message& proto, const char* filename) { } bool ReadImageToDatum(const string& filename, const int label, - const int height, const int width, Datum* datum) { + const int height, const int width, const bool iscolor, Datum* datum) { cv::Mat cv_img; + int cv_read_flag = (iscolor ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE); if (height > 0 && width > 0) { - cv::Mat cv_img_origin = cv::imread(filename, CV_LOAD_IMAGE_COLOR); + cv::Mat cv_img_origin = cv::imread(filename, cv_read_flag); cv::resize(cv_img_origin, cv_img, cv::Size(height, width)); } else { - cv_img = cv::imread(filename, CV_LOAD_IMAGE_COLOR); + cv_img = cv::imread(filename, cv_read_flag); } if (!cv_img.data) { LOG(ERROR) << "Could not open or find file " << filename; return false; } - datum->set_channels(3); + int num_channels = (iscolor ? 3 : 1); + datum->set_channels(num_channels); datum->set_height(cv_img.rows); datum->set_width(cv_img.cols); datum->set_label(label); datum->clear_data(); datum->clear_float_data(); string* datum_string = datum->mutable_data(); - for (int c = 0; c < 3; ++c) { + if (iscolor) { + for (int c = 0; c < num_channels; ++c) { + for (int h = 0; h < cv_img.rows; ++h) { + for (int w = 0; w < cv_img.cols; ++w) { + datum_string->push_back( + static_cast(cv_img.at(h, w)[c])); + } + } + } + } else { // Faster than repeatedly testing iscolor for each pixel w/i loop for (int h = 0; h < cv_img.rows; ++h) { for (int w = 0; w < cv_img.cols; ++w) { datum_string->push_back( - static_cast(cv_img.at(h, w)[c])); + static_cast(cv_img.at(h, w))); + } } - } } return true; } diff --git a/tools/convert_imageset.cpp b/tools/convert_imageset.cpp index 2420f9538fb..bbf848adc8a 100644 --- a/tools/convert_imageset.cpp +++ b/tools/convert_imageset.cpp @@ -2,7 +2,7 @@ // This program converts a set of images to a leveldb by storing them as Datum // proto buffers. // Usage: -// convert_imageset ROOTFOLDER/ LISTFILE DB_NAME RANDOM_SHUFFLE[0 or 1] \ +// convert_imageset [-g] ROOTFOLDER/ LISTFILE DB_NAME RANDOM_SHUFFLE[0 or 1] \ // [resize_height] [resize_width] // where ROOTFOLDER is the root folder that holds all the images, and LISTFILE // should be a list of files as well as their labels, in the format as @@ -10,6 +10,9 @@ // .... // if RANDOM_SHUFFLE is 1, a random shuffle will be carried out before we // process the file lines. +// Optional flag -g indicates the images should be read as +// single-channel grayscale. If omitted, grayscale images will be +// converted to color. #include #include @@ -30,24 +33,28 @@ using std::string; int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); - if (argc < 4 || argc > 7) { + if (argc < 4 || argc > 8) { printf("Convert a set of images to the leveldb format used\n" "as input for Caffe.\n" "Usage:\n" - " convert_imageset ROOTFOLDER/ LISTFILE DB_NAME" + " convert_imageset [-g] ROOTFOLDER/ LISTFILE DB_NAME" " RANDOM_SHUFFLE_DATA[0 or 1] [resize_height] [resize_width]\n" "The ImageNet dataset for the training demo is at\n" " http://www.image-net.org/download-images\n"); return 1; } - std::ifstream infile(argv[2]); + + // Test whether argv[1] == "-g" + bool iscolor= !(string("-g") == string(argv[1])); + int arg_offset = (iscolor ? 0 : 1); + std::ifstream infile(argv[arg_offset+2]); std::vector > lines; string filename; int label; while (infile >> filename >> label) { lines.push_back(std::make_pair(filename, label)); } - if (argc >= 5 && argv[4][0] == '1') { + if (argc >= (arg_offset+5) && argv[arg_offset+4][0] == '1') { // randomly shuffle data LOG(INFO) << "Shuffling data"; std::random_shuffle(lines.begin(), lines.end()); @@ -55,11 +62,11 @@ int main(int argc, char** argv) { LOG(INFO) << "A total of " << lines.size() << " images."; int resize_height = 0; int resize_width = 0; - if (argc >= 6) { - resize_height = atoi(argv[5]); + if (argc >= (arg_offset+6)) { + resize_height = atoi(argv[arg_offset+5]); } - if (argc >= 7) { - resize_width = atoi(argv[6]); + if (argc >= (arg_offset+7)) { + resize_width = atoi(argv[arg_offset+6]); } leveldb::DB* db; @@ -67,12 +74,12 @@ int main(int argc, char** argv) { options.error_if_exists = true; options.create_if_missing = true; options.write_buffer_size = 268435456; - LOG(INFO) << "Opening leveldb " << argv[3]; + LOG(INFO) << "Opening leveldb " << argv[arg_offset+3]; leveldb::Status status = leveldb::DB::Open( - options, argv[3], &db); - CHECK(status.ok()) << "Failed to open leveldb " << argv[3]; + options, argv[arg_offset+3], &db); + CHECK(status.ok()) << "Failed to open leveldb " << argv[arg_offset+3]; - string root_folder(argv[1]); + string root_folder(argv[arg_offset+1]); Datum datum; int count = 0; const int kMaxKeyLength = 256; @@ -82,7 +89,7 @@ int main(int argc, char** argv) { bool data_size_initialized = false; for (int line_id = 0; line_id < lines.size(); ++line_id) { if (!ReadImageToDatum(root_folder + lines[line_id].first, - lines[line_id].second, resize_height, resize_width, &datum)) { + lines[line_id].second, resize_height, resize_width, iscolor, &datum)) { continue; } if (!data_size_initialized) { From 781761eb74744bb00f6b9579eecbe868e180c570 Mon Sep 17 00:00:00 2001 From: Jerod Weinman Date: Sun, 8 Jun 2014 15:46:23 -0500 Subject: [PATCH 2/3] Changed variable name: iscolor to is_color. --- include/caffe/util/io.hpp | 2 +- src/caffe/util/io.cpp | 11 ++++++----- tools/convert_imageset.cpp | 6 +++--- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/include/caffe/util/io.hpp b/include/caffe/util/io.hpp index 829293ecaef..4458096ef78 100644 --- a/include/caffe/util/io.hpp +++ b/include/caffe/util/io.hpp @@ -61,7 +61,7 @@ inline void WriteProtoToBinaryFile( } bool ReadImageToDatum(const string& filename, const int label, - const int height, const int width, const bool iscolor, Datum* datum); + const int height, const int width, const bool is_color, Datum* datum); inline bool ReadImageToDatum(const string& filename, const int label, const int height, const int width, Datum* datum) { diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp index f2650e9a22a..65f82547071 100644 --- a/src/caffe/util/io.cpp +++ b/src/caffe/util/io.cpp @@ -72,9 +72,10 @@ void WriteProtoToBinaryFile(const Message& proto, const char* filename) { } bool ReadImageToDatum(const string& filename, const int label, - const int height, const int width, const bool iscolor, Datum* datum) { + const int height, const int width, const bool is_color, Datum* datum) { cv::Mat cv_img; - int cv_read_flag = (iscolor ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE); + int cv_read_flag = (is_color ? CV_LOAD_IMAGE_COLOR : + CV_LOAD_IMAGE_GRAYSCALE); if (height > 0 && width > 0) { cv::Mat cv_img_origin = cv::imread(filename, cv_read_flag); cv::resize(cv_img_origin, cv_img, cv::Size(height, width)); @@ -85,7 +86,7 @@ bool ReadImageToDatum(const string& filename, const int label, LOG(ERROR) << "Could not open or find file " << filename; return false; } - int num_channels = (iscolor ? 3 : 1); + int num_channels = (is_color ? 3 : 1); datum->set_channels(num_channels); datum->set_height(cv_img.rows); datum->set_width(cv_img.cols); @@ -93,7 +94,7 @@ bool ReadImageToDatum(const string& filename, const int label, datum->clear_data(); datum->clear_float_data(); string* datum_string = datum->mutable_data(); - if (iscolor) { + if (is_color) { for (int c = 0; c < num_channels; ++c) { for (int h = 0; h < cv_img.rows; ++h) { for (int w = 0; w < cv_img.cols; ++w) { @@ -102,7 +103,7 @@ bool ReadImageToDatum(const string& filename, const int label, } } } - } else { // Faster than repeatedly testing iscolor for each pixel w/i loop + } else { // Faster than repeatedly testing is_color for each pixel w/i loop for (int h = 0; h < cv_img.rows; ++h) { for (int w = 0; w < cv_img.cols; ++w) { datum_string->push_back( diff --git a/tools/convert_imageset.cpp b/tools/convert_imageset.cpp index bbf848adc8a..524d1950cb0 100644 --- a/tools/convert_imageset.cpp +++ b/tools/convert_imageset.cpp @@ -45,8 +45,8 @@ int main(int argc, char** argv) { } // Test whether argv[1] == "-g" - bool iscolor= !(string("-g") == string(argv[1])); - int arg_offset = (iscolor ? 0 : 1); + bool is_color= !(string("-g") == string(argv[1])); + int arg_offset = (is_color ? 0 : 1); std::ifstream infile(argv[arg_offset+2]); std::vector > lines; string filename; @@ -89,7 +89,7 @@ int main(int argc, char** argv) { bool data_size_initialized = false; for (int line_id = 0; line_id < lines.size(); ++line_id) { if (!ReadImageToDatum(root_folder + lines[line_id].first, - lines[line_id].second, resize_height, resize_width, iscolor, &datum)) { + lines[line_id].second, resize_height, resize_width, is_color, &datum)) { continue; } if (!data_size_initialized) { From 8dc270e691cb15e540c1b5ac074774163bc6faf1 Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Tue, 10 Jun 2014 15:01:41 -0700 Subject: [PATCH 3/3] pycaffe: leave grayscale images gray according to arg --- python/caffe/io.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/python/caffe/io.py b/python/caffe/io.py index 0bd2f812bec..1fc97231ca6 100644 --- a/python/caffe/io.py +++ b/python/caffe/io.py @@ -5,19 +5,24 @@ from caffe.proto import caffe_pb2 -def load_image(filename): +def load_image(filename, color=True): """ Load an image converting from grayscale or alpha as needed. Take filename: string + color: flag for color format. True (default) loads as RGB while False + loads as intensity (if image is already grayscale). Give - image: an image of size (H x W x 3) with RGB channels of type uint8. + image: an image with type np.float32 of size (H x W x 3) in RGB or + of size (H x W x 1) in grayscale. """ img = skimage.img_as_float(skimage.io.imread(filename)).astype(np.float32) if img.ndim == 2: - img = np.tile(img[:, :, np.newaxis], (1, 1, 3)) + img = img[:, :, np.newaxis] + if color: + img = np.tile(img, (1, 1, 3)) elif img.shape[2] == 4: img = img[:, :, :3] return img