From 602c3cd6aca49b52255bf19b211161b02ba826a9 Mon Sep 17 00:00:00 2001
From: Sergio <sguada@gmail.com>
Date: Mon, 6 Oct 2014 18:05:11 -0700
Subject: [PATCH 01/11] Added encoded option and check_size to convert_imageset

Conflicts:
	tools/convert_imageset.cpp
---
 tools/convert_imageset.cpp | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)
diff --git a/tools/convert_imageset.cpp b/tools/convert_imageset.cpp
index 37efa5c13f7..c554ed389ab 100644
--- a/tools/convert_imageset.cpp
+++ b/tools/convert_imageset.cpp
@@ -34,6 +34,8 @@ DEFINE_int32(resize_width, 0, "Width images are resized to");
 DEFINE_int32(resize_height, 0, "Height images are resized to");
 DEFINE_bool(check_size, false,
     "When this option is on, check that all the datum have the same size");
+DEFINE_bool(encoded, false,
+    "When this option is on, the encoded image will be save in datum");
 
 int main(int argc, char** argv) {
   ::google::InitGoogleLogging(argv[0]);
@@ -55,8 +57,10 @@ int main(int argc, char** argv) {
     return 1;
   }
 
-  bool is_color = !FLAGS_gray;
-  bool check_size = FLAGS_check_size;
+  const bool is_color = !FLAGS_gray;
+  const bool check_size = FLAGS_check_size;
+  const bool encoded = FLAGS_encoded;
+
   std::ifstream infile(argv[2]);
   std::vector<std::pair<std::string, int> > lines;
   std::string filename;
@@ -74,6 +78,12 @@ int main(int argc, char** argv) {
   const std::string& db_backend = FLAGS_backend;
   const char* db_path = argv[3];
 
+  if (encoded) {
+    CHECK_EQ(FLAGS_resize_height, 0) << "With encoded don't resize images";
+    CHECK_EQ(FLAGS_resize_width, 0) << "With encoded don't resize images";
+    CHECK(!check_size) << "With encoded cannot check_size";
+  }
+
   int resize_height = std::max<int>(0, FLAGS_resize_height);
   int resize_width = std::max<int>(0, FLAGS_resize_width);
 
@@ -94,10 +104,15 @@ int main(int argc, char** argv) {
   bool data_size_initialized = false;
 
   for (int line_id = 0; line_id < lines.size(); ++line_id) {
-    if (!ReadImageToDatum(root_folder + lines[line_id].first,
-        lines[line_id].second, resize_height, resize_width, is_color, &datum)) {
-      continue;
+    bool status;
+    if (encoded) {
+      status = ReadFileToDatum(root_folder + lines[line_id].first,
+        lines[line_id].second, &datum);
+    } else {
+      status = ReadImageToDatum(root_folder + lines[line_id].first,
+          lines[line_id].second, resize_height, resize_width, is_color, &datum);
     }
+    if (status == false) continue;
     if (check_size) {
       if (!data_size_initialized) {
         data_size = datum.channels() * datum.height() * datum.width();

From aa0bfbca55a8fa53fc8d4343257f689a69544e35 Mon Sep 17 00:00:00 2001
From: Sergio <sguada@gmail.com>
Date: Mon, 6 Oct 2014 18:25:28 -0700
Subject: [PATCH 02/11] Added encoded datum to io

---
 include/caffe/util/io.hpp   | 38 +++++++++++++++++++++++
 src/caffe/proto/caffe.proto |  2 ++
 src/caffe/util/io.cpp       | 61 +++++++++++++++++++++++++++++++++++--
 3 files changed, 98 insertions(+), 3 deletions(-)

diff --git a/include/caffe/util/io.hpp b/include/caffe/util/io.hpp
index b64c821a86e..63afe6298ff 100644
--- a/include/caffe/util/io.hpp
+++ b/include/caffe/util/io.hpp
@@ -88,6 +88,12 @@ inline void WriteProtoToBinaryFile(
   WriteProtoToBinaryFile(proto, filename.c_str());
 }
 
+bool ReadFileToDatum(const string& filename, const int label, Datum* datum);
+
+inline bool ReadFileToDatum(const string& filename, Datum* datum) {
+  return ReadFileToDatum(filename, -1, datum);
+}
+
 bool ReadImageToDatum(const string& filename, const int label,
     const int height, const int width, const bool is_color, Datum* datum);
 
@@ -106,6 +112,21 @@ inline bool ReadImageToDatum(const string& filename, const int label,
   return ReadImageToDatum(filename, label, 0, 0, true, datum);
 }
 
+bool DecodeDatum(const int height, const int width, const bool is_color,
+  Datum& datum);
+
+inline bool DecodeDatum(const int height, const int width, Datum& datum) {
+  return DecodeDatum(height, width, true, datum);
+}
+
+inline bool DecodeDatum(const bool is_color, Datum& datum) {
+  return DecodeDatum(0, 0, is_color, datum);
+}
+
+inline bool DecodeDatum(Datum& datum) {
+  return DecodeDatum(0, 0, true, datum);
+}
+
 #ifndef OSX
 cv::Mat ReadImageToCVMat(const string& filename,
     const int height, const int width, const bool is_color);
@@ -124,6 +145,23 @@ inline cv::Mat ReadImageToCVMat(const string& filename) {
   return ReadImageToCVMat(filename, 0, 0, true);
 }
 
+cv::Mat DecodeDatumToCVMat(const Datum& datum,
+    const int height, const int width, const bool is_color);
+
+inline cv::Mat DecodeDatumToCVMat(const Datum& datum,
+    const int height, const int width) {
+  return DecodeDatumToCVMat(datum, height, width, true);
+}
+
+inline cv::Mat DecodeDatumToCVMat(const Datum& datum,
+    const bool is_color) {
+  return DecodeDatumToCVMat(datum, 0, 0, is_color);
+}
+
+inline cv::Mat DecodeDatumToCVMat(const Datum& datum) {
+  return DecodeDatumToCVMat(datum, 0, 0, true);
+}
+
 void CVMatToDatum(const cv::Mat& cv_img, Datum* datum);
 #endif
 
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index f0dba097d05..53db6f76945 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -26,6 +26,8 @@ message Datum {
   optional int32 label = 5;
   // Optionally, the datum could also hold float data.
   repeated float float_data = 6;
+  // If true data contains an encoded image that need to be decoded
+  optional bool encoded = 7 [default = false];
 }
 
 message FillerParameter {
diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp
index a4a6627b277..6aa410a0feb 100644
--- a/src/caffe/util/io.cpp
+++ b/src/caffe/util/io.cpp
@@ -95,21 +95,76 @@ bool ReadImageToDatum(const string& filename, const int label,
   }
 }
 
+bool ReadFileToDatum(const string& filename, const int label,
+    Datum* datum) {
+  std::streampos size;
+
+  fstream file(filename.c_str(), ios::in|ios::binary|ios::ate);
+  if (file.is_open()) {
+    size = file.tellg();
+    std::string buffer(size, ' ');
+    file.seekg(0, ios::beg);
+    file.read(&buffer[0], size);
+    file.close();
+    datum->set_data(buffer);
+    datum->set_label(label);
+    datum->set_encoded(true);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+cv::Mat DecodeDatumToCVMat(const Datum& datum,
+    const int height, const int width, const bool is_color) {
+  cv::Mat cv_img;
+  CHECK(datum.encoded()) << "Datum not encoded";
+  int cv_read_flag = (is_color ? CV_LOAD_IMAGE_COLOR :
+    CV_LOAD_IMAGE_GRAYSCALE);
+  const string& data = datum.data();
+  std::vector<char> vec_data(data.c_str(), data.c_str() + data.size());
+  if (height > 0 && width > 0) {
+    cv::Mat cv_img_origin = cv::imdecode(cv::Mat(vec_data), cv_read_flag);
+    cv::resize(cv_img_origin, cv_img, cv::Size(width, height));
+  } else {
+    cv_img = cv::imdecode(vec_data, cv_read_flag);
+  }
+  if (!cv_img.data) {
+    LOG(ERROR) << "Could not decode datum ";
+  }
+  return cv_img;
+}
+
+// If Datum is encoded will decoded using DecodeDatumToCVMat and CVMatToDatum
+// if height and width are set it will resize it
+// If Datum is not encoded will do nothing
+bool DecodeDatum(const int height, const int width, const bool is_color,
+                Datum& datum) {
+  if (datum.encoded()) {
+    cv::Mat cv_img = DecodeDatumToCVMat(datum, height, width, is_color);
+    CVMatToDatum(cv_img, &datum);
+    return true;
+  } else {
+    return false;
+  }
+}
+
 void CVMatToDatum(const cv::Mat& cv_img, Datum* datum) {
-  CHECK(cv_img.depth() == CV_8U) <<
-      "Image data type must be unsigned byte";
+  CHECK(cv_img.depth() == CV_8U || cv_img.depth() == CV_8S) <<
+      "Image data type must be unsigned or signed byte";
   datum->set_channels(cv_img.channels());
   datum->set_height(cv_img.rows);
   datum->set_width(cv_img.cols);
   datum->clear_data();
   datum->clear_float_data();
+  datum->set_encoded(false);
   int datum_channels = datum->channels();
   int datum_height = datum->height();
   int datum_width = datum->width();
   int datum_size = datum_channels * datum_height * datum_width;
   std::string buffer(datum_size, ' ');
   for (int h = 0; h < datum_height; ++h) {
-    const uchar* ptr = cv_img.ptr<uchar>(h);
+    const char* ptr = cv_img.ptr<char>(h);
     int img_index = 0;
     for (int w = 0; w < datum_width; ++w) {
       for (int c = 0; c < datum_channels; ++c) {

From df656c57abe66ab373f3f867de9958302fccec0f Mon Sep 17 00:00:00 2001
From: Sergio <sguada@gmail.com>
Date: Mon, 6 Oct 2014 23:27:40 -0700
Subject: [PATCH 03/11] Added test for encoded Datum to test_io.cpp

---
 include/caffe/util/io.hpp  |  8 ++--
 src/caffe/test/test_io.cpp | 80 ++++++++++++++++++++++++++++++++++++++
 src/caffe/util/io.cpp      |  8 ++--
 3 files changed, 88 insertions(+), 8 deletions(-)

diff --git a/include/caffe/util/io.hpp b/include/caffe/util/io.hpp
index 63afe6298ff..64df0155780 100644
--- a/include/caffe/util/io.hpp
+++ b/include/caffe/util/io.hpp
@@ -113,17 +113,17 @@ inline bool ReadImageToDatum(const string& filename, const int label,
 }
 
 bool DecodeDatum(const int height, const int width, const bool is_color,
-  Datum& datum);
+  Datum* datum);
 
-inline bool DecodeDatum(const int height, const int width, Datum& datum) {
+inline bool DecodeDatum(const int height, const int width, Datum* datum) {
   return DecodeDatum(height, width, true, datum);
 }
 
-inline bool DecodeDatum(const bool is_color, Datum& datum) {
+inline bool DecodeDatum(const bool is_color, Datum* datum) {
   return DecodeDatum(0, 0, is_color, datum);
 }
 
-inline bool DecodeDatum(Datum& datum) {
+inline bool DecodeDatum(Datum* datum) {
   return DecodeDatum(0, 0, true, datum);
 }
 
diff --git a/src/caffe/test/test_io.cpp b/src/caffe/test/test_io.cpp
index 6994afda0e3..4d941fa8683 100644
--- a/src/caffe/test/test_io.cpp
+++ b/src/caffe/test/test_io.cpp
@@ -276,4 +276,84 @@ TEST_F(IOTest, TestCVMatToDatumReference) {
   }
 }
 
+TEST_F(IOTest, TestReadFileToDatum) {
+  string filename = EXAMPLES_SOURCE_DIR "images/cat.jpg";
+  Datum datum;
+  EXPECT_TRUE(ReadFileToDatum(filename, &datum));
+  EXPECT_TRUE(datum.encoded());
+  EXPECT_EQ(datum.label(), -1);
+  EXPECT_EQ(datum.data().size(), 140391);
+}
+
+TEST_F(IOTest, TestDecodeDatum) {
+  string filename = EXAMPLES_SOURCE_DIR "images/cat.jpg";
+  Datum datum;
+  EXPECT_TRUE(ReadFileToDatum(filename, &datum));
+  EXPECT_TRUE(DecodeDatum(&datum));
+  EXPECT_FALSE(DecodeDatum(&datum));
+  Datum datum_ref;
+  ReadImageToDatumReference(filename, 0, 0, 0, true, &datum_ref);
+  EXPECT_EQ(datum.channels(), datum_ref.channels());
+  EXPECT_EQ(datum.height(), datum_ref.height());
+  EXPECT_EQ(datum.width(), datum_ref.width());
+  EXPECT_EQ(datum.data().size(), datum_ref.data().size());
+
+  const string& data = datum.data();
+  const string& data_ref = datum_ref.data();
+  for (int i = 0; i < datum.data().size(); ++i) {
+    EXPECT_TRUE(data[i] == data_ref[i]);
+  }
+}
+
+TEST_F(IOTest, TestDecodeDatumToCVMat) {
+  string filename = EXAMPLES_SOURCE_DIR "images/cat.jpg";
+  Datum datum;
+  EXPECT_TRUE(ReadFileToDatum(filename, &datum));
+  cv::Mat cv_img = DecodeDatumToCVMat(datum);
+  EXPECT_EQ(cv_img.channels(), 3);
+  EXPECT_EQ(cv_img.rows, 360);
+  EXPECT_EQ(cv_img.cols, 480);
+}
+
+TEST_F(IOTest, TestDecodeDatumToCVMatResized) {
+  string filename = EXAMPLES_SOURCE_DIR "images/cat.jpg";
+  Datum datum;
+  EXPECT_TRUE(ReadFileToDatum(filename, &datum));
+  cv::Mat cv_img = DecodeDatumToCVMat(datum, 100, 200);
+  EXPECT_EQ(cv_img.channels(), 3);
+  EXPECT_EQ(cv_img.rows, 100);
+  EXPECT_EQ(cv_img.cols, 200);
+}
+
+TEST_F(IOTest, TestDecodeDatumToCVMatResizedGray) {
+  string filename = EXAMPLES_SOURCE_DIR "images/cat.jpg";
+  Datum datum;
+  EXPECT_TRUE(ReadFileToDatum(filename, &datum));
+  const bool is_color = false;
+  cv::Mat cv_img = DecodeDatumToCVMat(datum, 200, 100, is_color);
+  EXPECT_EQ(cv_img.channels(), 1);
+  EXPECT_EQ(cv_img.rows, 200);
+  EXPECT_EQ(cv_img.cols, 100);
+}
+
+TEST_F(IOTest, TestDecodeDatumToCVMatContent) {
+  string filename = EXAMPLES_SOURCE_DIR "images/cat.jpg";
+  Datum datum;
+  EXPECT_TRUE(ReadFileToDatum(filename, &datum));
+  cv::Mat cv_img = DecodeDatumToCVMat(datum);
+  cv::Mat cv_img_ref = ReadImageToCVMat(filename);
+  EXPECT_EQ(cv_img_ref.channels(), cv_img.channels());
+  EXPECT_EQ(cv_img_ref.rows, cv_img.rows);
+  EXPECT_EQ(cv_img_ref.cols, cv_img.cols);
+
+  for (int c = 0; c < datum.channels(); ++c) {
+    for (int h = 0; h < datum.height(); ++h) {
+      for (int w = 0; w < datum.width(); ++w) {
+        EXPECT_TRUE(cv_img.at<cv::Vec3b>(h, w)[c]==
+          cv_img_ref.at<cv::Vec3b>(h, w)[c]);
+      }
+    }
+  }
+}
+
 }  // namespace caffe
diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp
index 6aa410a0feb..06b9deb078a 100644
--- a/src/caffe/util/io.cpp
+++ b/src/caffe/util/io.cpp
@@ -139,10 +139,10 @@ cv::Mat DecodeDatumToCVMat(const Datum& datum,
 // if height and width are set it will resize it
 // If Datum is not encoded will do nothing
 bool DecodeDatum(const int height, const int width, const bool is_color,
-                Datum& datum) {
-  if (datum.encoded()) {
-    cv::Mat cv_img = DecodeDatumToCVMat(datum, height, width, is_color);
-    CVMatToDatum(cv_img, &datum);
+                Datum* datum) {
+  if (datum->encoded()) {
+    cv::Mat cv_img = DecodeDatumToCVMat((*datum), height, width, is_color);
+    CVMatToDatum(cv_img, datum);
     return true;
   } else {
     return false;

From 468d95591f97a4ee7d466b981e98dac180a5aa83 Mon Sep 17 00:00:00 2001
From: Sergio <sguada@gmail.com>
Date: Wed, 15 Oct 2014 15:35:01 -0700
Subject: [PATCH 04/11] Added timers for benchmarking

---
 src/caffe/layers/data_layer.cpp        | 22 +++++++++++++++++++---
 src/caffe/layers/image_data_layer.cpp  | 13 +++++++++++++
 src/caffe/layers/window_data_layer.cpp | 20 ++++++++++++++------
 src/caffe/net.cpp                      |  7 +++++++
 4 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp
index fcf9bb20a57..330381d2771 100644
--- a/src/caffe/layers/data_layer.cpp
+++ b/src/caffe/layers/data_layer.cpp
@@ -8,6 +8,7 @@
 #include "caffe/dataset_factory.hpp"
 #include "caffe/layer.hpp"
 #include "caffe/proto/caffe.pb.h"
+#include "caffe/util/benchmark.hpp"
 #include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
 #include "caffe/util/rng.hpp"
@@ -47,6 +48,9 @@ void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
   CHECK(iter_ != dataset_->end());
   const Datum& datum = iter_->value;
 
+  if (DecodeDatum(datum)) {
+    LOG(INFO) << "Decoding Datum";
+  }
   // image
   int crop_size = this->layer_param_.transform_param().crop_size();
   if (crop_size > 0) {
@@ -78,6 +82,8 @@ void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
 // This function is used to create a thread that prefetches the data.
 template <typename Dtype>
 void DataLayer<Dtype>::InternalThreadEntry() {
+  Timer batch_timer;
+  batch_timer.Start();
   CHECK(this->prefetch_data_.count());
   CHECK(this->transformed_data_.count());
   Dtype* top_data = this->prefetch_data_.mutable_cpu_data();
@@ -87,25 +93,35 @@ void DataLayer<Dtype>::InternalThreadEntry() {
     top_label = this->prefetch_label_.mutable_cpu_data();
   }
   const int batch_size = this->layer_param_.data_param().batch_size();
-
+  float read_time = 0;
+  float trans_time = 0;
+  Timer timer;
   for (int item_id = 0; item_id < batch_size; ++item_id) {
+    timer.Start();
+    // get a blob
     CHECK(iter_ != dataset_->end());
     const Datum& datum = iter_->value;
+    cv::Mat cv_img = DecodeDatumToCVMat(datum);
+    read_time += timer.MilliSeconds();
+    timer.Start();
 
     // Apply data transformations (mirror, scale, crop...)
     int offset = this->prefetch_data_.offset(item_id);
     this->transformed_data_.set_cpu_data(top_data + offset);
-    this->data_transformer_.Transform(datum, &(this->transformed_data_));
+    this->data_transformer_.Transform(cv_img, &(this->transformed_data_));
     if (this->output_labels_) {
       top_label[item_id] = datum.label();
     }
-
+    trans_time += timer.MilliSeconds();
     // go to the next iter
     ++iter_;
     if (iter_ == dataset_->end()) {
       iter_ = dataset_->begin();
     }
   }
+  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
+  DLOG(INFO) << "Read time: " << read_time << "ms.";
+  DLOG(INFO) << "Transform time: " << trans_time << "ms.";
 }
 
 INSTANTIATE_CLASS(DataLayer);
diff --git a/src/caffe/layers/image_data_layer.cpp b/src/caffe/layers/image_data_layer.cpp
index 2085df2760a..67493117b88 100644
--- a/src/caffe/layers/image_data_layer.cpp
+++ b/src/caffe/layers/image_data_layer.cpp
@@ -6,6 +6,7 @@
 
 #include "caffe/data_layers.hpp"
 #include "caffe/layer.hpp"
+#include "caffe/util/benchmark.hpp"
 #include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
 #include "caffe/util/rng.hpp"
@@ -91,6 +92,8 @@ void ImageDataLayer<Dtype>::ShuffleImages() {
 // This function is used to create a thread that prefetches the data.
 template <typename Dtype>
 void ImageDataLayer<Dtype>::InternalThreadEntry() {
+  Timer batch_timer;
+  batch_timer.Start();
   CHECK(this->prefetch_data_.count());
   CHECK(this->transformed_data_.count());
   Dtype* top_data = this->prefetch_data_.mutable_cpu_data();
@@ -103,18 +106,25 @@ void ImageDataLayer<Dtype>::InternalThreadEntry() {
 
   // datum scales
   const int lines_size = lines_.size();
+  float read_time = 0;
+  float trans_time = 0;
+  Timer timer;
   for (int item_id = 0; item_id < batch_size; ++item_id) {
     // get a blob
+    timer.Start();
     CHECK_GT(lines_size, lines_id_);
     cv::Mat cv_img = ReadImageToCVMat(lines_[lines_id_].first,
                                     new_height, new_width, is_color);
     if (!cv_img.data) {
       continue;
     }
+    read_time += timer.MilliSeconds();
+    timer.Start();
     // Apply transformations (mirror, crop...) to the image
     int offset = this->prefetch_data_.offset(item_id);
     this->transformed_data_.set_cpu_data(top_data + offset);
     this->data_transformer_.Transform(cv_img, &(this->transformed_data_));
+    trans_time += timer.MilliSeconds();
 
     top_label[item_id] = lines_[lines_id_].second;
     // go to the next iter
@@ -128,6 +138,9 @@ void ImageDataLayer<Dtype>::InternalThreadEntry() {
       }
     }
   }
+  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
+  DLOG(INFO) << "Read time: " << read_time << "ms.";
+  DLOG(INFO) << "Transform time: " << trans_time << "ms.";
 }
 
 INSTANTIATE_CLASS(ImageDataLayer);
diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp
index 529c3b47bd9..47e0fb2a123 100644
--- a/src/caffe/layers/window_data_layer.cpp
+++ b/src/caffe/layers/window_data_layer.cpp
@@ -13,6 +13,7 @@
 #include "caffe/common.hpp"
 #include "caffe/data_layers.hpp"
 #include "caffe/layer.hpp"
+#include "caffe/util/benchmark.hpp"
 #include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
 #include "caffe/util/rng.hpp"
@@ -192,7 +193,8 @@ template <typename Dtype>
 void WindowDataLayer<Dtype>::InternalThreadEntry() {
   // At each iteration, sample N windows where N*p are foreground (object)
   // windows and N*(1-p) are background (non-object) windows
-
+  Timer batch_timer;
+  batch_timer.Start();
   Dtype* top_data = this->prefetch_data_.mutable_cpu_data();
   Dtype* top_label = this->prefetch_label_.mutable_cpu_data();
   const Dtype scale = this->layer_param_.window_data_param().scale();
@@ -219,19 +221,20 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
   const int num_samples[2] = { batch_size - num_fg, num_fg };
 
   int item_id = 0;
+  float read_time = 0;
+  float trans_time = 0;
+  Timer timer;
   // sample from bg set then fg set
   for (int is_fg = 0; is_fg < 2; ++is_fg) {
     for (int dummy = 0; dummy < num_samples[is_fg]; ++dummy) {
       // sample a window
+      timer.Start();
       const unsigned int rand_index = PrefetchRand();
       vector<float> window = (is_fg) ?
           fg_windows_[rand_index % fg_windows_.size()] :
           bg_windows_[rand_index % bg_windows_.size()];
 
-      bool do_mirror = false;
-      if (mirror && PrefetchRand() % 2) {
-        do_mirror = true;
-      }
+      bool do_mirror = mirror && PrefetchRand() % 2;
 
       // load the image containing the window
       pair<std::string, vector<int> > image =
@@ -242,6 +245,8 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
         LOG(ERROR) << "Could not open or find file " << image.first;
         return;
       }
+      read_time += timer.MilliSeconds();
+      timer.Start();
       const int channels = cv_img.channels();
 
       // crop window out of image and warp it
@@ -359,7 +364,7 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
           }
         }
       }
-
+      trans_time += timer.MilliSeconds();
       // get window label
       top_label[item_id] = window[WindowDataLayer<Dtype>::LABEL];
 
@@ -399,6 +404,9 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
       item_id++;
     }
   }
+  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
+  DLOG(INFO) << "Read time: " << read_time << "ms.";
+  DLOG(INFO) << "Transform time: " << trans_time << "ms.";
 }
 
 INSTANTIATE_CLASS(WindowDataLayer);
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 21ab15fd31b..5b7152e6993 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -9,6 +9,7 @@
 #include "caffe/layer.hpp"
 #include "caffe/net.hpp"
 #include "caffe/proto/caffe.pb.h"
+#include "caffe/util/benchmark.hpp"
 #include "caffe/util/insert_splits.hpp"
 #include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
@@ -499,6 +500,8 @@ void Net<Dtype>::GetLearningRateAndWeightDecay() {
 
 template <typename Dtype>
 Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
+  Timer timer;
+  timer.Start();
   CHECK_GE(start, 0);
   CHECK_LT(end, layers_.size());
   Dtype loss = 0;
@@ -509,6 +512,7 @@ Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
     loss += layer_loss;
     if (debug_info_) { ForwardDebugInfo(i); }
   }
+  LOG(INFO) << "Forward time: " << timer.MilliSeconds() << "ms.";
   return loss;
 }
 
@@ -565,6 +569,8 @@ string Net<Dtype>::Forward(const string& input_blob_protos, Dtype* loss) {
 
 template <typename Dtype>
 void Net<Dtype>::BackwardFromTo(int start, int end) {
+  Timer timer;
+  timer.Start();
   CHECK_GE(end, 0);
   CHECK_LT(start, layers_.size());
   for (int i = start; i >= end; --i) {
@@ -574,6 +580,7 @@ void Net<Dtype>::BackwardFromTo(int start, int end) {
       if (debug_info_) { BackwardDebugInfo(i); }
     }
   }
+  LOG(INFO) << "Backward time: " << timer.MilliSeconds() << "ms.";
 }
 
 template <typename Dtype>

From f9e56a619d004671f818521c85f95ba9eb3b0cf9 Mon Sep 17 00:00:00 2001
From: Sergio <sguada@gmail.com>
Date: Wed, 15 Oct 2014 15:35:26 -0700
Subject: [PATCH 05/11] Add fast code for transform(cv::Mat,Blob)

---
 src/caffe/data_transformer.cpp  | 103 +++++++++++++++++++++++++++++++-
 src/caffe/layers/data_layer.cpp |  12 +++-
 2 files changed, 110 insertions(+), 5 deletions(-)

diff --git a/src/caffe/data_transformer.cpp b/src/caffe/data_transformer.cpp
index dffaba545b5..8f145994562 100644
--- a/src/caffe/data_transformer.cpp
+++ b/src/caffe/data_transformer.cpp
@@ -179,9 +179,106 @@ void DataTransformer<Dtype>::Transform(const vector<Datum> & datum_vector,
 template<typename Dtype>
 void DataTransformer<Dtype>::Transform(const cv::Mat& cv_img,
                                        Blob<Dtype>* transformed_blob) {
-  Datum datum;
-  CVMatToDatum(cv_img, &datum);
-  Transform(datum, transformed_blob);
+  const int img_channels = cv_img.channels();
+  const int img_height = cv_img.rows;
+  const int img_width = cv_img.cols;
+
+  const int channels = transformed_blob->channels();
+  const int height = transformed_blob->height();
+  const int width = transformed_blob->width();
+  const int num = transformed_blob->num();
+
+  CHECK_EQ(channels, img_channels);
+  CHECK_LE(height, img_height);
+  CHECK_LE(width, img_width);
+  CHECK_GE(num, 1);
+
+  CHECK(cv_img.depth() == CV_8U || cv_img.depth() == CV_8S) <<
+      "Image data type must be unsigned or signed byte";
+
+  const int crop_size = param_.crop_size();
+  const Dtype scale = param_.scale();
+  const bool do_mirror = param_.mirror() && Rand(2);
+  const bool has_mean_file = param_.has_mean_file();
+  const bool has_mean_values = mean_values_.size() > 0;
+
+  CHECK_GT(img_channels, 0);
+  CHECK_GE(img_height, crop_size);
+  CHECK_GE(img_width, crop_size);
+
+  Dtype* mean = NULL;
+  if (has_mean_file) {
+    CHECK_EQ(img_channels, data_mean_.channels());
+    CHECK_EQ(img_height, data_mean_.height());
+    CHECK_EQ(img_width, data_mean_.width());
+    mean = data_mean_.mutable_cpu_data();
+  }
+  if (has_mean_values) {
+    CHECK(mean_values_.size() == 1 || mean_values_.size() == img_channels) <<
+     "Specify either 1 mean_value or as many as channels: " << img_channels;
+    if (img_channels > 1 && mean_values_.size() == 1) {
+      // Replicate the mean_value for simplicity
+      for (int c = 1; c < img_channels; ++c) {
+        mean_values_.push_back(mean_values_[0]);
+      }
+    }
+  }
+
+  int h_off = 0;
+  int w_off = 0;
+  cv::Mat cv_cropped_img = cv_img;
+  if (crop_size) {
+    CHECK_EQ(crop_size, height);
+    CHECK_EQ(crop_size, width);
+    // We only do random crop when we do training.
+    if (phase_ == Caffe::TRAIN) {
+      h_off = Rand(img_height - crop_size + 1);
+      w_off = Rand(img_width - crop_size + 1);
+    } else {
+      h_off = (img_height - crop_size) / 2;
+      w_off = (img_width - crop_size) / 2;
+    }
+    cv::Rect roi(h_off, w_off, crop_size, crop_size);
+    cv_cropped_img = cv_img(roi);
+  } else {
+    CHECK_EQ(img_height, height);
+    CHECK_EQ(img_width, width);
+  }
+  
+  // if (do_mirror) {
+  //   cv::flip(cv_cropped_img, cv_cropped_img, 1);
+  // }
+  CHECK(cv_cropped_img.data);
+
+  Dtype* transformed_data = transformed_blob->mutable_cpu_data();
+  int top_index;
+  for (int h = 0; h < height; ++h) {
+    const char* ptr = cv_cropped_img.ptr<char>(h);
+    int img_index = 0;
+    for (int w = 0; w < width; ++w) {
+      for (int c = 0; c < img_channels; ++c) {
+        if (do_mirror) {
+          top_index = (c * height + h) * width + (width - 1 - w);
+        } else {
+          top_index = (c * height + h) * width + w;
+        }
+        // int top_index = (c * height + h) * width + w;
+        Dtype pixel = static_cast<Dtype>(ptr[img_index++]);
+        if (has_mean_file) {
+          int mean_index = (c * img_height + h_off + h) * img_width + w_off + w;
+          transformed_data[top_index] =
+            (pixel - mean[mean_index]) * scale;
+        } else {
+          if (has_mean_values) {
+            transformed_data[top_index] =
+              (pixel - mean_values_[c]) * scale;
+          } else {
+            transformed_data[top_index] = pixel * scale;
+          }
+        }
+      }
+    }
+  }
 }
 #endif
 
diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp
index 330381d2771..95c54279661 100644
--- a/src/caffe/layers/data_layer.cpp
+++ b/src/caffe/layers/data_layer.cpp
@@ -101,14 +101,22 @@ void DataLayer<Dtype>::InternalThreadEntry() {
     // get a blob
     CHECK(iter_ != dataset_->end());
     const Datum& datum = iter_->value;
-    cv::Mat cv_img = DecodeDatumToCVMat(datum);
+    cv::Mat cv_img;
+    if (datum.encoded()) {
+       cv_img = DecodeDatumToCVMat(datum);
+    }
     read_time += timer.MilliSeconds();
     timer.Start();
 
     // Apply data transformations (mirror, scale, crop...)
     int offset = this->prefetch_data_.offset(item_id);
     this->transformed_data_.set_cpu_data(top_data + offset);
-    this->data_transformer_.Transform(cv_img, &(this->transformed_data_));
+    if (datum.encoded()) {
+      this->data_transformer_.Transform(cv_img, &(this->transformed_data_));  
+    } else {
+      this->data_transformer_.Transform(datum, &(this->transformed_data_));
+    }
+    
     if (this->output_labels_) {
       top_label[item_id] = datum.label();
     }

From 779f9da7f6004009e212ca3569ed5a5e66232066 Mon Sep 17 00:00:00 2001
From: Sergio <sguada@gmail.com>
Date: Tue, 7 Oct 2014 11:19:16 -0700
Subject: [PATCH 06/11] Add root_folder to ImageDataLayer

---
 Makefile                               |  6 ++++++
 include/caffe/net.hpp                  | 10 +++++++++
 src/caffe/data_transformer.cpp         | 12 ++++-------
 src/caffe/layers/data_layer.cpp        | 25 ++++++++++++++-------
 src/caffe/layers/image_data_layer.cpp  | 30 +++++++++++++++++++-------
 src/caffe/layers/window_data_layer.cpp | 24 +++++++++++++++------
 src/caffe/net.cpp                      | 10 +++++++++
 src/caffe/proto/caffe.proto            |  1 +
 src/caffe/util/io.cpp                  |  5 ++---
 9 files changed, 90 insertions(+), 33 deletions(-)

diff --git a/Makefile b/Makefile
index 26d5964cacc..393fe18a70d 100644
--- a/Makefile
+++ b/Makefile
@@ -281,6 +281,12 @@ ifeq ($(USE_CUDNN), 1)
 	COMMON_FLAGS += -DUSE_CUDNN
 endif
 
+TIMING ?= 0
+# Timing Flag
+ifneq ($(TIMING), 0)
+	COMMON_FLAGS += -DTIMING
+endif
+
 # CPU-only configuration
 ifeq ($(CPU_ONLY), 1)
 	OBJS := $(PROTO_OBJS) $(CXX_OBJS)
diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp
index 1d06dc45533..879f474674d 100644
--- a/include/caffe/net.hpp
+++ b/include/caffe/net.hpp
@@ -11,6 +11,9 @@
 #include "caffe/common.hpp"
 #include "caffe/layer.hpp"
 #include "caffe/proto/caffe.pb.h"
+#ifdef TIMING
+#include "caffe/util/benchmark.hpp"
+#endif
 
 namespace caffe {
 
@@ -76,9 +79,16 @@ class Net {
   void Reshape();
 
   Dtype ForwardBackward(const vector<Blob<Dtype>* > & bottom) {
+    #ifdef TIMING
+    Timer timer;
+    timer.Start();
+    #endif
     Dtype loss;
     Forward(bottom, &loss);
     Backward();
+    #ifdef TIMING
+    LOG(INFO) << "ForwardBackward Time: " << timer.MilliSeconds() << "ms.";
+    #endif
     return loss;
   }
 
diff --git a/src/caffe/data_transformer.cpp b/src/caffe/data_transformer.cpp
index 8f145994562..023396ce5e6 100644
--- a/src/caffe/data_transformer.cpp
+++ b/src/caffe/data_transformer.cpp
@@ -193,8 +193,7 @@ void DataTransformer<Dtype>::Transform(const cv::Mat& cv_img,
   CHECK_LE(width, img_width);
   CHECK_GE(num, 1);
 
-  CHECK(cv_img.depth() == CV_8U || cv_img.depth() == CV_8S) <<
-      "Image data type must be unsigned or signed byte";
+  CHECK(cv_img.depth() == CV_8U) << "Image data type must be unsigned byte";
 
   const int crop_size = param_.crop_size();
   const Dtype scale = param_.scale();
@@ -238,22 +237,19 @@ void DataTransformer<Dtype>::Transform(const cv::Mat& cv_img,
       h_off = (img_height - crop_size) / 2;
       w_off = (img_width - crop_size) / 2;
     }
-    cv::Rect roi(h_off, w_off, crop_size, crop_size);
+    cv::Rect roi(w_off, h_off, crop_size, crop_size);
     cv_cropped_img = cv_img(roi);
   } else {
     CHECK_EQ(img_height, height);
     CHECK_EQ(img_width, width);
   }
-  
-  // if (do_mirror) {
-  //   cv::flip(cv_cropped_img, cv_cropped_img, 1);
-  // }
+
   CHECK(cv_cropped_img.data);
 
   Dtype* transformed_data = transformed_blob->mutable_cpu_data();
   int top_index;
   for (int h = 0; h < height; ++h) {
-    const char* ptr = cv_cropped_img.ptr<char>(h);
+    const uchar* ptr = cv_cropped_img.ptr<uchar>(h);
     int img_index = 0;
     for (int w = 0; w < width; ++w) {
       for (int c = 0; c < img_channels; ++c) {
diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp
index 95c54279661..95604e5a1ac 100644
--- a/src/caffe/layers/data_layer.cpp
+++ b/src/caffe/layers/data_layer.cpp
@@ -8,7 +8,9 @@
 #include "caffe/dataset_factory.hpp"
 #include "caffe/layer.hpp"
 #include "caffe/proto/caffe.pb.h"
+#ifdef TIMING
 #include "caffe/util/benchmark.hpp"
+#endif
 #include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
 #include "caffe/util/rng.hpp"
@@ -82,8 +84,13 @@ void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
 // This function is used to create a thread that prefetches the data.
 template <typename Dtype>
 void DataLayer<Dtype>::InternalThreadEntry() {
+  #ifdef TIMING
   Timer batch_timer;
   batch_timer.Start();
+  float read_time = 0;
+  float trans_time = 0;
+  Timer timer;
+  #endif
   CHECK(this->prefetch_data_.count());
   CHECK(this->transformed_data_.count());
   Dtype* top_data = this->prefetch_data_.mutable_cpu_data();
@@ -93,9 +100,6 @@ void DataLayer<Dtype>::InternalThreadEntry() {
     top_label = this->prefetch_label_.mutable_cpu_data();
   }
   const int batch_size = this->layer_param_.data_param().batch_size();
-  float read_time = 0;
-  float trans_time = 0;
-  Timer timer;
   for (int item_id = 0; item_id < batch_size; ++item_id) {
     timer.Start();
     // get a blob
@@ -105,31 +109,36 @@ void DataLayer<Dtype>::InternalThreadEntry() {
     if (datum.encoded()) {
        cv_img = DecodeDatumToCVMat(datum);
     }
+    #ifdef TIMING
     read_time += timer.MilliSeconds();
     timer.Start();
+    #endif
 
     // Apply data transformations (mirror, scale, crop...)
     int offset = this->prefetch_data_.offset(item_id);
     this->transformed_data_.set_cpu_data(top_data + offset);
     if (datum.encoded()) {
-      this->data_transformer_.Transform(cv_img, &(this->transformed_data_));  
+      this->data_transformer_.Transform(cv_img, &(this->transformed_data_));
     } else {
       this->data_transformer_.Transform(datum, &(this->transformed_data_));
     }
-    
     if (this->output_labels_) {
       top_label[item_id] = datum.label();
     }
+    #ifdef TIMING
     trans_time += timer.MilliSeconds();
+    #endif
     // go to the next iter
     ++iter_;
     if (iter_ == dataset_->end()) {
       iter_ = dataset_->begin();
     }
   }
-  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
-  DLOG(INFO) << "Read time: " << read_time << "ms.";
-  DLOG(INFO) << "Transform time: " << trans_time << "ms.";
+  #ifdef TIMING
+  LOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
+  LOG(INFO) << "Read time: " << read_time << "ms.";
+  LOG(INFO) << "Transform time: " << trans_time << "ms.";
+  #endif
 }
 
 INSTANTIATE_CLASS(DataLayer);
diff --git a/src/caffe/layers/image_data_layer.cpp b/src/caffe/layers/image_data_layer.cpp
index 67493117b88..0abcd888b67 100644
--- a/src/caffe/layers/image_data_layer.cpp
+++ b/src/caffe/layers/image_data_layer.cpp
@@ -6,7 +6,9 @@
 
 #include "caffe/data_layers.hpp"
 #include "caffe/layer.hpp"
+#ifdef TIMING
 #include "caffe/util/benchmark.hpp"
+#endif
 #include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
 #include "caffe/util/rng.hpp"
@@ -24,6 +26,7 @@ void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
   const int new_height = this->layer_param_.image_data_param().new_height();
   const int new_width  = this->layer_param_.image_data_param().new_width();
   const bool is_color  = this->layer_param_.image_data_param().is_color();
+  string root_folder = this->layer_param_.image_data_param().root_folder();
 
   CHECK((new_height == 0 && new_width == 0) ||
       (new_height > 0 && new_width > 0)) << "Current implementation requires "
@@ -57,7 +60,7 @@ void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
     lines_id_ = skip;
   }
   // Read an image, and use it to initialize the top blob.
-  cv::Mat cv_img = ReadImageToCVMat(lines_[lines_id_].first,
+  cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
                                     new_height, new_width, is_color);
   const int channels = cv_img.channels();
   const int height = cv_img.rows;
@@ -92,8 +95,13 @@ void ImageDataLayer<Dtype>::ShuffleImages() {
 // This function is used to create a thread that prefetches the data.
 template <typename Dtype>
 void ImageDataLayer<Dtype>::InternalThreadEntry() {
+  #ifdef TIMING
   Timer batch_timer;
   batch_timer.Start();
+  float read_time = 0;
+  float trans_time = 0;
+  Timer timer;
+  #endif
   CHECK(this->prefetch_data_.count());
   CHECK(this->transformed_data_.count());
   Dtype* top_data = this->prefetch_data_.mutable_cpu_data();
@@ -103,28 +111,32 @@ void ImageDataLayer<Dtype>::InternalThreadEntry() {
   const int new_height = image_data_param.new_height();
   const int new_width = image_data_param.new_width();
   const bool is_color = image_data_param.is_color();
+  string root_folder = image_data_param.root_folder();
 
   // datum scales
   const int lines_size = lines_.size();
-  float read_time = 0;
-  float trans_time = 0;
-  Timer timer;
   for (int item_id = 0; item_id < batch_size; ++item_id) {
     // get a blob
+    #ifdef TIMING
     timer.Start();
+    #endif
     CHECK_GT(lines_size, lines_id_);
-    cv::Mat cv_img = ReadImageToCVMat(lines_[lines_id_].first,
+    cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
                                     new_height, new_width, is_color);
     if (!cv_img.data) {
       continue;
     }
+    #ifdef TIMING
     read_time += timer.MilliSeconds();
     timer.Start();
+    #endif
     // Apply transformations (mirror, crop...) to the image
     int offset = this->prefetch_data_.offset(item_id);
     this->transformed_data_.set_cpu_data(top_data + offset);
     this->data_transformer_.Transform(cv_img, &(this->transformed_data_));
+    #ifdef TIMING
     trans_time += timer.MilliSeconds();
+    #endif
 
     top_label[item_id] = lines_[lines_id_].second;
     // go to the next iter
@@ -138,9 +150,11 @@ void ImageDataLayer<Dtype>::InternalThreadEntry() {
       }
     }
   }
-  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
-  DLOG(INFO) << "Read time: " << read_time << "ms.";
-  DLOG(INFO) << "Transform time: " << trans_time << "ms.";
+  #ifdef TIMING
+  LOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
+  LOG(INFO) << "Read time: " << read_time << "ms.";
+  LOG(INFO) << "Transform time: " << trans_time << "ms.";
+  #endif
 }
 
 INSTANTIATE_CLASS(ImageDataLayer);
diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp
index 47e0fb2a123..8e65615507b 100644
--- a/src/caffe/layers/window_data_layer.cpp
+++ b/src/caffe/layers/window_data_layer.cpp
@@ -13,7 +13,9 @@
 #include "caffe/common.hpp"
 #include "caffe/data_layers.hpp"
 #include "caffe/layer.hpp"
+#ifdef TIMING
 #include "caffe/util/benchmark.hpp"
+#endif
 #include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
 #include "caffe/util/rng.hpp"
@@ -193,8 +195,13 @@ template <typename Dtype>
 void WindowDataLayer<Dtype>::InternalThreadEntry() {
   // At each iteration, sample N windows where N*p are foreground (object)
   // windows and N*(1-p) are background (non-object) windows
+  #ifdef TIMING
   Timer batch_timer;
   batch_timer.Start();
+  float read_time = 0;
+  float trans_time = 0;
+  Timer timer;
+  #endif
   Dtype* top_data = this->prefetch_data_.mutable_cpu_data();
   Dtype* top_label = this->prefetch_label_.mutable_cpu_data();
   const Dtype scale = this->layer_param_.window_data_param().scale();
@@ -221,14 +228,13 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
   const int num_samples[2] = { batch_size - num_fg, num_fg };
 
   int item_id = 0;
-  float read_time = 0;
-  float trans_time = 0;
-  Timer timer;
   // sample from bg set then fg set
   for (int is_fg = 0; is_fg < 2; ++is_fg) {
     for (int dummy = 0; dummy < num_samples[is_fg]; ++dummy) {
       // sample a window
+      #ifdef TIMING
       timer.Start();
+      #endif
       const unsigned int rand_index = PrefetchRand();
       vector<float> window = (is_fg) ?
           fg_windows_[rand_index % fg_windows_.size()] :
@@ -245,8 +251,10 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
         LOG(ERROR) << "Could not open or find file " << image.first;
         return;
       }
+      #ifdef TIMING
       read_time += timer.MilliSeconds();
       timer.Start();
+      #endif
       const int channels = cv_img.channels();
 
       // crop window out of image and warp it
@@ -364,7 +372,9 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
           }
         }
       }
+      #ifdef TIMING
       trans_time += timer.MilliSeconds();
+      #endif
       // get window label
       top_label[item_id] = window[WindowDataLayer<Dtype>::LABEL];
 
@@ -404,9 +414,11 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
       item_id++;
     }
   }
-  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
-  DLOG(INFO) << "Read time: " << read_time << "ms.";
-  DLOG(INFO) << "Transform time: " << trans_time << "ms.";
+  #ifdef TIMING
+  LOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
+  LOG(INFO) << "Read time: " << read_time << "ms.";
+  LOG(INFO) << "Transform time: " << trans_time << "ms.";
+  #endif
 }
 
 INSTANTIATE_CLASS(WindowDataLayer);
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 5b7152e6993..c0e56920c83 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -9,7 +9,9 @@
 #include "caffe/layer.hpp"
 #include "caffe/net.hpp"
 #include "caffe/proto/caffe.pb.h"
+#ifdef TIMING
 #include "caffe/util/benchmark.hpp"
+#endif
 #include "caffe/util/insert_splits.hpp"
 #include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
@@ -500,8 +502,10 @@ void Net<Dtype>::GetLearningRateAndWeightDecay() {
 
 template <typename Dtype>
 Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
+  #ifdef TIMING
   Timer timer;
   timer.Start();
+  #endif
   CHECK_GE(start, 0);
   CHECK_LT(end, layers_.size());
   Dtype loss = 0;
@@ -512,7 +516,9 @@ Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
     loss += layer_loss;
     if (debug_info_) { ForwardDebugInfo(i); }
   }
+  #ifdef TIMING
   LOG(INFO) << "Forward time: " << timer.MilliSeconds() << "ms.";
+  #endif
   return loss;
 }
 
@@ -569,8 +575,10 @@ string Net<Dtype>::Forward(const string& input_blob_protos, Dtype* loss) {
 
 template <typename Dtype>
 void Net<Dtype>::BackwardFromTo(int start, int end) {
+  #ifdef TIMING
   Timer timer;
   timer.Start();
+  #endif
   CHECK_GE(end, 0);
   CHECK_LT(start, layers_.size());
   for (int i = start; i >= end; --i) {
@@ -580,7 +588,9 @@ void Net<Dtype>::BackwardFromTo(int start, int end) {
       if (debug_info_) { BackwardDebugInfo(i); }
     }
   }
+  #ifdef TIMING
   LOG(INFO) << "Backward time: " << timer.MilliSeconds() << "ms.";
+  #endif
 }
 
 template <typename Dtype>
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 53db6f76945..b602d0e4c7b 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -540,6 +540,7 @@ message ImageDataParameter {
   // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
   // data.
   optional bool mirror = 6 [default = false];
+  optional string root_folder = 12 [default = ""];
 }
 
 // Message that stores parameters InfogainLossLayer
diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp
index 06b9deb078a..b136bc8a120 100644
--- a/src/caffe/util/io.cpp
+++ b/src/caffe/util/io.cpp
@@ -150,8 +150,7 @@ bool DecodeDatum(const int height, const int width, const bool is_color,
 }
 
 void CVMatToDatum(const cv::Mat& cv_img, Datum* datum) {
-  CHECK(cv_img.depth() == CV_8U || cv_img.depth() == CV_8S) <<
-      "Image data type must be unsigned or signed byte";
+  CHECK(cv_img.depth() == CV_8U) << "Image data type must be unsigned byte";
   datum->set_channels(cv_img.channels());
   datum->set_height(cv_img.rows);
   datum->set_width(cv_img.cols);
@@ -164,7 +163,7 @@ void CVMatToDatum(const cv::Mat& cv_img, Datum* datum) {
   int datum_size = datum_channels * datum_height * datum_width;
   std::string buffer(datum_size, ' ');
   for (int h = 0; h < datum_height; ++h) {
-    const char* ptr = cv_img.ptr<char>(h);
+    const uchar* ptr = cv_img.ptr<uchar>(h);
     int img_index = 0;
     for (int w = 0; w < datum_width; ++w) {
       for (int c = 0; c < datum_channels; ++c) {

From 1e0bfd30949c61104c805fe6e0372d06cce41806 Mon Sep 17 00:00:00 2001
From: Sergio <sguada@gmail.com>
Date: Tue, 7 Oct 2014 14:14:50 -0700
Subject: [PATCH 07/11] Speed up WindowDataLayer and add mean_values

---
 include/caffe/data_layers.hpp          |  3 ++
 src/caffe/layers/window_data_layer.cpp | 65 ++++++++++++++++++--------
 2 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp
index a2ea854e5ff..c4903ce0891 100644
--- a/include/caffe/data_layers.hpp
+++ b/include/caffe/data_layers.hpp
@@ -325,6 +325,9 @@ class WindowDataLayer : public BasePrefetchingDataLayer<Dtype> {
   vector<vector<float> > fg_windows_;
   vector<vector<float> > bg_windows_;
   Blob<Dtype> data_mean_;
+  vector<Dtype> mean_values_;
+  bool has_mean_file_;
+  bool has_mean_values_;
 };
 
 }  // namespace caffe
diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp
index 8e65615507b..fc0ffc886bb 100644
--- a/src/caffe/layers/window_data_layer.cpp
+++ b/src/caffe/layers/window_data_layer.cpp
@@ -170,15 +170,30 @@ void WindowDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
   this->prefetch_label_.Reshape(batch_size, 1, 1, 1);
 
   // data mean
-  if (this->transform_param_.has_mean_file()) {
+  has_mean_file_ = this->transform_param_.has_mean_file();
+  has_mean_values_ = this->transform_param_.mean_value_size() > 0;
+  if (has_mean_file_) {
     const string& mean_file =
           this->transform_param_.mean_file();
     LOG(INFO) << "Loading mean file from" << mean_file;
     BlobProto blob_proto;
     ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);
     data_mean_.FromProto(blob_proto);
-  } else {
-    data_mean_.Reshape(1, channels, crop_size, crop_size);
+  }
+  if (has_mean_values_) {
+    CHECK(has_mean_file_ == false) <<
+      "Cannot specify mean_file and mean_value at the same time";
+    for (int c = 0; c < this->transform_param_.mean_value_size(); ++c) {
+      mean_values_.push_back(this->transform_param_.mean_value(c));
+    }
+    CHECK(mean_values_.size() == 1 || mean_values_.size() == channels) <<
+     "Specify either 1 mean_value or as many as channels: " << channels;
+    if (channels > 1 && mean_values_.size() == 1) {
+      // Replicate the mean_value for simplicity
+      for (int c = 1; c < channels; ++c) {
+        mean_values_.push_back(mean_values_[0]);
+      }
+    }
   }
 }
 
@@ -211,10 +226,14 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
   const bool mirror = this->transform_param_.mirror();
   const float fg_fraction =
       this->layer_param_.window_data_param().fg_fraction();
-  const Dtype* mean = this->data_mean_.cpu_data();
-  const int mean_off = (this->data_mean_.width() - crop_size) / 2;
-  const int mean_width = this->data_mean_.width();
-  const int mean_height = this->data_mean_.height();
+  Dtype* mean = NULL;
+  int mean_off, mean_width, mean_height;
+  if (this->has_mean_file_) {
+    mean = this->data_mean_.mutable_cpu_data();
+    mean_off = (this->data_mean_.width() - crop_size) / 2;
+    mean_width = this->data_mean_.width();
+    mean_height = this->data_mean_.height();
+  }
   cv::Size cv_crop_size(crop_size, crop_size);
   const string& crop_mode = this->layer_param_.window_data_param().crop_mode();
 
@@ -357,18 +376,26 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
       }
 
       // copy the warped window into top_data
-      for (int c = 0; c < channels; ++c) {
-        for (int h = 0; h < cv_cropped_img.rows; ++h) {
-          for (int w = 0; w < cv_cropped_img.cols; ++w) {
-            Dtype pixel =
-                static_cast<Dtype>(cv_cropped_img.at<cv::Vec3b>(h, w)[c]);
-
-            top_data[((item_id * channels + c) * crop_size + h + pad_h)
-                     * crop_size + w + pad_w]
-                = (pixel
-                    - mean[(c * mean_height + h + mean_off + pad_h)
-                           * mean_width + w + mean_off + pad_w])
-                  * scale;
+      for (int h = 0; h < cv_cropped_img.rows; ++h) {
+        const uchar* ptr = cv_cropped_img.ptr<uchar>(h);
+        int img_index = 0;
+        for (int w = 0; w < cv_cropped_img.cols; ++w) {
+          for (int c = 0; c < channels; ++c) {
+            int top_index = ((item_id * channels + c) * crop_size + h + pad_h)
+                     * crop_size + w + pad_w;
+            // int top_index = (c * height + h) * width + w;
+            Dtype pixel = static_cast<Dtype>(ptr[img_index++]);
+            if (this->has_mean_file_) {
+              int mean_index = (c * mean_height + h + mean_off + pad_h)
+                           * mean_width + w + mean_off + pad_w;
+              top_data[top_index] = (pixel - mean[mean_index]) * scale;
+            } else {
+              if (this->has_mean_values_) {
+                top_data[top_index] = (pixel - this->mean_values_[c]) * scale;
+              } else {
+                top_data[top_index] = pixel * scale;
+              }
+            }
           }
         }
       }

From 526960a9bc6c44b01a721bb62943f43826f3bcdf Mon Sep 17 00:00:00 2001
From: Sergio <sguada@gmail.com>
Date: Tue, 7 Oct 2014 17:19:15 -0700
Subject: [PATCH 08/11] Added cache_images to WindowDataLayer Added root_folder
 to WindowDataLayer to locate images

---
 include/caffe/data_layers.hpp          |  2 ++
 src/caffe/layers/window_data_layer.cpp | 38 ++++++++++++++++++++++----
 src/caffe/proto/caffe.proto            |  4 +++
 3 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp
index c4903ce0891..34b9b30aa3e 100644
--- a/include/caffe/data_layers.hpp
+++ b/include/caffe/data_layers.hpp
@@ -328,6 +328,8 @@ class WindowDataLayer : public BasePrefetchingDataLayer<Dtype> {
   vector<Dtype> mean_values_;
   bool has_mean_file_;
   bool has_mean_values_;
+  bool cache_images_;
+  vector<std::pair<std::string, Datum > > image_database_cache_;
 };
 
 }  // namespace caffe
diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp
index fc0ffc886bb..8f75557d694 100644
--- a/src/caffe/layers/window_data_layer.cpp
+++ b/src/caffe/layers/window_data_layer.cpp
@@ -59,7 +59,14 @@ void WindowDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
       << "  background (non-object) overlap threshold: "
       << this->layer_param_.window_data_param().bg_threshold() << std::endl
       << "  foreground sampling fraction: "
-      << this->layer_param_.window_data_param().fg_fraction();
+      << this->layer_param_.window_data_param().fg_fraction() << std::endl
+      << "  cache_images: "
+      << this->layer_param_.window_data_param().cache_images() << std::endl
+      << "  root_folder: "
+      << this->layer_param_.window_data_param().root_folder();
+
+  cache_images_ = this->layer_param_.window_data_param().cache_images();
+  string root_folder = this->layer_param_.window_data_param().root_folder();
 
   const bool prefetch_needs_rand =
       this->transform_param_.mirror() ||
@@ -88,12 +95,21 @@ void WindowDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
     // read image path
     string image_path;
     infile >> image_path;
+    image_path = root_folder + image_path;
     // read image dimensions
     vector<int> image_size(3);
     infile >> image_size[0] >> image_size[1] >> image_size[2];
     channels = image_size[0];
     image_database_.push_back(std::make_pair(image_path, image_size));
 
+    if (cache_images_) {
+      Datum datum;
+      if (!ReadFileToDatum(image_path, &datum)) {
+        LOG(ERROR) << "Could not open or find file " << image_path;
+        return;
+      }
+      image_database_cache_.push_back(std::make_pair(image_path, datum));
+    }
     // read each box
     int num_windows;
     infile >> num_windows;
@@ -227,7 +243,9 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
   const float fg_fraction =
       this->layer_param_.window_data_param().fg_fraction();
   Dtype* mean = NULL;
-  int mean_off, mean_width, mean_height;
+  int mean_off = 0;
+  int mean_width = 0;
+  int mean_height = 0;
   if (this->has_mean_file_) {
     mean = this->data_mean_.mutable_cpu_data();
     mean_off = (this->data_mean_.width() - crop_size) / 2;
@@ -265,10 +283,17 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
       pair<std::string, vector<int> > image =
           image_database_[window[WindowDataLayer<Dtype>::IMAGE_INDEX]];
 
-      cv::Mat cv_img = cv::imread(image.first, CV_LOAD_IMAGE_COLOR);
-      if (!cv_img.data) {
-        LOG(ERROR) << "Could not open or find file " << image.first;
-        return;
+      cv::Mat cv_img;
+      if (this->cache_images_) {
+        pair<std::string, Datum> image_cached =
+          image_database_cache_[window[WindowDataLayer<Dtype>::IMAGE_INDEX]];
+        cv_img = DecodeDatumToCVMat(image_cached.second);
+      } else {
+        cv_img = cv::imread(image.first, CV_LOAD_IMAGE_COLOR);
+        if (!cv_img.data) {
+          LOG(ERROR) << "Could not open or find file " << image.first;
+          return;
+        }
       }
       #ifdef TIMING
       read_time += timer.MilliSeconds();
@@ -442,6 +467,7 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
     }
   }
   #ifdef TIMING
+  batch_timer.Stop();
   LOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
   LOG(INFO) << "Read time: " << read_time << "ms.";
   LOG(INFO) << "Transform time: " << trans_time << "ms.";
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index b602d0e4c7b..03d955fa7ac 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -714,6 +714,10 @@ message WindowDataParameter {
   // warp: cropped window is warped to a fixed size and aspect ratio
   // square: the tightest square around the window is cropped
   optional string crop_mode = 11 [default = "warp"];
+  // cache_images: will load all images in memory for faster access
+  optional bool cache_images = 12 [default = false];
+  // append root_folder to locate images
+  optional string root_folder = 13 [default = ""];
 }
 
 // DEPRECATED: V0LayerParameter is the old way of specifying layer parameters

From 87f65ae5d5193ba66f8e598a6c7d8d7cfa368ba3 Mon Sep 17 00:00:00 2001
From: Sergio <sguada@gmail.com>
Date: Wed, 15 Oct 2014 15:36:16 -0700
Subject: [PATCH 09/11] Change caffe time to do forward/backward and accumulate
 time per layer

---
 Makefile                               |  6 ----
 src/caffe/layers/data_layer.cpp        | 18 +++-------
 src/caffe/layers/image_data_layer.cpp  | 19 +++-------
 src/caffe/layers/window_data_layer.cpp | 18 ++--------
 src/caffe/net.cpp                      | 17 ---------
 tools/caffe.cpp                        | 49 ++++++++++++++++----------
 6 files changed, 42 insertions(+), 85 deletions(-)

diff --git a/Makefile b/Makefile
index 393fe18a70d..26d5964cacc 100644
--- a/Makefile
+++ b/Makefile
@@ -281,12 +281,6 @@ ifeq ($(USE_CUDNN), 1)
 	COMMON_FLAGS += -DUSE_CUDNN
 endif
 
-TIMING ?= 0
-# Timing Flag
-ifneq ($(TIMING), 0)
-	COMMON_FLAGS += -DTIMING
-endif
-
 # CPU-only configuration
 ifeq ($(CPU_ONLY), 1)
 	OBJS := $(PROTO_OBJS) $(CXX_OBJS)
diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp
index 95604e5a1ac..7ddd4662e73 100644
--- a/src/caffe/layers/data_layer.cpp
+++ b/src/caffe/layers/data_layer.cpp
@@ -8,9 +8,7 @@
 #include "caffe/dataset_factory.hpp"
 #include "caffe/layer.hpp"
 #include "caffe/proto/caffe.pb.h"
-#ifdef TIMING
 #include "caffe/util/benchmark.hpp"
-#endif
 #include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
 #include "caffe/util/rng.hpp"
@@ -84,13 +82,11 @@ void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
 // This function is used to create a thread that prefetches the data.
 template <typename Dtype>
 void DataLayer<Dtype>::InternalThreadEntry() {
-  #ifdef TIMING
   Timer batch_timer;
   batch_timer.Start();
   float read_time = 0;
   float trans_time = 0;
   Timer timer;
-  #endif
   CHECK(this->prefetch_data_.count());
   CHECK(this->transformed_data_.count());
   Dtype* top_data = this->prefetch_data_.mutable_cpu_data();
@@ -105,14 +101,13 @@ void DataLayer<Dtype>::InternalThreadEntry() {
     // get a blob
     CHECK(iter_ != dataset_->end());
     const Datum& datum = iter_->value;
+
     cv::Mat cv_img;
     if (datum.encoded()) {
        cv_img = DecodeDatumToCVMat(datum);
     }
-    #ifdef TIMING
     read_time += timer.MilliSeconds();
     timer.Start();
-    #endif
 
     // Apply data transformations (mirror, scale, crop...)
     int offset = this->prefetch_data_.offset(item_id);
@@ -125,20 +120,17 @@ void DataLayer<Dtype>::InternalThreadEntry() {
     if (this->output_labels_) {
       top_label[item_id] = datum.label();
     }
-    #ifdef TIMING
     trans_time += timer.MilliSeconds();
-    #endif
     // go to the next iter
     ++iter_;
     if (iter_ == dataset_->end()) {
       iter_ = dataset_->begin();
     }
   }
-  #ifdef TIMING
-  LOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
-  LOG(INFO) << "Read time: " << read_time << "ms.";
-  LOG(INFO) << "Transform time: " << trans_time << "ms.";
-  #endif
+  batch_timer.Stop();
+  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
+  DLOG(INFO) << "Read time: " << read_time << "ms.";
+  DLOG(INFO) << "Transform time: " << trans_time << "ms.";
 }
 
 INSTANTIATE_CLASS(DataLayer);
diff --git a/src/caffe/layers/image_data_layer.cpp b/src/caffe/layers/image_data_layer.cpp
index 0abcd888b67..e0778e5989f 100644
--- a/src/caffe/layers/image_data_layer.cpp
+++ b/src/caffe/layers/image_data_layer.cpp
@@ -6,9 +6,7 @@
 
 #include "caffe/data_layers.hpp"
 #include "caffe/layer.hpp"
-#ifdef TIMING
 #include "caffe/util/benchmark.hpp"
-#endif
 #include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
 #include "caffe/util/rng.hpp"
@@ -95,13 +93,11 @@ void ImageDataLayer<Dtype>::ShuffleImages() {
 // This function is used to create a thread that prefetches the data.
 template <typename Dtype>
 void ImageDataLayer<Dtype>::InternalThreadEntry() {
-  #ifdef TIMING
   Timer batch_timer;
   batch_timer.Start();
   float read_time = 0;
   float trans_time = 0;
   Timer timer;
-  #endif
   CHECK(this->prefetch_data_.count());
   CHECK(this->transformed_data_.count());
   Dtype* top_data = this->prefetch_data_.mutable_cpu_data();
@@ -117,26 +113,20 @@ void ImageDataLayer<Dtype>::InternalThreadEntry() {
   const int lines_size = lines_.size();
   for (int item_id = 0; item_id < batch_size; ++item_id) {
     // get a blob
-    #ifdef TIMING
     timer.Start();
-    #endif
     CHECK_GT(lines_size, lines_id_);
     cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
                                     new_height, new_width, is_color);
     if (!cv_img.data) {
       continue;
     }
-    #ifdef TIMING
     read_time += timer.MilliSeconds();
     timer.Start();
-    #endif
     // Apply transformations (mirror, crop...) to the image
     int offset = this->prefetch_data_.offset(item_id);
     this->transformed_data_.set_cpu_data(top_data + offset);
     this->data_transformer_.Transform(cv_img, &(this->transformed_data_));
-    #ifdef TIMING
     trans_time += timer.MilliSeconds();
-    #endif
 
     top_label[item_id] = lines_[lines_id_].second;
     // go to the next iter
@@ -150,11 +140,10 @@ void ImageDataLayer<Dtype>::InternalThreadEntry() {
       }
     }
   }
-  #ifdef TIMING
-  LOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
-  LOG(INFO) << "Read time: " << read_time << "ms.";
-  LOG(INFO) << "Transform time: " << trans_time << "ms.";
-  #endif
+  batch_timer.Stop();
+  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
+  DLOG(INFO) << "Read time: " << read_time << "ms.";
+  DLOG(INFO) << "Transform time: " << trans_time << "ms.";
 }
 
 INSTANTIATE_CLASS(ImageDataLayer);
diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp
index 8f75557d694..14e8caad6c9 100644
--- a/src/caffe/layers/window_data_layer.cpp
+++ b/src/caffe/layers/window_data_layer.cpp
@@ -13,9 +13,7 @@
 #include "caffe/common.hpp"
 #include "caffe/data_layers.hpp"
 #include "caffe/layer.hpp"
-#ifdef TIMING
 #include "caffe/util/benchmark.hpp"
-#endif
 #include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
 #include "caffe/util/rng.hpp"
@@ -226,13 +224,11 @@ template <typename Dtype>
 void WindowDataLayer<Dtype>::InternalThreadEntry() {
   // At each iteration, sample N windows where N*p are foreground (object)
   // windows and N*(1-p) are background (non-object) windows
-  #ifdef TIMING
   Timer batch_timer;
   batch_timer.Start();
   float read_time = 0;
   float trans_time = 0;
   Timer timer;
-  #endif
   Dtype* top_data = this->prefetch_data_.mutable_cpu_data();
   Dtype* top_label = this->prefetch_label_.mutable_cpu_data();
   const Dtype scale = this->layer_param_.window_data_param().scale();
@@ -269,9 +265,7 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
   for (int is_fg = 0; is_fg < 2; ++is_fg) {
     for (int dummy = 0; dummy < num_samples[is_fg]; ++dummy) {
       // sample a window
-      #ifdef TIMING
       timer.Start();
-      #endif
       const unsigned int rand_index = PrefetchRand();
       vector<float> window = (is_fg) ?
           fg_windows_[rand_index % fg_windows_.size()] :
@@ -295,10 +289,8 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
           return;
         }
       }
-      #ifdef TIMING
       read_time += timer.MilliSeconds();
       timer.Start();
-      #endif
       const int channels = cv_img.channels();
 
       // crop window out of image and warp it
@@ -424,9 +416,7 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
           }
         }
       }
-      #ifdef TIMING
       trans_time += timer.MilliSeconds();
-      #endif
       // get window label
       top_label[item_id] = window[WindowDataLayer<Dtype>::LABEL];
 
@@ -466,12 +456,10 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
       item_id++;
     }
   }
-  #ifdef TIMING
   batch_timer.Stop();
-  LOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
-  LOG(INFO) << "Read time: " << read_time << "ms.";
-  LOG(INFO) << "Transform time: " << trans_time << "ms.";
-  #endif
+  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
+  DLOG(INFO) << "Read time: " << read_time << "ms.";
+  DLOG(INFO) << "Transform time: " << trans_time << "ms.";
 }
 
 INSTANTIATE_CLASS(WindowDataLayer);
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index c0e56920c83..21ab15fd31b 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -9,9 +9,6 @@
 #include "caffe/layer.hpp"
 #include "caffe/net.hpp"
 #include "caffe/proto/caffe.pb.h"
-#ifdef TIMING
-#include "caffe/util/benchmark.hpp"
-#endif
 #include "caffe/util/insert_splits.hpp"
 #include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
@@ -502,10 +499,6 @@ void Net<Dtype>::GetLearningRateAndWeightDecay() {
 
 template <typename Dtype>
 Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
-  #ifdef TIMING
-  Timer timer;
-  timer.Start();
-  #endif
   CHECK_GE(start, 0);
   CHECK_LT(end, layers_.size());
   Dtype loss = 0;
@@ -516,9 +509,6 @@ Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
     loss += layer_loss;
     if (debug_info_) { ForwardDebugInfo(i); }
   }
-  #ifdef TIMING
-  LOG(INFO) << "Forward time: " << timer.MilliSeconds() << "ms.";
-  #endif
   return loss;
 }
 
@@ -575,10 +565,6 @@ string Net<Dtype>::Forward(const string& input_blob_protos, Dtype* loss) {
 
 template <typename Dtype>
 void Net<Dtype>::BackwardFromTo(int start, int end) {
-  #ifdef TIMING
-  Timer timer;
-  timer.Start();
-  #endif
   CHECK_GE(end, 0);
   CHECK_LT(start, layers_.size());
   for (int i = start; i >= end; --i) {
@@ -588,9 +574,6 @@ void Net<Dtype>::BackwardFromTo(int start, int end) {
       if (debug_info_) { BackwardDebugInfo(i); }
     }
   }
-  #ifdef TIMING
-  LOG(INFO) << "Backward time: " << timer.MilliSeconds() << "ms.";
-  #endif
 }
 
 template <typename Dtype>
diff --git a/tools/caffe.cpp b/tools/caffe.cpp
index bfcd9f19cec..1c842a078c2 100644
--- a/tools/caffe.cpp
+++ b/tools/caffe.cpp
@@ -229,35 +229,46 @@ int time() {
   Timer total_timer;
   total_timer.Start();
   Timer forward_timer;
-  forward_timer.Start();
+  Timer backward_timer;
   Timer timer;
-  for (int i = 0; i < layers.size(); ++i) {
-    const caffe::string& layername = layers[i]->layer_param().name();
-    timer.Start();
-    for (int j = 0; j < FLAGS_iterations; ++j) {
+  std::vector<float> forward_time_per_layer(layers.size(), 0.0);
+  std::vector<float> backward_time_per_layer(layers.size(), 0.0);
+  float forward_time = 0.0;
+  float backward_time = 0.0;
+  for (int j = 0; j < FLAGS_iterations; ++j) {
+    Timer iter_timer;
+    iter_timer.Start();
+    forward_timer.Start();
+    for (int i = 0; i < layers.size(); ++i) {
+      timer.Start();
       // Although Reshape should be essentially free, we include it here
       // so that we will notice Reshape performance bugs.
       layers[i]->Reshape(bottom_vecs[i], top_vecs[i]);
       layers[i]->Forward(bottom_vecs[i], top_vecs[i]);
+      forward_time_per_layer[i] += timer.MilliSeconds();
     }
-    LOG(INFO) << layername << "\tforward: " << timer.MilliSeconds() <<
-        " milliseconds.";
-  }
-  LOG(INFO) << "Forward pass: " << forward_timer.MilliSeconds() <<
-      " milliseconds.";
-  Timer backward_timer;
-  backward_timer.Start();
-  for (int i = layers.size() - 1; i >= 0; --i) {
-    const caffe::string& layername = layers[i]->layer_param().name();
-    timer.Start();
-    for (int j = 0; j < FLAGS_iterations; ++j) {
+    forward_time += forward_timer.MilliSeconds();
+    backward_timer.Start();
+    for (int i = layers.size() - 1; i >= 0; --i) {
+      timer.Start();
       layers[i]->Backward(top_vecs[i], bottom_need_backward[i],
                           bottom_vecs[i]);
+      backward_time_per_layer[i] += timer.MilliSeconds();
     }
-    LOG(INFO) << layername << "\tbackward: "
-        << timer.MilliSeconds() << " milliseconds.";
+    backward_time += backward_timer.MilliSeconds();
+    LOG(INFO) << "Iteration: " << j + 1 << " forward-backward time: "
+      << iter_timer.MilliSeconds() << " milliseconds.";
   }
-  LOG(INFO) << "Backward pass: " << backward_timer.MilliSeconds() <<
+  for (int i = 0; i < layers.size(); ++i) {
+    const caffe::string& layername = layers[i]->layer_param().name();
+    LOG(INFO) << std::setfill(' ') << std::setw(10) << layername <<
+      "\tforward: " << forward_time_per_layer[i] << " milliseconds.";
+    LOG(INFO) << std::setfill(' ') << std::setw(10) << layername  <<
+      "\tbackward: " << backward_time_per_layer[i] << " milliseconds.";
+  }
+  LOG(INFO) << "Forward pass: " << forward_time <<
+      " milliseconds.";
+  LOG(INFO) << "Backward pass: " << backward_time <<
       " milliseconds.";
   LOG(INFO) << "Total Time: " << total_timer.MilliSeconds() <<
       " milliseconds.";

From b6e9116ee379afa3d616f9890a07ee29b6e87319 Mon Sep 17 00:00:00 2001
From: Sergio <sguada@gmail.com>
Date: Wed, 15 Oct 2014 15:36:42 -0700
Subject: [PATCH 10/11] Upgrade compute_image_mean to use gflags, accept
 list_of_images, and print mean_values

---
 examples/cifar10/create_cifar10.sh |  4 +-
 src/caffe/layers/data_layer.cpp    |  4 +-
 tools/compute_image_mean.cpp       | 59 ++++++++++++++++++++++--------
 3 files changed, 48 insertions(+), 19 deletions(-)

diff --git a/examples/cifar10/create_cifar10.sh b/examples/cifar10/create_cifar10.sh
index ebfca46de87..a42725cb610 100755
--- a/examples/cifar10/create_cifar10.sh
+++ b/examples/cifar10/create_cifar10.sh
@@ -13,7 +13,7 @@ rm -rf $EXAMPLE/cifar10_train_$DBTYPE $EXAMPLE/cifar10_test_$DBTYPE
 
 echo "Computing image mean..."
 
-./build/tools/compute_image_mean $EXAMPLE/cifar10_train_$DBTYPE \
-  $EXAMPLE/mean.binaryproto $DBTYPE
+./build/tools/compute_image_mean -backend=$DBTYPE \
+  $EXAMPLE/cifar10_train_$DBTYPE $EXAMPLE/mean.binaryproto
 
 echo "Done."
diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp
index 7ddd4662e73..05f2e298ee7 100644
--- a/src/caffe/layers/data_layer.cpp
+++ b/src/caffe/layers/data_layer.cpp
@@ -46,9 +46,9 @@ void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
   }
   // Read a data point, and use it to initialize the top blob.
   CHECK(iter_ != dataset_->end());
-  const Datum& datum = iter_->value;
+  Datum datum = iter_->value;
 
-  if (DecodeDatum(datum)) {
+  if (DecodeDatum(&datum)) {
     LOG(INFO) << "Decoding Datum";
   }
   // image
diff --git a/tools/compute_image_mean.cpp b/tools/compute_image_mean.cpp
index a720f162aa2..4c2c658bbb0 100644
--- a/tools/compute_image_mean.cpp
+++ b/tools/compute_image_mean.cpp
@@ -1,3 +1,4 @@
+#include <gflags/gflags.h>
 #include <glog/logging.h>
 #include <stdint.h>
 
@@ -12,19 +13,31 @@ using caffe::Dataset;
 using caffe::Datum;
 using caffe::BlobProto;
 using std::max;
+using std::pair;
+
+
+DEFINE_string(backend, "lmdb", "The backend for containing the images");
 
 int main(int argc, char** argv) {
   ::google::InitGoogleLogging(argv[0]);
-  if (argc < 3 || argc > 4) {
-    LOG(ERROR) << "Usage: compute_image_mean input_db output_file"
-               << " db_backend[leveldb or lmdb]";
+
+#ifndef GFLAGS_GFLAGS_H_
+  namespace gflags = google;
+#endif
+
+  gflags::SetUsageMessage("Compute the mean_image of a set of images given by"
+        " a leveldb/lmdb or a list of images\n"
+        "Usage:\n"
+        "    compute_image_mean [FLAGS] INPUT_DB [OUTPUT_FILE]\n");
+
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
+
+  if (argc < 2 || argc > 3) {
+    gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/compute_image_mean");
     return 1;
   }
 
-  std::string db_backend = "lmdb";
-  if (argc == 4) {
-    db_backend = std::string(argv[3]);
-  }
+  std::string db_backend = FLAGS_backend;
 
   caffe::shared_ptr<Dataset<std::string, Datum> > dataset =
       caffe::DatasetFactory<std::string, Datum>(db_backend);
@@ -36,7 +49,11 @@ int main(int argc, char** argv) {
   int count = 0;
   // load first datum
   Dataset<std::string, Datum>::const_iterator iter = dataset->begin();
-  const Datum& datum = iter->value;
+  Datum datum = iter->value;
+
+  if (DecodeDatum(&datum)) {
+    LOG(INFO) << "Decoding Datum";
+  }
 
   sum_blob.set_num(1);
   sum_blob.set_channels(datum.channels());
@@ -51,8 +68,9 @@ int main(int argc, char** argv) {
   LOG(INFO) << "Starting Iteration";
   for (Dataset<std::string, Datum>::const_iterator iter = dataset->begin();
       iter != dataset->end(); ++iter) {
-    // just a dummy operation
-    const Datum& datum = iter->value;
+    Datum datum = iter->value;
+    DecodeDatum(&datum);
+
     const std::string& data = datum.data();
     size_in_datum = std::max<int>(datum.data().size(),
         datum.float_data_size());
@@ -72,20 +90,31 @@ int main(int argc, char** argv) {
     }
     ++count;
     if (count % 10000 == 0) {
-      LOG(ERROR) << "Processed " << count << " files.";
+      LOG(INFO) << "Processed " << count << " files.";
     }
   }
 
   if (count % 10000 != 0) {
-    LOG(ERROR) << "Processed " << count << " files.";
+    LOG(INFO) << "Processed " << count << " files.";
   }
   for (int i = 0; i < sum_blob.data_size(); ++i) {
     sum_blob.set_data(i, sum_blob.data(i) / count);
   }
   // Write to disk
-  LOG(INFO) << "Write to " << argv[2];
-  WriteProtoToBinaryFile(sum_blob, argv[2]);
-
+  if (argc == 3) {
+    LOG(INFO) << "Write to " << argv[2];
+    WriteProtoToBinaryFile(sum_blob, argv[2]);
+  }
+  const int channels = sum_blob.channels();
+  const int dim = sum_blob.height() * sum_blob.width();
+  std::vector<float> mean_values(channels, 0.0);
+  LOG(INFO) << "Number of channels: " << channels;
+  for (int c = 0; c < channels; ++c) {
+    for (int i = 0; i < dim; ++i) {
+      mean_values[c] += sum_blob.data(dim * c + i);
+    }
+    LOG(INFO) << "mean_value channel [" << c << "]:" << mean_values[c] / dim;
+  }
   // Clean up
   dataset->close();
   return 0;

From f17cd3ee6b944e7b0fedcd47d85d4eff4a94a76a Mon Sep 17 00:00:00 2001
From: Sergio <sguada@gmail.com>
Date: Sun, 12 Oct 2014 23:16:59 -0700
Subject: [PATCH 11/11] Added CPUTimer Make timing more precise using double
 and microseconds

---
 include/caffe/util/benchmark.hpp       | 20 ++++++++---
 src/caffe/layers/data_layer.cpp        | 18 +++++-----
 src/caffe/layers/image_data_layer.cpp  | 18 +++++-----
 src/caffe/layers/window_data_layer.cpp | 18 +++++-----
 src/caffe/util/benchmark.cpp           | 49 +++++++++++++++++++++++++-
 tools/caffe.cpp                        | 39 +++++++++++---------
 tools/compute_image_mean.cpp           |  2 ++
 7 files changed, 114 insertions(+), 50 deletions(-)

diff --git a/include/caffe/util/benchmark.hpp b/include/caffe/util/benchmark.hpp
index 6c01ac4ee18..d63582776ee 100644
--- a/include/caffe/util/benchmark.hpp
+++ b/include/caffe/util/benchmark.hpp
@@ -11,11 +11,11 @@ class Timer {
  public:
   Timer();
   virtual ~Timer();
-  void Start();
-  void Stop();
-  float MilliSeconds();
-  float MicroSeconds();
-  float Seconds();
+  virtual void Start();
+  virtual void Stop();
+  virtual float MilliSeconds();
+  virtual float MicroSeconds();
+  virtual float Seconds();
 
   inline bool initted() { return initted_; }
   inline bool running() { return running_; }
@@ -37,6 +37,16 @@ class Timer {
   float elapsed_microseconds_;
 };
 
+class CPUTimer : public Timer {
+ public:
+  explicit CPUTimer();
+  virtual ~CPUTimer() {}
+  virtual void Start();
+  virtual void Stop();
+  virtual float MilliSeconds();
+  virtual float MicroSeconds();
+};
+
 }  // namespace caffe
 
 #endif   // CAFFE_UTIL_BENCHMARK_H_
diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp
index 05f2e298ee7..5d6f05802ce 100644
--- a/src/caffe/layers/data_layer.cpp
+++ b/src/caffe/layers/data_layer.cpp
@@ -82,11 +82,11 @@ void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
 // This function is used to create a thread that prefetches the data.
 template <typename Dtype>
 void DataLayer<Dtype>::InternalThreadEntry() {
-  Timer batch_timer;
+  CPUTimer batch_timer;
   batch_timer.Start();
-  float read_time = 0;
-  float trans_time = 0;
-  Timer timer;
+  double read_time = 0;
+  double trans_time = 0;
+  CPUTimer timer;
   CHECK(this->prefetch_data_.count());
   CHECK(this->transformed_data_.count());
   Dtype* top_data = this->prefetch_data_.mutable_cpu_data();
@@ -106,7 +106,7 @@ void DataLayer<Dtype>::InternalThreadEntry() {
     if (datum.encoded()) {
        cv_img = DecodeDatumToCVMat(datum);
     }
-    read_time += timer.MilliSeconds();
+    read_time += timer.MicroSeconds();
     timer.Start();
 
     // Apply data transformations (mirror, scale, crop...)
@@ -120,7 +120,7 @@ void DataLayer<Dtype>::InternalThreadEntry() {
     if (this->output_labels_) {
       top_label[item_id] = datum.label();
     }
-    trans_time += timer.MilliSeconds();
+    trans_time += timer.MicroSeconds();
     // go to the next iter
     ++iter_;
     if (iter_ == dataset_->end()) {
@@ -128,9 +128,9 @@ void DataLayer<Dtype>::InternalThreadEntry() {
     }
   }
   batch_timer.Stop();
-  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
-  DLOG(INFO) << "Read time: " << read_time << "ms.";
-  DLOG(INFO) << "Transform time: " << trans_time << "ms.";
+  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
+  DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";
+  DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
 }
 
 INSTANTIATE_CLASS(DataLayer);
diff --git a/src/caffe/layers/image_data_layer.cpp b/src/caffe/layers/image_data_layer.cpp
index e0778e5989f..50997a23bf9 100644
--- a/src/caffe/layers/image_data_layer.cpp
+++ b/src/caffe/layers/image_data_layer.cpp
@@ -93,11 +93,11 @@ void ImageDataLayer<Dtype>::ShuffleImages() {
 // This function is used to create a thread that prefetches the data.
 template <typename Dtype>
 void ImageDataLayer<Dtype>::InternalThreadEntry() {
-  Timer batch_timer;
+  CPUTimer batch_timer;
   batch_timer.Start();
-  float read_time = 0;
-  float trans_time = 0;
-  Timer timer;
+  double read_time = 0;
+  double trans_time = 0;
+  CPUTimer timer;
   CHECK(this->prefetch_data_.count());
   CHECK(this->transformed_data_.count());
   Dtype* top_data = this->prefetch_data_.mutable_cpu_data();
@@ -120,13 +120,13 @@ void ImageDataLayer<Dtype>::InternalThreadEntry() {
     if (!cv_img.data) {
       continue;
     }
-    read_time += timer.MilliSeconds();
+    read_time += timer.MicroSeconds();
     timer.Start();
     // Apply transformations (mirror, crop...) to the image
     int offset = this->prefetch_data_.offset(item_id);
     this->transformed_data_.set_cpu_data(top_data + offset);
     this->data_transformer_.Transform(cv_img, &(this->transformed_data_));
-    trans_time += timer.MilliSeconds();
+    trans_time += timer.MicroSeconds();
 
     top_label[item_id] = lines_[lines_id_].second;
     // go to the next iter
@@ -141,9 +141,9 @@ void ImageDataLayer<Dtype>::InternalThreadEntry() {
     }
   }
   batch_timer.Stop();
-  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
-  DLOG(INFO) << "Read time: " << read_time << "ms.";
-  DLOG(INFO) << "Transform time: " << trans_time << "ms.";
+  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
+  DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";
+  DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
 }
 
 INSTANTIATE_CLASS(ImageDataLayer);
diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp
index 14e8caad6c9..6287b385dc5 100644
--- a/src/caffe/layers/window_data_layer.cpp
+++ b/src/caffe/layers/window_data_layer.cpp
@@ -224,11 +224,11 @@ template <typename Dtype>
 void WindowDataLayer<Dtype>::InternalThreadEntry() {
   // At each iteration, sample N windows where N*p are foreground (object)
   // windows and N*(1-p) are background (non-object) windows
-  Timer batch_timer;
+  CPUTimer batch_timer;
   batch_timer.Start();
-  float read_time = 0;
-  float trans_time = 0;
-  Timer timer;
+  double read_time = 0;
+  double trans_time = 0;
+  CPUTimer timer;
   Dtype* top_data = this->prefetch_data_.mutable_cpu_data();
   Dtype* top_label = this->prefetch_label_.mutable_cpu_data();
   const Dtype scale = this->layer_param_.window_data_param().scale();
@@ -289,7 +289,7 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
           return;
         }
       }
-      read_time += timer.MilliSeconds();
+      read_time += timer.MicroSeconds();
       timer.Start();
       const int channels = cv_img.channels();
 
@@ -416,7 +416,7 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
           }
         }
       }
-      trans_time += timer.MilliSeconds();
+      trans_time += timer.MicroSeconds();
       // get window label
       top_label[item_id] = window[WindowDataLayer<Dtype>::LABEL];
 
@@ -457,9 +457,9 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
     }
   }
   batch_timer.Stop();
-  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
-  DLOG(INFO) << "Read time: " << read_time << "ms.";
-  DLOG(INFO) << "Transform time: " << trans_time << "ms.";
+  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
+  DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";
+  DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
 }
 
 INSTANTIATE_CLASS(WindowDataLayer);
diff --git a/src/caffe/util/benchmark.cpp b/src/caffe/util/benchmark.cpp
index 2edc9a53ef0..1d269c351c1 100644
--- a/src/caffe/util/benchmark.cpp
+++ b/src/caffe/util/benchmark.cpp
@@ -69,7 +69,7 @@ float Timer::MicroSeconds() {
     CUDA_CHECK(cudaEventElapsedTime(&elapsed_milliseconds_, start_gpu_,
                                     stop_gpu_));
     // Cuda only measure milliseconds
-    elapsed_microseconds_ = elapsed_milliseconds_ / 1000;
+    elapsed_microseconds_ = elapsed_milliseconds_ * 1000;
 #else
       NO_GPU;
 #endif
@@ -118,4 +118,51 @@ void Timer::Init() {
   }
 }
 
+CPUTimer::CPUTimer() {
+  this->initted_ = true;
+  this->running_ = false;
+  this->has_run_at_least_once_ = false;
+}
+
+void CPUTimer::Start() {
+  if (!running()) {
+    this->start_cpu_ = boost::posix_time::microsec_clock::local_time();
+    this->running_ = true;
+    this->has_run_at_least_once_ = true;
+  }
+}
+
+void CPUTimer::Stop() {
+  if (running()) {
+    this->stop_cpu_ = boost::posix_time::microsec_clock::local_time();
+    this->running_ = false;
+  }
+}
+
+float CPUTimer::MilliSeconds() {
+  if (!has_run_at_least_once()) {
+    LOG(WARNING) << "Timer has never been run before reading time.";
+    return 0;
+  }
+  if (running()) {
+    Stop();
+  }
+  this->elapsed_milliseconds_ = (this->stop_cpu_ -
+                                this->start_cpu_).total_milliseconds();
+  return this->elapsed_milliseconds_;
+}
+
+float CPUTimer::MicroSeconds() {
+  if (!has_run_at_least_once()) {
+    LOG(WARNING) << "Timer has never been run before reading time.";
+    return 0;
+  }
+  if (running()) {
+    Stop();
+  }
+  this->elapsed_microseconds_ = (this->stop_cpu_ -
+                                this->start_cpu_).total_microseconds();
+  return this->elapsed_microseconds_;
+}
+
 }  // namespace caffe
diff --git a/tools/caffe.cpp b/tools/caffe.cpp
index 1c842a078c2..9f9d975dfba 100644
--- a/tools/caffe.cpp
+++ b/tools/caffe.cpp
@@ -231,10 +231,10 @@ int time() {
   Timer forward_timer;
   Timer backward_timer;
   Timer timer;
-  std::vector<float> forward_time_per_layer(layers.size(), 0.0);
-  std::vector<float> backward_time_per_layer(layers.size(), 0.0);
-  float forward_time = 0.0;
-  float backward_time = 0.0;
+  std::vector<double> forward_time_per_layer(layers.size(), 0.0);
+  std::vector<double> backward_time_per_layer(layers.size(), 0.0);
+  double forward_time = 0.0;
+  double backward_time = 0.0;
   for (int j = 0; j < FLAGS_iterations; ++j) {
     Timer iter_timer;
     iter_timer.Start();
@@ -245,33 +245,38 @@ int time() {
       // so that we will notice Reshape performance bugs.
       layers[i]->Reshape(bottom_vecs[i], top_vecs[i]);
       layers[i]->Forward(bottom_vecs[i], top_vecs[i]);
-      forward_time_per_layer[i] += timer.MilliSeconds();
+      forward_time_per_layer[i] += timer.MicroSeconds();
     }
-    forward_time += forward_timer.MilliSeconds();
+    forward_time += forward_timer.MicroSeconds();
     backward_timer.Start();
     for (int i = layers.size() - 1; i >= 0; --i) {
       timer.Start();
       layers[i]->Backward(top_vecs[i], bottom_need_backward[i],
                           bottom_vecs[i]);
-      backward_time_per_layer[i] += timer.MilliSeconds();
+      backward_time_per_layer[i] += timer.MicroSeconds();
     }
-    backward_time += backward_timer.MilliSeconds();
+    backward_time += backward_timer.MicroSeconds();
     LOG(INFO) << "Iteration: " << j + 1 << " forward-backward time: "
-      << iter_timer.MilliSeconds() << " milliseconds.";
+      << iter_timer.MilliSeconds() << " ms.";
   }
+  LOG(INFO) << "Average time per layer: ";
   for (int i = 0; i < layers.size(); ++i) {
     const caffe::string& layername = layers[i]->layer_param().name();
     LOG(INFO) << std::setfill(' ') << std::setw(10) << layername <<
-      "\tforward: " << forward_time_per_layer[i] << " milliseconds.";
+      "\tforward: " << forward_time_per_layer[i] / 1000 /
+      FLAGS_iterations << " ms.";
     LOG(INFO) << std::setfill(' ') << std::setw(10) << layername  <<
-      "\tbackward: " << backward_time_per_layer[i] << " milliseconds.";
+      "\tbackward: " << backward_time_per_layer[i] / 1000 /
+      FLAGS_iterations << " ms.";
   }
-  LOG(INFO) << "Forward pass: " << forward_time <<
-      " milliseconds.";
-  LOG(INFO) << "Backward pass: " << backward_time <<
-      " milliseconds.";
-  LOG(INFO) << "Total Time: " << total_timer.MilliSeconds() <<
-      " milliseconds.";
+  total_timer.Stop();
+  LOG(INFO) << "Average Forward pass: " << forward_time / 1000 /
+    FLAGS_iterations << " ms.";
+  LOG(INFO) << "Average Backward pass: " << backward_time / 1000 /
+    FLAGS_iterations << " ms.";
+  LOG(INFO) << "Average Forward-Backward: " << total_timer.MilliSeconds() /
+    FLAGS_iterations << " ms.";
+  LOG(INFO) << "Total Time: " << total_timer.MilliSeconds() << " ms.";
   LOG(INFO) << "*** Benchmark ends ***";
   return 0;
 }
diff --git a/tools/compute_image_mean.cpp b/tools/compute_image_mean.cpp
index 4c2c658bbb0..358f57e38d6 100644
--- a/tools/compute_image_mean.cpp
+++ b/tools/compute_image_mean.cpp
@@ -4,6 +4,8 @@
 
 #include <algorithm>
 #include <string>
+#include <utility>
+#include <vector>
 
 #include "caffe/dataset_factory.hpp"
 #include "caffe/proto/caffe.pb.h"