From 40ca4e68097589a6143a6754328a1f94786c118a Mon Sep 17 00:00:00 2001 From: tishibas67 Date: Wed, 18 Mar 2015 00:11:01 +0900 Subject: [PATCH 001/223] improved to load RGB image as grayscale image --- python/caffe/io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/caffe/io.py b/python/caffe/io.py index 6ae2cf13..acd8a142 100644 --- a/python/caffe/io.py +++ b/python/caffe/io.py @@ -285,7 +285,7 @@ def load_image(filename, color=True): of size (H x W x 3) in RGB or of size (H x W x 1) in grayscale. """ - img = skimage.img_as_float(skimage.io.imread(filename)).astype(np.float32) + img = skimage.img_as_float(skimage.io.imread(filename, as_grey=not color)).astype(np.float32) if img.ndim == 2: img = img[:, :, np.newaxis] if color: From 90750550d5f6ee7dd1937777b522189b9266634f Mon Sep 17 00:00:00 2001 From: Felix Abecassis Date: Thu, 2 Jul 2015 11:42:37 -0700 Subject: [PATCH 002/223] Optimize inner product layer for special case M == 1 Using gemm with one of the operand being a vector or a scalar is suboptimal. Functions gemv and axpy should be prefered in these cases. --- src/caffe/layers/inner_product_layer.cu | 19 ++++++--- src/caffe/test/test_inner_product_layer.cpp | 43 ++++++++++++++++++++- 2 files changed, 54 insertions(+), 8 deletions(-) diff --git a/src/caffe/layers/inner_product_layer.cu b/src/caffe/layers/inner_product_layer.cu index dd90cac1..c0ebd2c4 100644 --- a/src/caffe/layers/inner_product_layer.cu +++ b/src/caffe/layers/inner_product_layer.cu @@ -15,12 +15,19 @@ void InnerProductLayer::Forward_gpu(const vector*>& bottom, const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = top[0]->mutable_gpu_data(); const Dtype* weight = this->blobs_[0]->gpu_data(); - caffe_gpu_gemm(CblasNoTrans, CblasTrans, M_, N_, K_, (Dtype)1., - bottom_data, weight, (Dtype)0., top_data); - if (bias_term_) { - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1., - bias_multiplier_.gpu_data(), - this->blobs_[1]->gpu_data(), (Dtype)1., top_data); + if (M_ == 1) { + caffe_gpu_gemv(CblasNoTrans, N_, K_, (Dtype)1., + weight, bottom_data, (Dtype)0., top_data); + if (bias_term_) + caffe_gpu_axpy(N_, bias_multiplier_.cpu_data()[0], + this->blobs_[1]->gpu_data(), top_data); + } else { + caffe_gpu_gemm(CblasNoTrans, CblasTrans, M_, N_, K_, (Dtype)1., + bottom_data, weight, (Dtype)0., top_data); + if (bias_term_) + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1., + bias_multiplier_.gpu_data(), + this->blobs_[1]->gpu_data(), (Dtype)1., top_data); } } diff --git a/src/caffe/test/test_inner_product_layer.cpp b/src/caffe/test/test_inner_product_layer.cpp index c03df173..fbf0c851 100644 --- a/src/caffe/test/test_inner_product_layer.cpp +++ b/src/caffe/test/test_inner_product_layer.cpp @@ -23,16 +23,21 @@ class InnerProductLayerTest : public MultiDeviceTest { protected: InnerProductLayerTest() : blob_bottom_(new Blob(2, 3, 4, 5)), + blob_bottom_nobatch_(new Blob(1, 2, 3, 4)), blob_top_(new Blob()) { // fill the values FillerParameter filler_param; UniformFiller filler(filler_param); filler.Fill(this->blob_bottom_); - blob_bottom_vec_.push_back(blob_bottom_); blob_top_vec_.push_back(blob_top_); } - virtual ~InnerProductLayerTest() { delete blob_bottom_; delete blob_top_; } + virtual ~InnerProductLayerTest() { + delete blob_bottom_; + delete blob_bottom_nobatch_; + delete blob_top_; + } Blob* const blob_bottom_; + Blob* const blob_bottom_nobatch_; Blob* const blob_top_; vector*> blob_bottom_vec_; vector*> blob_top_vec_; @@ -42,6 +47,7 @@ TYPED_TEST_CASE(InnerProductLayerTest, TestDtypesAndDevices); TYPED_TEST(InnerProductLayerTest, TestSetUp) { typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_); LayerParameter layer_param; InnerProductParameter* inner_product_param = layer_param.mutable_inner_product_param(); @@ -57,6 +63,38 @@ TYPED_TEST(InnerProductLayerTest, TestSetUp) { TYPED_TEST(InnerProductLayerTest, TestForward) { typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_); + bool IS_VALID_CUDA = false; +#ifndef CPU_ONLY + IS_VALID_CUDA = CAFFE_TEST_CUDA_PROP.major >= 2; +#endif + if (Caffe::mode() == Caffe::CPU || + sizeof(Dtype) == 4 || IS_VALID_CUDA) { + LayerParameter layer_param; + InnerProductParameter* inner_product_param = + layer_param.mutable_inner_product_param(); + inner_product_param->set_num_output(10); + inner_product_param->mutable_weight_filler()->set_type("uniform"); + inner_product_param->mutable_bias_filler()->set_type("uniform"); + inner_product_param->mutable_bias_filler()->set_min(1); + inner_product_param->mutable_bias_filler()->set_max(2); + shared_ptr > layer( + new InnerProductLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_top_->cpu_data(); + const int count = this->blob_top_->count(); + for (int i = 0; i < count; ++i) { + EXPECT_GE(data[i], 1.); + } + } else { + LOG(ERROR) << "Skipping test due to old architecture."; + } +} + +TYPED_TEST(InnerProductLayerTest, TestForwardNoBatch) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_nobatch_); bool IS_VALID_CUDA = false; #ifndef CPU_ONLY IS_VALID_CUDA = CAFFE_TEST_CUDA_PROP.major >= 2; @@ -87,6 +125,7 @@ TYPED_TEST(InnerProductLayerTest, TestForward) { TYPED_TEST(InnerProductLayerTest, TestGradient) { typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_); bool IS_VALID_CUDA = false; #ifndef CPU_ONLY IS_VALID_CUDA = CAFFE_TEST_CUDA_PROP.major >= 2; From 3fa0de93b059753ed378b474e1568980ed131a10 Mon Sep 17 00:00:00 2001 From: AdamStelmaszczyk Date: Sat, 4 Jul 2015 20:57:17 +0100 Subject: [PATCH 003/223] Deprecated OpenCV consts --- examples/cpp_classification/classification.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/cpp_classification/classification.cpp b/examples/cpp_classification/classification.cpp index 1c6371e3..0683a497 100644 --- a/examples/cpp_classification/classification.cpp +++ b/examples/cpp_classification/classification.cpp @@ -186,13 +186,13 @@ void Classifier::Preprocess(const cv::Mat& img, /* Convert the input image to the input image format of the network. */ cv::Mat sample; if (img.channels() == 3 && num_channels_ == 1) - cv::cvtColor(img, sample, CV_BGR2GRAY); + cv::cvtColor(img, sample, cv::COLOR_BGR2GRAY); else if (img.channels() == 4 && num_channels_ == 1) - cv::cvtColor(img, sample, CV_BGRA2GRAY); + cv::cvtColor(img, sample, cv::COLOR_BGRA2GRAY); else if (img.channels() == 4 && num_channels_ == 3) - cv::cvtColor(img, sample, CV_BGRA2BGR); + cv::cvtColor(img, sample, cv::COLOR_BGRA2BGR); else if (img.channels() == 1 && num_channels_ == 3) - cv::cvtColor(img, sample, CV_GRAY2BGR); + cv::cvtColor(img, sample, cv::COLOR_GRAY2BGR); else sample = img; From 4c23e93a95db0c6c6c5d4239070966e3bdc24fdc Mon Sep 17 00:00:00 2001 From: Gustavo Serra Scalet Date: Thu, 6 Aug 2015 14:20:55 -0300 Subject: [PATCH 004/223] Fix download model script to use zip archive Currently GitHub is not using tarballs as archive for downloading gists therefore the script was broken as actually a zip archive was being downloaded. --- scripts/download_model_from_gist.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/download_model_from_gist.sh b/scripts/download_model_from_gist.sh index a1dccf78..89527b75 100755 --- a/scripts/download_model_from_gist.sh +++ b/scripts/download_model_from_gist.sh @@ -18,7 +18,7 @@ fi echo "Downloading Caffe model info to $MODEL_DIR ..." mkdir -p $MODEL_DIR -wget https://gist.github.com/$GIST/download -O $MODEL_DIR/gist.tar.gz -tar xzf $MODEL_DIR/gist.tar.gz --directory=$MODEL_DIR --strip-components=1 -rm $MODEL_DIR/gist.tar.gz +wget https://gist.github.com/$GIST/download -O $MODEL_DIR/gist.zip +unzip -j $MODEL_DIR/gist.zip -d $MODEL_DIR +rm $MODEL_DIR/gist.zip echo "Done" From fda9229a426d9c0e4496c700099a7126da72ba64 Mon Sep 17 00:00:00 2001 From: Gustavo Serra Scalet Date: Thu, 6 Aug 2015 14:37:20 -0300 Subject: [PATCH 005/223] Fix download model binary script to get correct lines on parsing table The base reference of "bottom" variable was relative to the "top+1" and not to the whole readlines output. It ended up without all the lines. That could work for some gists however for the model I was looking for (see below) the sha1 key was not being parsed, as it was missing the last line. tested with the following gist: longjon/1bf3aa1e0b8e788d7e1d --- scripts/download_model_binary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/download_model_binary.py b/scripts/download_model_binary.py index 48e9015f..03a50f67 100755 --- a/scripts/download_model_binary.py +++ b/scripts/download_model_binary.py @@ -32,7 +32,7 @@ def parse_readme_frontmatter(dirname): with open(readme_filename) as f: lines = [line.strip() for line in f.readlines()] top = lines.index('---') - bottom = lines[top + 1:].index('---') + bottom = lines.index('---', top + 1) frontmatter = yaml.load('\n'.join(lines[top + 1:bottom])) assert all(key in frontmatter for key in required_keys) return dirname, frontmatter From 443b16f84fb8948dbddecaf775c7da44dac6c6b1 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Wed, 21 Jan 2015 22:21:13 -0800 Subject: [PATCH 006/223] Add gpu_util.cuh, with caffe_gpu_atomic_add (double impl from NVIDIA dev docs; float impl included in CUDA as "atomicAdd") --- include/caffe/util/gpu_util.cuh | 35 +++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 include/caffe/util/gpu_util.cuh diff --git a/include/caffe/util/gpu_util.cuh b/include/caffe/util/gpu_util.cuh new file mode 100644 index 00000000..994202f2 --- /dev/null +++ b/include/caffe/util/gpu_util.cuh @@ -0,0 +1,35 @@ +#ifndef CAFFE_UTIL_GPU_UTIL_H_ +#define CAFFE_UTIL_GPU_UTIL_H_ + +namespace caffe { + +template +inline __device__ Dtype caffe_gpu_atomic_add(const Dtype val, Dtype* address); + +template <> +inline __device__ +float caffe_gpu_atomic_add(const float val, float* address) { + return atomicAdd(address, val); +} + +// double atomicAdd implementation taken from: +// http://docs.nvidia.com/cuda/cuda-c-programming-guide/#axzz3PVCpVsEG +template <> +inline __device__ +double caffe_gpu_atomic_add(const double val, double* address) { + unsigned long long int* address_as_ull = // NOLINT(runtime/int) + // NOLINT_NEXT_LINE(runtime/int) + reinterpret_cast(address); + unsigned long long int old = *address_as_ull; // NOLINT(runtime/int) + unsigned long long int assumed; // NOLINT(runtime/int) + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __double_as_longlong(val + __longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); +} + +} // namespace caffe + +#endif // CAFFE_UTIL_GPU_UTIL_H_ From 6067869f8339344ecd68c486a6e47d07e8997b6f Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Wed, 21 Jan 2015 14:23:34 -0800 Subject: [PATCH 007/223] test_gradient_check_util: check_bottom < -1 only checks params --- include/caffe/test/test_gradient_check_util.hpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/include/caffe/test/test_gradient_check_util.hpp b/include/caffe/test/test_gradient_check_util.hpp index cc5dcbad..25f35d15 100644 --- a/include/caffe/test/test_gradient_check_util.hpp +++ b/include/caffe/test/test_gradient_check_util.hpp @@ -45,6 +45,10 @@ class GradientChecker { void CheckGradientEltwise(Layer* layer, const vector*>& bottom, const vector*>& top); + // Checks the gradient of a single output with respect to particular input + // blob(s). If check_bottom = i >= 0, check only the ith bottom Blob. + // If check_bottom == -1, check everything -- all bottom Blobs and all + // param Blobs. Otherwise (if check_bottom < -1), check only param Blobs. void CheckGradientSingle(Layer* layer, const vector*>& bottom, const vector*>& top, int check_bottom, int top_id, int top_data_id, bool element_wise = false); @@ -83,21 +87,22 @@ void GradientChecker::CheckGradientSingle(Layer* layer, // First, figure out what blobs we need to check against, and zero init // parameter blobs. vector*> blobs_to_check; - vector propagate_down(bottom.size(), check_bottom < 0); + vector propagate_down(bottom.size(), check_bottom == -1); for (int i = 0; i < layer->blobs().size(); ++i) { Blob* blob = layer->blobs()[i].get(); caffe_set(blob->count(), static_cast(0), blob->mutable_cpu_diff()); blobs_to_check.push_back(blob); } - if (check_bottom < 0) { + if (check_bottom == -1) { for (int i = 0; i < bottom.size(); ++i) { blobs_to_check.push_back(bottom[i]); } - } else { + } else if (check_bottom >= 0) { CHECK_LT(check_bottom, bottom.size()); blobs_to_check.push_back(bottom[check_bottom]); propagate_down[check_bottom] = true; } + CHECK_GT(blobs_to_check.size(), 0) << "No blobs to check."; // Compute the gradient analytically using Backward Caffe::set_random_seed(seed_); // Ignore the loss from the layer (it's just the weighted sum of the losses From 4d299c3071039e7c49c01b2435e11549f764df88 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Sun, 15 Feb 2015 16:00:04 -0800 Subject: [PATCH 008/223] Add EmbedLayer for inner products with sparse input (one-hot vectors), with unit tests --- include/caffe/common_layers.hpp | 38 ++++++ src/caffe/layers/embed_layer.cpp | 122 +++++++++++++++++++ src/caffe/layers/embed_layer.cu | 80 ++++++++++++ src/caffe/proto/caffe.proto | 18 ++- src/caffe/test/test_embed_layer.cpp | 183 ++++++++++++++++++++++++++++ 5 files changed, 440 insertions(+), 1 deletion(-) create mode 100644 src/caffe/layers/embed_layer.cpp create mode 100644 src/caffe/layers/embed_layer.cu create mode 100644 src/caffe/test/test_embed_layer.cpp diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index d2c0ce6d..691e755f 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -180,6 +180,44 @@ class EltwiseLayer : public Layer { bool stable_prod_grad_; }; +/** + * @brief A layer for learning "embeddings" of one-hot vector input. + * Equivalent to an InnerProductLayer with one-hot vectors as input, but + * for efficiency the input is the "hot" index of each column itself. + * + * TODO(dox): thorough documentation for Forward, Backward, and proto params. + */ +template +class EmbedLayer : public Layer { + public: + explicit EmbedLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Embed"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + int M_; + int K_; + int N_; + bool bias_term_; + Blob bias_multiplier_; +}; + /** * @brief Takes two+ Blobs, interprets last Blob as a selector and * filter remaining Blobs accordingly with selector data (0 means that diff --git a/src/caffe/layers/embed_layer.cpp b/src/caffe/layers/embed_layer.cpp new file mode 100644 index 00000000..be6b2cd2 --- /dev/null +++ b/src/caffe/layers/embed_layer.cpp @@ -0,0 +1,122 @@ +#include + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/common_layers.hpp" +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void EmbedLayer::LayerSetUp(const vector*>& bottom, + const vector*>& top) { + N_ = this->layer_param_.embed_param().num_output(); + CHECK_GT(N_, 0) << "EmbedLayer num_output must be positive."; + K_ = this->layer_param_.embed_param().input_dim(); + CHECK_GT(K_, 0) << "EmbedLayer input_dim must be positive."; + bias_term_ = this->layer_param_.embed_param().bias_term(); + // Check if we need to set up the weights + if (this->blobs_.size() > 0) { + LOG(INFO) << "Skipping parameter initialization"; + } else { + if (bias_term_) { + this->blobs_.resize(2); + } else { + this->blobs_.resize(1); + } + // Initialize the weights -- + // transposed from InnerProductLayer for spatial locality. + vector weight_shape(2); + weight_shape[0] = K_; + weight_shape[1] = N_; + this->blobs_[0].reset(new Blob(weight_shape)); + // fill the weights + shared_ptr > weight_filler(GetFiller( + this->layer_param_.embed_param().weight_filler())); + weight_filler->Fill(this->blobs_[0].get()); + // If necessary, initialize and fill the bias term + if (bias_term_) { + vector bias_shape(1, N_); + this->blobs_[1].reset(new Blob(bias_shape)); + shared_ptr > bias_filler(GetFiller( + this->layer_param_.embed_param().bias_filler())); + bias_filler->Fill(this->blobs_[1].get()); + } + } // parameter initialization + this->param_propagate_down_.resize(this->blobs_.size(), true); +} + +template +void EmbedLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + // Figure out the dimensions + M_ = bottom[0]->count(); + vector top_shape = bottom[0]->shape(); + top_shape.push_back(N_); + top[0]->Reshape(top_shape); + // Set up the bias multiplier + if (bias_term_) { + vector bias_shape(1, M_); + bias_multiplier_.Reshape(bias_shape); + caffe_set(M_, Dtype(1), bias_multiplier_.mutable_cpu_data()); + } +} + +template +void EmbedLayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + const Dtype* weight = this->blobs_[0]->cpu_data(); + Dtype* top_data = top[0]->mutable_cpu_data(); + int index; + for (int n = 0; n < M_; ++n) { + index = static_cast(bottom_data[n]); + DCHECK_GE(index, 0); + DCHECK_LT(index, K_); + DCHECK_EQ(static_cast(index), bottom_data[n]) << "non-integer input"; + caffe_copy(N_, weight + index * N_, top_data + n * N_); + } + if (bias_term_) { + const Dtype* bias = this->blobs_[1]->cpu_data(); + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, M_, N_, 1, Dtype(1), + bias_multiplier_.cpu_data(), bias, Dtype(1), top_data); + } +} + +template +void EmbedLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + CHECK(!propagate_down[0]) << "Can't backpropagate to EmbedLayer input."; + if (this->param_propagate_down_[0]) { + const Dtype* top_diff = top[0]->cpu_diff(); + const Dtype* bottom_data = bottom[0]->cpu_data(); + // Gradient with respect to weight + Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff(); + int index; + for (int n = 0; n < M_; ++n) { + index = static_cast(bottom_data[n]); + DCHECK_GE(index, 0); + DCHECK_LT(index, K_); + DCHECK_EQ(static_cast(index), bottom_data[n]) + << "non-integer input"; + caffe_axpy(N_, Dtype(1), top_diff + n * N_, weight_diff + index * N_); + } + } + if (bias_term_ && this->param_propagate_down_[1]) { + const Dtype* top_diff = top[0]->cpu_diff(); + Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff(); + caffe_cpu_gemv(CblasTrans, M_, N_, Dtype(1), top_diff, + bias_multiplier_.cpu_data(), Dtype(1), bias_diff); + } +} + +#ifdef CPU_ONLY +STUB_GPU(EmbedLayer); +#endif + +INSTANTIATE_CLASS(EmbedLayer); +REGISTER_LAYER_CLASS(Embed); + +} // namespace caffe diff --git a/src/caffe/layers/embed_layer.cu b/src/caffe/layers/embed_layer.cu new file mode 100644 index 00000000..37a4f7e3 --- /dev/null +++ b/src/caffe/layers/embed_layer.cu @@ -0,0 +1,80 @@ +#include + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/common_layers.hpp" +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +__global__ void EmbedForward(const int nthreads, const Dtype* bottom_data, + const Dtype* weight, const int M, const int N, const int K, + Dtype* top_data) { + CUDA_KERNEL_LOOP(top_index, nthreads) { + const int n = top_index / N; + const int d = top_index % N; + const int index = static_cast(bottom_data[n]); + const int weight_index = index * N + d; + top_data[top_index] = weight[weight_index]; + } +} + +template +__global__ void EmbedBackward(const int nthreads, const Dtype* bottom_data, + const Dtype* top_diff, const int M, const int N, const int K, + Dtype* weight_diff) { + CUDA_KERNEL_LOOP(weight_index, nthreads) { + const int index = weight_index / N; + const int output_index = weight_index % N; + for (int n = 0; n < M; ++n) { + if (static_cast(bottom_data[n]) == index) { + weight_diff[weight_index] += top_diff[n * N + output_index]; + } + } + } +} + +template +void EmbedLayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* top_data = top[0]->mutable_gpu_data(); + const Dtype* weight = this->blobs_[0]->gpu_data(); + const int count = top[0]->count(); + EmbedForward // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + count, bottom_data, weight, M_, N_, K_, top_data); + if (bias_term_) { + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, M_, N_, 1, Dtype(1), + bias_multiplier_.gpu_data(), + this->blobs_[1]->gpu_data(), Dtype(1), top_data); + } +} + +template +void EmbedLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + CHECK(!propagate_down[0]) << "Can't backpropagate to EmbedLayer input."; + if (this->param_propagate_down_[0]) { + const int count = this->blobs_[0]->count(); + const Dtype* top_diff = top[0]->gpu_diff(); + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff(); + EmbedBackward // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + count, bottom_data, top_diff, M_, N_, K_, weight_diff); + } + if (bias_term_ && this->param_propagate_down_[1]) { + const Dtype* top_diff = top[0]->gpu_diff(); + Dtype* bias_diff = this->blobs_[1]->mutable_gpu_diff(); + caffe_gpu_gemv(CblasTrans, M_, N_, Dtype(1), top_diff, + bias_multiplier_.gpu_data(), Dtype(1), bias_diff); + } +} + +INSTANTIATE_LAYER_GPU_FUNCS(EmbedLayer); + +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index a13c0e79..705cceb0 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -276,7 +276,7 @@ message ParamSpec { // NOTE // Update the next available ID when you add a new LayerParameter field. // -// LayerParameter next available layer-specific ID: 137 (last added: reduction_param) +// LayerParameter next available layer-specific ID: 138 (last added: embed_param) message LayerParameter { optional string name = 1; // the layer name optional string type = 2; // the layer type @@ -332,6 +332,7 @@ message LayerParameter { optional DropoutParameter dropout_param = 108; optional DummyDataParameter dummy_data_param = 109; optional EltwiseParameter eltwise_param = 110; + optional EmbedParameter embed_param = 137; optional ExpParameter exp_param = 111; optional FlattenParameter flatten_param = 135; optional HDF5DataParameter hdf5_data_param = 112; @@ -533,6 +534,21 @@ message EltwiseParameter { optional bool stable_prod_grad = 3 [default = true]; } +// Message that stores parameters used by EmbedLayer +message EmbedParameter { + optional uint32 num_output = 1; // The number of outputs for the layer + // The input is given as integers to be interpreted as one-hot + // vector indices with dimension num_input. Hence num_input should be + // 1 greater than the maximum possible input value. + optional uint32 input_dim = 2; + + optional bool bias_term = 3 [default = true]; // Whether to use a bias term + optional FillerParameter weight_filler = 4; // The filler for the weight + optional FillerParameter bias_filler = 5; // The filler for the bias + +} + +// Message that stores parameters used by ExpLayer message ExpParameter { // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0. // Or if base is set to the default (-1), base is set to e, diff --git a/src/caffe/test/test_embed_layer.cpp b/src/caffe/test/test_embed_layer.cpp new file mode 100644 index 00000000..7a4fb980 --- /dev/null +++ b/src/caffe/test/test_embed_layer.cpp @@ -0,0 +1,183 @@ +#include +#include + +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/vision_layers.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +namespace caffe { + +#ifndef CPU_ONLY +extern cudaDeviceProp CAFFE_TEST_CUDA_PROP; +#endif + +template +class EmbedLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + protected: + EmbedLayerTest() + : blob_bottom_(new Blob(4, 1, 1, 1)), + blob_top_(new Blob()) { + // fill the values + FillerParameter filler_param; + UniformFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + blob_bottom_vec_.push_back(blob_bottom_); + blob_top_vec_.push_back(blob_top_); + } + virtual ~EmbedLayerTest() { delete blob_bottom_; delete blob_top_; } + Blob* const blob_bottom_; + Blob* const blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +TYPED_TEST_CASE(EmbedLayerTest, TestDtypesAndDevices); + +TYPED_TEST(EmbedLayerTest, TestSetUp) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + EmbedParameter* embed_param = layer_param.mutable_embed_param(); + embed_param->set_num_output(10); + embed_param->set_input_dim(5); + shared_ptr > layer(new EmbedLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_top_->num_axes(), 5); + EXPECT_EQ(this->blob_top_->shape(0), 4); + EXPECT_EQ(this->blob_top_->shape(1), 1); + EXPECT_EQ(this->blob_top_->shape(2), 1); + EXPECT_EQ(this->blob_top_->shape(3), 1); + EXPECT_EQ(this->blob_top_->shape(4), 10); +} + +TYPED_TEST(EmbedLayerTest, TestForward) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + EmbedParameter* embed_param = layer_param.mutable_embed_param(); + const int kNumOutput = 10; + const int kInputDim = 5; + embed_param->set_num_output(kNumOutput); + embed_param->set_input_dim(kInputDim); + embed_param->mutable_weight_filler()->set_type("uniform"); + embed_param->mutable_weight_filler()->set_min(-10); + embed_param->mutable_weight_filler()->set_max(10); + embed_param->set_bias_term(false); + shared_ptr > layer(new EmbedLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(1, layer->blobs().size()); + vector weight_shape(2); + weight_shape[0] = kInputDim; + weight_shape[1] = kNumOutput; + ASSERT_TRUE(weight_shape == layer->blobs()[0]->shape()); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + this->blob_bottom_->mutable_cpu_data()[i] = caffe_rng_rand() % kInputDim; + } + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + vector weight_offset(2, 0); + vector top_offset(5, 0); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + weight_offset[0] = static_cast(this->blob_bottom_->cpu_data()[i]); + weight_offset[1] = 0; + top_offset[0] = i; + top_offset[4] = 0; + for (int j = 0; j < kNumOutput; ++j) { + EXPECT_EQ(layer->blobs()[0]->data_at(weight_offset), + this->blob_top_->data_at(top_offset)); + ++top_offset[4]; + ++weight_offset[1]; + } + } +} + +TYPED_TEST(EmbedLayerTest, TestForwardWithBias) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + EmbedParameter* embed_param = layer_param.mutable_embed_param(); + const int kNumOutput = 10; + const int kInputDim = 5; + embed_param->set_num_output(kNumOutput); + embed_param->set_input_dim(kInputDim); + embed_param->mutable_weight_filler()->set_type("uniform"); + embed_param->mutable_weight_filler()->set_min(-10); + embed_param->mutable_weight_filler()->set_max(10); + embed_param->mutable_bias_filler()->CopyFrom(embed_param->weight_filler()); + embed_param->set_bias_term(true); + shared_ptr > layer(new EmbedLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(2, layer->blobs().size()); + vector weight_shape(2); + weight_shape[0] = kInputDim; + weight_shape[1] = kNumOutput; + ASSERT_TRUE(weight_shape == layer->blobs()[0]->shape()); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + this->blob_bottom_->mutable_cpu_data()[i] = caffe_rng_rand() % kInputDim; + } + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + vector bias_offset(1, 0); + vector weight_offset(2, 0); + vector top_offset(5, 0); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + weight_offset[0] = static_cast(this->blob_bottom_->cpu_data()[i]); + weight_offset[1] = 0; + top_offset[0] = i; + top_offset[4] = 0; + bias_offset[0] = 0; + for (int j = 0; j < kNumOutput; ++j) { + EXPECT_EQ(layer->blobs()[0]->data_at(weight_offset) + + layer->blobs()[1]->data_at(bias_offset), + this->blob_top_->data_at(top_offset)); + ++top_offset[4]; + ++weight_offset[1]; + ++bias_offset[0]; + } + } +} + +TYPED_TEST(EmbedLayerTest, TestGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + EmbedParameter* embed_param = layer_param.mutable_embed_param(); + embed_param->set_num_output(10); + embed_param->set_input_dim(5); + embed_param->set_bias_term(false); + embed_param->mutable_weight_filler()->set_type("uniform"); + embed_param->mutable_weight_filler()->set_min(-10); + embed_param->mutable_weight_filler()->set_max(10); + EmbedLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + this->blob_bottom_->mutable_cpu_data()[0] = 4; + this->blob_bottom_->mutable_cpu_data()[1] = 2; + this->blob_bottom_->mutable_cpu_data()[2] = 2; + this->blob_bottom_->mutable_cpu_data()[3] = 3; + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_, -2); +} + +TYPED_TEST(EmbedLayerTest, TestGradientWithBias) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + EmbedParameter* embed_param = layer_param.mutable_embed_param(); + embed_param->set_num_output(10); + embed_param->set_input_dim(5); + embed_param->set_bias_term(true); + embed_param->mutable_weight_filler()->set_type("uniform"); + embed_param->mutable_weight_filler()->set_min(-10); + embed_param->mutable_weight_filler()->set_max(10); + embed_param->mutable_bias_filler()->CopyFrom(embed_param->weight_filler()); + EmbedLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + this->blob_bottom_->mutable_cpu_data()[0] = 4; + this->blob_bottom_->mutable_cpu_data()[1] = 2; + this->blob_bottom_->mutable_cpu_data()[2] = 2; + this->blob_bottom_->mutable_cpu_data()[3] = 3; + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_, -2); +} + +} // namespace caffe From ac9e29fd7b90a665a956f460715669bf05445a13 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Wed, 21 Jan 2015 16:12:12 -0800 Subject: [PATCH 009/223] EmbedBackward with no loops -- use caffe_gpu_atomic_add instead --- src/caffe/layers/embed_layer.cu | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/caffe/layers/embed_layer.cu b/src/caffe/layers/embed_layer.cu index 37a4f7e3..672fb9c6 100644 --- a/src/caffe/layers/embed_layer.cu +++ b/src/caffe/layers/embed_layer.cu @@ -5,6 +5,7 @@ #include "caffe/common_layers.hpp" #include "caffe/filler.hpp" #include "caffe/layer.hpp" +#include "caffe/util/gpu_util.cuh" #include "caffe/util/math_functions.hpp" namespace caffe { @@ -22,18 +23,21 @@ __global__ void EmbedForward(const int nthreads, const Dtype* bottom_data, } } +template +__global__ void EmbedBackward(const int nthreads, const Dtype* bottom_data, + const Dtype* top_diff, const int M, const int N, const int K, + Dtype* weight_diff); + template __global__ void EmbedBackward(const int nthreads, const Dtype* bottom_data, const Dtype* top_diff, const int M, const int N, const int K, Dtype* weight_diff) { - CUDA_KERNEL_LOOP(weight_index, nthreads) { - const int index = weight_index / N; - const int output_index = weight_index % N; - for (int n = 0; n < M; ++n) { - if (static_cast(bottom_data[n]) == index) { - weight_diff[weight_index] += top_diff[n * N + output_index]; - } - } + CUDA_KERNEL_LOOP(top_index, nthreads) { + const int n = top_index / N; + const int d = top_index % N; + const int index = static_cast(bottom_data[n]); + const int weight_index = index * N + d; + caffe_gpu_atomic_add(top_diff[top_index], weight_diff + weight_index); } } @@ -59,13 +63,14 @@ void EmbedLayer::Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { CHECK(!propagate_down[0]) << "Can't backpropagate to EmbedLayer input."; if (this->param_propagate_down_[0]) { + const int top_count = top[0]->count(); const int count = this->blobs_[0]->count(); const Dtype* top_diff = top[0]->gpu_diff(); const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff(); EmbedBackward // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - count, bottom_data, top_diff, M_, N_, K_, weight_diff); + <<>>( + top_count, bottom_data, top_diff, M_, N_, K_, weight_diff); } if (bias_term_ && this->param_propagate_down_[1]) { const Dtype* top_diff = top[0]->gpu_diff(); From 2c356a4ed4c33f662288166dfa74dd5d71d6c194 Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Sat, 8 Aug 2015 12:57:45 -0700 Subject: [PATCH 010/223] TestGradientBasedSolver: drop doubled seed inititialization --- src/caffe/test/test_gradient_based_solver.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/caffe/test/test_gradient_based_solver.cpp b/src/caffe/test/test_gradient_based_solver.cpp index 7bb0ec18..30b041fa 100644 --- a/src/caffe/test/test_gradient_based_solver.cpp +++ b/src/caffe/test/test_gradient_based_solver.cpp @@ -180,7 +180,6 @@ class GradientBasedSolverTest : public MultiDeviceTest { } Caffe::set_random_seed(this->seed_); this->InitSolverFromProtoString(proto.str()); - Caffe::set_random_seed(this->seed_); if (from_snapshot != NULL) { this->solver_->Restore(from_snapshot); vector*> empty_bottom_vec; From 6019246930f54b108decc100f9cb801e4aea81a4 Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Sat, 8 Aug 2015 13:07:13 -0700 Subject: [PATCH 011/223] TestGradientBasedSolver: replace dummy data with hdf5 Rely on fixed hdf5 data for determinism of solver tests. - draw random Gaussian data and targets for test and save to hdf5 - use the same data for all tests without constant / gaussian condition previously needed for accumulation - avoid test artifacts due to order of random draws in dummy data --- .../test/test_data/generate_sample_data.py | 28 ++++++++++++- src/caffe/test/test_data/solver_data.h5 | Bin 0 -> 11776 bytes src/caffe/test/test_data/solver_data_list.txt | 1 + src/caffe/test/test_gradient_based_solver.cpp | 39 ++++++++---------- 4 files changed, 45 insertions(+), 23 deletions(-) create mode 100644 src/caffe/test/test_data/solver_data.h5 create mode 100644 src/caffe/test/test_data/solver_data_list.txt diff --git a/src/caffe/test/test_data/generate_sample_data.py b/src/caffe/test/test_data/generate_sample_data.py index ab557268..3703b418 100644 --- a/src/caffe/test/test_data/generate_sample_data.py +++ b/src/caffe/test/test_data/generate_sample_data.py @@ -1,5 +1,5 @@ """ -Generate data used in the HDF5DataLayer test. +Generate data used in the HDF5DataLayer and GradientBasedSolver tests. """ import os import numpy as np @@ -7,6 +7,8 @@ script_dir = os.path.dirname(os.path.abspath(__file__)) +# Generate HDF5DataLayer sample_data.h5 + num_cols = 8 num_rows = 10 height = 6 @@ -51,3 +53,27 @@ with open(script_dir + '/sample_data_list.txt', 'w') as f: f.write(script_dir + '/sample_data.h5\n') f.write(script_dir + '/sample_data_2_gzip.h5\n') + +# Generate GradientBasedSolver solver_data.h5 + +num_cols = 3 +num_rows = 8 +height = 10 +width = 10 + +data = np.random.randn(num_rows, num_cols, height, width) +data = data.reshape(num_rows, num_cols, height, width) +data = data.astype('float32') + +targets = np.random.randn(num_rows, 1) +targets = targets.astype('float32') + +print data +print targets + +with h5py.File(script_dir + '/solver_data.h5', 'w') as f: + f['data'] = data + f['targets'] = targets + +with open(script_dir + '/solver_data_list.txt', 'w') as f: + f.write(script_dir + '/solver_data.h5\n') diff --git a/src/caffe/test/test_data/solver_data.h5 b/src/caffe/test/test_data/solver_data.h5 new file mode 100644 index 0000000000000000000000000000000000000000..7ee05ea7aac988bee3e769434dbb66eb668d65bc GIT binary patch literal 11776 zcmeHtc~p;G)Myi$XBwnI6VgDGhUe@bilm6lC89w^84`u0IU1!@A|;ucqf$L*|C%Kv zg=7e2iqM1zxA%R$cddKB^{urL|L=0E zR$5x}j7k2h|6ly)G9Ckgf2jX;cK?q4H~xR!16G#v?f$ZH-5*2$ji(!b;_P4hU7UTL z|Dt`Jce}g#?)f|R{_1o7YW@qr%QOBT9e)Cxm&fXFExZE%Fa-bTJNhBVgfARlm zoPR7c{?a6+sxp>)e}O8GoFI?f;lF7fo^d>ae~6!9Vi+yqo{14$}N1`Ztmu zlm83;5`N&*u$t%3eE(+tUuf)?Ew|w1;rnx!vDyA>_`GD<0T7T?7PM zZSWCZLvD{V2R~S2UGsgqw4<8F=bD2{3O{6YujIZF-%eJUUB-)x+kn^RGVWJ(#i`0$ zQFKixI$P(F7EmSAo}@v6LI8aGD8-EwwWYtd#$Z(QPU2$NjdGHq11raeBFIT4LIZ^msO0}v-XgkWBxkWytwKEk8o?uk?6YecG!Oy=h zuoY3MjQi&tI;vPkOb>7%-qVEitBmJLoDt;kRgU9+lrKQ(#v_;?_>=II&B1l*t8sJt zJZz4SrDBdh8NsFwV$Y+4anT|))yWV21NgZ9p`zT&@SSM)vt&VrGyYn;fQnD50Lc{_ zv8{O$yon5jxRhwHZF0ojtx0%II|gQ$2yisajWDj8k253M5!XyRNS-+rLzb5n>{+i2 zZ7T&xzm+9CF>^!fLtOYU=Lo!rxquVJD!}ghQE1)w6ODEiqg!edSy$dcuPiu8_K04` zw@0snbFL1$c}+%U=XN~*wuU;Y3USnCsBlEwOQGFuI~uA#0I`9e^r4eDWZC^jo2}Du zvvnevHyy^D6j|=zg-Q&v>cVGjc35^VmA+FjN1-iu$(mCp%xm9|L`Ufd#E2O~ci|xH z=UqS()M_w=ks;Nk9iY5$JVNHDMqiHTRx0KpHKLQF}?_pc}3cB^zLYR@0p!7c%Dz`q&`_dz_kQM62p7NowqP?m=xqj>4PsRMSxfE)+Zi`&nnP&gU1Iel(1x z3dXRFGp^C)>D92mx&T^uE2`2X#kr>a2e7|f9$xO-iaNoPoa0a4GDkZp*&-kTjcd*l zD?R~kllLreIbK2{Jl+x!nMlT~eHz{pF(PSUrEpbR8>b|HrLnT3ctd_6E=@TDFJ9fI z{#&DAj#56XyEsUUHBT`U(@Myfz;T$Ev6H+s5hq97U1`C6f7ts-f;I86rlPs2_{~-v zf5aC-cDo?Gb3}%#cYGB!la0Z_oNf@B>qsLdSHQFLnuN5CfabbXxZyX8{cYxn@5?!0 z9{G{jzIaI9SzUz<(YvtBQkr<(??5kB8t&|wPuES>r0br3g`zZJxX#&z`#)@;+le?= z;Qlq3Q?Uo+_TI(t<;k@9)e)NI^$zULWir{yJn(7c3iM7q3imUjs?42+36E?D`p=bs z+u_1gZb($K|p!)`~^=rsqyawQazPGxsVW}&2e1n%X@fZPeS5PEt6SuHUH`|eHv zrBHQJF&d2Lf@?{?<2klqiV%byF2yC<`jGuh15Vw`WIEHb8R>W**ud{er}z0&(;EeJ z%fn|V#zXMxg2RxzED9U7H)7thXs};&6MSFJprJFvNw?rfEI)V!dK4pZ_RwiKdxsBA z6w2|Me<}SWJfHLqk9K!2QOn(nQQa^QBAVi$2I|b8T+##I`Fm)* zK0@$Ycd}&qG&(_j3ohN2Ma4xWIhpdN$Z=VYQ_N3c!O3YjZr*sVvKaY^a zn%R)wo6O9Njfa-~arhzWE7%r_gH?gjSninOP<;#D5^5)Y>b$t;Sq6P|Eg8*@s&kHL zaZ%~lbUNxqQERsVr{q%(NzQ!BmXGMt=b?b3M$#Z9cniPp`$o5Y5k%$K7|hI9M~xyw zj{Gx00OQTfKAi{fRWwvYzsY%#w?R zlP%jIh_8h_tqOs+Hu2P@ash6C5KWg`$dE?Ml~tm`3G544$er0MM}PcYj{CNzf%gm_ z)H+|stVnqRR@1CWr}!}lpCXLhr*mn;#!NP*VLIBoloRu!)mX0N1Y0gBaLbM-(k#aa zJZd7%@mq47yjGFm^4-3P=WZp_$~V)wKff--19$ciBlAI4ah#gD#1#u1&y=9%;{iBn zkWVu2>%-Ei<8Wx1HO$nkM_CtMP9B?vQ$Pt{H8&xh83|veZ^pG|XXyP47FhDL6Sk8i z^6<`GYIkNP&MxDJFNa0Avaj2T;SBHNS`w}8N5C9D))VaoIhiTKsm&DGm7InY8 zq>0nk)6?@#F$>pfqLu0$bPRW4{Yv^NzF7?uJfLdD*=w}Ks*z~*B;XtO6>QUaAGTRq zmsCeZ;tml5y6~nGyXg``)r^8+`dKYVPdW!5O+5gAhJepN1vD0iVw21OQMDeS4I}xu zqO1&-p8EiamRoT&cEo(~oFLZkRRyD#W|4{OJLtv9M)d`fgXuT7?n6?%Y=ud zD>w=IV*Jr|`V*41wiQROIg6>=s9DnBu=~O&TQ;Z^zr<9kg`P>x6k6cFG#dA5) z_xRvivnM_Db1Ip!il9X54Z0ttQM>Uj-TW;E%?ARhkZ2vwUG@$#eh!j`!4Md#9c6!f zTZW6?BokVG8k;oJsOZo`tTfCaLz$0AziTiQY+Z=U)Q9m~L@(t{LaaLNNu>oi_@MVH z$!?g;iLq#<^`19r?Xgz+%sB_m?yW?59u7WIY9JH+4ifRDIVfFSficlXFkbOKz14md zXXgmR1`SadsnF&eYKVcf?itV~k%WsQmZD#k0ZcwrO!6NO0{J9@8-*v}lT-zBaY=ll?`tc}j)navqE9F4Euy5ZN}O<2_%L@f-y;p&>X=t3x1y4|D`W8`3nwW6Vy zF5pAyKw=e~1=GXCIYIN6p^GpBenVxjO7bVses~x@+OG!x6(0Cx$8;KLD@vcAJde6c zUi5A^5BHYI2)5SUMu$!>1iu61eX13ec`gP0B8ebj5X%JQ)#77^D14E*iWJ%C0P8eX z7G~7Zy`OowKewdG+BZi-{0&`4qip831RKIzjPHIXV57gHyh3hQcYrTqS`>d}DGH z4@}9ztF*c#|Ub8TGUx7`U) zc|6P%uARxu%P_`#jRczWYX^t~AIGX+$I#PRugd*OZY6Vq56yWW;{?Ks$-^T!zW6m< zUfc(bi-w8iXd#sEsAqP5j>cfx$(T9S(DrlRndY)&(CsuK6;bK5AaD!Rr`IrI*6U%1 zm<=-SZ!m*%mn56Nr-SRI$c5czR5!;J)2A7L+9xgUfv3lyt2_bMOHCj{AB*XMuVFMk zRhk|YF-5B&D}22%3Lf)2BHiAFw*uyps{2#WCR~OqJwuNBF;9gS+!P|q4(v8>yBmt} z3jM5aWRJPR>NC*uJ_K70vVhi$Fqc0ZAzfSJabDy~l(9EN!$)0;a|zRBt{dht~wta%Pl~P0gn3oD^`VVk%1UYSRm9 z3sFWi8&`}AqtkWLP*mg!i3yO0kgPZCaQ}UnH|Y_MelmlhqpIlT5r_`))7e#*2Z--L z6|AaxftJ^|kcAhbXtJd&T-Hma&oA`@#(yxcv=HRFOIE^k;fpBmR>+1#`Cx2b9yGM_ zQTg`g7-uAni^FWWBQNdHwD|_vkpF=;t;_+>hA=pt8V3P05^%oqZ441Mz~eQ8)LCUd zWYk%rhL0{LjH)50Cm4)I0^|4_4L4u0I8HzhBwL+TfOI4C^81b%YE*S{`TXD^FNeRiqgkvu$`xvK`c!8s0D+gSBTjM8Bol2gBSXu=+oQ^+7r1%Fwd1T`JMEL(FwGS(m=zF zr$OY|D-d~+fa4~W!DrQ<^uS^vqWY_sU*l3JufGP<>!sjFi#fBw zI+tcVS&!TCG|2gKY4M~(@G^vTqDz3%c1s!l-Mui&m9V5}NW#LQt z9b7$S9waC(CUXy7CyMh9fqU*z47rws>dVYyM@9o-5(gEEx#gvL4;mO#zP{V^m7C zM+e*ac);lln69?1Ty`XWKfav6fbf&K|SvLBMy7m-MnWQ*!s!0(P6PMxH z&%c>l36{VZ9KaSIE}b`}3`VSlP?PP$Ihm{Auw8jog{LIv!S5YJxN98O{gg69zih;Y zx~bSV%!7w}4xqrUOt2h?r}tAA($9|vh&R77H9BSkk7KVhA9hM}n-k=@Yt#+FbC(@y ztQCU1s1K+h^d5L~qTzVrTfANq4<(ig)XGL07EPZG-reFHZtySC_qCplahpK=bH>5Q zmSOWH{*Fv(S6Hc6-g*Q6wDSp=jNHPI`n`qp$c{$AKjs#DubksHtA=@7KLC*}za(Nv| z=lqS($YVf0tqsDSN7mpx`5tsn8>M!sMoeI)Jl5tfg1P7iU(p|oX0q6B=R=;pm4jby zZ$Yd7H*=_um;2(TDK;7mk<@B^+7=$b=on8z-|9GU_N&4CPy-?{DsaR zeg%0AV{4AlW_UC3nJ(EBL0)Z7hMHqX=)nM%7A-0S?cyx7-M9EWbSJ`|Ka!PbXHQGt08U|%6j z4AZvYjTa6uT$F(+H`b%wte0fT4$6E22{8NB!;Ut{!}%wU;C}on78SXJf#m@Gd_IP(9w zWVF`6oN`I=YZJ)7 zmOJ)-OQo%Ynp{q-2D~f|Lnbr=9-8Q4_p6wy+=EBRms%a}rSq$xQ*vy5a|pt`_ap49 zwO$xmR!k*AHZuw8IiUD89y<7BA*RulZe4x}45}D(th1*or{WRTCy*UAT{KVA5lQ%U zjD9$ozI!GI&AY~NC6{cX*mRZzIOOA`ZH@H1loN4P|ILIdEXTz{%iz*mS9tqHj+5;2 z1HS98z;!YMw7({Zg!F!_Dk+?XYQt0LT9}E44ClkO#GQD=MvcBUlcF67(Tu0aafVNO zF7652M9w6hAktPRvEI^@=(S(+6gl6=2lk4X}<( z#h&g~vhDe9w7%+yp*a)0<8890djnT`_Y)EoX$(1|D%{jVMli=T zos@nGV=L>HKsIw7B(+YGw+}_QXT^nJeEe_r#-R=H?dfXpEgvM?JMIxBNqfA1UJI;L zRzjHY7jT$-33L`LL$m6|WQkKRoNXyV{eW6LR# z0}Kwb=_?uNI$nsf-C;Onx(9M|AF`7Y)Zu0KTl22Iix48s(%_?;vG?5-%oZ+YxtLB%_O?N(q9nH1Jx2FgCo#N8fNajshGNxKD3hzqeqJ{Xk2!v! zv)%8~Agh)3PV9(dj{42cAmPHZ^!GOH=x#cNsfef1l{_gi>!}0gXd1g!Z}gE z>PThCG3H{n_Z8^0jHFY?YOn|=JF+Iyh`K2+1^u>#q~?(Y$@F=teyAxDi`#^P$kX(-`Kr8lFl;!B*boU~d>hv(8#TS;tYZH;ci6&|30sg&2v~ z(ZrQ9+$yuZD`3M+1*+@(9abh@#p%l*z?#R;K%0aDxo=7@c-azXaKx^&pTMjp3zz?V z&i))eMK2qg2*q<$VRnffKIDdC;?ZL;X}LeN zwW`zam3vsOlQ5o>tc4{L?I2lVA=Tf14A-mtAha8y%i-I0p;1e5E~dykOoQ0OIa|DZnLuGi*WT zcq|+bPbai_l!z=kK|Do$@z#ptuus8@G;Hf6qk8eA`W-L2TF%8{L2Kl)`=B&nA{_Md zBwZHoaGK69qQ7Y=)M)kLz7ZR2(z=N3@3E#pP-d7S`k&heu>+cN1=MOBcy6HD!ECUVA`Jt50}P34A% z%q5Y0gR~9QVD5&OL}P+F9$8~SJs#d51D$VX$$X9ifAql;?~h;oJolyO+^ zDEK!Bl8nN|q-=^SR!rWDGq;(*%gtv%{%sh}YR_cWz4w9AtPi+m#YN!FO=B8!E5Wh* zD+GOZ!8lh9k{7B6Pg4*IY$D*vk>$i8a6diZV1ULk**J4t2O8#mH!uEb1&0*$*aO@k zD!nd;E>0K6l`m#OWJwzC)HqLWFX6$38Nw)nX>jcBB&KZ90(|JH$lWbIo@+Hag1-&@ z!K_k-_Qr_eqMys?sjSYDN$<_y>#e~rn~>Zy`cxl0T% zpO_VmlID-rINiStULB8waWy@#MDHAQxXd8F_G=)KPaXram!Xo&8JzUBiVSwg<4utV zG#VqxH}`$CL?{>T^S<&I_i-8yq|8IFI9-rBbPG(^@4~CYCsDEb0HpNvR)&`dg6V;E(E4Q}-Ebuc zirQr0{*Q0))uSJtmPb^Ua5+f%6Y%8(`?0z(5W`lzp#&acP5=SL&{AyETmu)6pN7NQ zt&n!s0q>ql!i(G}W-w_9?y}1!tNfNj#L_}mMyHY8>C;R4S_;|M2g^v$J0s{_yBu{_ z#$fU}Rpz#tAr2||z>c*lX!>jlXC!|;>~<0122N|Iy?&wi?ZOD_a8wbiP1DBSr(tm7 zQZ2^Kzl!B&^ie$P28rLo!3iEN?nP#;U~KB*b%Tb zEMTax+Y(aV}{@fJ(-cK(Izo64Yc0x6sV1yq-tx$xOoC+>5HKtSQX=qxygCBBVPa;Vq+mK>0-$A{;iktc=yM0jR8^;G*tqE801S#NkD2v5@AUgL?b)Lydu zZa#gxB?K)#XOXI+0rnqffG++09WQ3}GUAU@D6>2p2R#_vn=*&fvBnbn zOPjD*Y7_W9%OH&8X(BKu1NZbBqsQT5sBnG&!!fe-x%pbmG~JI2$M!yZjQP2@2X)9K zHU#5OM8N$Ub?EB;0Wge@nuc4@W^Ex*Rj9y(^2;zM_avPcVM~9AP9wE%7NC5_Qq+rO zsNQ%&3KWCjrPW!?bQ?jBO+U~t#{)i8@N@2`%W^+nuLk+ocKF?UD|!$+{NZ;FQ_o9q zo@QClj5`xh2fIn$@dW7jnN+2B;uOv4_Q9#eH;ABCB?;-dh+e@p%%hDlEa6l^$jlP9 zZjw7Yny>;amq}5DyN0BoSq6D+&qH3E1i2h_4U@l)!=qaZh|rV_*jTs&EtQ-I-^LTv z*J2$hp7E1;+Qd@)p^vjFp3&S}g50GRD~R=#E~0wJ8{%3HV5;vsT9@U)=Q4a!1SP$|c1I%i)oJnK6M7cU$o zZ=W6l?YE~Pd*wy)6dX`QxC`EEd6Ppi$7%IJB%`sWIUHL;0P0lr6~AR((q=Xa=+pJQiqnsHVr+026uuJP=+JCWGX zEDf^1Hb9*BH(0-_1P2@&P@^q?He8*`wb3slZmDCRoB5k#>>N>Ue4`s~YrO!*+;qY? zNkh-RlaO;x07Lg(ByX4SahsnUgWV6r=_dXaRR3Nn%;-oli#ODxhp0I@zxz13I#nI+ z$(_Wrf-^XNLB{ANRYr!j`Jpag4~&YWP?zjkFwmMnZ<+1Hm>Dm~Lhc>77jq12rnQ5u zmNw2%O`+Ou*^neNf_~+8@OkYHRP|3JUi>;(l0Ajw+suMR;o`W+H3h@N)G_jFG;|y{ z!Y|GaRcQ`&RC2%rgJmOOm(5t6SjRGQwU!{WM;+UGQgC(T4YDii9$B4}0i5fCIFrgj zj`KNi;}^qnn|u_k4X5g97U+IMhI2V8uF5jwEEw;PMhEr^sZNRqRo_E&kNpYA|MirJ zez*ubcb1c$v_1i{$ zRF<-Bjcqhw+5`OYb3PPamg33`%W@Rk_7VvdDI%pkf%7~f3dJonpw(58<2~Pt-Zba| zr_3U{Ql^ME`aKs(H1bVZ(3p3dPybI}!*y;(2&wb~gLU zhKGB|=ryx9;sD_}x*A<#Cvo4(DRXUIkHXc;9@7484M|bffh376!hbG_9JM}xRvsqA z(6o)1s4gLEww)!L(!ZftL?D)G_7gMdYBKC+1dBJSlg&$vLENFvY?<>lG*_DruR`lE z`dSVue@@58OH$~^+;q~TJN946okY@6ma*YaqN~J-yifm<=&Wf#!HsEU;@Kkwd(kwM#C* wmt0A1SGqmY2a`AkdQad!N^-6$dxNjH3+}iP15b7OnFcK$%vdiE`fvsQ2apE6>i_@% literal 0 HcmV?d00001 diff --git a/src/caffe/test/test_data/solver_data_list.txt b/src/caffe/test/test_data/solver_data_list.txt new file mode 100644 index 00000000..a6552f50 --- /dev/null +++ b/src/caffe/test/test_data/solver_data_list.txt @@ -0,0 +1 @@ +src/caffe/test/test_data/solver_data.h5 diff --git a/src/caffe/test/test_gradient_based_solver.cpp b/src/caffe/test/test_gradient_based_solver.cpp index 30b041fa..a8b211b7 100644 --- a/src/caffe/test/test_gradient_based_solver.cpp +++ b/src/caffe/test/test_gradient_based_solver.cpp @@ -25,15 +25,26 @@ class GradientBasedSolverTest : public MultiDeviceTest { protected: GradientBasedSolverTest() : seed_(1701), num_(4), channels_(3), height_(10), width_(10), - constant_data_(false), share_(false) {} + share_(false) { + input_file_ = new string( + CMAKE_SOURCE_DIR "caffe/test/test_data/solver_data_list.txt" CMAKE_EXT); + } + ~GradientBasedSolverTest() { + delete input_file_; + } string snapshot_prefix_; shared_ptr > solver_; int seed_; + // Dimensions are determined by generate_sample_data.py + // TODO this is brittle and the hdf5 file should be checked instead. int num_, channels_, height_, width_; - bool constant_data_, share_; + bool share_; Dtype delta_; // Stability constant for AdaGrad. + // Test data: check out generate_sample_data.py in the same directory. + string* input_file_; + virtual SolverParameter_SolverType solver_type() = 0; virtual void InitSolver(const SolverParameter& param) = 0; @@ -71,25 +82,10 @@ class GradientBasedSolverTest : public MultiDeviceTest { " name: 'TestNetwork' " " layer { " " name: 'data' " - " type: 'DummyData' " - " dummy_data_param { " - " num: " << num_ / iter_size << " " - " channels: " << channels_ << " " - " height: " << height_ << " " - " width: " << width_ << " " - " channels: 1 " - " height: 1 " - " width: 1 " - " data_filler { " - " type: '" << string(constant_data_ ? "constant" : "gaussian") - << "' " - " std: 1.0 " - " value: 1.0 " - " } " - " data_filler { " - " type: 'gaussian' " - " std: 1.0 " - " } " + " type: 'HDF5Data' " + " hdf5_data_param { " + " source: '" << *(this->input_file_) << "' " + " batch_size: " << num_ / iter_size << " " " } " " top: 'data' " " top: 'targets' " @@ -354,7 +350,6 @@ class GradientBasedSolverTest : public MultiDeviceTest { const Dtype kMomentum, const int kNumIters, const int kIterSize) { const double kPrecision = 1e-2; const double kMinPrecision = 1e-7; - constant_data_ = true; // Solve without accumulation and save parameters. this->RunLeastSquaresSolver(kLearningRate, kWeightDecay, kMomentum, kNumIters); From b97ca6d4d17b4a661d553c233c2dfe03b88033c5 Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Sun, 9 Aug 2015 11:03:23 -0700 Subject: [PATCH 012/223] Use net_->learnable_params() instead of net_->params() in RMSprop In RMSProp solver, use const vector*>& net_params = this->net_->learnable_params(); instead of const vector > >& net_params = this->net_->params(); --- src/caffe/solver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 43834c0c..54e085a6 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -861,7 +861,7 @@ void AdaGradSolver::ComputeUpdateValue(int param_id, Dtype rate) { template void RMSPropSolver::ComputeUpdateValue(int param_id, Dtype rate) { - const vector > >& net_params = this->net_->params(); + const vector*>& net_params = this->net_->learnable_params(); const vector& net_params_lr = this->net_->params_lr(); // get the learning rate From 9b71fd038018d7c910ed0b75003b76edce1c5af9 Mon Sep 17 00:00:00 2001 From: Fisher Yu Date: Sun, 9 Aug 2015 14:22:04 -0700 Subject: [PATCH 013/223] from __future__ imports must occur at the beginning of the file --- examples/pycaffe/caffenet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/pycaffe/caffenet.py b/examples/pycaffe/caffenet.py index 06c5a02d..82af2294 100644 --- a/examples/pycaffe/caffenet.py +++ b/examples/pycaffe/caffenet.py @@ -1,6 +1,6 @@ +from __future__ import print_function from caffe import layers as L, params as P, to_proto from caffe.proto import caffe_pb2 -from __future__ import print_function # helper function for common structures From d94ca3f4b15d1e02bdf520733b04999f1134d078 Mon Sep 17 00:00:00 2001 From: Cyprien Noel Date: Tue, 28 Apr 2015 14:28:04 -0700 Subject: [PATCH 014/223] Add BlockingQueue for inter-thread communication --- include/caffe/util/blocking_queue.hpp | 47 +++++++++++++++ src/caffe/util/blocking_queue.cpp | 86 +++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 include/caffe/util/blocking_queue.hpp create mode 100644 src/caffe/util/blocking_queue.cpp diff --git a/include/caffe/util/blocking_queue.hpp b/include/caffe/util/blocking_queue.hpp new file mode 100644 index 00000000..955e12cc --- /dev/null +++ b/include/caffe/util/blocking_queue.hpp @@ -0,0 +1,47 @@ +#ifndef CAFFE_UTIL_BLOCKING_QUEUE_HPP_ +#define CAFFE_UTIL_BLOCKING_QUEUE_HPP_ + +#include +#include + +#include "caffe/common.hpp" + +namespace caffe { + +template +class BlockingQueue { + public: + explicit BlockingQueue(); + + void push(const T& t); + + bool try_pop(T* t); + + // This logs a message if the threads needs to be blocked + // useful for detecting e.g. when data feeding is too slow + T pop(const string& log_on_wait = ""); + + bool try_peek(T* t); + + // Return element without removing it + T peek(); + + size_t size() const; + + protected: + /** + Move synchronization fields out instead of including boost/thread.hpp + to avoid a boost/NVCC issues (#1009, #1010) on OSX. Also fails on + Linux CUDA 7.0.18. + */ + class sync; + + std::queue queue_; + shared_ptr sync_; + +DISABLE_COPY_AND_ASSIGN(BlockingQueue); +}; + +} // namespace caffe + +#endif diff --git a/src/caffe/util/blocking_queue.cpp b/src/caffe/util/blocking_queue.cpp new file mode 100644 index 00000000..73c9564c --- /dev/null +++ b/src/caffe/util/blocking_queue.cpp @@ -0,0 +1,86 @@ +#include +#include + +#include "caffe/util/blocking_queue.hpp" + +namespace caffe { + +template +class BlockingQueue::sync { + public: + mutable boost::mutex mutex_; + boost::condition_variable condition_; +}; + +template +BlockingQueue::BlockingQueue() + : sync_(new sync()) { +} + +template +void BlockingQueue::push(const T& t) { + boost::mutex::scoped_lock lock(sync_->mutex_); + queue_.push(t); + lock.unlock(); + sync_->condition_.notify_one(); +} + +template +bool BlockingQueue::try_pop(T* t) { + boost::mutex::scoped_lock lock(sync_->mutex_); + + if (queue_.empty()) { + return false; + } + + *t = queue_.front(); + queue_.pop(); + return true; +} + +template +T BlockingQueue::pop(const string& log_on_wait) { + boost::mutex::scoped_lock lock(sync_->mutex_); + + while (queue_.empty()) { + if (!log_on_wait.empty()) { + LOG_EVERY_N(INFO, 1000)<< log_on_wait; + } + sync_->condition_.wait(lock); + } + + T t = queue_.front(); + queue_.pop(); + return t; +} + +template +bool BlockingQueue::try_peek(T* t) { + boost::mutex::scoped_lock lock(sync_->mutex_); + + if (queue_.empty()) { + return false; + } + + *t = queue_.front(); + return true; +} + +template +T BlockingQueue::peek() { + boost::mutex::scoped_lock lock(sync_->mutex_); + + while (queue_.empty()) { + sync_->condition_.wait(lock); + } + + return queue_.front(); +} + +template +size_t BlockingQueue::size() const { + boost::mutex::scoped_lock lock(sync_->mutex_); + return queue_.size(); +} + +} // namespace caffe From 45d792e8b1e44acb467ab9be3debdd9e819c11d1 Mon Sep 17 00:00:00 2001 From: Cyprien Noel Date: Mon, 27 Apr 2015 19:48:10 -0700 Subject: [PATCH 015/223] Thread-local Caffe --- include/caffe/common.hpp | 13 +++++------ include/caffe/internal_thread.hpp | 12 ++++++++-- src/caffe/common.cpp | 11 ++++++++- src/caffe/internal_thread.cpp | 29 +++++++++++++++++++++--- src/caffe/test/test_internal_thread.cpp | 30 +++++++++++++++++++++++++ 5 files changed, 82 insertions(+), 13 deletions(-) diff --git a/include/caffe/common.hpp b/include/caffe/common.hpp index 5f86bc26..3fa81431 100644 --- a/include/caffe/common.hpp +++ b/include/caffe/common.hpp @@ -98,12 +98,12 @@ void GlobalInit(int* pargc, char*** pargv); class Caffe { public: ~Caffe(); - inline static Caffe& Get() { - if (!singleton_.get()) { - singleton_.reset(new Caffe()); - } - return *singleton_; - } + + // Thread local context for Caffe. Moved to common.cpp instead of + // including boost/thread.hpp to avoid a boost/NVCC issues (#1009, #1010) + // on OSX. Also fails on Linux with CUDA 7.0.18. + static Caffe& Get(); + enum Brew { CPU, GPU }; // This random number generator facade hides boost and CUDA rng @@ -158,7 +158,6 @@ class Caffe { shared_ptr random_generator_; Brew mode_; - static shared_ptr singleton_; private: // The private constructor to avoid duplicate instantiation. diff --git a/include/caffe/internal_thread.hpp b/include/caffe/internal_thread.hpp index 815ca546..bcff318e 100644 --- a/include/caffe/internal_thread.hpp +++ b/include/caffe/internal_thread.hpp @@ -14,14 +14,19 @@ namespace caffe { /** * Virtual class encapsulate boost::thread for use in base class * The child class will acquire the ability to run a single thread, - * by reimplementing the virutal function InternalThreadEntry. + * by reimplementing the virtual function InternalThreadEntry. */ class InternalThread { public: InternalThread() : thread_() {} virtual ~InternalThread(); - /** Returns true if the thread was successfully started. **/ + /** + * Caffe's thread local state will be initialized using the current + * thread values, e.g. device id, solver index etc. The random seed + * is initialized using caffe_rng_rand. + * Will not return until the internal thread has exited. + */ bool StartInternalThread(); /** Will not return until the internal thread has exited. */ @@ -34,6 +39,9 @@ class InternalThread { with the code you want your thread to run. */ virtual void InternalThreadEntry() {} + private: + void entry(int device, Caffe::Brew mode, int rand_seed); + shared_ptr thread_; }; diff --git a/src/caffe/common.cpp b/src/caffe/common.cpp index af96cac4..0215c76e 100644 --- a/src/caffe/common.cpp +++ b/src/caffe/common.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -7,7 +8,15 @@ namespace caffe { -shared_ptr Caffe::singleton_; +// Make sure each thread can have different values. +static boost::thread_specific_ptr thread_instance_; + +Caffe& Caffe::Get() { + if (!thread_instance_.get()) { + thread_instance_.reset(new Caffe()); + } + return *(thread_instance_.get()); +} // random seeding int64_t cluster_seedgen(void) { diff --git a/src/caffe/internal_thread.cpp b/src/caffe/internal_thread.cpp index c2d19d43..2be88b31 100644 --- a/src/caffe/internal_thread.cpp +++ b/src/caffe/internal_thread.cpp @@ -1,8 +1,14 @@ #include + #include "caffe/internal_thread.hpp" +#include "caffe/util/math_functions.hpp" namespace caffe { +InternalThread::~InternalThread() { + StopInternalThread(); +} + InternalThread::~InternalThread() { WaitForInternalThreadToExit(); } @@ -11,20 +17,37 @@ bool InternalThread::is_started() const { return thread_.get() != NULL && thread_->joinable(); } - bool InternalThread::StartInternalThread() { if (!WaitForInternalThreadToExit()) { return false; } + + int device = 0; +#ifndef CPU_ONLY + CUDA_CHECK(cudaGetDevice(&device)); +#endif + Caffe::Brew mode = Caffe::mode(); + int rand_seed = caffe_rng_rand(); + try { - thread_.reset( - new boost::thread(&InternalThread::InternalThreadEntry, this)); + thread_.reset(new boost::thread(&InternalThread::entry, this, device, mode, + rand_seed)); } catch (...) { return false; } return true; } +void InternalThread::entry(int device, Caffe::Brew mode, int rand_seed) { +#ifndef CPU_ONLY + CUDA_CHECK(cudaSetDevice(device)); +#endif + Caffe::set_mode(mode); + Caffe::set_random_seed(rand_seed); + + InternalThreadEntry(); +} + /** Will not return until the internal thread has exited. */ bool InternalThread::WaitForInternalThreadToExit() { if (is_started()) { diff --git a/src/caffe/test/test_internal_thread.cpp b/src/caffe/test/test_internal_thread.cpp index 31882b6d..390c8eda 100644 --- a/src/caffe/test/test_internal_thread.cpp +++ b/src/caffe/test/test_internal_thread.cpp @@ -2,6 +2,7 @@ #include "gtest/gtest.h" #include "caffe/internal_thread.hpp" +#include "caffe/util/math_functions.hpp" #include "caffe/test/test_caffe_main.hpp" @@ -19,5 +20,34 @@ TEST_F(InternalThreadTest, TestStartAndExit) { EXPECT_FALSE(thread.is_started()); } +class TestThreadA : public InternalThread { + void InternalThreadEntry() { + EXPECT_EQ(4244559767, caffe_rng_rand()); + } +}; + +class TestThreadB : public InternalThread { + void InternalThreadEntry() { + EXPECT_EQ(1726478280, caffe_rng_rand()); + } +}; + +TEST_F(InternalThreadTest, TestRandomSeed) { + TestThreadA t1; + Caffe::set_random_seed(9658361); + EXPECT_TRUE(t1.StartInternalThread()); + EXPECT_TRUE(t1.WaitForInternalThreadToExit()); + + TestThreadA t2; + Caffe::set_random_seed(9658361); + EXPECT_TRUE(t2.StartInternalThread()); + EXPECT_TRUE(t2.WaitForInternalThreadToExit()); + + TestThreadB t3; + Caffe::set_random_seed(3435563); + EXPECT_TRUE(t3.StartInternalThread()); + EXPECT_TRUE(t3.WaitForInternalThreadToExit()); +} + } // namespace caffe From 73b3d13b68bedad9d19f70755b0ee4ef376e2a30 Mon Sep 17 00:00:00 2001 From: Cyprien Noel Date: Tue, 28 Apr 2015 14:46:20 -0700 Subject: [PATCH 016/223] Change the way threads are started and stopped - Interrupt the thread before waiting on join - Provide a method for looping threads to exit on demand - CHECK if start and stop succeed instead of returning an error --- include/caffe/internal_thread.hpp | 8 +++--- src/caffe/internal_thread.cpp | 33 +++++++++++++------------ src/caffe/layers/base_data_layer.cpp | 4 +-- src/caffe/test/test_internal_thread.cpp | 16 ++++++------ 4 files changed, 32 insertions(+), 29 deletions(-) diff --git a/include/caffe/internal_thread.hpp b/include/caffe/internal_thread.hpp index bcff318e..be6ff7fb 100644 --- a/include/caffe/internal_thread.hpp +++ b/include/caffe/internal_thread.hpp @@ -25,12 +25,11 @@ class InternalThread { * Caffe's thread local state will be initialized using the current * thread values, e.g. device id, solver index etc. The random seed * is initialized using caffe_rng_rand. - * Will not return until the internal thread has exited. */ - bool StartInternalThread(); + void StartInternalThread(); /** Will not return until the internal thread has exited. */ - bool WaitForInternalThreadToExit(); + void StopInternalThread(); bool is_started() const; @@ -39,6 +38,9 @@ class InternalThread { with the code you want your thread to run. */ virtual void InternalThreadEntry() {} + /* Should be tested when running loops to exit when requested. */ + bool must_stop(); + private: void entry(int device, Caffe::Brew mode, int rand_seed); diff --git a/src/caffe/internal_thread.cpp b/src/caffe/internal_thread.cpp index 2be88b31..d6c26559 100644 --- a/src/caffe/internal_thread.cpp +++ b/src/caffe/internal_thread.cpp @@ -1,4 +1,5 @@ #include +#include #include "caffe/internal_thread.hpp" #include "caffe/util/math_functions.hpp" @@ -9,18 +10,19 @@ InternalThread::~InternalThread() { StopInternalThread(); } -InternalThread::~InternalThread() { - WaitForInternalThreadToExit(); +bool InternalThread::is_started() const { + return thread_ && thread_->joinable(); } -bool InternalThread::is_started() const { - return thread_.get() != NULL && thread_->joinable(); +bool InternalThread::must_stop() { + return thread_ && thread_->interruption_requested(); } -bool InternalThread::StartInternalThread() { - if (!WaitForInternalThreadToExit()) { - return false; - } +void InternalThread::StartInternalThread() { + // TODO switch to failing once Caffe prefetch thread is persistent. + // Threads should not be started and stopped repeatedly. + // CHECK(!is_started()); + StopInternalThread(); int device = 0; #ifndef CPU_ONLY @@ -32,10 +34,9 @@ bool InternalThread::StartInternalThread() { try { thread_.reset(new boost::thread(&InternalThread::entry, this, device, mode, rand_seed)); - } catch (...) { - return false; + } catch (std::exception& e) { + LOG(FATAL) << "Thread exception: " << e.what(); } - return true; } void InternalThread::entry(int device, Caffe::Brew mode, int rand_seed) { @@ -48,16 +49,16 @@ void InternalThread::entry(int device, Caffe::Brew mode, int rand_seed) { InternalThreadEntry(); } -/** Will not return until the internal thread has exited. */ -bool InternalThread::WaitForInternalThreadToExit() { +void InternalThread::StopInternalThread() { if (is_started()) { + thread_->interrupt(); try { thread_->join(); - } catch (...) { - return false; + } catch (boost::thread_interrupted&) { + } catch (std::exception& e) { + LOG(FATAL) << "Thread exception: " << e.what(); } } - return true; } } // namespace caffe diff --git a/src/caffe/layers/base_data_layer.cpp b/src/caffe/layers/base_data_layer.cpp index 26a11182..facaed7f 100644 --- a/src/caffe/layers/base_data_layer.cpp +++ b/src/caffe/layers/base_data_layer.cpp @@ -47,12 +47,12 @@ void BasePrefetchingDataLayer::LayerSetUp( template void BasePrefetchingDataLayer::CreatePrefetchThread() { this->data_transformer_->InitRand(); - CHECK(StartInternalThread()) << "Thread execution failed"; + StartInternalThread(); } template void BasePrefetchingDataLayer::JoinPrefetchThread() { - CHECK(WaitForInternalThreadToExit()) << "Thread joining failed"; + StopInternalThread(); } template diff --git a/src/caffe/test/test_internal_thread.cpp b/src/caffe/test/test_internal_thread.cpp index 390c8eda..93f1cc54 100644 --- a/src/caffe/test/test_internal_thread.cpp +++ b/src/caffe/test/test_internal_thread.cpp @@ -14,9 +14,9 @@ class InternalThreadTest : public ::testing::Test {}; TEST_F(InternalThreadTest, TestStartAndExit) { InternalThread thread; EXPECT_FALSE(thread.is_started()); - EXPECT_TRUE(thread.StartInternalThread()); + thread.StartInternalThread(); EXPECT_TRUE(thread.is_started()); - EXPECT_TRUE(thread.WaitForInternalThreadToExit()); + thread.StopInternalThread(); EXPECT_FALSE(thread.is_started()); } @@ -35,18 +35,18 @@ class TestThreadB : public InternalThread { TEST_F(InternalThreadTest, TestRandomSeed) { TestThreadA t1; Caffe::set_random_seed(9658361); - EXPECT_TRUE(t1.StartInternalThread()); - EXPECT_TRUE(t1.WaitForInternalThreadToExit()); + t1.StartInternalThread(); + t1.StopInternalThread(); TestThreadA t2; Caffe::set_random_seed(9658361); - EXPECT_TRUE(t2.StartInternalThread()); - EXPECT_TRUE(t2.WaitForInternalThreadToExit()); + t2.StartInternalThread(); + t2.StopInternalThread(); TestThreadB t3; Caffe::set_random_seed(3435563); - EXPECT_TRUE(t3.StartInternalThread()); - EXPECT_TRUE(t3.WaitForInternalThreadToExit()); + t3.StartInternalThread(); + t3.StopInternalThread(); } } // namespace caffe From ddcdc9d711e81312caf127e8aa512c3298101297 Mon Sep 17 00:00:00 2001 From: Cyprien Noel Date: Mon, 18 May 2015 17:45:20 -0700 Subject: [PATCH 017/223] Persistent prefetch thread --- include/caffe/data_layers.hpp | 31 +++++---- include/caffe/syncedmem.hpp | 4 ++ src/caffe/internal_thread.cpp | 5 +- src/caffe/layers/base_data_layer.cpp | 88 ++++++++++++++++++-------- src/caffe/layers/base_data_layer.cu | 15 ++--- src/caffe/layers/data_layer.cpp | 26 ++++---- src/caffe/layers/image_data_layer.cpp | 28 ++++---- src/caffe/layers/window_data_layer.cpp | 20 +++--- src/caffe/syncedmem.cpp | 12 ++++ src/caffe/util/blocking_queue.cpp | 4 ++ 10 files changed, 153 insertions(+), 80 deletions(-) diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp index 3958cb7e..f57ab6b0 100644 --- a/include/caffe/data_layers.hpp +++ b/include/caffe/data_layers.hpp @@ -15,6 +15,7 @@ #include "caffe/internal_thread.hpp" #include "caffe/layer.hpp" #include "caffe/proto/caffe.pb.h" +#include "caffe/util/blocking_queue.hpp" #include "caffe/util/db.hpp" namespace caffe { @@ -50,12 +51,17 @@ class BaseDataLayer : public Layer { bool output_labels_; }; +template +class Batch { + public: + Blob data_, label_; +}; + template class BasePrefetchingDataLayer : public BaseDataLayer, public InternalThread { public: - explicit BasePrefetchingDataLayer(const LayerParameter& param) - : BaseDataLayer(param) {} + explicit BasePrefetchingDataLayer(const LayerParameter& param); // LayerSetUp: implements common data layer setup functionality, and calls // DataLayerSetUp to do special data layer setup for individual layer types. // This method may not be overridden. @@ -67,14 +73,17 @@ class BasePrefetchingDataLayer : virtual void Forward_gpu(const vector*>& bottom, const vector*>& top); - virtual void CreatePrefetchThread(); - virtual void JoinPrefetchThread(); - // The thread's function - virtual void InternalThreadEntry() {} + // Prefetches batches (asynchronously if to GPU memory) + static const int PREFETCH_COUNT = 3; protected: - Blob prefetch_data_; - Blob prefetch_label_; + virtual void InternalThreadEntry(); + virtual void load_batch(Batch* batch) = 0; + + Batch prefetch_[PREFETCH_COUNT]; + BlockingQueue*> prefetch_free_; + BlockingQueue*> prefetch_full_; + Blob transformed_data_; }; @@ -93,7 +102,7 @@ class DataLayer : public BasePrefetchingDataLayer { virtual inline int MaxTopBlobs() const { return 2; } protected: - virtual void InternalThreadEntry(); + virtual void load_batch(Batch* batch); shared_ptr db_; shared_ptr cursor_; @@ -235,7 +244,7 @@ class ImageDataLayer : public BasePrefetchingDataLayer { protected: shared_ptr prefetch_rng_; virtual void ShuffleImages(); - virtual void InternalThreadEntry(); + virtual void load_batch(Batch* batch); vector > lines_; int lines_id_; @@ -307,7 +316,7 @@ class WindowDataLayer : public BasePrefetchingDataLayer { protected: virtual unsigned int PrefetchRand(); - virtual void InternalThreadEntry(); + virtual void load_batch(Batch* batch); shared_ptr prefetch_rng_; vector > > image_database_; diff --git a/include/caffe/syncedmem.hpp b/include/caffe/syncedmem.hpp index 1b726de9..4d339bf4 100644 --- a/include/caffe/syncedmem.hpp +++ b/include/caffe/syncedmem.hpp @@ -56,6 +56,10 @@ class SyncedMemory { SyncedHead head() { return head_; } size_t size() { return size_; } +#ifndef CPU_ONLY + void async_gpu_push(const cudaStream_t& stream); +#endif + private: void to_cpu(); void to_gpu(); diff --git a/src/caffe/internal_thread.cpp b/src/caffe/internal_thread.cpp index d6c26559..b193826c 100644 --- a/src/caffe/internal_thread.cpp +++ b/src/caffe/internal_thread.cpp @@ -19,10 +19,7 @@ bool InternalThread::must_stop() { } void InternalThread::StartInternalThread() { - // TODO switch to failing once Caffe prefetch thread is persistent. - // Threads should not be started and stopped repeatedly. - // CHECK(!is_started()); - StopInternalThread(); + CHECK(!is_started()) << "Threads should persist and not be restarted."; int device = 0; #ifndef CPU_ONLY diff --git a/src/caffe/layers/base_data_layer.cpp b/src/caffe/layers/base_data_layer.cpp index facaed7f..9288d913 100644 --- a/src/caffe/layers/base_data_layer.cpp +++ b/src/caffe/layers/base_data_layer.cpp @@ -1,7 +1,9 @@ +#include #include #include #include "caffe/data_layers.hpp" +#include "caffe/net.hpp" #include "caffe/util/io.hpp" namespace caffe { @@ -27,56 +29,92 @@ void BaseDataLayer::LayerSetUp(const vector*>& bottom, DataLayerSetUp(bottom, top); } +template +BasePrefetchingDataLayer::BasePrefetchingDataLayer( + const LayerParameter& param) + : BaseDataLayer(param), + prefetch_free_(), prefetch_full_() { + for (int i = 0; i < PREFETCH_COUNT; ++i) { + prefetch_free_.push(&prefetch_[i]); + } +} + template void BasePrefetchingDataLayer::LayerSetUp( const vector*>& bottom, const vector*>& top) { BaseDataLayer::LayerSetUp(bottom, top); - // Now, start the prefetch thread. Before calling prefetch, we make two - // cpu_data calls so that the prefetch thread does not accidentally make - // simultaneous cudaMalloc calls when the main thread is running. In some - // GPUs this seems to cause failures if we do not so. - this->prefetch_data_.mutable_cpu_data(); - if (this->output_labels_) { - this->prefetch_label_.mutable_cpu_data(); + // Before starting the prefetch thread, we make cpu_data and gpu_data + // calls so that the prefetch thread does not accidentally make simultaneous + // cudaMalloc calls when the main thread is running. In some GPUs this + // seems to cause failures if we do not so. + for (int i = 0; i < PREFETCH_COUNT; ++i) { + prefetch_[i].data_.mutable_cpu_data(); + if (this->output_labels_) { + prefetch_[i].label_.mutable_cpu_data(); + } } +#ifndef CPU_ONLY + if (Caffe::mode() == Caffe::GPU) { + for (int i = 0; i < PREFETCH_COUNT; ++i) { + prefetch_[i].data_.mutable_gpu_data(); + if (this->output_labels_) { + prefetch_[i].label_.mutable_gpu_data(); + } + } + } +#endif DLOG(INFO) << "Initializing prefetch"; - this->CreatePrefetchThread(); - DLOG(INFO) << "Prefetch initialized."; -} - -template -void BasePrefetchingDataLayer::CreatePrefetchThread() { this->data_transformer_->InitRand(); StartInternalThread(); + DLOG(INFO) << "Prefetch initialized."; } template -void BasePrefetchingDataLayer::JoinPrefetchThread() { - StopInternalThread(); +void BasePrefetchingDataLayer::InternalThreadEntry() { +#ifndef CPU_ONLY + cudaStream_t stream; + if (Caffe::mode() == Caffe::GPU) { + cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); + } +#endif + + try { + while (!must_stop()) { + Batch* batch = prefetch_free_.pop(); + load_batch(batch); +#ifndef CPU_ONLY + if (Caffe::mode() == Caffe::GPU) { + batch->data_.data().get()->async_gpu_push(stream); + cudaStreamSynchronize(stream); + } +#endif + prefetch_full_.push(batch); + } + } catch (boost::thread_interrupted&) { + // Interrupted exception is expected on shutdown + } } template void BasePrefetchingDataLayer::Forward_cpu( const vector*>& bottom, const vector*>& top) { - // First, join the thread - JoinPrefetchThread(); - DLOG(INFO) << "Thread joined"; + Batch* batch = prefetch_full_.pop("Data layer prefetch queue empty"); // Reshape to loaded data. - top[0]->ReshapeLike(prefetch_data_); + top[0]->Reshape(batch->data_.num(), batch->data_.channels(), + batch->data_.height(), batch->data_.width()); // Copy the data - caffe_copy(prefetch_data_.count(), prefetch_data_.cpu_data(), + caffe_copy(batch->data_.count(), batch->data_.cpu_data(), top[0]->mutable_cpu_data()); DLOG(INFO) << "Prefetch copied"; if (this->output_labels_) { // Reshape to loaded labels. top[1]->ReshapeLike(prefetch_label_); // Copy the labels. - caffe_copy(prefetch_label_.count(), prefetch_label_.cpu_data(), - top[1]->mutable_cpu_data()); + caffe_copy(batch->label_.count(), batch->label_.cpu_data(), + top[1]->mutable_cpu_data()); } - // Start a new prefetch thread - DLOG(INFO) << "CreatePrefetchThread"; - CreatePrefetchThread(); + + prefetch_free_.push(batch); } #ifdef CPU_ONLY diff --git a/src/caffe/layers/base_data_layer.cu b/src/caffe/layers/base_data_layer.cu index 9335a5bc..56439bc5 100644 --- a/src/caffe/layers/base_data_layer.cu +++ b/src/caffe/layers/base_data_layer.cu @@ -7,22 +7,21 @@ namespace caffe { template void BasePrefetchingDataLayer::Forward_gpu( const vector*>& bottom, const vector*>& top) { - // First, join the thread - JoinPrefetchThread(); + Batch* batch = prefetch_full_.pop("Data layer prefetch queue empty"); // Reshape to loaded data. - top[0]->ReshapeLike(this->prefetch_data_); + top[0]->ReshapeLike(batch->data_); // Copy the data - caffe_copy(prefetch_data_.count(), prefetch_data_.cpu_data(), + caffe_copy(batch->data_.count(), batch->data_.gpu_data(), top[0]->mutable_gpu_data()); if (this->output_labels_) { // Reshape to loaded labels. - top[1]->ReshapeLike(prefetch_label_); + top[1]->ReshapeLike(batch->label_); // Copy the labels. - caffe_copy(prefetch_label_.count(), prefetch_label_.cpu_data(), + caffe_copy(batch->label_.count(), batch->label_.gpu_data(), top[1]->mutable_gpu_data()); } - // Start a new prefetch thread - CreatePrefetchThread(); + + prefetch_free_.push(batch); } INSTANTIATE_LAYER_GPU_FORWARD(BasePrefetchingDataLayer); diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp index 161a75e0..22d9f436 100644 --- a/src/caffe/layers/data_layer.cpp +++ b/src/caffe/layers/data_layer.cpp @@ -17,8 +17,8 @@ namespace caffe { template -DataLayer::~DataLayer() { - this->JoinPrefetchThread(); +DataLayer::~DataLayer() { + this->StopInternalThread(); } template @@ -54,21 +54,23 @@ void DataLayer::DataLayerSetUp(const vector*>& bottom, << top[0]->width(); // label if (this->output_labels_) { - vector label_shape(1, this->layer_param_.data_param().batch_size()); + vector label_shape(1, batch_size); top[1]->Reshape(label_shape); - this->prefetch_label_.Reshape(label_shape); + for (int i = 0; i < this->PREFETCH_COUNT; ++i) { + this->prefetch_[i].label_.Reshape(label_shape); + } } } -// This function is used to create a thread that prefetches the data. -template -void DataLayer::InternalThreadEntry() { +// This function is called on prefetch thread +template +void DataLayer::load_batch(Batch* batch) { CPUTimer batch_timer; batch_timer.Start(); double read_time = 0; double trans_time = 0; CPUTimer timer; - CHECK(this->prefetch_data_.count()); + CHECK(batch->data_.count()); CHECK(this->transformed_data_.count()); // Reshape according to the first datum of each batch @@ -81,13 +83,13 @@ void DataLayer::InternalThreadEntry() { this->transformed_data_.Reshape(top_shape); // Reshape prefetch_data according to the batch_size. top_shape[0] = batch_size; - this->prefetch_data_.Reshape(top_shape); + batch->data_.Reshape(top_shape); - Dtype* top_data = this->prefetch_data_.mutable_cpu_data(); + Dtype* top_data = batch->data_.mutable_cpu_data(); Dtype* top_label = NULL; // suppress warnings about uninitialized variables if (this->output_labels_) { - top_label = this->prefetch_label_.mutable_cpu_data(); + top_label = batch->label_.mutable_cpu_data(); } timer.Start(); for (int item_id = 0; item_id < batch_size; ++item_id) { @@ -97,7 +99,7 @@ void DataLayer::InternalThreadEntry() { read_time += timer.MicroSeconds(); timer.Start(); // Apply data transformations (mirror, scale, crop...) - int offset = this->prefetch_data_.offset(item_id); + int offset = batch->data_.offset(item_id); this->transformed_data_.set_cpu_data(top_data + offset); this->data_transformer_->Transform(datum, &(this->transformed_data_)); // Copy label. diff --git a/src/caffe/layers/image_data_layer.cpp b/src/caffe/layers/image_data_layer.cpp index dcc53348..223ba3a7 100644 --- a/src/caffe/layers/image_data_layer.cpp +++ b/src/caffe/layers/image_data_layer.cpp @@ -17,7 +17,7 @@ namespace caffe { template ImageDataLayer::~ImageDataLayer() { - this->JoinPrefetchThread(); + this->StopInternalThread(); } template @@ -70,8 +70,10 @@ void ImageDataLayer::DataLayerSetUp(const vector*>& bottom, const int batch_size = this->layer_param_.image_data_param().batch_size(); CHECK_GT(batch_size, 0) << "Positive batch size required"; top_shape[0] = batch_size; - this->prefetch_data_.Reshape(top_shape); - top[0]->ReshapeLike(this->prefetch_data_); + for (int i = 0; i < this->PREFETCH_COUNT; ++i) { + this->prefetch_[i].data_.Reshape(top_shape); + } + top[0]->Reshape(top_shape); LOG(INFO) << "output data size: " << top[0]->num() << "," << top[0]->channels() << "," << top[0]->height() << "," @@ -79,7 +81,9 @@ void ImageDataLayer::DataLayerSetUp(const vector*>& bottom, // label vector label_shape(1, batch_size); top[1]->Reshape(label_shape); - this->prefetch_label_.Reshape(label_shape); + for (int i = 0; i < this->PREFETCH_COUNT; ++i) { + this->prefetch_[i].label_.Reshape(label_shape); + } } template @@ -89,15 +93,15 @@ void ImageDataLayer::ShuffleImages() { shuffle(lines_.begin(), lines_.end(), prefetch_rng); } -// This function is used to create a thread that prefetches the data. +// This function is called on prefetch thread template -void ImageDataLayer::InternalThreadEntry() { +void ImageDataLayer::load_batch(Batch* batch) { CPUTimer batch_timer; batch_timer.Start(); double read_time = 0; double trans_time = 0; CPUTimer timer; - CHECK(this->prefetch_data_.count()); + CHECK(batch->data_.count()); CHECK(this->transformed_data_.count()); ImageDataParameter image_data_param = this->layer_param_.image_data_param(); const int batch_size = image_data_param.batch_size(); @@ -114,12 +118,12 @@ void ImageDataLayer::InternalThreadEntry() { // Use data_transformer to infer the expected blob shape from a cv_img. vector top_shape = this->data_transformer_->InferBlobShape(cv_img); this->transformed_data_.Reshape(top_shape); - // Reshape prefetch_data according to the batch_size. + // Reshape batch according to the batch_size. top_shape[0] = batch_size; - this->prefetch_data_.Reshape(top_shape); + batch->data_.Reshape(top_shape); - Dtype* prefetch_data = this->prefetch_data_.mutable_cpu_data(); - Dtype* prefetch_label = this->prefetch_label_.mutable_cpu_data(); + Dtype* prefetch_data = batch->data_.mutable_cpu_data(); + Dtype* prefetch_label = batch->label_.mutable_cpu_data(); // datum scales const int lines_size = lines_.size(); @@ -133,7 +137,7 @@ void ImageDataLayer::InternalThreadEntry() { read_time += timer.MicroSeconds(); timer.Start(); // Apply transformations (mirror, crop...) to the image - int offset = this->prefetch_data_.offset(item_id); + int offset = batch->data_.offset(item_id); this->transformed_data_.set_cpu_data(prefetch_data + offset); this->data_transformer_->Transform(cv_img, &(this->transformed_data_)); trans_time += timer.MicroSeconds(); diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp index c127d56b..f637f2ec 100644 --- a/src/caffe/layers/window_data_layer.cpp +++ b/src/caffe/layers/window_data_layer.cpp @@ -27,7 +27,7 @@ namespace caffe { template WindowDataLayer::~WindowDataLayer() { - this->JoinPrefetchThread(); + this->StopInternalThread(); } template @@ -171,7 +171,9 @@ void WindowDataLayer::DataLayerSetUp(const vector*>& bottom, CHECK_GT(crop_size, 0); const int batch_size = this->layer_param_.window_data_param().batch_size(); top[0]->Reshape(batch_size, channels, crop_size, crop_size); - this->prefetch_data_.Reshape(batch_size, channels, crop_size, crop_size); + for (int i = 0; i < this->PREFETCH_COUNT; ++i) + this->prefetch_[i].data_.Reshape( + batch_size, channels, crop_size, crop_size); LOG(INFO) << "output data size: " << top[0]->num() << "," << top[0]->channels() << "," << top[0]->height() << "," @@ -179,7 +181,9 @@ void WindowDataLayer::DataLayerSetUp(const vector*>& bottom, // label vector label_shape(1, batch_size); top[1]->Reshape(label_shape); - this->prefetch_label_.Reshape(label_shape); + for (int i = 0; i < this->PREFETCH_COUNT; ++i) { + this->prefetch_[i].label_.Reshape(label_shape); + } // data mean has_mean_file_ = this->transform_param_.has_mean_file(); @@ -217,9 +221,9 @@ unsigned int WindowDataLayer::PrefetchRand() { return (*prefetch_rng)(); } -// Thread fetching the data +// This function is called on prefetch thread template -void WindowDataLayer::InternalThreadEntry() { +void WindowDataLayer::load_batch(Batch* batch) { // At each iteration, sample N windows where N*p are foreground (object) // windows and N*(1-p) are background (non-object) windows CPUTimer batch_timer; @@ -227,8 +231,8 @@ void WindowDataLayer::InternalThreadEntry() { double read_time = 0; double trans_time = 0; CPUTimer timer; - Dtype* top_data = this->prefetch_data_.mutable_cpu_data(); - Dtype* top_label = this->prefetch_label_.mutable_cpu_data(); + Dtype* top_data = batch->data_.mutable_cpu_data(); + Dtype* top_label = batch->label_.mutable_cpu_data(); const Dtype scale = this->layer_param_.window_data_param().scale(); const int batch_size = this->layer_param_.window_data_param().batch_size(); const int context_pad = this->layer_param_.window_data_param().context_pad(); @@ -252,7 +256,7 @@ void WindowDataLayer::InternalThreadEntry() { bool use_square = (crop_mode == "square") ? true : false; // zero out batch - caffe_set(this->prefetch_data_.count(), Dtype(0), top_data); + caffe_set(batch->data_.count(), Dtype(0), top_data); const int num_fg = static_cast(static_cast(batch_size) * fg_fraction); diff --git a/src/caffe/syncedmem.cpp b/src/caffe/syncedmem.cpp index 7617ccfb..0da7a3ba 100644 --- a/src/caffe/syncedmem.cpp +++ b/src/caffe/syncedmem.cpp @@ -108,6 +108,18 @@ void* SyncedMemory::mutable_gpu_data() { #endif } +#ifndef CPU_ONLY +void SyncedMemory::async_gpu_push(const cudaStream_t& stream) { + CHECK(head_ == HEAD_AT_CPU); + if (gpu_ptr_ == NULL) { + CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_)); + } + const cudaMemcpyKind put = cudaMemcpyHostToDevice; + CUDA_CHECK(cudaMemcpyAsync(gpu_ptr_, cpu_ptr_, size_, put, stream)); + // Assume caller will synchronize on the stream before use + head_ = SYNCED; +} +#endif } // namespace caffe diff --git a/src/caffe/util/blocking_queue.cpp b/src/caffe/util/blocking_queue.cpp index 73c9564c..6ab6ba06 100644 --- a/src/caffe/util/blocking_queue.cpp +++ b/src/caffe/util/blocking_queue.cpp @@ -1,6 +1,7 @@ #include #include +#include "caffe/data_layers.hpp" #include "caffe/util/blocking_queue.hpp" namespace caffe { @@ -83,4 +84,7 @@ size_t BlockingQueue::size() const { return queue_.size(); } +template class BlockingQueue*>; +template class BlockingQueue*>; + } // namespace caffe From bcc8f50a95ecad954d1887f3fb273eaa298e2274 Mon Sep 17 00:00:00 2001 From: Cyprien Noel Date: Mon, 18 May 2015 18:06:09 -0700 Subject: [PATCH 018/223] Add DataReader for parallel training with one DB session - Make sure each solver accesses a different subset of the data - Sequential reading of DB for performance - Prefetch a configurable amount of data to host memory - Distribute data to solvers in round-robin way for determinism --- include/caffe/data_layers.hpp | 8 +- include/caffe/data_reader.hpp | 82 +++++++++++++++++ src/caffe/data_reader.cpp | 121 ++++++++++++++++++++++++++ src/caffe/layers/base_data_layer.cpp | 2 +- src/caffe/layers/data_layer.cpp | 55 +++++------- src/caffe/proto/caffe.proto | 4 + src/caffe/test/test_layer_factory.cpp | 14 ++- src/caffe/test/test_upgrade_proto.cpp | 12 +++ src/caffe/util/blocking_queue.cpp | 3 + 9 files changed, 259 insertions(+), 42 deletions(-) create mode 100644 include/caffe/data_reader.hpp create mode 100644 src/caffe/data_reader.cpp diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp index f57ab6b0..12e6c366 100644 --- a/include/caffe/data_layers.hpp +++ b/include/caffe/data_layers.hpp @@ -5,11 +5,11 @@ #include #include -#include "boost/scoped_ptr.hpp" #include "hdf5.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" +#include "caffe/data_reader.hpp" #include "caffe/data_transformer.hpp" #include "caffe/filler.hpp" #include "caffe/internal_thread.hpp" @@ -90,8 +90,7 @@ class BasePrefetchingDataLayer : template class DataLayer : public BasePrefetchingDataLayer { public: - explicit DataLayer(const LayerParameter& param) - : BasePrefetchingDataLayer(param) {} + explicit DataLayer(const LayerParameter& param); virtual ~DataLayer(); virtual void DataLayerSetUp(const vector*>& bottom, const vector*>& top); @@ -104,8 +103,7 @@ class DataLayer : public BasePrefetchingDataLayer { protected: virtual void load_batch(Batch* batch); - shared_ptr db_; - shared_ptr cursor_; + DataReader reader_; }; /** diff --git a/include/caffe/data_reader.hpp b/include/caffe/data_reader.hpp new file mode 100644 index 00000000..8ed5542c --- /dev/null +++ b/include/caffe/data_reader.hpp @@ -0,0 +1,82 @@ +#ifndef CAFFE_DATA_READER_HPP_ +#define CAFFE_DATA_READER_HPP_ + +#include +#include +#include + +#include "caffe/common.hpp" +#include "caffe/internal_thread.hpp" +#include "caffe/util/blocking_queue.hpp" +#include "caffe/util/db.hpp" + +namespace caffe { + +/** + * @brief Reads data from a source to queues available to data layers. + * A single reading thread is created per source, even if multiple solvers + * are running in parallel, e.g. for multi-GPU training. This makes sure + * databases are read sequentially, and that each solver accesses a different + * subset of the database. Data is distributed to solvers in a round-robin + * way to keep parallel training deterministic. + */ +class DataReader { + public: + explicit DataReader(const LayerParameter& param); + ~DataReader(); + + inline BlockingQueue& free() const { + return queue_pair_->free_; + } + inline BlockingQueue& full() const { + return queue_pair_->full_; + } + + protected: + // Queue pairs are shared between a body and its readers + class QueuePair { + public: + explicit QueuePair(int size); + ~QueuePair(); + + BlockingQueue free_; + BlockingQueue full_; + + DISABLE_COPY_AND_ASSIGN(QueuePair); + }; + + // A single body is created per source + class Body : public InternalThread { + public: + explicit Body(const LayerParameter& param); + virtual ~Body(); + + protected: + void InternalThreadEntry(); + void read_one(db::Cursor* cursor, QueuePair* qp); + + const LayerParameter param_; + BlockingQueue > new_queue_pairs_; + + friend class DataReader; + + DISABLE_COPY_AND_ASSIGN(Body); + }; + + // A source is uniquely identified by its layer name + path, in case + // the same database is read from two different locations in the net. + static inline string source_key(const LayerParameter& param) { + return param.name() + ":" + param.data_param().source(); + } + + const shared_ptr queue_pair_; + shared_ptr body_; + + static map > bodies_; + +DISABLE_COPY_AND_ASSIGN(DataReader); +}; + +} // namespace caffe + +#endif // CAFFE_DATA_READER_HPP_ diff --git a/src/caffe/data_reader.cpp b/src/caffe/data_reader.cpp new file mode 100644 index 00000000..60606f0d --- /dev/null +++ b/src/caffe/data_reader.cpp @@ -0,0 +1,121 @@ +#include +#include +#include +#include + +#include "caffe/common.hpp" +#include "caffe/data_layers.hpp" +#include "caffe/data_reader.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +using boost::weak_ptr; + +map > DataReader::bodies_; +static boost::mutex bodies_mutex_; + +DataReader::DataReader(const LayerParameter& param) + : queue_pair_(new QueuePair( // + param.data_param().prefetch() * param.data_param().batch_size())) { + // Get or create a body + boost::mutex::scoped_lock lock(bodies_mutex_); + string key = source_key(param); + weak_ptr& weak = bodies_[key]; + body_ = weak.lock(); + if (!body_) { + body_.reset(new Body(param)); + bodies_[key] = weak_ptr(body_); + } + body_->new_queue_pairs_.push(queue_pair_); +} + +DataReader::~DataReader() { + string key = source_key(body_->param_); + body_.reset(); + boost::mutex::scoped_lock lock(bodies_mutex_); + if (bodies_[key].expired()) { + bodies_.erase(key); + } +} + +// + +DataReader::QueuePair::QueuePair(int size) { + // Initialize the free queue with requested number of datums + for (int i = 0; i < size; ++i) { + free_.push(new Datum()); + } +} + +DataReader::QueuePair::~QueuePair() { + Datum* datum; + while (free_.try_pop(&datum)) { + delete datum; + } + while (full_.try_pop(&datum)) { + delete datum; + } +} + +// + +DataReader::Body::Body(const LayerParameter& param) + : param_(param), + new_queue_pairs_() { + StartInternalThread(); +} + +DataReader::Body::~Body() { + StopInternalThread(); +} + +void DataReader::Body::InternalThreadEntry() { + shared_ptr db(db::GetDB(param_.data_param().backend())); + db->Open(param_.data_param().source(), db::READ); + shared_ptr cursor(db->NewCursor()); + vector > qps; + try { + // int solver_count = param_.phase() == TRAIN ? Caffe::solver_count() : 1; + // TODO single solver until multi-gpu merge + int solver_count = 1; + + // To ensure deterministic runs, only start running once all solvers + // are ready. But solvers need to peek on one item during initialization, + // so read one item, then wait for the next solver. + for (int i = 0; i < solver_count; ++i) { + shared_ptr qp(new_queue_pairs_.pop()); + read_one(cursor.get(), qp.get()); + qps.push_back(qp); + } + // Main loop + while (!must_stop()) { + for (int i = 0; i < solver_count; ++i) { + read_one(cursor.get(), qps[i].get()); + } + // Check no additional readers have been created. This can happen if + // more than one net is trained at a time per process, whether single + // or multi solver. It might also happen if two data layers have same + // name and same source. + CHECK_EQ(new_queue_pairs_.size(), 0); + } + } catch (boost::thread_interrupted&) { + // Interrupted exception is expected on shutdown + } +} + +void DataReader::Body::read_one(db::Cursor* cursor, QueuePair* qp) { + Datum* datum = qp->free_.pop(); + // TODO deserialize in-place instead of copy? + datum->ParseFromString(cursor->value()); + qp->full_.push(datum); + + // go to the next iter + cursor->Next(); + if (!cursor->valid()) { + DLOG(INFO) << "Restarting data prefetching from start."; + cursor->SeekToFirst(); + } +} + +} // namespace caffe diff --git a/src/caffe/layers/base_data_layer.cpp b/src/caffe/layers/base_data_layer.cpp index 9288d913..20f76f62 100644 --- a/src/caffe/layers/base_data_layer.cpp +++ b/src/caffe/layers/base_data_layer.cpp @@ -108,7 +108,7 @@ void BasePrefetchingDataLayer::Forward_cpu( DLOG(INFO) << "Prefetch copied"; if (this->output_labels_) { // Reshape to loaded labels. - top[1]->ReshapeLike(prefetch_label_); + top[1]->ReshapeLike(batch->label_); // Copy the labels. caffe_copy(batch->label_.count(), batch->label_.cpu_data(), top[1]->mutable_cpu_data()); diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp index 22d9f436..0932d9fe 100644 --- a/src/caffe/layers/data_layer.cpp +++ b/src/caffe/layers/data_layer.cpp @@ -11,11 +11,15 @@ #include "caffe/proto/caffe.pb.h" #include "caffe/util/benchmark.hpp" #include "caffe/util/io.hpp" -#include "caffe/util/math_functions.hpp" -#include "caffe/util/rng.hpp" namespace caffe { +template +DataLayer::DataLayer(const LayerParameter& param) + : BasePrefetchingDataLayer(param), + reader_(param) { +} + template DataLayer::~DataLayer() { this->StopInternalThread(); @@ -24,31 +28,19 @@ DataLayer::~DataLayer() { template void DataLayer::DataLayerSetUp(const vector*>& bottom, const vector*>& top) { - // Initialize DB - db_.reset(db::GetDB(this->layer_param_.data_param().backend())); - db_->Open(this->layer_param_.data_param().source(), db::READ); - cursor_.reset(db_->NewCursor()); + const int batch_size = this->layer_param_.data_param().batch_size(); + // Read a data point, and use it to initialize the top blob. + Datum& datum = *(reader_.full().peek()); - // Check if we should randomly skip a few data points - if (this->layer_param_.data_param().rand_skip()) { - unsigned int skip = caffe_rng_rand() % - this->layer_param_.data_param().rand_skip(); - LOG(INFO) << "Skipping first " << skip << " data points."; - while (skip-- > 0) { - cursor_->Next(); - } - } - // Read a data point, to initialize the prefetch and top blobs. - Datum datum; - datum.ParseFromString(cursor_->value()); // Use data_transformer to infer the expected blob shape from datum. vector top_shape = this->data_transformer_->InferBlobShape(datum); this->transformed_data_.Reshape(top_shape); // Reshape top[0] and prefetch_data according to the batch_size. - top_shape[0] = this->layer_param_.data_param().batch_size(); - this->prefetch_data_.Reshape(top_shape); - top[0]->ReshapeLike(this->prefetch_data_); - + top_shape[0] = batch_size; + top[0]->Reshape(top_shape); + for (int i = 0; i < this->PREFETCH_COUNT; ++i) { + this->prefetch_[i].data_.Reshape(top_shape); + } LOG(INFO) << "output data size: " << top[0]->num() << "," << top[0]->channels() << "," << top[0]->height() << "," << top[0]->width(); @@ -76,12 +68,11 @@ void DataLayer::load_batch(Batch* batch) { // Reshape according to the first datum of each batch // on single input batches allows for inputs of varying dimension. const int batch_size = this->layer_param_.data_param().batch_size(); - Datum datum; - datum.ParseFromString(cursor_->value()); + Datum& datum = *(reader_.full().peek()); // Use data_transformer to infer the expected blob shape from datum. vector top_shape = this->data_transformer_->InferBlobShape(datum); this->transformed_data_.Reshape(top_shape); - // Reshape prefetch_data according to the batch_size. + // Reshape batch according to the batch_size. top_shape[0] = batch_size; batch->data_.Reshape(top_shape); @@ -91,11 +82,10 @@ void DataLayer::load_batch(Batch* batch) { if (this->output_labels_) { top_label = batch->label_.mutable_cpu_data(); } - timer.Start(); for (int item_id = 0; item_id < batch_size; ++item_id) { + timer.Start(); // get a datum - Datum datum; - datum.ParseFromString(cursor_->value()); + Datum& datum = *(reader_.full().pop("Waiting for data")); read_time += timer.MicroSeconds(); timer.Start(); // Apply data transformations (mirror, scale, crop...) @@ -107,13 +97,8 @@ void DataLayer::load_batch(Batch* batch) { top_label[item_id] = datum.label(); } trans_time += timer.MicroSeconds(); - timer.Start(); - // go to the next item. - cursor_->Next(); - if (!cursor_->valid()) { - DLOG(INFO) << "Restarting data prefetching from start."; - cursor_->SeekToFirst(); - } + + reader_.free().push(const_cast(&datum)); } timer.Stop(); batch_timer.Stop(); diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 89f14595..41165410 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -500,6 +500,7 @@ message DataParameter { // to avoid all asynchronous sgd clients to start at the same point. The skip // point would be set as rand_skip * rand(0,1). Note that rand_skip should not // be larger than the number of keys in the database. + // DEPRECATED. Each solver accesses a different subset of the database. optional uint32 rand_skip = 7 [default = 0]; optional DB backend = 8 [default = LEVELDB]; // DEPRECATED. See TransformationParameter. For data pre-processing, we can do @@ -515,6 +516,9 @@ message DataParameter { optional bool mirror = 6 [default = false]; // Force the encoded image to have 3 color channels optional bool force_encoded_color = 9 [default = false]; + // Prefetch queue (Number of batches to prefetch to host memory, increase if + // data access bandwidth varies). + optional uint32 prefetch = 10 [default = 4]; } message DropoutParameter { diff --git a/src/caffe/test/test_layer_factory.cpp b/src/caffe/test/test_layer_factory.cpp index efb1b37a..c86fafd0 100644 --- a/src/caffe/test/test_layer_factory.cpp +++ b/src/caffe/test/test_layer_factory.cpp @@ -1,11 +1,14 @@ #include #include +#include "boost/scoped_ptr.hpp" #include "gtest/gtest.h" #include "caffe/common.hpp" #include "caffe/layer.hpp" #include "caffe/layer_factory.hpp" +#include "caffe/util/db.hpp" +#include "caffe/util/io.hpp" #include "caffe/test/test_caffe_main.hpp" @@ -21,11 +24,20 @@ TYPED_TEST(LayerFactoryTest, TestCreateLayer) { typename LayerRegistry::CreatorRegistry& registry = LayerRegistry::Registry(); shared_ptr > layer; - LayerParameter layer_param; for (typename LayerRegistry::CreatorRegistry::iterator iter = registry.begin(); iter != registry.end(); ++iter) { // Special case: PythonLayer is checked by pytest if (iter->first == "Python") { continue; } + LayerParameter layer_param; + // Data layers expect a DB + if (iter->first == "Data") { + string tmp; + MakeTempDir(&tmp); + boost::scoped_ptr db(db::GetDB(DataParameter_DB_LEVELDB)); + db->Open(tmp, db::NEW); + db->Close(); + layer_param.mutable_data_param()->set_source(tmp); + } layer_param.set_type(iter->first); layer = LayerRegistry::CreateLayer(layer_param); EXPECT_EQ(iter->first, layer->type()); diff --git a/src/caffe/test/test_upgrade_proto.cpp b/src/caffe/test/test_upgrade_proto.cpp index eec62765..00672023 100644 --- a/src/caffe/test/test_upgrade_proto.cpp +++ b/src/caffe/test/test_upgrade_proto.cpp @@ -2,12 +2,15 @@ #include #include +#include "boost/scoped_ptr.hpp" #include "google/protobuf/text_format.h" #include "gtest/gtest.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/layer.hpp" +#include "caffe/util/db.hpp" +#include "caffe/util/io.hpp" #include "caffe/util/upgrade_proto.hpp" #include "caffe/test/test_caffe_main.hpp" @@ -2901,6 +2904,15 @@ TEST_F(NetUpgradeTest, TestUpgradeV1LayerType) { continue; // Empty string isn't actually a valid layer type. } layer_param.set_type(v2_layer_type); + // Data layers expect a DB + if (v2_layer_type == "Data") { + string tmp; + MakeTempDir(&tmp); + boost::scoped_ptr db(db::GetDB(DataParameter_DB_LEVELDB)); + db->Open(tmp, db::NEW); + db->Close(); + layer_param.mutable_data_param()->set_source(tmp); + } layer = LayerRegistry::CreateLayer(layer_param); EXPECT_EQ(v2_layer_type, layer->type()); } diff --git a/src/caffe/util/blocking_queue.cpp b/src/caffe/util/blocking_queue.cpp index 6ab6ba06..f7c53f22 100644 --- a/src/caffe/util/blocking_queue.cpp +++ b/src/caffe/util/blocking_queue.cpp @@ -2,6 +2,7 @@ #include #include "caffe/data_layers.hpp" +#include "caffe/data_reader.hpp" #include "caffe/util/blocking_queue.hpp" namespace caffe { @@ -86,5 +87,7 @@ size_t BlockingQueue::size() const { template class BlockingQueue*>; template class BlockingQueue*>; +template class BlockingQueue; +template class BlockingQueue >; } // namespace caffe From d2f045768cba7d494abb4d168fc366d6fce80b85 Mon Sep 17 00:00:00 2001 From: Cyprien Noel Date: Mon, 18 May 2015 20:07:36 -0700 Subject: [PATCH 019/223] Allocate host memory through cudaMallocHost thanks to discussion by @thatguymike and @flx42 --- include/caffe/syncedmem.hpp | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/include/caffe/syncedmem.hpp b/include/caffe/syncedmem.hpp index 4d339bf4..4a1a2f3f 100644 --- a/include/caffe/syncedmem.hpp +++ b/include/caffe/syncedmem.hpp @@ -8,26 +8,29 @@ namespace caffe { -// Theoretically, CaffeMallocHost and CaffeFreeHost should simply call the -// cudaMallocHost and cudaFree functions in order to create pinned memory. -// However, those codes rely on the existence of a cuda GPU (I don't know -// why that is a must since allocating memory should not be accessing the -// GPU resource, but it just creates an error as of Cuda 5.0) and will cause -// problem when running on a machine without GPU. Thus, we simply define -// these two functions for safety and possible future change if the problem -// of calling cuda functions disappears in a future version. -// -// In practice, although we are creating unpinned memory here, as long as we -// are constantly accessing them the memory pages almost always stays in -// the physical memory (assuming we have large enough memory installed), and -// does not seem to create a memory bottleneck here. - +// If CUDA is available and in GPU mode, host memory will be allocated pinned, +// using cudaMallocHost. It avoids dynamic pinning for transfers (DMA). +// The improvement in performance seems negligible in the single GPU case, +// but might be more significant for parallel training. Most importantly, +// it improved stability for large models on many GPUs. inline void CaffeMallocHost(void** ptr, size_t size) { +#ifndef CPU_ONLY + if (Caffe::mode() == Caffe::GPU) { + CUDA_CHECK(cudaMallocHost(ptr, size)); + return; + } +#endif *ptr = malloc(size); CHECK(*ptr) << "host allocation of size " << size << " failed"; } inline void CaffeFreeHost(void* ptr) { +#ifndef CPU_ONLY + if (Caffe::mode() == Caffe::GPU) { + CUDA_CHECK(cudaFreeHost(ptr)); + return; + } +#endif free(ptr); } From e5575cf17a43a56e4ba9bc5465548ac0512197d8 Mon Sep 17 00:00:00 2001 From: Cyprien Noel Date: Tue, 19 May 2015 11:11:05 -0700 Subject: [PATCH 020/223] Multi-GPU - Parallelize batches among GPUs and tree-reduce the gradients - The effective batch size scales with the number of devices - Batch size is multiplied by the number of devices - Split batches between GPUs, and tree-reduce the gradients - Detect machine topology (twin-GPU boards, P2P connectivity) - Track device in syncedmem (thanks @thatguymike) - Insert a callback in the solver for minimal code change - Accept list for gpu flag of caffe tool, e.g. '-gpu 0,1' or '-gpu all'. Run on default GPU if no ID given. - Add multi-GPU solver test - Deterministic architecture for reproducible runs --- include/caffe/caffe.hpp | 1 + include/caffe/common.hpp | 7 + include/caffe/internal_thread.hpp | 3 +- include/caffe/layer_factory.hpp | 4 +- include/caffe/parallel.hpp | 118 +++++ include/caffe/solver.hpp | 38 ++ include/caffe/syncedmem.hpp | 7 +- src/caffe/common.cpp | 5 +- src/caffe/data_reader.cpp | 4 +- src/caffe/data_transformer.cpp | 4 +- src/caffe/internal_thread.cpp | 9 +- src/caffe/net.cpp | 180 +++++--- src/caffe/parallel.cpp | 430 ++++++++++++++++++ src/caffe/solver.cpp | 57 ++- src/caffe/syncedmem.cpp | 34 +- src/caffe/test/test_gradient_based_solver.cpp | 75 ++- src/caffe/util/blocking_queue.cpp | 3 + tools/caffe.cpp | 111 +++-- 18 files changed, 949 insertions(+), 141 deletions(-) create mode 100644 include/caffe/parallel.hpp create mode 100644 src/caffe/parallel.cpp diff --git a/include/caffe/caffe.hpp b/include/caffe/caffe.hpp index 3c829f2f..68a5e1d1 100644 --- a/include/caffe/caffe.hpp +++ b/include/caffe/caffe.hpp @@ -10,6 +10,7 @@ #include "caffe/layer.hpp" #include "caffe/layer_factory.hpp" #include "caffe/net.hpp" +#include "caffe/parallel.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/solver.hpp" #include "caffe/util/benchmark.hpp" diff --git a/include/caffe/common.hpp b/include/caffe/common.hpp index 3fa81431..1df6b9a1 100644 --- a/include/caffe/common.hpp +++ b/include/caffe/common.hpp @@ -149,6 +149,11 @@ class Caffe { static void SetDevice(const int device_id); // Prints the current GPU status. static void DeviceQuery(); + // Parallel training info + inline static int solver_count() { return Get().solver_count_; } + inline static void set_solver_count(int val) { Get().solver_count_ = val; } + inline static bool root_solver() { return Get().root_solver_; } + inline static void set_root_solver(bool val) { Get().root_solver_ = val; } protected: #ifndef CPU_ONLY @@ -158,6 +163,8 @@ class Caffe { shared_ptr random_generator_; Brew mode_; + int solver_count_; + bool root_solver_; private: // The private constructor to avoid duplicate instantiation. diff --git a/include/caffe/internal_thread.hpp b/include/caffe/internal_thread.hpp index be6ff7fb..6a8c5a02 100644 --- a/include/caffe/internal_thread.hpp +++ b/include/caffe/internal_thread.hpp @@ -42,7 +42,8 @@ class InternalThread { bool must_stop(); private: - void entry(int device, Caffe::Brew mode, int rand_seed); + void entry(int device, Caffe::Brew mode, int rand_seed, int solver_count, + bool root_solver); shared_ptr thread_; }; diff --git a/include/caffe/layer_factory.hpp b/include/caffe/layer_factory.hpp index 2fcd9386..32e849de 100644 --- a/include/caffe/layer_factory.hpp +++ b/include/caffe/layer_factory.hpp @@ -71,7 +71,9 @@ class LayerRegistry { // Get a layer using a LayerParameter. static shared_ptr > CreateLayer(const LayerParameter& param) { - LOG(INFO) << "Creating layer " << param.name(); + if (Caffe::root_solver()) { + LOG(INFO) << "Creating layer " << param.name(); + } const string& type = param.type(); CreatorRegistry& registry = Registry(); CHECK_EQ(registry.count(type), 1) << "Unknown layer type: " << type diff --git a/include/caffe/parallel.hpp b/include/caffe/parallel.hpp new file mode 100644 index 00000000..85fc2b55 --- /dev/null +++ b/include/caffe/parallel.hpp @@ -0,0 +1,118 @@ +#ifndef CAFFE_PARALLEL_HPP_ +#define CAFFE_PARALLEL_HPP_ + +#include + +#include + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/internal_thread.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" +#include "caffe/solver.hpp" +#include "caffe/syncedmem.hpp" +#include "caffe/util/blocking_queue.hpp" + +namespace caffe { + +// Represents a net parameters. Once a net is created, its parameter buffers can +// be replaced by ones from Params, to allow parallelization. Params ensures +// parameters are allocated in one consecutive array. +template +class Params { + public: + explicit Params(shared_ptr > root_solver); + virtual ~Params() { + } + + inline size_t size() const { + return size_; + } + inline Dtype* data() const { + return data_; + } + inline Dtype* diff() const { + return diff_; + } + + protected: + const size_t size_; // Size of buffers + Dtype* data_; // Network parameters + Dtype* diff_; // Gradient + +DISABLE_COPY_AND_ASSIGN(Params); +}; + +// Params stored in GPU memory. +template +class GPUParams : public Params { + public: + GPUParams(shared_ptr > root_solver, int device); + virtual ~GPUParams(); + + void configure(Solver* solver) const; + + protected: + using Params::size_; + using Params::data_; + using Params::diff_; +}; + +class DevicePair { + public: + DevicePair(int parent, int device) + : parent_(parent), + device_(device) { + } + inline int parent() { + return parent_; + } + inline int device() { + return device_; + } + + // Group GPUs in pairs, by proximity depending on machine's topology + static void compute(const vector devices, vector* pairs); + + protected: + int parent_; + int device_; +}; + +// Synchronous data parallelism using map-reduce between local GPUs. +template +class P2PSync : public GPUParams, public Solver::Callback, + public InternalThread { + public: + explicit P2PSync(shared_ptr > root_solver, + P2PSync* parent, const SolverParameter& param); + virtual ~P2PSync(); + + inline const shared_ptr >& solver() const { + return solver_; + } + + void run(const vector& gpus); + + protected: + void on_start(); + void on_gradients_ready(); + + void InternalThreadEntry(); + + P2PSync* parent_; + vector*> children_; + BlockingQueue*> queue_; + const int initial_iter_; + Dtype* parent_grads_; + shared_ptr > solver_; + + using Params::size_; + using Params::data_; + using Params::diff_; +}; + +} // namespace caffe + +#endif diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index fbade938..89a6c76d 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -32,12 +32,27 @@ class Solver { // methods to restore the state from the appropriate snapshot type. void Restore(const char* resume_file); virtual ~Solver() {} + inline const SolverParameter& param() const { return param_; } inline shared_ptr > net() { return net_; } inline const vector > >& test_nets() { return test_nets_; } int iter() { return iter_; } + // Invoked at specific points during an iteration + class Callback { + protected: + virtual void on_start() = 0; + virtual void on_gradients_ready() = 0; + + template + friend class Solver; + }; + const vector& callbacks() const { return callbacks_; } + void add_callback(Callback* value) { + callbacks_.push_back(value); + } + protected: // Make and apply the update value for the current iteration. virtual void ApplyUpdate() = 0; @@ -62,10 +77,33 @@ class Solver { int current_step_; shared_ptr > net_; vector > > test_nets_; + vector callbacks_; DISABLE_COPY_AND_ASSIGN(Solver); }; +/** + * @brief Solver that only computes gradients, used as worker + * for multi-GPU training. + */ +template +class WorkerSolver : public Solver { + public: + explicit WorkerSolver(const SolverParameter& param) + : Solver(param) {} + + protected: + void ApplyUpdate() {} + void SnapshotSolverState(const string& model_filename) { + LOG(FATAL) << "Should not be called on worker solver."; + } + void RestoreSolverStateFromBinaryProto(const string& state_file) { + LOG(FATAL) << "Should not be called on worker solver."; + } + void RestoreSolverStateFromHDF5(const string& state_file) { + LOG(FATAL) << "Should not be called on worker solver."; + } +}; /** * @brief Optimizes the parameters of a Net using diff --git a/include/caffe/syncedmem.hpp b/include/caffe/syncedmem.hpp index 4a1a2f3f..62aadef4 100644 --- a/include/caffe/syncedmem.hpp +++ b/include/caffe/syncedmem.hpp @@ -45,14 +45,15 @@ class SyncedMemory { public: SyncedMemory() : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(UNINITIALIZED), - own_cpu_data_(false) {} + own_cpu_data_(false), own_gpu_data_(false), gpu_device_(-1) {} explicit SyncedMemory(size_t size) : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(size), head_(UNINITIALIZED), - own_cpu_data_(false) {} + own_cpu_data_(false), own_gpu_data_(false), gpu_device_(-1) {} ~SyncedMemory(); const void* cpu_data(); void set_cpu_data(void* data); const void* gpu_data(); + void set_gpu_data(void* data); void* mutable_cpu_data(); void* mutable_gpu_data(); enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED }; @@ -71,6 +72,8 @@ class SyncedMemory { size_t size_; SyncedHead head_; bool own_cpu_data_; + bool own_gpu_data_; + int gpu_device_; DISABLE_COPY_AND_ASSIGN(SyncedMemory); }; // class SyncedMemory diff --git a/src/caffe/common.cpp b/src/caffe/common.cpp index 0215c76e..7077f378 100644 --- a/src/caffe/common.cpp +++ b/src/caffe/common.cpp @@ -51,7 +51,8 @@ void GlobalInit(int* pargc, char*** pargv) { #ifdef CPU_ONLY // CPU-only Caffe. Caffe::Caffe() - : random_generator_(), mode_(Caffe::CPU) { } + : random_generator_(), mode_(Caffe::CPU), + solver_count_(1), root_solver_(true) { } Caffe::~Caffe() { } @@ -95,7 +96,7 @@ void* Caffe::RNG::generator() { Caffe::Caffe() : cublas_handle_(NULL), curand_generator_(NULL), random_generator_(), - mode_(Caffe::CPU) { + mode_(Caffe::CPU), solver_count_(1), root_solver_(true) { // Try to create a cublas handler, and report an error if failed (but we will // keep the program running as one might just want to run CPU code). if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) { diff --git a/src/caffe/data_reader.cpp b/src/caffe/data_reader.cpp index 60606f0d..16378203 100644 --- a/src/caffe/data_reader.cpp +++ b/src/caffe/data_reader.cpp @@ -76,9 +76,7 @@ void DataReader::Body::InternalThreadEntry() { shared_ptr cursor(db->NewCursor()); vector > qps; try { - // int solver_count = param_.phase() == TRAIN ? Caffe::solver_count() : 1; - // TODO single solver until multi-gpu merge - int solver_count = 1; + int solver_count = param_.phase() == TRAIN ? Caffe::solver_count() : 1; // To ensure deterministic runs, only start running once all solvers // are ready. But solvers need to peek on one item during initialization, diff --git a/src/caffe/data_transformer.cpp b/src/caffe/data_transformer.cpp index 22633922..4666d9bd 100644 --- a/src/caffe/data_transformer.cpp +++ b/src/caffe/data_transformer.cpp @@ -19,7 +19,9 @@ DataTransformer::DataTransformer(const TransformationParameter& param, CHECK_EQ(param_.mean_value_size(), 0) << "Cannot specify mean_file and mean_value at the same time"; const string& mean_file = param.mean_file(); - LOG(INFO) << "Loading mean file from: " << mean_file; + if (Caffe::root_solver()) { + LOG(INFO) << "Loading mean file from: " << mean_file; + } BlobProto blob_proto; ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto); data_mean_.FromProto(blob_proto); diff --git a/src/caffe/internal_thread.cpp b/src/caffe/internal_thread.cpp index b193826c..104884e0 100644 --- a/src/caffe/internal_thread.cpp +++ b/src/caffe/internal_thread.cpp @@ -27,21 +27,26 @@ void InternalThread::StartInternalThread() { #endif Caffe::Brew mode = Caffe::mode(); int rand_seed = caffe_rng_rand(); + int solver_count = Caffe::solver_count(); + bool root_solver = Caffe::root_solver(); try { thread_.reset(new boost::thread(&InternalThread::entry, this, device, mode, - rand_seed)); + rand_seed, solver_count, root_solver)); } catch (std::exception& e) { LOG(FATAL) << "Thread exception: " << e.what(); } } -void InternalThread::entry(int device, Caffe::Brew mode, int rand_seed) { +void InternalThread::entry(int device, Caffe::Brew mode, int rand_seed, + int solver_count, bool root_solver) { #ifndef CPU_ONLY CUDA_CHECK(cudaSetDevice(device)); #endif Caffe::set_mode(mode); Caffe::set_random_seed(rand_seed); + Caffe::set_solver_count(solver_count); + Caffe::set_root_solver(root_solver); InternalThreadEntry(); } diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index 0e5ed804..5d0f4322 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -10,6 +10,7 @@ #include "caffe/common.hpp" #include "caffe/layer.hpp" #include "caffe/net.hpp" +#include "caffe/parallel.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/hdf5.hpp" #include "caffe/util/insert_splits.hpp" @@ -41,8 +42,10 @@ void Net::Init(const NetParameter& in_param) { // the current NetState. NetParameter filtered_param; FilterNet(in_param, &filtered_param); - LOG(INFO) << "Initializing net from parameters: " << std::endl - << filtered_param.DebugString(); + if (Caffe::root_solver()) { + LOG(INFO) << "Initializing net from parameters: " << std::endl + << filtered_param.DebugString(); + } // Create a copy of filtered_param with splits added where necessary. NetParameter param; InsertSplits(filtered_param, ¶m); @@ -66,7 +69,8 @@ void Net::Init(const NetParameter& in_param) { const int layer_id = -1; // inputs have fake layer ID -1 AppendTop(param, layer_id, input_id, &available_blobs, &blob_name_to_idx); } - DLOG(INFO) << "Memory required for data: " << memory_used_ * sizeof(Dtype); + DLOG_IF(INFO, Caffe::root_solver()) + << "Memory required for data: " << memory_used_ * sizeof(Dtype); // For each layer, set up its input and output bottom_vecs_.resize(param.layer_size()); top_vecs_.resize(param.layer_size()); @@ -89,7 +93,9 @@ void Net::Init(const NetParameter& in_param) { } layers_.push_back(LayerRegistry::CreateLayer(layer_param)); layer_names_.push_back(layer_param.name()); - LOG(INFO) << "Creating Layer " << layer_param.name(); + if (Caffe::root_solver()) { + LOG(INFO) << "Creating Layer " << layer_param.name(); + } bool need_backward = false; // Figure out this layer's input and output @@ -119,20 +125,30 @@ void Net::Init(const NetParameter& in_param) { } } // After this layer is connected, set it up. - LOG(INFO) << "Setting up " << layer_names_[layer_id]; + if (Caffe::root_solver()) { + LOG(INFO) << "Setting up " << layer_names_[layer_id]; + } layers_[layer_id]->SetUp(bottom_vecs_[layer_id], top_vecs_[layer_id]); for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) { if (blob_loss_weights_.size() <= top_id_vecs_[layer_id][top_id]) { blob_loss_weights_.resize(top_id_vecs_[layer_id][top_id] + 1, Dtype(0)); } blob_loss_weights_[top_id_vecs_[layer_id][top_id]] = layer->loss(top_id); - LOG(INFO) << "Top shape: " << top_vecs_[layer_id][top_id]->shape_string(); + if (Caffe::root_solver()) { + LOG(INFO) << "Top shape: " + << top_vecs_[layer_id][top_id]->shape_string(); + } if (layer->loss(top_id)) { - LOG(INFO) << " with loss weight " << layer->loss(top_id); + if (Caffe::root_solver()) { + LOG(INFO) << " with loss weight " << layer->loss(top_id); + } } memory_used_ += top_vecs_[layer_id][top_id]->count(); } - DLOG(INFO) << "Memory required for data: " << memory_used_ * sizeof(Dtype); + if (Caffe::root_solver()) { + DLOG(INFO) << "Memory required for data: " + << memory_used_ * sizeof(Dtype); + } const int param_size = layer_param.param_size(); const int num_param_blobs = layers_[layer_id]->blobs().size(); CHECK_LE(param_size, num_param_blobs) @@ -191,10 +207,14 @@ void Net::Init(const NetParameter& in_param) { } if (!layer_contributes_loss) { layer_need_backward_[layer_id] = false; } if (layer_need_backward_[layer_id]) { - LOG(INFO) << layer_names_[layer_id] << " needs backward computation."; + if (Caffe::root_solver()) { + LOG(INFO) << layer_names_[layer_id] << " needs backward computation."; + } } else { - LOG(INFO) << layer_names_[layer_id] - << " does not need backward computation."; + if (Caffe::root_solver()) { + LOG(INFO) << layer_names_[layer_id] + << " does not need backward computation."; + } } for (int bottom_id = 0; bottom_id < bottom_vecs_[layer_id].size(); ++bottom_id) { @@ -234,7 +254,9 @@ void Net::Init(const NetParameter& in_param) { // In the end, all remaining blobs are considered output blobs. for (set::iterator it = available_blobs.begin(); it != available_blobs.end(); ++it) { - LOG(INFO) << "This network produces output " << *it; + if (Caffe::root_solver()) { + LOG(INFO) << "This network produces output " << *it; + } net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get()); net_output_blob_indices_.push_back(blob_name_to_idx[*it]); } @@ -246,8 +268,10 @@ void Net::Init(const NetParameter& in_param) { } ShareWeights(); debug_info_ = param.debug_info(); - LOG(INFO) << "Network initialization done."; - LOG(INFO) << "Memory required for data: " << memory_used_ * sizeof(Dtype); + if (Caffe::root_solver()) { + LOG(INFO) << "Network initialization done."; + LOG(INFO) << "Memory required for data: " << memory_used_ * sizeof(Dtype); + } } template @@ -286,27 +310,33 @@ bool Net::StateMeetsRule(const NetState& state, // Check whether the rule is broken due to phase. if (rule.has_phase()) { if (rule.phase() != state.phase()) { - LOG(INFO) << "The NetState phase (" << state.phase() - << ") differed from the phase (" << rule.phase() - << ") specified by a rule in layer " << layer_name; + if (Caffe::root_solver()) { + LOG(INFO) << "The NetState phase (" << state.phase() + << ") differed from the phase (" << rule.phase() + << ") specified by a rule in layer " << layer_name; + } return false; } } // Check whether the rule is broken due to min level. if (rule.has_min_level()) { if (state.level() < rule.min_level()) { - LOG(INFO) << "The NetState level (" << state.level() - << ") is above the min_level (" << rule.min_level() - << ") specified by a rule in layer " << layer_name; + if (Caffe::root_solver()) { + LOG(INFO) << "The NetState level (" << state.level() + << ") is above the min_level (" << rule.min_level() + << ") specified by a rule in layer " << layer_name; + } return false; } } // Check whether the rule is broken due to max level. if (rule.has_max_level()) { if (state.level() > rule.max_level()) { - LOG(INFO) << "The NetState level (" << state.level() - << ") is above the max_level (" << rule.max_level() - << ") specified by a rule in layer " << layer_name; + if (Caffe::root_solver()) { + LOG(INFO) << "The NetState level (" << state.level() + << ") is above the max_level (" << rule.max_level() + << ") specified by a rule in layer " << layer_name; + } return false; } } @@ -319,8 +349,10 @@ bool Net::StateMeetsRule(const NetState& state, if (rule.stage(i) == state.stage(j)) { has_stage = true; } } if (!has_stage) { - LOG(INFO) << "The NetState did not contain stage '" << rule.stage(i) - << "' specified by a rule in layer " << layer_name; + if (Caffe::root_solver()) { + LOG(INFO) << "The NetState did not contain stage '" << rule.stage(i) + << "' specified by a rule in layer " << layer_name; + } return false; } } @@ -333,8 +365,10 @@ bool Net::StateMeetsRule(const NetState& state, if (rule.not_stage(i) == state.stage(j)) { has_stage = true; } } if (has_stage) { - LOG(INFO) << "The NetState contained a not_stage '" << rule.not_stage(i) - << "' specified by a rule in layer " << layer_name; + if (Caffe::root_solver()) { + LOG(INFO) << "The NetState contained a not_stage '" << rule.not_stage(i) + << "' specified by a rule in layer " << layer_name; + } return false; } } @@ -356,7 +390,9 @@ void Net::AppendTop(const NetParameter& param, const int layer_id, if (blob_name_to_idx && layer_param && layer_param->bottom_size() > top_id && blob_name == layer_param->bottom(top_id)) { // In-place computation - LOG(INFO) << layer_param->name() << " -> " << blob_name << " (in-place)"; + if (Caffe::root_solver()) { + LOG(INFO) << layer_param->name() << " -> " << blob_name << " (in-place)"; + } top_vecs_[layer_id].push_back(blobs_[(*blob_name_to_idx)[blob_name]].get()); top_id_vecs_[layer_id].push_back((*blob_name_to_idx)[blob_name]); } else if (blob_name_to_idx && @@ -366,10 +402,12 @@ void Net::AppendTop(const NetParameter& param, const int layer_id, LOG(FATAL) << "Duplicate blobs produced by multiple sources."; } else { // Normal output. - if (layer_param) { - LOG(INFO) << layer_param->name() << " -> " << blob_name; - } else { - LOG(INFO) << "Input " << top_id << " -> " << blob_name; + if (Caffe::root_solver()) { + if (layer_param) { + LOG(INFO) << layer_param->name() << " -> " << blob_name; + } else { + LOG(INFO) << "Input " << top_id << " -> " << blob_name; + } } shared_ptr > blob_pointer(new Blob()); const int blob_id = blobs_.size(); @@ -409,7 +447,9 @@ int Net::AppendBottom(const NetParameter& param, const int layer_id, << " (at index " << bottom_id << ") to layer " << layer_id; } const int blob_id = (*blob_name_to_idx)[blob_name]; - LOG(INFO) << layer_names_[layer_id] << " <- " << blob_name; + if (Caffe::root_solver()) { + LOG(INFO) << layer_names_[layer_id] << " <- " << blob_name; + } bottom_vecs_[layer_id].push_back(blobs_[blob_id].get()); bottom_id_vecs_[layer_id].push_back(blob_id); available_blobs->erase(blob_name); @@ -468,9 +508,10 @@ void Net::AppendParam(const NetParameter& param, const int layer_id, param_layer_indices_[owner_net_param_id]; const int owner_layer_id = owner_index.first; const int owner_param_id = owner_index.second; - LOG(INFO) << "Sharing parameters '" << param_name << "' owned by " - << "layer '" << layer_names_[owner_layer_id] << "', param " - << "index " << owner_param_id; + LOG_IF(INFO, Caffe::root_solver()) << "Sharing parameters '" << param_name + << "' owned by " + << "layer '" << layer_names_[owner_layer_id] << "', param " + << "index " << owner_param_id; Blob* this_blob = layers_[layer_id]->blobs()[param_id].get(); Blob* owner_blob = layers_[owner_layer_id]->blobs()[owner_param_id].get(); @@ -595,8 +636,10 @@ void Net::InputDebugInfo(const int input_id) { const Blob& blob = *net_input_blobs_[input_id]; const string& blob_name = blob_names_[net_input_blob_indices_[input_id]]; const Dtype data_abs_val_mean = blob.asum_data() / blob.count(); - LOG(INFO) << " [Forward] " - << "Input " << blob_name << " data: " << data_abs_val_mean; + if (Caffe::root_solver()) { + LOG(INFO) << " [Forward] " + << "Input " << blob_name << " data: " << data_abs_val_mean; + } } template @@ -605,9 +648,12 @@ void Net::ForwardDebugInfo(const int layer_id) { const Blob& blob = *top_vecs_[layer_id][top_id]; const string& blob_name = blob_names_[top_id_vecs_[layer_id][top_id]]; const Dtype data_abs_val_mean = blob.asum_data() / blob.count(); - LOG(INFO) << " [Forward] " - << "Layer " << layer_names_[layer_id] << ", top blob " << blob_name - << " data: " << data_abs_val_mean; + if (Caffe::root_solver()) { + LOG(INFO) << " [Forward] " + << "Layer " << layer_names_[layer_id] + << ", top blob " << blob_name + << " data: " << data_abs_val_mean; + } } for (int param_id = 0; param_id < layers_[layer_id]->blobs().size(); ++param_id) { @@ -615,9 +661,12 @@ void Net::ForwardDebugInfo(const int layer_id) { const int net_param_id = param_id_vecs_[layer_id][param_id]; const string& blob_name = param_display_names_[net_param_id]; const Dtype data_abs_val_mean = blob.asum_data() / blob.count(); - LOG(INFO) << " [Forward] " - << "Layer " << layer_names_[layer_id] << ", param blob " << blob_name - << " data: " << data_abs_val_mean; + if (Caffe::root_solver()) { + LOG(INFO) << " [Forward] " + << "Layer " << layer_names_[layer_id] + << ", param blob " << blob_name + << " data: " << data_abs_val_mean; + } } } @@ -629,18 +678,24 @@ void Net::BackwardDebugInfo(const int layer_id) { const Blob& blob = *bottom_vec[bottom_id]; const string& blob_name = blob_names_[bottom_id_vecs_[layer_id][bottom_id]]; const Dtype diff_abs_val_mean = blob.asum_diff() / blob.count(); - LOG(INFO) << " [Backward] " - << "Layer " << layer_names_[layer_id] << ", bottom blob " << blob_name - << " diff: " << diff_abs_val_mean; + if (Caffe::root_solver()) { + LOG(INFO) << " [Backward] " + << "Layer " << layer_names_[layer_id] + << ", bottom blob " << blob_name + << " diff: " << diff_abs_val_mean; + } } for (int param_id = 0; param_id < layers_[layer_id]->blobs().size(); ++param_id) { if (!layers_[layer_id]->param_propagate_down(param_id)) { continue; } const Blob& blob = *layers_[layer_id]->blobs()[param_id]; const Dtype diff_abs_val_mean = blob.asum_diff() / blob.count(); - LOG(INFO) << " [Backward] " - << "Layer " << layer_names_[layer_id] << ", param blob " << param_id - << " diff: " << diff_abs_val_mean; + if (Caffe::root_solver()) { + LOG(INFO) << " [Backward] " + << "Layer " << layer_names_[layer_id] + << ", param blob " << param_id + << " diff: " << diff_abs_val_mean; + } } } @@ -653,17 +708,22 @@ void Net::UpdateDebugInfo(const int param_id) { const Dtype diff_abs_val_mean = blob.asum_diff() / blob.count(); if (param_owner < 0) { const Dtype data_abs_val_mean = blob.asum_data() / blob.count(); - LOG(INFO) << " [Update] Layer " << layer_name - << ", param " << param_display_name - << " data: " << data_abs_val_mean << "; diff: " << diff_abs_val_mean; + if (Caffe::root_solver()) { + LOG(INFO) << " [Update] Layer " << layer_name + << ", param " << param_display_name + << " data: " << data_abs_val_mean + << "; diff: " << diff_abs_val_mean; + } } else { const string& owner_layer_name = layer_names_[param_layer_indices_[param_owner].first]; - LOG(INFO) << " [Update] Layer " << layer_name - << ", param blob " << param_display_name - << " (owned by layer " << owner_layer_name << ", " - << "param " << param_display_names_[param_owners_[param_id]] << ")" - << " diff: " << diff_abs_val_mean; + if (Caffe::root_solver()) { + LOG(INFO) << " [Update] Layer " << layer_name + << ", param blob " << param_display_name + << " (owned by layer " << owner_layer_name << ", " << "param " + << param_display_names_[param_owners_[param_id]] << ")" + << " diff: " << diff_abs_val_mean; + } } } @@ -720,8 +780,8 @@ void Net::Backward() { const Dtype l2norm_data = std::sqrt(sumsq_data); const Dtype l2norm_diff = std::sqrt(sumsq_diff); LOG(ERROR) << " [Backward] All net params (data, diff): " - << "L1 norm = (" << asum_data << ", " << asum_diff << "); " - << "L2 norm = (" << l2norm_data << ", " << l2norm_diff << ")"; + << "L1 norm = (" << asum_data << ", " << asum_diff << "); " + << "L2 norm = (" << l2norm_data << ", " << l2norm_diff << ")"; } } diff --git a/src/caffe/parallel.cpp b/src/caffe/parallel.cpp new file mode 100644 index 00000000..3fef8cfd --- /dev/null +++ b/src/caffe/parallel.cpp @@ -0,0 +1,430 @@ +#ifndef CPU_ONLY +#include +#endif +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "boost/thread.hpp" +#include "caffe/caffe.hpp" +#include "caffe/parallel.hpp" + +namespace caffe { + +enum Op { + copy, + replace_cpu, + replace_gpu, + replace_cpu_diff, + replace_gpu_diff +}; + +template +static void apply_buffers(const vector*>& blobs, + Dtype* buffer, size_t total_size, Op op) { + Dtype* ptr = buffer; + for (int i = 0; i < blobs.size(); ++i) { + int size = blobs[i]->count(); + switch (op) { + case copy: { + // Init buffer to current values of blobs + caffe_copy(size, + reinterpret_cast(blobs[i]->data()->cpu_data()), + ptr); + break; + } + case replace_cpu: + blobs[i]->data()->set_cpu_data(ptr); + break; + case replace_gpu: + blobs[i]->data()->set_gpu_data(ptr); + break; + case replace_cpu_diff: + blobs[i]->diff()->set_cpu_data(ptr); + break; + case replace_gpu_diff: + blobs[i]->diff()->set_gpu_data(ptr); + break; + } + ptr += size; + } + CHECK_EQ(total_size, ptr - buffer); +} + +// Buffer size necessary to store given blobs +template +static size_t total_size(const vector*>& params) { + size_t size = 0; + for (int i = 0; i < params.size(); ++i) + size += params[i]->count(); + return size; +} + +template +Params::Params(shared_ptr > root_solver) + : size_(total_size(root_solver->net()->learnable_params())), + data_(), + diff_() { +} + +template +GPUParams::GPUParams(shared_ptr > root_solver, int device) + : Params(root_solver) { +#ifndef CPU_ONLY + int initial_device; + CUDA_CHECK(cudaGetDevice(&initial_device)); + + // Allocate device buffers + CUDA_CHECK(cudaSetDevice(device)); + CUDA_CHECK(cudaMalloc(&data_, size_ * sizeof(Dtype))); + + // Copy blob values + const vector*>& net = + root_solver->net()->learnable_params(); + apply_buffers(net, data_, size_, copy); + + CUDA_CHECK(cudaMalloc(&diff_, size_ * sizeof(Dtype))); + caffe_gpu_set(size_, Dtype(0), diff_); + + CUDA_CHECK(cudaSetDevice(initial_device)); +#else + NO_GPU; +#endif +} + +template +GPUParams::~GPUParams() { +#ifndef CPU_ONLY + CUDA_CHECK(cudaFree(data_)); + CUDA_CHECK(cudaFree(diff_)); +#endif +} + +template +void GPUParams::configure(Solver* solver) const { + const vector*>& net = + solver->net()->learnable_params(); + apply_buffers(net, data_, size_, replace_gpu); + apply_buffers(net, diff_, size_, replace_gpu_diff); +} + +void DevicePair::compute(const vector devices, vector* pairs) { +#ifndef CPU_ONLY + vector remaining(devices); + + // Group GPUs by board + for (int i = 0; i < remaining.size(); ++i) { + for (int j = i + 1; j < remaining.size(); ++j) { + cudaDeviceProp a, b; + CUDA_CHECK(cudaGetDeviceProperties(&a, remaining[i])); + CUDA_CHECK(cudaGetDeviceProperties(&b, remaining[j])); + if (a.isMultiGpuBoard && b.isMultiGpuBoard) { + if (a.multiGpuBoardGroupID == b.multiGpuBoardGroupID) { + pairs->push_back(DevicePair(remaining[i], remaining[j])); + DLOG(INFO) << "GPU board: " << remaining[i] << ":" << remaining[j]; + remaining.erase(remaining.begin() + j); + break; + } + } + } + } + ostringstream s; + for (int i = 0; i < remaining.size(); ++i) { + s << (i ? ", " : "") << remaining[i]; + } + DLOG(INFO) << "GPUs paired by boards, remaining: " << s.str(); + + // Group by P2P accessibility + for (int i = 0; i < remaining.size(); ++i) { + for (int j = i + 1; j < remaining.size(); ++j) { + int access; + CUDA_CHECK(cudaDeviceCanAccessPeer(&access, remaining[i], remaining[j])); + if (access) { + pairs->push_back(DevicePair(remaining[i], remaining[j])); + DLOG(INFO) << "P2P pair: " << remaining[i] << ":" << remaining[j]; + remaining.erase(remaining.begin() + j); + break; + } + } + } + s.str(""); + for (int i = 0; i < remaining.size(); ++i) { + s << (i ? ", " : "") << remaining[i]; + } + DLOG(INFO) << "GPUs paired by P2P access, remaining: " << s.str(); + + // Group remaining + for (int i = 0; i < remaining.size(); ++i) { + for (int j = i + 1; j < remaining.size(); ++j) { + pairs->push_back(DevicePair(remaining[i], remaining[j])); + DLOG(INFO) << "Remaining pair: " << remaining[i] << ":" << remaining[j]; + remaining.erase(remaining.begin() + j); + break; + } + } + CHECK_EQ(remaining.size(), 1); + pairs->insert(pairs->begin(), DevicePair(-1, remaining[0])); + + CHECK(pairs->size() == devices.size()); + for (int i = 0; i < pairs->size(); ++i) { + CHECK((*pairs)[i].parent() != (*pairs)[i].device()); + for (int j = i + 1; j < pairs->size(); ++j) { + CHECK((*pairs)[i].device() != (*pairs)[j].device()); + } + } +#else + NO_GPU; +#endif +} + +// + +template +P2PSync::P2PSync(shared_ptr > root_solver, + P2PSync* parent, const SolverParameter& param) + : GPUParams(root_solver, param.device_id()), + parent_(parent), + children_(), + queue_(), + initial_iter_(root_solver->iter()), + solver_() { +#ifndef CPU_ONLY + int initial_device; + CUDA_CHECK(cudaGetDevice(&initial_device)); + const int self = param.device_id(); + CUDA_CHECK(cudaSetDevice(self)); + + if (parent == NULL) { + solver_ = root_solver; + } else { + Caffe::set_root_solver(false); + solver_.reset(new WorkerSolver(param)); + Caffe::set_root_solver(true); + } + this->configure(solver_.get()); + solver_->add_callback(this); + + if (parent) { + // Enable p2p access between devices + const int peer = parent->solver_->param().device_id(); + int access; + CUDA_CHECK(cudaDeviceCanAccessPeer(&access, self, peer)); + if (access) { + CUDA_CHECK(cudaDeviceEnablePeerAccess(peer, 0)); + } else { + LOG(INFO)<< "GPU " << self << " does not have p2p access to GPU " << peer; + } + // Allocate receiving buffer on parent + CUDA_CHECK(cudaSetDevice(peer)); + CUDA_CHECK(cudaMalloc(&parent_grads_, size_ * sizeof(Dtype))); + CUDA_CHECK(cudaSetDevice(self)); + } + + CUDA_CHECK(cudaSetDevice(initial_device)); +#else + NO_GPU; +#endif +} + +template +P2PSync::~P2PSync() { +#ifndef CPU_ONLY + int initial_device; + CUDA_CHECK(cudaGetDevice(&initial_device)); + const int self = solver_->param().device_id(); + CUDA_CHECK(cudaSetDevice(self)); + + if (parent_) { + CUDA_CHECK(cudaFree(parent_grads_)); + const int peer = parent_->solver_->param().device_id(); + int access; + CUDA_CHECK(cudaDeviceCanAccessPeer(&access, self, peer)); + if (access) { + CUDA_CHECK(cudaDeviceDisablePeerAccess(peer)); + } + } + + CUDA_CHECK(cudaSetDevice(initial_device)); +#endif +} + +template +void P2PSync::InternalThreadEntry() { + Caffe::SetDevice(solver_->param().device_id()); + CHECK(Caffe::root_solver()); + Caffe::set_root_solver(false); + // See if there is a defined seed and reset random state if so + if (solver_->param().random_seed() >= 0) { + // Fetch random seed and modulate by device ID to make sure + // everyone doesn't have the same seed. We seem to have some + // solver instability if we have everyone with the same seed + Caffe::set_random_seed( + solver_->param().random_seed() + solver_->param().device_id()); + } + solver_->Step(solver_->param().max_iter() - initial_iter_); +} + +template +void P2PSync::on_start() { +#ifndef CPU_ONLY +#ifdef DEBUG + int device; + CUDA_CHECK(cudaGetDevice(&device)); + CHECK(device == solver_->param().device_id()); +#else +// CHECK(false); +#endif + + // Wait for update from parent + if (parent_) { + P2PSync *parent = queue_.pop(); + CHECK(parent == parent_); + } + + // Update children + for (int i = 0; i < children_.size(); ++i) { + Dtype* src = data_; + Dtype* dst = children_[i]->data_; + +#ifdef DEBUG + cudaPointerAttributes attributes; + CUDA_CHECK(cudaPointerGetAttributes(&attributes, src)); + CHECK(attributes.device == device); + CUDA_CHECK(cudaPointerGetAttributes(&attributes, dst)); + CHECK(attributes.device == children_[i]->solver_->param().device_id()); +#endif + + CUDA_CHECK(cudaMemcpyAsync(dst, src, size_ * sizeof(Dtype), // + cudaMemcpyDeviceToDevice, cudaStreamDefault)); + } + if (children_.size()) { + CUDA_CHECK(cudaStreamSynchronize(cudaStreamDefault)); + } + for (int i = 0; i < children_.size(); ++i) { + children_[i]->queue_.push(this); + } +#endif +} + +template +void P2PSync::on_gradients_ready() { +#ifndef CPU_ONLY +#ifdef DEBUG + int device; + CUDA_CHECK(cudaGetDevice(&device)); + CHECK(device == solver_->param().device_id()); +#endif + + // Sum children gradients as they appear in the queue + for (int i = 0; i < children_.size(); ++i) { + P2PSync *child = queue_.pop(); + Dtype* src = child->parent_grads_; + Dtype* dst = diff_; + +#ifdef DEBUG + bool ok = false; + for (int j = 0; j < children_.size(); ++j) { + if (child == children_[j]) { + ok = true; + } + } + CHECK(ok); + cudaPointerAttributes attributes; + CUDA_CHECK(cudaPointerGetAttributes(&attributes, src)); + CHECK(attributes.device == device); + CUDA_CHECK(cudaPointerGetAttributes(&attributes, dst)); + CHECK(attributes.device == device); +#endif + + caffe_gpu_add(size_, src, dst, dst); + } + + // Send gradients to parent + if (parent_) { + Dtype* src = diff_; + Dtype* dst = parent_grads_; + +#ifdef DEBUG + cudaPointerAttributes attributes; + CUDA_CHECK(cudaPointerGetAttributes(&attributes, src)); + CHECK(attributes.device == device); + CUDA_CHECK(cudaPointerGetAttributes(&attributes, dst)); + CHECK(attributes.device == parent_->solver_->param().device_id()); +#endif + + CUDA_CHECK(cudaMemcpyAsync(dst, src, size_ * sizeof(Dtype), // + cudaMemcpyDeviceToDevice, cudaStreamDefault)); + CUDA_CHECK(cudaStreamSynchronize(cudaStreamDefault)); + parent_->queue_.push(this); + } else { + // Loss functions divide gradients by the batch size, so to compensate + // for split batch, the root solver divides by number of solvers. + caffe_gpu_scal(size_, Dtype(1.0 / Caffe::solver_count()), diff_); + } +#endif +} + +template +void P2PSync::run(const vector& gpus) { + // Pair devices for map-reduce synchronization + vector pairs; + DevicePair::compute(gpus, &pairs); + ostringstream s; + for (int i = 1; i < pairs.size(); ++i) { + s << (i == 1 ? "" : ", ") << pairs[i].parent() << ":" << pairs[i].device(); + } + LOG(INFO)<< "GPUs pairs " << s.str(); + + SolverParameter param(solver_->param()); + vector > > syncs(gpus.size()); + + // Build the GPU tree by finding the parent for each solver + for (int attempts = 0; attempts < pairs.size(); ++attempts) { + for (int i = 1; i < pairs.size(); ++i) { + if (!syncs[i].get()) { + P2PSync* parent = NULL; + for (int j = 0; j < syncs.size(); ++j) { + P2PSync* sync = j == 0 ? this : syncs[j].get(); + if (sync) { + const SolverParameter& p = sync->solver()->param(); + if (p.device_id() == pairs[i].parent()) { + parent = sync; + } + } + } + if (parent) { + param.set_device_id(pairs[i].device()); + syncs[i].reset(new P2PSync(solver_, parent, param)); + parent->children_.push_back((P2PSync*) syncs[i].get()); + } + } + } + } + + LOG(INFO)<< "Starting Optimization"; + + for (int i = 1; i < syncs.size(); ++i) { + syncs[i]->StartInternalThread(); + } + + // Run root solver on current thread + solver_->Solve(); + + for (int i = 1; i < syncs.size(); ++i) { + syncs[i]->StopInternalThread(); + } +} + +INSTANTIATE_CLASS(Params); +INSTANTIATE_CLASS(GPUParams); +INSTANTIATE_CLASS(P2PSync); + +} // namespace caffe + diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 54e085a6..b6fd6b64 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -19,13 +19,13 @@ namespace caffe { template Solver::Solver(const SolverParameter& param) - : net_() { + : net_(), callbacks_() { Init(param); } template Solver::Solver(const string& param_file) - : net_() { + : net_(), callbacks_() { SolverParameter param; ReadProtoFromTextFileOrDie(param_file, ¶m); Init(param); @@ -33,17 +33,19 @@ Solver::Solver(const string& param_file) template void Solver::Init(const SolverParameter& param) { - LOG(INFO) << "Initializing solver from parameters: " << std::endl - << param.DebugString(); + LOG_IF(INFO, Caffe::root_solver()) << "Initializing solver from parameters: " + << std::endl << param.DebugString(); param_ = param; CHECK_GE(param_.average_loss(), 1) << "average_loss should be non-negative."; - if (param_.random_seed() >= 0) { + if (Caffe::root_solver() && param_.random_seed() >= 0) { Caffe::set_random_seed(param_.random_seed()); } // Scaffolding code InitTrainNet(); - InitTestNets(); - LOG(INFO) << "Solver scaffolding done."; + if (Caffe::root_solver()) { + InitTestNets(); + LOG(INFO) << "Solver scaffolding done."; + } iter_ = 0; current_step_ = 0; } @@ -59,19 +61,22 @@ void Solver::InitTrainNet() { << "one of these fields specifying a train_net: " << field_names; NetParameter net_param; if (param_.has_train_net_param()) { - LOG(INFO) << "Creating training net specified in train_net_param."; + LOG_IF(INFO, Caffe::root_solver()) + << "Creating training net specified in train_net_param."; net_param.CopyFrom(param_.train_net_param()); } else if (param_.has_train_net()) { - LOG(INFO) << "Creating training net from train_net file: " - << param_.train_net(); + LOG_IF(INFO, Caffe::root_solver()) + << "Creating training net from train_net file: " << param_.train_net(); ReadNetParamsFromTextFileOrDie(param_.train_net(), &net_param); } if (param_.has_net_param()) { - LOG(INFO) << "Creating training net specified in net_param."; + LOG_IF(INFO, Caffe::root_solver()) + << "Creating training net specified in net_param."; net_param.CopyFrom(param_.net_param()); } if (param_.has_net()) { - LOG(INFO) << "Creating training net from net file: " << param_.net(); + LOG_IF(INFO, Caffe::root_solver()) + << "Creating training net from net file: " << param_.net(); ReadNetParamsFromTextFileOrDie(param_.net(), &net_param); } // Set the correct NetState. We start with the solver defaults (lowest @@ -88,6 +93,7 @@ void Solver::InitTrainNet() { template void Solver::InitTestNets() { + CHECK(Caffe::root_solver()); const bool has_net_param = param_.has_net_param(); const bool has_net_file = param_.has_net(); const int num_generic_nets = has_net_param + has_net_file; @@ -175,10 +181,14 @@ void Solver::Step(int iters) { // zero-init the params net_->ClearParamDiffs(); if (param_.test_interval() && iter_ % param_.test_interval() == 0 - && (iter_ > 0 || param_.test_initialization())) { + && (iter_ > 0 || param_.test_initialization()) + && Caffe::root_solver()) { TestAll(); } + for (int i = 0; i < callbacks_.size(); ++i) { + callbacks_[i]->on_start(); + } const bool display = param_.display() && iter_ % param_.display() == 0; net_->set_debug_info(display && param_.debug_info()); // accumulate the loss and gradient @@ -198,7 +208,8 @@ void Solver::Step(int iters) { losses[idx] = loss; } if (display) { - LOG(INFO) << "Iteration " << iter_ << ", loss = " << smoothed_loss; + LOG_IF(INFO, Caffe::root_solver()) << "Iteration " << iter_ + << ", loss = " << smoothed_loss; const vector*>& result = net_->output_blobs(); int score_index = 0; for (int j = 0; j < result.size(); ++j) { @@ -213,12 +224,15 @@ void Solver::Step(int iters) { loss_msg_stream << " (* " << loss_weight << " = " << loss_weight * result_vec[k] << " loss)"; } - LOG(INFO) << " Train net output #" + LOG_IF(INFO, Caffe::root_solver()) << " Train net output #" << score_index++ << ": " << output_name << " = " << result_vec[k] << loss_msg_stream.str(); } } } + for (int i = 0; i < callbacks_.size(); ++i) { + callbacks_[i]->on_gradients_ready(); + } ApplyUpdate(); // Increment the internal iter_ counter -- its value should always indicate @@ -226,7 +240,9 @@ void Solver::Step(int iters) { ++iter_; // Save a snapshot if needed. - if (param_.snapshot() && iter_ % param_.snapshot() == 0) { + if (param_.snapshot() + && iter_ % param_.snapshot() == 0 + && Caffe::root_solver()) { Snapshot(); } } @@ -234,6 +250,7 @@ void Solver::Step(int iters) { template void Solver::Solve(const char* resume_file) { + CHECK(Caffe::root_solver()); LOG(INFO) << "Solving " << net_->name(); LOG(INFO) << "Learning Rate Policy: " << param_.lr_policy(); @@ -278,6 +295,7 @@ void Solver::TestAll() { template void Solver::Test(const int test_net_id) { + CHECK(Caffe::root_solver()); LOG(INFO) << "Iteration " << iter_ << ", Testing net (#" << test_net_id << ")"; CHECK_NOTNULL(test_nets_[test_net_id].get())-> @@ -328,13 +346,14 @@ void Solver::Test(const int test_net_id) { << " = " << loss_weight * mean_score << " loss)"; } LOG(INFO) << " Test net output #" << i << ": " << output_name << " = " - << mean_score << loss_msg_stream.str(); + << mean_score << loss_msg_stream.str(); } } template void Solver::Snapshot() { + CHECK(Caffe::root_solver()); string model_filename; switch (param_.snapshot_format()) { case caffe::SolverParameter_SnapshotFormat_BINARYPROTO: @@ -379,6 +398,7 @@ string Solver::SnapshotToHDF5() { template void Solver::Restore(const char* state_file) { + CHECK(Caffe::root_solver()); string state_filename(state_file); if (state_filename.size() >= 3 && state_filename.compare(state_filename.size() - 3, 3, ".h5") == 0) { @@ -480,6 +500,7 @@ void SGDSolver::ClipGradients() { template void SGDSolver::ApplyUpdate() { + CHECK(Caffe::root_solver()); Dtype rate = GetLearningRate(); if (this->param_.display() && this->iter_ % this->param_.display() == 0) { LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate; @@ -723,6 +744,7 @@ void SGDSolver::RestoreSolverStateFromHDF5(const string& state_file) { template void NesterovSolver::ComputeUpdateValue(int param_id, Dtype rate) { + CHECK(Caffe::root_solver()); const vector*>& net_params = this->net_->learnable_params(); const vector& net_params_lr = this->net_->params_lr(); Dtype momentum = this->param_.momentum(); @@ -783,6 +805,7 @@ void NesterovSolver::ComputeUpdateValue(int param_id, Dtype rate) { template void AdaGradSolver::ComputeUpdateValue(int param_id, Dtype rate) { + CHECK(Caffe::root_solver()); const vector*>& net_params = this->net_->learnable_params(); const vector& net_params_lr = this->net_->params_lr(); Dtype delta = this->param_.delta(); diff --git a/src/caffe/syncedmem.cpp b/src/caffe/syncedmem.cpp index 0da7a3ba..a667a867 100644 --- a/src/caffe/syncedmem.cpp +++ b/src/caffe/syncedmem.cpp @@ -12,8 +12,14 @@ SyncedMemory::~SyncedMemory() { } #ifndef CPU_ONLY - if (gpu_ptr_) { + if (gpu_ptr_ && own_gpu_data_) { + int initial_device; + cudaGetDevice(&initial_device); + if (gpu_device_ != -1) { + CUDA_CHECK(cudaSetDevice(gpu_device_)); + } CUDA_CHECK(cudaFree(gpu_ptr_)); + cudaSetDevice(initial_device); } #endif // CPU_ONLY } @@ -48,13 +54,17 @@ inline void SyncedMemory::to_gpu() { #ifndef CPU_ONLY switch (head_) { case UNINITIALIZED: + CUDA_CHECK(cudaGetDevice(&gpu_device_)); CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_)); caffe_gpu_memset(size_, 0, gpu_ptr_); head_ = HEAD_AT_GPU; + own_gpu_data_ = true; break; case HEAD_AT_CPU: if (gpu_ptr_ == NULL) { + CUDA_CHECK(cudaGetDevice(&gpu_device_)); CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_)); + own_gpu_data_ = true; } caffe_gpu_memcpy(size_, cpu_ptr_, gpu_ptr_); head_ = SYNCED; @@ -92,6 +102,26 @@ const void* SyncedMemory::gpu_data() { #endif } +void SyncedMemory::set_gpu_data(void* data) { +#ifndef CPU_ONLY + CHECK(data); + if (own_gpu_data_) { + int initial_device; + cudaGetDevice(&initial_device); + if (gpu_device_ != -1) { + CUDA_CHECK(cudaSetDevice(gpu_device_)); + } + CUDA_CHECK(cudaFree(gpu_ptr_)); + cudaSetDevice(initial_device); + } + gpu_ptr_ = data; + head_ = HEAD_AT_GPU; + own_gpu_data_ = false; +#else + NO_GPU; +#endif +} + void* SyncedMemory::mutable_cpu_data() { to_cpu(); head_ = HEAD_AT_CPU; @@ -112,7 +142,9 @@ void* SyncedMemory::mutable_gpu_data() { void SyncedMemory::async_gpu_push(const cudaStream_t& stream) { CHECK(head_ == HEAD_AT_CPU); if (gpu_ptr_ == NULL) { + CUDA_CHECK(cudaGetDevice(&gpu_device_)); CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_)); + own_gpu_data_ = true; } const cudaMemcpyKind put = cudaMemcpyHostToDevice; CUDA_CHECK(cudaMemcpyAsync(gpu_ptr_, cpu_ptr_, size_, put, stream)); diff --git a/src/caffe/test/test_gradient_based_solver.cpp b/src/caffe/test/test_gradient_based_solver.cpp index eaa7a759..1cede07f 100644 --- a/src/caffe/test/test_gradient_based_solver.cpp +++ b/src/caffe/test/test_gradient_based_solver.cpp @@ -8,6 +8,7 @@ #include "gtest/gtest.h" #include "caffe/common.hpp" +#include "caffe/parallel.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/solver.hpp" #include "caffe/util/io.hpp" @@ -35,6 +36,7 @@ class GradientBasedSolverTest : public MultiDeviceTest { string snapshot_prefix_; shared_ptr > solver_; + shared_ptr > sync_; int seed_; // Dimensions are determined by generate_sample_data.py // TODO this is brittle and the hdf5 file should be checked instead. @@ -70,8 +72,8 @@ class GradientBasedSolverTest : public MultiDeviceTest { string RunLeastSquaresSolver(const Dtype learning_rate, const Dtype weight_decay, const Dtype momentum, const int num_iters, - const int iter_size = 1, const bool snapshot = false, - const char* from_snapshot = NULL) { + const int iter_size = 1, const int devices = 1, + const bool snapshot = false, const char* from_snapshot = NULL) { ostringstream proto; proto << "snapshot_after_train: " << snapshot << " " @@ -184,7 +186,20 @@ class GradientBasedSolverTest : public MultiDeviceTest { this->solver_->net()->Forward(empty_bottom_vec); } } - this->solver_->Solve(); + if (devices == 1) { + this->solver_->Solve(); + } else { + LOG(INFO) << "Multi-GPU test on " << devices << " devices"; + vector gpus; + for (int i = 0; i < devices; ++i) { + gpus.push_back(i); + } + Caffe::set_solver_count(gpus.size()); + this->sync_.reset(new P2PSync( + this->solver_, NULL, this->solver_->param())); + this->sync_->run(gpus); + Caffe::set_solver_count(1); + } if (snapshot) { ostringstream resume_file; resume_file << snapshot_prefix_ << "/_iter_" << num_iters @@ -410,20 +425,38 @@ class GradientBasedSolverTest : public MultiDeviceTest { void TestLeastSquaresUpdate(const Dtype learning_rate = 1.0, const Dtype weight_decay = 0.0, const Dtype momentum = 0.0, const int iter_to_check = 0) { - // Initialize the solver and run K (= iter_to_check) solver iterations. - RunLeastSquaresSolver(learning_rate, weight_decay, momentum, iter_to_check); - - // Compute the (K+1)th update using the analytic least squares gradient. - vector > > updated_params; - ComputeLeastSquaresUpdate(learning_rate, weight_decay, momentum, - &updated_params); - - // Reinitialize the solver and run K+1 solver iterations. - RunLeastSquaresSolver(learning_rate, weight_decay, momentum, - iter_to_check + 1); - - // Check that the solver's solution matches ours. - CheckLeastSquaresUpdate(updated_params); + const int kNum = num_; + const int kIterSize = 1; + // Test over all numbers of devices. + int available_devices = 1; +#ifndef CPU_ONLY + if (Caffe::mode() == Caffe::GPU) { + CUDA_CHECK(cudaGetDeviceCount(&available_devices)); + } +#endif + for (int devices = 1; devices <= available_devices; ++devices) { + // Configure batch size for single / multi device equivalence. + // Constant data is needed for multi device as for accumulation. + num_ = kNum * devices; + + // Initialize the solver and run K (= iter_to_check) solver iterations + // (on single device). + RunLeastSquaresSolver(learning_rate, weight_decay, momentum, + iter_to_check, kIterSize, 1); + + // Compute the (K+1)th update using the analytic least squares gradient. + vector > > updated_params; + ComputeLeastSquaresUpdate(learning_rate, weight_decay, momentum, + &updated_params); + + // Reinitialize the solver and run K+1 solver iterations. + num_ = kNum; + RunLeastSquaresSolver(learning_rate, weight_decay, momentum, + iter_to_check + 1, kIterSize, devices); + + // Check that the solver's solution matches ours. + CheckLeastSquaresUpdate(updated_params); + } } void TestSnapshot(const Dtype learning_rate = 1.0, @@ -433,8 +466,9 @@ class GradientBasedSolverTest : public MultiDeviceTest { const int total_num_iters = num_iters * 2; bool snapshot = false; const int kIterSize = 1; + const int kDevices = 1; RunLeastSquaresSolver(learning_rate, weight_decay, momentum, - total_num_iters, kIterSize, snapshot); + total_num_iters, kIterSize, kDevices, snapshot); // Save the resulting param values. vector > > param_copies; @@ -464,12 +498,13 @@ class GradientBasedSolverTest : public MultiDeviceTest { // Run the solver for num_iters iterations and snapshot. snapshot = true; string snapshot_name = RunLeastSquaresSolver(learning_rate, weight_decay, - momentum, num_iters, kIterSize, snapshot); + momentum, num_iters, kIterSize, kDevices, snapshot); // Reinitialize the solver and run for num_iters more iterations. snapshot = false; RunLeastSquaresSolver(learning_rate, weight_decay, momentum, - total_num_iters, kIterSize, snapshot, snapshot_name.c_str()); + total_num_iters, kIterSize, kDevices, + snapshot, snapshot_name.c_str()); // Check that params now match. const vector*>& params = solver_->net()->learnable_params(); diff --git a/src/caffe/util/blocking_queue.cpp b/src/caffe/util/blocking_queue.cpp index f7c53f22..d1d1fa86 100644 --- a/src/caffe/util/blocking_queue.cpp +++ b/src/caffe/util/blocking_queue.cpp @@ -3,6 +3,7 @@ #include "caffe/data_layers.hpp" #include "caffe/data_reader.hpp" +#include "caffe/parallel.hpp" #include "caffe/util/blocking_queue.hpp" namespace caffe { @@ -89,5 +90,7 @@ template class BlockingQueue*>; template class BlockingQueue*>; template class BlockingQueue; template class BlockingQueue >; +template class BlockingQueue*>; +template class BlockingQueue*>; } // namespace caffe diff --git a/tools/caffe.cpp b/tools/caffe.cpp index 46f99594..9f31b37a 100644 --- a/tools/caffe.cpp +++ b/tools/caffe.cpp @@ -17,13 +17,17 @@ using caffe::Blob; using caffe::Caffe; using caffe::Net; using caffe::Layer; +using caffe::Solver; using caffe::shared_ptr; +using caffe::string; using caffe::Timer; using caffe::vector; +using std::ostringstream; - -DEFINE_int32(gpu, -1, - "Run in GPU mode on given device ID."); +DEFINE_string(gpu, "", + "Optional; run in GPU mode on given device IDs separated by ','." + "Use '-gpu all' to run on all available GPUs. The effective training " + "batch size is multiplied by the number of devices."); DEFINE_string(solver, "", "The solver definition protocol buffer text file."); DEFINE_string(model, "", @@ -31,8 +35,8 @@ DEFINE_string(model, "", DEFINE_string(snapshot, "", "Optional; the snapshot solver state to resume training."); DEFINE_string(weights, "", - "Optional; the pretrained weights to initialize finetuning. " - "Cannot be set simultaneously with snapshot."); + "Optional; the pretrained weights to initialize finetuning, " + "separated by ','. Cannot be set simultaneously with snapshot."); DEFINE_int32(iterations, 50, "The number of iterations to run."); @@ -66,6 +70,29 @@ static BrewFunction GetBrewFunction(const caffe::string& name) { } } +// Parse GPU ids or use all available devices +static void get_gpus(vector* gpus) { + if (FLAGS_gpu == "all") { + int count = 0; +#ifndef CPU_ONLY + CUDA_CHECK(cudaGetDeviceCount(&count)); +#else + NO_GPU; +#endif + for (int i = 0; i < count; ++i) { + gpus->push_back(i); + } + } else if (FLAGS_gpu.size()) { + vector strings; + boost::split(strings, FLAGS_gpu, boost::is_any_of(",")); + for (int i = 0; i < strings.size(); ++i) { + gpus->push_back(boost::lexical_cast(strings[i])); + } + } else { + CHECK_EQ(gpus->size(), 0); + } +} + // caffe commands to call by // caffe // @@ -74,10 +101,13 @@ static BrewFunction GetBrewFunction(const caffe::string& name) { // Device Query: show diagnostic information for a GPU device. int device_query() { - CHECK_GT(FLAGS_gpu, -1) << "Need a device ID to query."; - LOG(INFO) << "Querying device ID = " << FLAGS_gpu; - caffe::Caffe::SetDevice(FLAGS_gpu); - caffe::Caffe::DeviceQuery(); + LOG(INFO) << "Querying GPUs " << FLAGS_gpu; + vector gpus; + get_gpus(&gpus); + for (int i = 0; i < gpus.size(); ++i) { + caffe::Caffe::SetDevice(gpus[i]); + caffe::Caffe::DeviceQuery(); + } return 0; } RegisterBrewFunction(device_query); @@ -106,34 +136,49 @@ int train() { caffe::SolverParameter solver_param; caffe::ReadProtoFromTextFileOrDie(FLAGS_solver, &solver_param); - // If the gpu flag is not provided, allow the mode and device to be set + // If the gpus flag is not provided, allow the mode and device to be set // in the solver prototxt. - if (FLAGS_gpu < 0 + if (FLAGS_gpu.size() == 0 && solver_param.solver_mode() == caffe::SolverParameter_SolverMode_GPU) { - FLAGS_gpu = solver_param.device_id(); + if (solver_param.has_device_id()) { + FLAGS_gpu = "" + + boost::lexical_cast(solver_param.device_id()); + } else { // Set default GPU if unspecified + FLAGS_gpu = "" + boost::lexical_cast(0); + } } - // Set device id and mode - if (FLAGS_gpu >= 0) { - LOG(INFO) << "Use GPU with device ID " << FLAGS_gpu; - Caffe::SetDevice(FLAGS_gpu); - Caffe::set_mode(Caffe::GPU); - } else { - LOG(INFO) << "Use CPU."; + vector gpus; + get_gpus(&gpus); + if (gpus.size() == 0) { Caffe::set_mode(Caffe::CPU); + } else { + ostringstream s; + for (int i = 0; i < gpus.size(); ++i) { + s << (i ? ", " : "") << gpus[i]; + } + LOG(INFO) << "Using GPUs " << s.str(); + + solver_param.set_device_id(gpus[0]); + Caffe::SetDevice(gpus[0]); + Caffe::set_mode(Caffe::GPU); + Caffe::set_solver_count(gpus.size()); } - LOG(INFO) << "Starting Optimization"; - shared_ptr > - solver(caffe::GetSolver(solver_param)); + shared_ptr > solver(caffe::GetSolver(solver_param)); if (FLAGS_snapshot.size()) { LOG(INFO) << "Resuming from " << FLAGS_snapshot; - solver->Solve(FLAGS_snapshot); + solver->Restore(FLAGS_snapshot.c_str()); } else if (FLAGS_weights.size()) { - CopyLayers(&*solver, FLAGS_weights); - solver->Solve(); + CopyLayers(solver.get(), FLAGS_weights); + } + + if (gpus.size() > 1) { + caffe::P2PSync sync(solver, NULL, solver->param()); + sync.run(gpus); } else { + LOG(INFO) << "Starting Optimization"; solver->Solve(); } LOG(INFO) << "Optimization Done."; @@ -148,9 +193,11 @@ int test() { CHECK_GT(FLAGS_weights.size(), 0) << "Need model weights to score."; // Set device id and mode - if (FLAGS_gpu >= 0) { - LOG(INFO) << "Use GPU with device ID " << FLAGS_gpu; - Caffe::SetDevice(FLAGS_gpu); + vector gpus; + get_gpus(&gpus); + if (gpus.size() != 0) { + LOG(INFO) << "Use GPU with device ID " << gpus[0]; + Caffe::SetDevice(gpus[0]); Caffe::set_mode(Caffe::GPU); } else { LOG(INFO) << "Use CPU."; @@ -213,9 +260,11 @@ int time() { CHECK_GT(FLAGS_model.size(), 0) << "Need a model definition to time."; // Set device id and mode - if (FLAGS_gpu >= 0) { - LOG(INFO) << "Use GPU with device ID " << FLAGS_gpu; - Caffe::SetDevice(FLAGS_gpu); + vector gpus; + get_gpus(&gpus); + if (gpus.size() != 0) { + LOG(INFO) << "Use GPU with device ID " << gpus[0]; + Caffe::SetDevice(gpus[0]); Caffe::set_mode(Caffe::GPU); } else { LOG(INFO) << "Use CPU."; From 335bee737cb2e715abe685e6029afc83ccd8f404 Mon Sep 17 00:00:00 2001 From: mhouston Date: Fri, 10 Jul 2015 16:05:48 -0700 Subject: [PATCH 021/223] Detect topology corner cases and improve broadcast order - Start with distant nodes in broadcast - Fix outside loop to loop for full tree depth --- src/caffe/parallel.cpp | 73 ++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 32 deletions(-) diff --git a/src/caffe/parallel.cpp b/src/caffe/parallel.cpp index 3fef8cfd..5a08df6c 100644 --- a/src/caffe/parallel.cpp +++ b/src/caffe/parallel.cpp @@ -119,18 +119,23 @@ void DevicePair::compute(const vector devices, vector* pairs) { #ifndef CPU_ONLY vector remaining(devices); + // Depth for reduction tree + int remaining_depth = static_cast(ceil(log2(remaining.size()))); + // Group GPUs by board - for (int i = 0; i < remaining.size(); ++i) { - for (int j = i + 1; j < remaining.size(); ++j) { - cudaDeviceProp a, b; - CUDA_CHECK(cudaGetDeviceProperties(&a, remaining[i])); - CUDA_CHECK(cudaGetDeviceProperties(&b, remaining[j])); - if (a.isMultiGpuBoard && b.isMultiGpuBoard) { - if (a.multiGpuBoardGroupID == b.multiGpuBoardGroupID) { - pairs->push_back(DevicePair(remaining[i], remaining[j])); - DLOG(INFO) << "GPU board: " << remaining[i] << ":" << remaining[j]; - remaining.erase(remaining.begin() + j); - break; + for (int d = 0; d < remaining_depth; ++d) { + for (int i = 0; i < remaining.size(); ++i) { + for (int j = i + 1; j < remaining.size(); ++j) { + cudaDeviceProp a, b; + CUDA_CHECK(cudaGetDeviceProperties(&a, remaining[i])); + CUDA_CHECK(cudaGetDeviceProperties(&b, remaining[j])); + if (a.isMultiGpuBoard && b.isMultiGpuBoard) { + if (a.multiGpuBoardGroupID == b.multiGpuBoardGroupID) { + pairs->push_back(DevicePair(remaining[i], remaining[j])); + DLOG(INFO) << "GPU board: " << remaining[i] << ":" << remaining[j]; + remaining.erase(remaining.begin() + j); + break; + } } } } @@ -142,15 +147,19 @@ void DevicePair::compute(const vector devices, vector* pairs) { DLOG(INFO) << "GPUs paired by boards, remaining: " << s.str(); // Group by P2P accessibility - for (int i = 0; i < remaining.size(); ++i) { - for (int j = i + 1; j < remaining.size(); ++j) { - int access; - CUDA_CHECK(cudaDeviceCanAccessPeer(&access, remaining[i], remaining[j])); - if (access) { - pairs->push_back(DevicePair(remaining[i], remaining[j])); - DLOG(INFO) << "P2P pair: " << remaining[i] << ":" << remaining[j]; - remaining.erase(remaining.begin() + j); - break; + remaining_depth = ceil(log2(remaining.size())); + for (int d = 0; d < remaining_depth; ++d) { + for (int i = 0; i < remaining.size(); ++i) { + for (int j = i + 1; j < remaining.size(); ++j) { + int access; + CUDA_CHECK( + cudaDeviceCanAccessPeer(&access, remaining[i], remaining[j])); + if (access) { + pairs->push_back(DevicePair(remaining[i], remaining[j])); + DLOG(INFO) << "P2P pair: " << remaining[i] << ":" << remaining[j]; + remaining.erase(remaining.begin() + j); + break; + } } } } @@ -161,15 +170,19 @@ void DevicePair::compute(const vector devices, vector* pairs) { DLOG(INFO) << "GPUs paired by P2P access, remaining: " << s.str(); // Group remaining - for (int i = 0; i < remaining.size(); ++i) { - for (int j = i + 1; j < remaining.size(); ++j) { - pairs->push_back(DevicePair(remaining[i], remaining[j])); - DLOG(INFO) << "Remaining pair: " << remaining[i] << ":" << remaining[j]; - remaining.erase(remaining.begin() + j); - break; + remaining_depth = ceil(log2(remaining.size())); + for (int d = 0; d < remaining_depth; ++d) { + for (int i = 0; i < remaining.size(); ++i) { + pairs->push_back(DevicePair(remaining[i], remaining[i + 1])); + DLOG(INFO) << "Remaining pair: " << remaining[i] << ":" + << remaining[i + 1]; + remaining.erase(remaining.begin() + i + 1); } } + + // Should only be the parent node remaining CHECK_EQ(remaining.size(), 1); + pairs->insert(pairs->begin(), DevicePair(-1, remaining[0])); CHECK(pairs->size() == devices.size()); @@ -289,7 +302,7 @@ void P2PSync::on_start() { } // Update children - for (int i = 0; i < children_.size(); ++i) { + for (int i = children_.size() - 1; i >= 0; i--) { Dtype* src = data_; Dtype* dst = children_[i]->data_; @@ -301,13 +314,9 @@ void P2PSync::on_start() { CHECK(attributes.device == children_[i]->solver_->param().device_id()); #endif - CUDA_CHECK(cudaMemcpyAsync(dst, src, size_ * sizeof(Dtype), // + CUDA_CHECK(cudaMemcpyAsync(dst, src, size_ * sizeof(Dtype), cudaMemcpyDeviceToDevice, cudaStreamDefault)); - } - if (children_.size()) { CUDA_CHECK(cudaStreamSynchronize(cudaStreamDefault)); - } - for (int i = 0; i < children_.size(); ++i) { children_[i]->queue_.push(this); } #endif From 8771d0f4317fc0081d86b7637f5f5ceef5b92dfb Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Fri, 7 Aug 2015 13:56:49 -0700 Subject: [PATCH 022/223] [docs] add multi-gpu usage note to interfaces --- docs/tutorial/interfaces.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/tutorial/interfaces.md b/docs/tutorial/interfaces.md index 40602948..9006179d 100644 --- a/docs/tutorial/interfaces.md +++ b/docs/tutorial/interfaces.md @@ -50,6 +50,13 @@ For a full example of fine-tuning, see examples/finetuning_on_flickr_style, but # query the first device caffe device_query -gpu 0 +**Parallelism**: the `-gpu` flag to the `caffe` tool can take a comma separated list of IDs to run on multiple GPUs. A solver and net will be instantiated for each GPU so the batch size is effectively multiplied by the number of GPUs. To reproduce single GPU training, reduce the batch size in the network definition accordingly. + + # train on GPUs 0 & 1 (doubling the batch size) + caffe train -solver examples/mnist/lenet_solver.prototxt -gpu 0,1 + # train on all GPUs (multiplying batch size by number of devices) + caffe train -solver examples/mnist/lenet_solver.prototxt -gpu all + ## Python The Python interface -- pycaffe -- is the `caffe` module and its scripts in caffe/python. `import caffe` to load models, do forward and backward, handle IO, visualize networks, and even instrument model solving. All model data, derivatives, and parameters are exposed for reading and writing. From 1ce3380f172336cadaa649a6e077a42a246a534d Mon Sep 17 00:00:00 2001 From: Mohamed Omran Date: Sat, 20 Sep 2014 19:01:28 +0200 Subject: [PATCH 023/223] Implement AdaDelta; add test cases; add mnist examples --- examples/mnist/lenet_adadelta_solver.prototxt | 22 ++ ...mnist_autoencoder_solver_adadelta.prototxt | 17 ++ .../mnist/train_mnist_autoencoder_adadelta.sh | 4 + include/caffe/solver.hpp | 23 ++ src/caffe/proto/caffe.proto | 1 + src/caffe/solver.cpp | 199 ++++++++++++++++++ src/caffe/test/test_gradient_based_solver.cpp | 100 ++++++++- 7 files changed, 364 insertions(+), 2 deletions(-) create mode 100644 examples/mnist/lenet_adadelta_solver.prototxt create mode 100644 examples/mnist/mnist_autoencoder_solver_adadelta.prototxt create mode 100755 examples/mnist/train_mnist_autoencoder_adadelta.sh diff --git a/examples/mnist/lenet_adadelta_solver.prototxt b/examples/mnist/lenet_adadelta_solver.prototxt new file mode 100644 index 00000000..b77b451d --- /dev/null +++ b/examples/mnist/lenet_adadelta_solver.prototxt @@ -0,0 +1,22 @@ +# The train/test net protocol buffer definition +net: "examples/mnist/lenet_train_test.prototxt" +# test_iter specifies how many forward passes the test should carry out. +# In the case of MNIST, we have test batch size 100 and 100 test iterations, +# covering the full 10,000 testing images. +test_iter: 100 +# Carry out testing every 500 training iterations. +test_interval: 500 +# The base learning rate, momentum and the weight decay of the network. +momentum: 0.95 +weight_decay: 0.0005 +# Display every 100 iterations +display: 100 +# The maximum number of iterations +max_iter: 10000 +# snapshot intermediate results +snapshot: 5000 +snapshot_prefix: "examples/mnist/lenet_adadelta" +# solver mode: CPU or GPU +solver_mode: GPU +solver_type: ADADELTA +delta: 1e-6 diff --git a/examples/mnist/mnist_autoencoder_solver_adadelta.prototxt b/examples/mnist/mnist_autoencoder_solver_adadelta.prototxt new file mode 100644 index 00000000..cc4f0bbb --- /dev/null +++ b/examples/mnist/mnist_autoencoder_solver_adadelta.prototxt @@ -0,0 +1,17 @@ +net: "examples/mnist/mnist_autoencoder.prototxt" +test_state: { stage: 'test-on-train' } +test_iter: 500 +test_state: { stage: 'test-on-test' } +test_iter: 100 +test_interval: 500 +test_compute_loss: true +momentum: 0.95 +display: 100 +max_iter: 65000 +weight_decay: 0.0005 +snapshot: 10000 +snapshot_prefix: "examples/mnist/mnist_autoencoder_adadelta_train" +# solver mode: CPU or GPU +solver_mode: GPU +solver_type: ADADELTA +delta: 1e-8 diff --git a/examples/mnist/train_mnist_autoencoder_adadelta.sh b/examples/mnist/train_mnist_autoencoder_adadelta.sh new file mode 100755 index 00000000..4be0ebdd --- /dev/null +++ b/examples/mnist/train_mnist_autoencoder_adadelta.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +./build/tools/caffe train \ + --solver=examples/mnist/mnist_autoencoder_solver_adadelta.prototxt diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index fbade938..4b408380 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -158,6 +158,27 @@ class RMSPropSolver : public SGDSolver { DISABLE_COPY_AND_ASSIGN(RMSPropSolver); }; +template +class AdaDeltaSolver : public SGDSolver { + public: + explicit AdaDeltaSolver(const SolverParameter& param) + : SGDSolver(param) { constructor_sanity_check(); } + explicit AdaDeltaSolver(const string& param_file) + : SGDSolver(param_file) { constructor_sanity_check(); } + + protected: + virtual void PreSolve(); + virtual void ComputeUpdateValue(); + void constructor_sanity_check() { + CHECK_EQ(0, this->param_.base_lr()) + << "Learning rate cannot be used with AdaDelta."; + CHECK_EQ("", this->param_.lr_policy()) + << "Learning rate policy cannot be applied to AdaDelta."; + } + + DISABLE_COPY_AND_ASSIGN(AdaDeltaSolver); +}; + template Solver* GetSolver(const SolverParameter& param) { SolverParameter_SolverType type = param.solver_type(); @@ -171,6 +192,8 @@ Solver* GetSolver(const SolverParameter& param) { return new AdaGradSolver(param); case SolverParameter_SolverType_RMSPROP: return new RMSPropSolver(param); + case SolverParameter_SolverType_ADADELTA: + return new AdaDeltaSolver(param); default: LOG(FATAL) << "Unknown SolverType: " << type; } diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 89f14595..7cfcaa8b 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -215,6 +215,7 @@ message SolverParameter { NESTEROV = 1; ADAGRAD = 2; RMSPROP = 3; + ADADELTA = 4; } optional SolverType solver_type = 30 [default = SGD]; // numerical stability for AdaGrad diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 54e085a6..d8749a1b 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -934,10 +934,209 @@ void RMSPropSolver::ComputeUpdateValue(int param_id, Dtype rate) { } } +template +void AdaDeltaSolver::PreSolve() { + // Initialize the history + vector > >& net_params = this->net_->params(); + this->history_.clear(); + this->update_.clear(); + this->temp_.clear(); + for (int i = 0; i < net_params.size(); ++i) { + const Blob* net_param = net_params[i].get(); + this->history_.push_back(shared_ptr >(new Blob( + net_param->num(), net_param->channels(), net_param->height(), + net_param->width()))); + this->update_.push_back(shared_ptr >(new Blob( + net_param->num(), net_param->channels(), net_param->height(), + net_param->width()))); + this->temp_.push_back(shared_ptr >(new Blob( + net_param->num(), net_param->channels(), net_param->height(), + net_param->width()))); + } + for (int i = 0; i < net_params.size(); ++i) { + const Blob* net_param = net_params[i].get(); + this->history_.push_back(shared_ptr >(new Blob( + net_param->num(), net_param->channels(), net_param->height(), + net_param->width()))); + } +} + +template +void AdaDeltaSolver::ComputeUpdateValue() { + vector > >& net_params = this->net_->params(); + vector& net_params_weight_decay = this->net_->params_weight_decay(); + Dtype delta = this->param_.delta(); + Dtype momentum = this->param_.momentum(); + Dtype weight_decay = this->param_.weight_decay(); + string regularization_type = this->param_.regularization_type(); + size_t update_history_offset = net_params.size(); + switch (Caffe::mode()) { + case Caffe::CPU: + for (int param_id = 0; param_id < net_params.size(); ++param_id) { + Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; + + if (local_decay) { + if (regularization_type == "L2") { + // add weight decay + caffe_axpy(net_params[param_id]->count(), + local_decay, + net_params[param_id]->cpu_data(), + net_params[param_id]->mutable_cpu_diff()); + } else if (regularization_type == "L1") { + caffe_cpu_sign(net_params[param_id]->count(), + net_params[param_id]->cpu_data(), + this->temp_[param_id]->mutable_cpu_data()); + caffe_axpy(net_params[param_id]->count(), + local_decay, + this->temp_[param_id]->cpu_data(), + net_params[param_id]->mutable_cpu_diff()); + } else { + LOG(FATAL) << "Unknown regularization type: " << regularization_type; + } + } + + // compute square of gradient in update + caffe_powx(net_params[param_id]->count(), + net_params[param_id]->cpu_diff(), Dtype(2), + this->update_[param_id]->mutable_cpu_data()); + + // update history of gradients + caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, + this->update_[param_id]->cpu_data(), momentum, + this->history_[param_id]->mutable_cpu_data()); + + // add delta to history to guard against dividing by zero later + caffe_set(net_params[param_id]->count(), delta, + this->temp_[param_id]->mutable_cpu_data()); + + caffe_add(net_params[param_id]->count(), + this->temp_[param_id]->cpu_data(), + this->history_[update_history_offset + param_id]->cpu_data(), + this->update_[param_id]->mutable_cpu_data()); + + caffe_add(net_params[param_id]->count(), + this->temp_[param_id]->cpu_data(), + this->history_[param_id]->cpu_data(), + this->temp_[param_id]->mutable_cpu_data()); + + // divide history of updates by history of gradients + caffe_div(net_params[param_id]->count(), + this->update_[param_id]->cpu_data(), + this->temp_[param_id]->cpu_data(), + this->update_[param_id]->mutable_cpu_data()); + + // jointly compute the RMS of both for update and gradient history + caffe_powx(net_params[param_id]->count(), + this->update_[param_id]->cpu_data(), Dtype(0.5), + this->update_[param_id]->mutable_cpu_data()); + + // compute the update + caffe_mul(net_params[param_id]->count(), + net_params[param_id]->cpu_diff(), + this->update_[param_id]->cpu_data(), + net_params[param_id]->mutable_cpu_diff()); + + // compute square of update + caffe_powx(net_params[param_id]->count(), + net_params[param_id]->cpu_diff(), Dtype(2), + this->update_[param_id]->mutable_cpu_data()); + + // update history of updates + caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, + this->update_[param_id]->cpu_data(), momentum, + this->history_[update_history_offset + param_id]->mutable_cpu_data()); + } + break; + case Caffe::GPU: +#ifndef CPU_ONLY + for (int param_id = 0; param_id < net_params.size(); ++param_id) { + Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; + + if (local_decay) { + if (regularization_type == "L2") { + // add weight decay + caffe_gpu_axpy(net_params[param_id]->count(), + local_decay, + net_params[param_id]->gpu_data(), + net_params[param_id]->mutable_gpu_diff()); + } else if (regularization_type == "L1") { + caffe_gpu_sign(net_params[param_id]->count(), + net_params[param_id]->gpu_data(), + this->temp_[param_id]->mutable_gpu_data()); + caffe_gpu_axpy(net_params[param_id]->count(), + local_decay, + this->temp_[param_id]->gpu_data(), + net_params[param_id]->mutable_gpu_diff()); + } else { + LOG(FATAL) << "Unknown regularization type: " << regularization_type; + } + } + + // compute square of gradient in update + caffe_gpu_powx(net_params[param_id]->count(), + net_params[param_id]->gpu_diff(), Dtype(2), + this->update_[param_id]->mutable_gpu_data()); + + // update history of gradients + caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, + this->update_[param_id]->gpu_data(), momentum, + this->history_[param_id]->mutable_gpu_data()); + + // add delta to history to guard against dividing by zero later + caffe_gpu_set(net_params[param_id]->count(), delta, + this->temp_[param_id]->mutable_gpu_data()); + + caffe_gpu_add(net_params[param_id]->count(), + this->temp_[param_id]->gpu_data(), + this->history_[update_history_offset + param_id]->gpu_data(), + this->update_[param_id]->mutable_gpu_data()); + + caffe_gpu_add(net_params[param_id]->count(), + this->temp_[param_id]->gpu_data(), + this->history_[param_id]->gpu_data(), + this->temp_[param_id]->mutable_gpu_data()); + + // divide history of updates by history of gradients + caffe_gpu_div(net_params[param_id]->count(), + this->update_[param_id]->gpu_data(), + this->temp_[param_id]->gpu_data(), + this->update_[param_id]->mutable_gpu_data()); + + // jointly compute the RMS of both for update and gradient history + caffe_gpu_powx(net_params[param_id]->count(), + this->update_[param_id]->gpu_data(), Dtype(0.5), + this->update_[param_id]->mutable_gpu_data()); + + // compute the update and copy to net_diff + caffe_gpu_mul(net_params[param_id]->count(), + net_params[param_id]->gpu_diff(), + this->update_[param_id]->gpu_data(), + net_params[param_id]->mutable_gpu_diff()); + + // compute square of update + caffe_gpu_powx(net_params[param_id]->count(), + net_params[param_id]->gpu_diff(), Dtype(2), + this->update_[param_id]->mutable_gpu_data()); + + // update history of updates + caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, + this->update_[param_id]->gpu_data(), momentum, + this->history_[update_history_offset + param_id]->mutable_gpu_data()); + } +#else + NO_GPU; +#endif + break; + default: + LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); + } +} + INSTANTIATE_CLASS(Solver); INSTANTIATE_CLASS(SGDSolver); INSTANTIATE_CLASS(NesterovSolver); INSTANTIATE_CLASS(AdaGradSolver); INSTANTIATE_CLASS(RMSPropSolver); +INSTANTIATE_CLASS(AdaDeltaSolver); } // namespace caffe diff --git a/src/caffe/test/test_gradient_based_solver.cpp b/src/caffe/test/test_gradient_based_solver.cpp index eaa7a759..db89e285 100644 --- a/src/caffe/test/test_gradient_based_solver.cpp +++ b/src/caffe/test/test_gradient_based_solver.cpp @@ -64,7 +64,8 @@ class GradientBasedSolverTest : public MultiDeviceTest { } InitSolver(param); delta_ = (solver_type() == SolverParameter_SolverType_ADAGRAD || - solver_type() == SolverParameter_SolverType_RMSPROP) ? + solver_type() == SolverParameter_SolverType_RMSPROP || + solver_type() == SolverParameter_SolverType_ADADELTA) ? param.delta() : 0; } @@ -164,6 +165,10 @@ class GradientBasedSolverTest : public MultiDeviceTest { " bottom: 'targets' " " } " "} "; + if (learning_rate != 0) { + proto << "base_lr: " << learning_rate << " "; + proto << "lr_policy: 'fixed' "; + } if (weight_decay != 0) { proto << "weight_decay: " << weight_decay << " "; } @@ -266,7 +271,11 @@ class GradientBasedSolverTest : public MultiDeviceTest { ((i == D) ? bias.cpu_data()[0] : weights.cpu_data()[i]); // Finally, compute update. const vector > >& history = solver_->history(); - ASSERT_EQ(2, history.size()); // 1 blob for weights, 1 for bias + if (solver_type() != SolverParameter_SolverType_ADADELTA) { + ASSERT_EQ(2, history.size()); // 1 blob for weights, 1 for bias + } else { + ASSERT_EQ(4, history.size()); // additional blobs for update history + } Dtype update_value = learning_rate * grad; const Dtype history_value = (i == D) ? history[1]->cpu_data()[0] : history[0]->cpu_data()[i]; @@ -289,6 +298,19 @@ class GradientBasedSolverTest : public MultiDeviceTest { + grad * grad * (1 - rms_decay)) + delta_; } break; + case SolverParameter_SolverType_ADADELTA: + { + const Dtype update_history_value = (i == D) ? + history[3]->cpu_data()[0] : history[2]->cpu_data()[i]; + const Dtype weighted_gradient_average = + momentum * history_value + (1 - momentum) * (grad * grad); + update_value = grad * std::sqrt((update_history_value + delta_) / + (weighted_gradient_average + delta_)); + // not actually needed, just here for illustrative purposes + // const Dtype weighted_update_average = + // momentum * update_history_value + (1 - momentum) * (update_value); + break; + } default: LOG(FATAL) << "Unknown solver type: " << solver_type(); } @@ -981,4 +1003,78 @@ TYPED_TEST(RMSPropSolverTest, TestSnapshotShare) { } } +template +class AdaDeltaSolverTest : public GradientBasedSolverTest { + typedef typename TypeParam::Dtype Dtype; + + protected: + virtual void InitSolver(const SolverParameter& param) { + this->solver_.reset(new AdaDeltaSolver(param)); + } + + virtual SolverParameter_SolverType solver_type() { + return SolverParameter_SolverType_ADADELTA; + } +}; + +TYPED_TEST_CASE(AdaDeltaSolverTest, TestDtypesAndDevices); + +TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdate) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 0.0; + this->TestLeastSquaresUpdate(kLearningRate); +} + +TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithWeightDecay) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 0.0; + const Dtype kWeightDecay = 0.5; + const Dtype kMomentum = 0.95; + this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum); +} + +TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithHalfMomentum) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 0.0; + const Dtype kWeightDecay = 0.0; + const Dtype kMomentum = 0.5; + const int kNumIters = 1; + for (int i = 0; i <= kNumIters; ++i) { + this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum); + } +} + +TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithMomentum) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 0.0; + const Dtype kWeightDecay = 0.0; + const Dtype kMomentum = 0.95; + const int kNumIters = 1; + for (int i = 0; i <= kNumIters; ++i) { + this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum); + } +} + +TYPED_TEST(AdaDeltaSolverTest, TestLeastSquaresUpdateWithMomentumMultiIter) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 0.0; + const Dtype kWeightDecay = 0.0; + const Dtype kMomentum = 0.95; + const int kNumIters = 500; + for (int i = 0; i <= kNumIters; ++i) { + this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i); + } +} + +TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithEverything) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 0.0; + const Dtype kWeightDecay = 0.1; + const Dtype kMomentum = 0.95; + const int kNumIters = 500; + for (int i = 0; i <= kNumIters; ++i) { + this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i); + } +} + } // namespace caffe From 4c58741ce2e031b61aef53914128801e6edd673d Mon Sep 17 00:00:00 2001 From: Kevin Bache Date: Thu, 19 Mar 2015 15:56:51 -0700 Subject: [PATCH 024/223] Updated AdaDelta for modern Caffe; reduced iterations on multi-iter tests --- ...mnist_autoencoder_solver_adadelta.prototxt | 2 +- include/caffe/solver.hpp | 6 ++-- src/caffe/solver.cpp | 32 ++++++------------- src/caffe/test/test_gradient_based_solver.cpp | 4 +-- 4 files changed, 15 insertions(+), 29 deletions(-) diff --git a/examples/mnist/mnist_autoencoder_solver_adadelta.prototxt b/examples/mnist/mnist_autoencoder_solver_adadelta.prototxt index cc4f0bbb..4e43468a 100644 --- a/examples/mnist/mnist_autoencoder_solver_adadelta.prototxt +++ b/examples/mnist/mnist_autoencoder_solver_adadelta.prototxt @@ -6,6 +6,7 @@ test_iter: 100 test_interval: 500 test_compute_loss: true momentum: 0.95 +delta: 1e-8 display: 100 max_iter: 65000 weight_decay: 0.0005 @@ -14,4 +15,3 @@ snapshot_prefix: "examples/mnist/mnist_autoencoder_adadelta_train" # solver mode: CPU or GPU solver_mode: GPU solver_type: ADADELTA -delta: 1e-8 diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index 4b408380..495cd4f1 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -82,12 +82,12 @@ class SGDSolver : public Solver { const vector > >& history() { return history_; } protected: - void PreSolve(); Dtype GetLearningRate(); virtual void ApplyUpdate(); virtual void Normalize(int param_id); virtual void Regularize(int param_id); virtual void ComputeUpdateValue(int param_id, Dtype rate); + virtual void PreSolve(); virtual void ClipGradients(); virtual void SnapshotSolverState(const string& model_filename); virtual void SnapshotSolverStateToBinaryProto(const string& model_filename); @@ -162,9 +162,9 @@ template class AdaDeltaSolver : public SGDSolver { public: explicit AdaDeltaSolver(const SolverParameter& param) - : SGDSolver(param) { constructor_sanity_check(); } + : SGDSolver(param) { PreSolve(); constructor_sanity_check(); } explicit AdaDeltaSolver(const string& param_file) - : SGDSolver(param_file) { constructor_sanity_check(); } + : SGDSolver(param_file) { PreSolve(); constructor_sanity_check(); } protected: virtual void PreSolve(); diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index d8749a1b..34a290ff 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -936,35 +936,21 @@ void RMSPropSolver::ComputeUpdateValue(int param_id, Dtype rate) { template void AdaDeltaSolver::PreSolve() { - // Initialize the history - vector > >& net_params = this->net_->params(); - this->history_.clear(); - this->update_.clear(); - this->temp_.clear(); - for (int i = 0; i < net_params.size(); ++i) { - const Blob* net_param = net_params[i].get(); - this->history_.push_back(shared_ptr >(new Blob( - net_param->num(), net_param->channels(), net_param->height(), - net_param->width()))); - this->update_.push_back(shared_ptr >(new Blob( - net_param->num(), net_param->channels(), net_param->height(), - net_param->width()))); - this->temp_.push_back(shared_ptr >(new Blob( - net_param->num(), net_param->channels(), net_param->height(), - net_param->width()))); - } + // Add the extra history entries for AdaDelta after those from + // SGDSolver::PreSolve + const vector > >& net_params = this->net_->params(); for (int i = 0; i < net_params.size(); ++i) { - const Blob* net_param = net_params[i].get(); - this->history_.push_back(shared_ptr >(new Blob( - net_param->num(), net_param->channels(), net_param->height(), - net_param->width()))); + const vector& shape = net_params[i]->shape(); + this->history_.push_back( + shared_ptr >(new Blob(shape))); } } template void AdaDeltaSolver::ComputeUpdateValue() { - vector > >& net_params = this->net_->params(); - vector& net_params_weight_decay = this->net_->params_weight_decay(); + const vector > >& net_params = this->net_->params(); + const vector& net_params_weight_decay = + this->net_->params_weight_decay(); Dtype delta = this->param_.delta(); Dtype momentum = this->param_.momentum(); Dtype weight_decay = this->param_.weight_decay(); diff --git a/src/caffe/test/test_gradient_based_solver.cpp b/src/caffe/test/test_gradient_based_solver.cpp index db89e285..277aa3a5 100644 --- a/src/caffe/test/test_gradient_based_solver.cpp +++ b/src/caffe/test/test_gradient_based_solver.cpp @@ -1060,7 +1060,7 @@ TYPED_TEST(AdaDeltaSolverTest, TestLeastSquaresUpdateWithMomentumMultiIter) { const Dtype kLearningRate = 0.0; const Dtype kWeightDecay = 0.0; const Dtype kMomentum = 0.95; - const int kNumIters = 500; + const int kNumIters = 4; for (int i = 0; i <= kNumIters; ++i) { this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i); } @@ -1071,7 +1071,7 @@ TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithEverything) { const Dtype kLearningRate = 0.0; const Dtype kWeightDecay = 0.1; const Dtype kMomentum = 0.95; - const int kNumIters = 500; + const int kNumIters = 4; for (int i = 0; i <= kNumIters; ++i) { this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i); } From f2e523e479b89902b644f3a8bb2ac51a6dc28eee Mon Sep 17 00:00:00 2001 From: Matthias Plappert Date: Sat, 18 Jul 2015 18:46:51 +0200 Subject: [PATCH 025/223] Clean up and modernize AdaDelta code; add learning rate support; add additional test cases --- examples/mnist/lenet_adadelta_solver.prototxt | 2 + ...mnist_autoencoder_solver_adadelta.prototxt | 2 + include/caffe/solver.hpp | 16 +- src/caffe/solver.cpp | 274 ++++++++---------- src/caffe/test/test_gradient_based_solver.cpp | 211 +++++++++----- 5 files changed, 260 insertions(+), 245 deletions(-) diff --git a/examples/mnist/lenet_adadelta_solver.prototxt b/examples/mnist/lenet_adadelta_solver.prototxt index b77b451d..776d1e06 100644 --- a/examples/mnist/lenet_adadelta_solver.prototxt +++ b/examples/mnist/lenet_adadelta_solver.prototxt @@ -7,6 +7,8 @@ test_iter: 100 # Carry out testing every 500 training iterations. test_interval: 500 # The base learning rate, momentum and the weight decay of the network. +base_lr: 1.0 +lr_policy: "fixed" momentum: 0.95 weight_decay: 0.0005 # Display every 100 iterations diff --git a/examples/mnist/mnist_autoencoder_solver_adadelta.prototxt b/examples/mnist/mnist_autoencoder_solver_adadelta.prototxt index 4e43468a..065647df 100644 --- a/examples/mnist/mnist_autoencoder_solver_adadelta.prototxt +++ b/examples/mnist/mnist_autoencoder_solver_adadelta.prototxt @@ -5,6 +5,8 @@ test_state: { stage: 'test-on-test' } test_iter: 100 test_interval: 500 test_compute_loss: true +base_lr: 1.0 +lr_policy: "fixed" momentum: 0.95 delta: 1e-8 display: 100 diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index 495cd4f1..5fefd01e 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -82,12 +82,12 @@ class SGDSolver : public Solver { const vector > >& history() { return history_; } protected: + void PreSolve(); Dtype GetLearningRate(); virtual void ApplyUpdate(); virtual void Normalize(int param_id); virtual void Regularize(int param_id); virtual void ComputeUpdateValue(int param_id, Dtype rate); - virtual void PreSolve(); virtual void ClipGradients(); virtual void SnapshotSolverState(const string& model_filename); virtual void SnapshotSolverStateToBinaryProto(const string& model_filename); @@ -162,19 +162,13 @@ template class AdaDeltaSolver : public SGDSolver { public: explicit AdaDeltaSolver(const SolverParameter& param) - : SGDSolver(param) { PreSolve(); constructor_sanity_check(); } + : SGDSolver(param) { AdaDeltaPreSolve(); } explicit AdaDeltaSolver(const string& param_file) - : SGDSolver(param_file) { PreSolve(); constructor_sanity_check(); } + : SGDSolver(param_file) { AdaDeltaPreSolve(); } protected: - virtual void PreSolve(); - virtual void ComputeUpdateValue(); - void constructor_sanity_check() { - CHECK_EQ(0, this->param_.base_lr()) - << "Learning rate cannot be used with AdaDelta."; - CHECK_EQ("", this->param_.lr_policy()) - << "Learning rate policy cannot be applied to AdaDelta."; - } + void AdaDeltaPreSolve(); + virtual void ComputeUpdateValue(int param_id, Dtype rate); DISABLE_COPY_AND_ASSIGN(AdaDeltaSolver); }; diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 34a290ff..78902ca0 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -935,10 +935,10 @@ void RMSPropSolver::ComputeUpdateValue(int param_id, Dtype rate) { } template -void AdaDeltaSolver::PreSolve() { +void AdaDeltaSolver::AdaDeltaPreSolve() { // Add the extra history entries for AdaDelta after those from // SGDSolver::PreSolve - const vector > >& net_params = this->net_->params(); + const vector*>& net_params = this->net_->learnable_params(); for (int i = 0; i < net_params.size(); ++i) { const vector& shape = net_params[i]->shape(); this->history_.push_back( @@ -947,172 +947,134 @@ void AdaDeltaSolver::PreSolve() { } template -void AdaDeltaSolver::ComputeUpdateValue() { - const vector > >& net_params = this->net_->params(); - const vector& net_params_weight_decay = - this->net_->params_weight_decay(); +void AdaDeltaSolver::ComputeUpdateValue(int param_id, Dtype rate) { + const vector*>& net_params = this->net_->learnable_params(); + const vector& net_params_lr = this->net_->params_lr(); Dtype delta = this->param_.delta(); Dtype momentum = this->param_.momentum(); - Dtype weight_decay = this->param_.weight_decay(); - string regularization_type = this->param_.regularization_type(); + Dtype local_rate = rate * net_params_lr[param_id]; size_t update_history_offset = net_params.size(); switch (Caffe::mode()) { - case Caffe::CPU: - for (int param_id = 0; param_id < net_params.size(); ++param_id) { - Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; - - if (local_decay) { - if (regularization_type == "L2") { - // add weight decay - caffe_axpy(net_params[param_id]->count(), - local_decay, - net_params[param_id]->cpu_data(), - net_params[param_id]->mutable_cpu_diff()); - } else if (regularization_type == "L1") { - caffe_cpu_sign(net_params[param_id]->count(), - net_params[param_id]->cpu_data(), - this->temp_[param_id]->mutable_cpu_data()); - caffe_axpy(net_params[param_id]->count(), - local_decay, - this->temp_[param_id]->cpu_data(), - net_params[param_id]->mutable_cpu_diff()); - } else { - LOG(FATAL) << "Unknown regularization type: " << regularization_type; - } - } + case Caffe::CPU: { + // compute square of gradient in update + caffe_powx(net_params[param_id]->count(), + net_params[param_id]->cpu_diff(), Dtype(2), + this->update_[param_id]->mutable_cpu_data()); - // compute square of gradient in update - caffe_powx(net_params[param_id]->count(), - net_params[param_id]->cpu_diff(), Dtype(2), - this->update_[param_id]->mutable_cpu_data()); - - // update history of gradients - caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, - this->update_[param_id]->cpu_data(), momentum, - this->history_[param_id]->mutable_cpu_data()); - - // add delta to history to guard against dividing by zero later - caffe_set(net_params[param_id]->count(), delta, - this->temp_[param_id]->mutable_cpu_data()); - - caffe_add(net_params[param_id]->count(), - this->temp_[param_id]->cpu_data(), - this->history_[update_history_offset + param_id]->cpu_data(), - this->update_[param_id]->mutable_cpu_data()); - - caffe_add(net_params[param_id]->count(), - this->temp_[param_id]->cpu_data(), - this->history_[param_id]->cpu_data(), - this->temp_[param_id]->mutable_cpu_data()); - - // divide history of updates by history of gradients - caffe_div(net_params[param_id]->count(), - this->update_[param_id]->cpu_data(), - this->temp_[param_id]->cpu_data(), - this->update_[param_id]->mutable_cpu_data()); - - // jointly compute the RMS of both for update and gradient history - caffe_powx(net_params[param_id]->count(), - this->update_[param_id]->cpu_data(), Dtype(0.5), - this->update_[param_id]->mutable_cpu_data()); - - // compute the update - caffe_mul(net_params[param_id]->count(), - net_params[param_id]->cpu_diff(), - this->update_[param_id]->cpu_data(), - net_params[param_id]->mutable_cpu_diff()); - - // compute square of update - caffe_powx(net_params[param_id]->count(), - net_params[param_id]->cpu_diff(), Dtype(2), - this->update_[param_id]->mutable_cpu_data()); - - // update history of updates - caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, - this->update_[param_id]->cpu_data(), momentum, - this->history_[update_history_offset + param_id]->mutable_cpu_data()); - } + // update history of gradients + caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, + this->update_[param_id]->cpu_data(), momentum, + this->history_[param_id]->mutable_cpu_data()); + + // add delta to history to guard against dividing by zero later + caffe_set(net_params[param_id]->count(), delta, + this->temp_[param_id]->mutable_cpu_data()); + + caffe_add(net_params[param_id]->count(), + this->temp_[param_id]->cpu_data(), + this->history_[update_history_offset + param_id]->cpu_data(), + this->update_[param_id]->mutable_cpu_data()); + + caffe_add(net_params[param_id]->count(), + this->temp_[param_id]->cpu_data(), + this->history_[param_id]->cpu_data(), + this->temp_[param_id]->mutable_cpu_data()); + + // divide history of updates by history of gradients + caffe_div(net_params[param_id]->count(), + this->update_[param_id]->cpu_data(), + this->temp_[param_id]->cpu_data(), + this->update_[param_id]->mutable_cpu_data()); + + // jointly compute the RMS of both for update and gradient history + caffe_powx(net_params[param_id]->count(), + this->update_[param_id]->cpu_data(), Dtype(0.5), + this->update_[param_id]->mutable_cpu_data()); + + // compute the update + caffe_mul(net_params[param_id]->count(), + net_params[param_id]->cpu_diff(), + this->update_[param_id]->cpu_data(), + net_params[param_id]->mutable_cpu_diff()); + + // compute square of update + caffe_powx(net_params[param_id]->count(), + net_params[param_id]->cpu_diff(), Dtype(2), + this->update_[param_id]->mutable_cpu_data()); + + // update history of updates + caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, + this->update_[param_id]->cpu_data(), momentum, + this->history_[update_history_offset + param_id]->mutable_cpu_data()); + + // apply learning rate + caffe_cpu_scale(net_params[param_id]->count(), local_rate, + net_params[param_id]->cpu_diff(), + net_params[param_id]->mutable_cpu_diff()); break; - case Caffe::GPU: + } + case Caffe::GPU: { #ifndef CPU_ONLY - for (int param_id = 0; param_id < net_params.size(); ++param_id) { - Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; - - if (local_decay) { - if (regularization_type == "L2") { - // add weight decay - caffe_gpu_axpy(net_params[param_id]->count(), - local_decay, - net_params[param_id]->gpu_data(), - net_params[param_id]->mutable_gpu_diff()); - } else if (regularization_type == "L1") { - caffe_gpu_sign(net_params[param_id]->count(), - net_params[param_id]->gpu_data(), - this->temp_[param_id]->mutable_gpu_data()); - caffe_gpu_axpy(net_params[param_id]->count(), - local_decay, - this->temp_[param_id]->gpu_data(), - net_params[param_id]->mutable_gpu_diff()); - } else { - LOG(FATAL) << "Unknown regularization type: " << regularization_type; - } - } + // compute square of gradient in update + caffe_gpu_powx(net_params[param_id]->count(), + net_params[param_id]->gpu_diff(), Dtype(2), + this->update_[param_id]->mutable_gpu_data()); - // compute square of gradient in update - caffe_gpu_powx(net_params[param_id]->count(), - net_params[param_id]->gpu_diff(), Dtype(2), - this->update_[param_id]->mutable_gpu_data()); - - // update history of gradients - caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, - this->update_[param_id]->gpu_data(), momentum, - this->history_[param_id]->mutable_gpu_data()); - - // add delta to history to guard against dividing by zero later - caffe_gpu_set(net_params[param_id]->count(), delta, - this->temp_[param_id]->mutable_gpu_data()); - - caffe_gpu_add(net_params[param_id]->count(), - this->temp_[param_id]->gpu_data(), - this->history_[update_history_offset + param_id]->gpu_data(), - this->update_[param_id]->mutable_gpu_data()); - - caffe_gpu_add(net_params[param_id]->count(), - this->temp_[param_id]->gpu_data(), - this->history_[param_id]->gpu_data(), - this->temp_[param_id]->mutable_gpu_data()); - - // divide history of updates by history of gradients - caffe_gpu_div(net_params[param_id]->count(), - this->update_[param_id]->gpu_data(), - this->temp_[param_id]->gpu_data(), - this->update_[param_id]->mutable_gpu_data()); - - // jointly compute the RMS of both for update and gradient history - caffe_gpu_powx(net_params[param_id]->count(), - this->update_[param_id]->gpu_data(), Dtype(0.5), - this->update_[param_id]->mutable_gpu_data()); - - // compute the update and copy to net_diff - caffe_gpu_mul(net_params[param_id]->count(), - net_params[param_id]->gpu_diff(), - this->update_[param_id]->gpu_data(), - net_params[param_id]->mutable_gpu_diff()); - - // compute square of update - caffe_gpu_powx(net_params[param_id]->count(), - net_params[param_id]->gpu_diff(), Dtype(2), - this->update_[param_id]->mutable_gpu_data()); - - // update history of updates - caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, - this->update_[param_id]->gpu_data(), momentum, - this->history_[update_history_offset + param_id]->mutable_gpu_data()); - } + // update history of gradients + caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, + this->update_[param_id]->gpu_data(), momentum, + this->history_[param_id]->mutable_gpu_data()); + + // add delta to history to guard against dividing by zero later + caffe_gpu_set(net_params[param_id]->count(), delta, + this->temp_[param_id]->mutable_gpu_data()); + + caffe_gpu_add(net_params[param_id]->count(), + this->temp_[param_id]->gpu_data(), + this->history_[update_history_offset + param_id]->gpu_data(), + this->update_[param_id]->mutable_gpu_data()); + + caffe_gpu_add(net_params[param_id]->count(), + this->temp_[param_id]->gpu_data(), + this->history_[param_id]->gpu_data(), + this->temp_[param_id]->mutable_gpu_data()); + + // divide history of updates by history of gradients + caffe_gpu_div(net_params[param_id]->count(), + this->update_[param_id]->gpu_data(), + this->temp_[param_id]->gpu_data(), + this->update_[param_id]->mutable_gpu_data()); + + // jointly compute the RMS of both for update and gradient history + caffe_gpu_powx(net_params[param_id]->count(), + this->update_[param_id]->gpu_data(), Dtype(0.5), + this->update_[param_id]->mutable_gpu_data()); + + // compute the update and copy to net_diff + caffe_gpu_mul(net_params[param_id]->count(), + net_params[param_id]->gpu_diff(), + this->update_[param_id]->gpu_data(), + net_params[param_id]->mutable_gpu_diff()); + + // compute square of update + caffe_gpu_powx(net_params[param_id]->count(), + net_params[param_id]->gpu_diff(), Dtype(2), + this->update_[param_id]->mutable_gpu_data()); + + // update history of updates + caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, + this->update_[param_id]->gpu_data(), momentum, + this->history_[update_history_offset + param_id]->mutable_gpu_data()); + + // apply learning rate + caffe_gpu_scale(net_params[param_id]->count(), local_rate, + net_params[param_id]->gpu_diff(), + net_params[param_id]->mutable_gpu_diff()); #else NO_GPU; #endif break; + } default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } diff --git a/src/caffe/test/test_gradient_based_solver.cpp b/src/caffe/test/test_gradient_based_solver.cpp index 277aa3a5..c97d4ede 100644 --- a/src/caffe/test/test_gradient_based_solver.cpp +++ b/src/caffe/test/test_gradient_based_solver.cpp @@ -165,10 +165,6 @@ class GradientBasedSolverTest : public MultiDeviceTest { " bottom: 'targets' " " } " "} "; - if (learning_rate != 0) { - proto << "base_lr: " << learning_rate << " "; - proto << "lr_policy: 'fixed' "; - } if (weight_decay != 0) { proto << "weight_decay: " << weight_decay << " "; } @@ -897,6 +893,139 @@ TYPED_TEST(NesterovSolverTest, TestSnapshotShare) { } } +template +class AdaDeltaSolverTest : public GradientBasedSolverTest { + typedef typename TypeParam::Dtype Dtype; + + protected: + virtual void InitSolver(const SolverParameter& param) { + this->solver_.reset(new AdaDeltaSolver(param)); + } + + virtual SolverParameter_SolverType solver_type() { + return SolverParameter_SolverType_ADADELTA; + } +}; + +TYPED_TEST_CASE(AdaDeltaSolverTest, TestDtypesAndDevices); + +TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdate) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 1.0; + this->TestLeastSquaresUpdate(kLearningRate); +} + +TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithWeightDecay) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 1.0; + const Dtype kWeightDecay = 0.5; + const Dtype kMomentum = 0.95; + this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum); +} + +TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithHalfMomentum) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 1.0; + const Dtype kWeightDecay = 0.0; + const Dtype kMomentum = 0.5; + const int kNumIters = 1; + for (int i = 0; i <= kNumIters; ++i) { + this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum); + } +} + +TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithMomentum) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 1.0; + const Dtype kWeightDecay = 0.0; + const Dtype kMomentum = 0.95; + const int kNumIters = 1; + for (int i = 0; i <= kNumIters; ++i) { + this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum); + } +} + +TYPED_TEST(AdaDeltaSolverTest, TestLeastSquaresUpdateWithMomentumMultiIter) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 1.0; + const Dtype kWeightDecay = 0.0; + const Dtype kMomentum = 0.95; + const int kNumIters = 4; + for (int i = 0; i <= kNumIters; ++i) { + this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i); + } +} + +TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithEverything) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 1.0; + const Dtype kWeightDecay = 0.1; + const Dtype kMomentum = 0.95; + const int kNumIters = 4; + for (int i = 0; i <= kNumIters; ++i) { + this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i); + } +} + +TYPED_TEST(AdaDeltaSolverTest, + TestAdaDeltaLeastSquaresUpdateWithEverythingShare) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 1.0; + const Dtype kWeightDecay = 0.1; + const Dtype kMomentum = 0.95; + const int kNumIters = 4; + this->share_ = true; + for (int i = 0; i <= kNumIters; ++i) { + this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i); + } +} + +TYPED_TEST(AdaDeltaSolverTest, TestLeastSquaresUpdateWithEverythingAccum) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 1.0; + const Dtype kWeightDecay = 0.1; + const Dtype kMomentum = 0.95; + const int kNumIters = 4; + const int kIterSize = 2; + this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kNumIters, + kIterSize); +} + +TYPED_TEST(AdaDeltaSolverTest, TestLeastSquaresUpdateWithEverythingAccumShare) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 1.0; + const Dtype kWeightDecay = 0.1; + const Dtype kMomentum = 0.95; + const int kNumIters = 4; + const int kIterSize = 2; + this->share_ = true; + this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kNumIters, + kIterSize); +} + +TYPED_TEST(AdaDeltaSolverTest, TestSnapshot) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 1.0; + const Dtype kWeightDecay = 0.1; + const Dtype kMomentum = 0.95; + const int kNumIters = 4; + for (int i = 1; i <= kNumIters; ++i) { + this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, i); + } +} + +TYPED_TEST(AdaDeltaSolverTest, TestSnapshotShare) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 1.0; + const Dtype kWeightDecay = 0.1; + const Dtype kMomentum = 0.95; + const int kNumIters = 4; + this->share_ = true; + for (int i = 1; i <= kNumIters; ++i) { + this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, i); + } +} + template class RMSPropSolverTest : public GradientBasedSolverTest { typedef typename TypeParam::Dtype Dtype; @@ -1003,78 +1132,4 @@ TYPED_TEST(RMSPropSolverTest, TestSnapshotShare) { } } -template -class AdaDeltaSolverTest : public GradientBasedSolverTest { - typedef typename TypeParam::Dtype Dtype; - - protected: - virtual void InitSolver(const SolverParameter& param) { - this->solver_.reset(new AdaDeltaSolver(param)); - } - - virtual SolverParameter_SolverType solver_type() { - return SolverParameter_SolverType_ADADELTA; - } -}; - -TYPED_TEST_CASE(AdaDeltaSolverTest, TestDtypesAndDevices); - -TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdate) { - typedef typename TypeParam::Dtype Dtype; - const Dtype kLearningRate = 0.0; - this->TestLeastSquaresUpdate(kLearningRate); -} - -TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithWeightDecay) { - typedef typename TypeParam::Dtype Dtype; - const Dtype kLearningRate = 0.0; - const Dtype kWeightDecay = 0.5; - const Dtype kMomentum = 0.95; - this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum); -} - -TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithHalfMomentum) { - typedef typename TypeParam::Dtype Dtype; - const Dtype kLearningRate = 0.0; - const Dtype kWeightDecay = 0.0; - const Dtype kMomentum = 0.5; - const int kNumIters = 1; - for (int i = 0; i <= kNumIters; ++i) { - this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum); - } -} - -TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithMomentum) { - typedef typename TypeParam::Dtype Dtype; - const Dtype kLearningRate = 0.0; - const Dtype kWeightDecay = 0.0; - const Dtype kMomentum = 0.95; - const int kNumIters = 1; - for (int i = 0; i <= kNumIters; ++i) { - this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum); - } -} - -TYPED_TEST(AdaDeltaSolverTest, TestLeastSquaresUpdateWithMomentumMultiIter) { - typedef typename TypeParam::Dtype Dtype; - const Dtype kLearningRate = 0.0; - const Dtype kWeightDecay = 0.0; - const Dtype kMomentum = 0.95; - const int kNumIters = 4; - for (int i = 0; i <= kNumIters; ++i) { - this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i); - } -} - -TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithEverything) { - typedef typename TypeParam::Dtype Dtype; - const Dtype kLearningRate = 0.0; - const Dtype kWeightDecay = 0.1; - const Dtype kMomentum = 0.95; - const int kNumIters = 4; - for (int i = 0; i <= kNumIters; ++i) { - this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i); - } -} - } // namespace caffe From 918c2aca42a0e9f87dab6f7b40d648f2c41321cb Mon Sep 17 00:00:00 2001 From: mfigurnov Date: Mon, 10 Aug 2015 13:59:35 +0300 Subject: [PATCH 026/223] Fix truncation of value warning --- src/caffe/common.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/caffe/common.cpp b/src/caffe/common.cpp index af96cac4..b40760ef 100644 --- a/src/caffe/common.cpp +++ b/src/caffe/common.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -25,7 +26,7 @@ int64_t cluster_seedgen(void) { pid = getpid(); s = time(NULL); - seed = abs(((s * 181) * ((pid - 83) * 359)) % 104729); + seed = std::abs(((s * 181) * ((pid - 83) * 359)) % 104729); return seed; } From dde32b4ee631a647776e33ea92096558a2b37cbd Mon Sep 17 00:00:00 2001 From: Russell Stewart Date: Mon, 10 Aug 2015 13:50:01 -0700 Subject: [PATCH 027/223] Update net_spec.py --- python/caffe/net_spec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/caffe/net_spec.py b/python/caffe/net_spec.py index 31cde7ad..77a0e007 100644 --- a/python/caffe/net_spec.py +++ b/python/caffe/net_spec.py @@ -1,7 +1,7 @@ """Python net specification. This module provides a way to write nets directly in Python, using a natural, -functional style. See examples/python_nets/caffenet.py for an example. +functional style. See examples/pycaffe/caffenet.py for an example. Currently this works as a thin wrapper around the Python protobuf interface, with layers and parameters automatically generated for the "layers" and From bd2591e31e53e159c84874d26dcfe4a45d3c904e Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Tue, 11 Aug 2015 13:18:41 -0700 Subject: [PATCH 028/223] fix for learnable_param_ids_ --- src/caffe/net.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index 0e5ed804..91883a10 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -485,6 +485,7 @@ void Net::AppendParam(const NetParameter& param, const int layer_id, CHECK(this_blob->shape() == owner_blob->shape()); } const int learnable_param_id = learnable_param_ids_[owner_net_param_id]; + learnable_param_ids_.push_back(learnable_param_id); if (param_spec->has_lr_mult()) { if (has_params_lr_[learnable_param_id]) { CHECK_EQ(param_spec->lr_mult(), params_lr_[learnable_param_id]) From 0d34d5ba0fbdc09ac8f372cb581ccaec599f10bc Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Tue, 11 Aug 2015 21:38:06 -0700 Subject: [PATCH 029/223] Data Layers Parallel for Multi-GPU Allow data layers (and also PythonLayer when used as data layer) to be shared among worker solver's training net, and also test net for future-proof if one wants to do Multi-GPU testing. Data layers are locked during forward to ensure sequential forward. --- include/caffe/data_layers.hpp | 11 ++++++++++- include/caffe/layer.hpp | 15 +++++++++++++++ include/caffe/net.hpp | 8 +++++--- include/caffe/python_layer.hpp | 4 ++++ include/caffe/solver.hpp | 14 ++++++++++---- src/caffe/net.cpp | 32 ++++++++++++++++++++++++++++---- src/caffe/parallel.cpp | 3 +-- src/caffe/proto/caffe.proto | 4 ++++ src/caffe/solver.cpp | 23 +++++++++++++++++------ 9 files changed, 94 insertions(+), 20 deletions(-) diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp index 12e6c366..552d8141 100644 --- a/include/caffe/data_layers.hpp +++ b/include/caffe/data_layers.hpp @@ -34,6 +34,8 @@ class BaseDataLayer : public Layer { // This method may not be overridden except by the BasePrefetchingDataLayer. virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); + // Data layers should be shared by multiple solvers in parallel + virtual inline bool ShareInParallel() const { return true; } virtual void DataLayerSetUp(const vector*>& bottom, const vector*>& top) {} // Data layers have no bottoms, so reshaping is trivial. @@ -94,7 +96,8 @@ class DataLayer : public BasePrefetchingDataLayer { virtual ~DataLayer(); virtual void DataLayerSetUp(const vector*>& bottom, const vector*>& top); - + // DataLayer uses DataReader instead for sharing for parallelism + virtual inline bool ShareInParallel() const { return false; } virtual inline const char* type() const { return "Data"; } virtual inline int ExactNumBottomBlobs() const { return 0; } virtual inline int MinTopBlobs() const { return 1; } @@ -118,6 +121,8 @@ class DummyDataLayer : public Layer { : Layer(param) {} virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); + // Data layers should be shared by multiple solvers in parallel + virtual inline bool ShareInParallel() const { return true; } // Data layers have no bottoms, so reshaping is trivial. virtual void Reshape(const vector*>& bottom, const vector*>& top) {} @@ -151,6 +156,8 @@ class HDF5DataLayer : public Layer { virtual ~HDF5DataLayer(); virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); + // Data layers should be shared by multiple solvers in parallel + virtual inline bool ShareInParallel() const { return true; } // Data layers have no bottoms, so reshaping is trivial. virtual void Reshape(const vector*>& bottom, const vector*>& top) {} @@ -192,6 +199,8 @@ class HDF5OutputLayer : public Layer { virtual ~HDF5OutputLayer(); virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); + // Data layers should be shared by multiple solvers in parallel + virtual inline bool ShareInParallel() const { return true; } // Data layers have no bottoms, so reshaping is trivial. virtual void Reshape(const vector*>& bottom, const vector*>& top) {} diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp index 0771b6a8..d82197a9 100644 --- a/include/caffe/layer.hpp +++ b/include/caffe/layer.hpp @@ -1,6 +1,7 @@ #ifndef CAFFE_LAYER_H_ #define CAFFE_LAYER_H_ +#include #include #include #include @@ -85,6 +86,14 @@ class Layer { virtual void LayerSetUp(const vector*>& bottom, const vector*>& top) {} + /** + * @brief Whether a layer should be shared by multiple nets during data + * parallelism. By default, all layers except for data layers should + * not be shared. data layers should be shared to ensure each worker + * solver access data sequentially during data parallelism. + */ + virtual inline bool ShareInParallel() const { return false; } + /** * @brief Adjust the shapes of top blobs and internal buffers to accommodate * the shapes of the bottom blobs. @@ -396,6 +405,10 @@ class Layer { } } + private: + // mutex to lock layer to ensure sequential forward + boost::mutex forward_mutex_; + DISABLE_COPY_AND_ASSIGN(Layer); }; // class Layer @@ -405,6 +418,8 @@ class Layer { template inline Dtype Layer::Forward(const vector*>& bottom, const vector*>& top) { + // Lock during forward to ensure sequential forward + boost::mutex::scoped_lock lock(forward_mutex_); Dtype loss = 0; Reshape(bottom, top); switch (Caffe::mode()) { diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp index bf997553..1bf07d28 100644 --- a/include/caffe/net.hpp +++ b/include/caffe/net.hpp @@ -23,8 +23,9 @@ namespace caffe { template class Net { public: - explicit Net(const NetParameter& param); - explicit Net(const string& param_file, Phase phase); + explicit Net(const NetParameter& param, const Net* root_net = NULL); + explicit Net(const string& param_file, Phase phase, + const Net* root_net = NULL); virtual ~Net() {} /// @brief Initialize a network with a NetParameter. @@ -291,7 +292,8 @@ class Net { size_t memory_used_; /// Whether to compute and display debug info for the net. bool debug_info_; - + /// The root net that actually holds the shared layers in data parallelism + const Net* const root_net_; DISABLE_COPY_AND_ASSIGN(Net); }; diff --git a/include/caffe/python_layer.hpp b/include/caffe/python_layer.hpp index 2957e742..c43c1e8a 100644 --- a/include/caffe/python_layer.hpp +++ b/include/caffe/python_layer.hpp @@ -27,6 +27,10 @@ class PythonLayer : public Layer { self_.attr("reshape")(bottom, top); } + virtual inline bool ShareInParallel() const { + return this->layer_param_.python_param().share_in_parallel(); + } + virtual inline const char* type() const { return "Python"; } protected: diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index 89a6c76d..f583324a 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -17,8 +17,9 @@ namespace caffe { template class Solver { public: - explicit Solver(const SolverParameter& param); - explicit Solver(const string& param_file); + explicit Solver(const SolverParameter& param, + const Solver* root_solver = NULL); + explicit Solver(const string& param_file, const Solver* root_solver = NULL); void Init(const SolverParameter& param); void InitTrainNet(); void InitTestNets(); @@ -79,6 +80,10 @@ class Solver { vector > > test_nets_; vector callbacks_; + // The root solver that holds root nets (actually containing shared layers) + // in data parallelism + const Solver* const root_solver_; + DISABLE_COPY_AND_ASSIGN(Solver); }; @@ -89,8 +94,9 @@ class Solver { template class WorkerSolver : public Solver { public: - explicit WorkerSolver(const SolverParameter& param) - : Solver(param) {} + explicit WorkerSolver(const SolverParameter& param, + const Solver* root_solver = NULL) + : Solver(param, root_solver) {} protected: void ApplyUpdate() {} diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index 5d0f4322..14f8385c 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -22,12 +22,14 @@ namespace caffe { template -Net::Net(const NetParameter& param) { +Net::Net(const NetParameter& param, const Net* root_net) + : root_net_(root_net) { Init(param); } template -Net::Net(const string& param_file, Phase phase) { +Net::Net(const string& param_file, Phase phase, const Net* root_net) + : root_net_(root_net) { NetParameter param; ReadNetParamsFromTextFileOrDie(param_file, ¶m); param.mutable_state()->set_phase(phase); @@ -36,6 +38,8 @@ Net::Net(const string& param_file, Phase phase) { template void Net::Init(const NetParameter& in_param) { + CHECK(Caffe::root_solver() || root_net_) + << "root_net_ needs to be set for all non-root solvers"; // Set phase from the state. phase_ = in_param.state().phase(); // Filter layers based on their include/exclude rules and @@ -79,6 +83,9 @@ void Net::Init(const NetParameter& in_param) { top_id_vecs_.resize(param.layer_size()); bottom_need_backward_.resize(param.layer_size()); for (int layer_id = 0; layer_id < param.layer_size(); ++layer_id) { + // For non-root solvers, whether this layer is shared from root_net_. + bool is_shared_layer = !Caffe::root_solver() + && root_net_->layers_[layer_id]->ShareInParallel(); // Inherit phase from net if unset. if (!param.layer(layer_id).has_phase()) { param.mutable_layer(layer_id)->set_phase(phase_); @@ -91,7 +98,12 @@ void Net::Init(const NetParameter& in_param) { << "propagate_down param must be specified " << "either 0 or bottom_size times "; } - layers_.push_back(LayerRegistry::CreateLayer(layer_param)); + if (is_shared_layer) { + LOG(INFO) << "Sharing layer " << layer_param.name() << " from root net"; + layers_.push_back(root_net_->layers_[layer_id]); + } else { + layers_.push_back(LayerRegistry::CreateLayer(layer_param)); + } layer_names_.push_back(layer_param.name()); if (Caffe::root_solver()) { LOG(INFO) << "Creating Layer " << layer_param.name(); @@ -125,10 +137,22 @@ void Net::Init(const NetParameter& in_param) { } } // After this layer is connected, set it up. + if (is_shared_layer) { + // Set up size of top blobs using root_net_ + const vector*>& base_top = root_net_->top_vecs_[layer_id]; + const vector*>& this_top = this->top_vecs_[layer_id]; + for (int top_id = 0; top_id < base_top.size(); ++top_id) { + this_top[top_id]->ReshapeLike(*base_top[top_id]); + LOG(INFO) << "Created top blob " << top_id << " (shape: " + << this_top[top_id]->shape_string() << ") for shared layer " + << layer_param.name(); + } + } else { + layers_[layer_id]->SetUp(bottom_vecs_[layer_id], top_vecs_[layer_id]); + } if (Caffe::root_solver()) { LOG(INFO) << "Setting up " << layer_names_[layer_id]; } - layers_[layer_id]->SetUp(bottom_vecs_[layer_id], top_vecs_[layer_id]); for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) { if (blob_loss_weights_.size() <= top_id_vecs_[layer_id][top_id]) { blob_loss_weights_.resize(top_id_vecs_[layer_id][top_id] + 1, Dtype(0)); diff --git a/src/caffe/parallel.cpp b/src/caffe/parallel.cpp index 5a08df6c..6e7d802b 100644 --- a/src/caffe/parallel.cpp +++ b/src/caffe/parallel.cpp @@ -218,7 +218,7 @@ P2PSync::P2PSync(shared_ptr > root_solver, solver_ = root_solver; } else { Caffe::set_root_solver(false); - solver_.reset(new WorkerSolver(param)); + solver_.reset(new WorkerSolver(param, root_solver.get())); Caffe::set_root_solver(true); } this->configure(solver_.get()); @@ -436,4 +436,3 @@ INSTANTIATE_CLASS(GPUParams); INSTANTIATE_CLASS(P2PSync); } // namespace caffe - diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 41165410..e78c6686 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -740,6 +740,10 @@ message PythonParameter { // string, dictionary in Python dict format, JSON, etc. You may parse this // string in `setup` method and use it in `forward` and `backward`. optional string param_str = 3 [default = '']; + // Whether this PythonLayer is shared among worker solvers during data parallelism. + // If true, each worker solver sequentially run forward from this layer. + // This value should be set true if you are using it as a data layer. + optional bool share_in_parallel = 4 [default = false]; } // Message that stores parameters used by ReductionLayer diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index b6fd6b64..a44ff88d 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -18,14 +18,14 @@ namespace caffe { template -Solver::Solver(const SolverParameter& param) - : net_(), callbacks_() { +Solver::Solver(const SolverParameter& param, const Solver* root_solver) + : net_(), callbacks_(), root_solver_(root_solver) { Init(param); } template -Solver::Solver(const string& param_file) - : net_(), callbacks_() { +Solver::Solver(const string& param_file, const Solver* root_solver) + : net_(), callbacks_(), root_solver_(root_solver) { SolverParameter param; ReadProtoFromTextFileOrDie(param_file, ¶m); Init(param); @@ -33,6 +33,8 @@ Solver::Solver(const string& param_file) template void Solver::Init(const SolverParameter& param) { + CHECK(Caffe::root_solver() || root_solver_) + << "root_solver_ needs to be set for all non-root solvers"; LOG_IF(INFO, Caffe::root_solver()) << "Initializing solver from parameters: " << std::endl << param.DebugString(); param_ = param; @@ -88,7 +90,11 @@ void Solver::InitTrainNet() { net_state.MergeFrom(net_param.state()); net_state.MergeFrom(param_.train_state()); net_param.mutable_state()->CopyFrom(net_state); - net_.reset(new Net(net_param)); + if (Caffe::root_solver()) { + net_.reset(new Net(net_param)); + } else { + net_.reset(new Net(net_param, root_solver_->net_.get())); + } } template @@ -163,7 +169,12 @@ void Solver::InitTestNets() { net_params[i].mutable_state()->CopyFrom(net_state); LOG(INFO) << "Creating test net (#" << i << ") specified by " << sources[i]; - test_nets_[i].reset(new Net(net_params[i])); + if (Caffe::root_solver()) { + test_nets_[i].reset(new Net(net_params[i])); + } else { + test_nets_[i].reset(new Net(net_params[i], + root_solver_->test_nets_[i].get())); + } test_nets_[i]->set_debug_info(param_.debug_info()); } } From 6b50ed6fc1897ce1ccd673cf0287788b38b58a6d Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Wed, 12 Aug 2015 12:05:56 -0700 Subject: [PATCH 030/223] Apply mutex only to shared layers and fix NVCC warning --- include/caffe/layer.hpp | 43 ++++++++++++++++++++++++++++++++++++----- src/caffe/layer.cpp | 27 ++++++++++++++++++++++++++ src/caffe/net.cpp | 7 ++++--- 3 files changed, 69 insertions(+), 8 deletions(-) create mode 100644 src/caffe/layer.cpp diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp index d82197a9..a0d1d4ec 100644 --- a/include/caffe/layer.hpp +++ b/include/caffe/layer.hpp @@ -1,7 +1,6 @@ #ifndef CAFFE_LAYER_H_ #define CAFFE_LAYER_H_ -#include #include #include #include @@ -12,6 +11,12 @@ #include "caffe/proto/caffe.pb.h" #include "caffe/util/device_alternate.hpp" +/** + Forward declare boost::thread instead of including boost/thread.hpp + to avoid a boost/NVCC issues (#1009, #1010) on OSX. + */ +namespace boost { class mutex; } + namespace caffe { /** @@ -33,7 +38,7 @@ class Layer { * layer. */ explicit Layer(const LayerParameter& param) - : layer_param_(param) { + : layer_param_(param), is_shared_(false) { // Set phase and copy blobs (if there are any). phase_ = param.phase(); if (layer_param_.blobs_size() > 0) { @@ -61,6 +66,7 @@ class Layer { */ void SetUp(const vector*>& bottom, const vector*>& top) { + InitMutex(); CheckBlobCounts(bottom, top); LayerSetUp(bottom, top); Reshape(bottom, top); @@ -94,6 +100,22 @@ class Layer { */ virtual inline bool ShareInParallel() const { return false; } + /** @brief Return whether this layer is actually shared by other nets. + * If ShareInParallel() is true and using more than one GPU and the + * net has TRAIN phase, then this function is expected return true. + */ + inline bool IsShared() const { return is_shared_; } + + /** @brief Set whether this layer is actually shared by other nets + * If ShareInParallel() is true and using more than one GPU and the + * net has TRAIN phase, then is_shared should be set true. + */ + inline void SetShared(bool is_shared) { + CHECK(ShareInParallel() || !is_shared) + << type() << "Layer does not support sharing."; + is_shared_ = is_shared; + } + /** * @brief Adjust the shapes of top blobs and internal buffers to accommodate * the shapes of the bottom blobs. @@ -406,8 +428,18 @@ class Layer { } private: - // mutex to lock layer to ensure sequential forward - boost::mutex forward_mutex_; + /** Whether this layer is actually shared by other nets*/ + bool is_shared_; + + /** The mutex for sequential forward if this layer is shared */ + shared_ptr forward_mutex_; + + /** Initialize forward_mutex_ */ + void InitMutex(); + /** Lock forward_mutex_ if this layer is shared */ + void Lock(); + /** Unlock forward_mutex_ if this layer is shared */ + void Unlock(); DISABLE_COPY_AND_ASSIGN(Layer); }; // class Layer @@ -419,7 +451,7 @@ template inline Dtype Layer::Forward(const vector*>& bottom, const vector*>& top) { // Lock during forward to ensure sequential forward - boost::mutex::scoped_lock lock(forward_mutex_); + Lock(); Dtype loss = 0; Reshape(bottom, top); switch (Caffe::mode()) { @@ -450,6 +482,7 @@ inline Dtype Layer::Forward(const vector*>& bottom, default: LOG(FATAL) << "Unknown caffe mode."; } + Unlock(); return loss; } diff --git a/src/caffe/layer.cpp b/src/caffe/layer.cpp new file mode 100644 index 00000000..3b912898 --- /dev/null +++ b/src/caffe/layer.cpp @@ -0,0 +1,27 @@ +#include +#include "caffe/layer.hpp" + +namespace caffe { + +template +void Layer::InitMutex() { + forward_mutex_.reset(new boost::mutex()); +} + +template +void Layer::Lock() { + if (IsShared()) { + forward_mutex_->lock(); + } +} + +template +void Layer::Unlock() { + if (IsShared()) { + forward_mutex_->unlock(); + } +} + +INSTANTIATE_CLASS(Layer); + +} // namespace caffe diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index 14f8385c..7f5bdf7e 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -84,7 +84,7 @@ void Net::Init(const NetParameter& in_param) { bottom_need_backward_.resize(param.layer_size()); for (int layer_id = 0; layer_id < param.layer_size(); ++layer_id) { // For non-root solvers, whether this layer is shared from root_net_. - bool is_shared_layer = !Caffe::root_solver() + bool share_from_root = !Caffe::root_solver() && root_net_->layers_[layer_id]->ShareInParallel(); // Inherit phase from net if unset. if (!param.layer(layer_id).has_phase()) { @@ -98,9 +98,10 @@ void Net::Init(const NetParameter& in_param) { << "propagate_down param must be specified " << "either 0 or bottom_size times "; } - if (is_shared_layer) { + if (share_from_root) { LOG(INFO) << "Sharing layer " << layer_param.name() << " from root net"; layers_.push_back(root_net_->layers_[layer_id]); + layers_[layer_id]->SetShared(true); } else { layers_.push_back(LayerRegistry::CreateLayer(layer_param)); } @@ -137,7 +138,7 @@ void Net::Init(const NetParameter& in_param) { } } // After this layer is connected, set it up. - if (is_shared_layer) { + if (share_from_root) { // Set up size of top blobs using root_net_ const vector*>& base_top = root_net_->top_vecs_[layer_id]; const vector*>& this_top = this->top_vecs_[layer_id]; From 3c6485a95e2c5653c601f07fd7f5875cf956f3e6 Mon Sep 17 00:00:00 2001 From: Ajinkya Kale Date: Thu, 13 Aug 2015 17:10:46 -0700 Subject: [PATCH 031/223] fixing the database param The example talks about LevelDB as the db backend but has lmdb as the param in the execution. --- examples/feature_extraction/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/feature_extraction/readme.md b/examples/feature_extraction/readme.md index a980b8b3..f3ec3609 100644 --- a/examples/feature_extraction/readme.md +++ b/examples/feature_extraction/readme.md @@ -51,7 +51,7 @@ Extract Features Now everything necessary is in place. - ./build/tools/extract_features.bin models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel examples/_temp/imagenet_val.prototxt fc7 examples/_temp/features 10 lmdb + ./build/tools/extract_features.bin models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel examples/_temp/imagenet_val.prototxt fc7 examples/_temp/features 10 leveldb The name of feature blob that you extract is `fc7`, which represents the highest level feature of the reference model. We can use any other layer, as well, such as `conv5` or `pool3`. From 4c7e58e201ab2a06bb3b08d7c148e3b67988f853 Mon Sep 17 00:00:00 2001 From: PatWie Date: Fri, 14 Aug 2015 13:06:13 +0200 Subject: [PATCH 032/223] information about new implemented solvers --- docs/tutorial/solver.md | 79 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/docs/tutorial/solver.md b/docs/tutorial/solver.md index 17f793ef..b150f648 100644 --- a/docs/tutorial/solver.md +++ b/docs/tutorial/solver.md @@ -6,7 +6,14 @@ title: Solver / Model Optimization The solver orchestrates model optimization by coordinating the network's forward inference and backward gradients to form parameter updates that attempt to improve the loss. The responsibilities of learning are divided between the Solver for overseeing the optimization and generating parameter updates and the Net for yielding loss and gradients. -The Caffe solvers are Stochastic Gradient Descent (SGD), Adaptive Gradient (ADAGRAD), and Nesterov's Accelerated Gradient (NESTEROV). +The Caffe solvers are: + +- Stochastic Gradient Descent (`SGD`), +- AdaDelta (`ADADELTA`), +- Adaptive Gradient (`ADAGRAD`), +- Adam (`ADAM`), +- Nesterov's Accelerated Gradient (`NESTEROV`) and +- RMSprop (`RMSPROP`) The solver @@ -104,6 +111,32 @@ If learning diverges (e.g., you start to see very large or `NaN` or `inf` loss v [ImageNet Classification with Deep Convolutional Neural Networks](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf). *Advances in Neural Information Processing Systems*, 2012. +### AdaDelta + +The **AdaDelta** (`solver_type: ADADELTA`) method (M. Zeiler [1]) is a "robust learning rate method". It is a gradient-based optimization method (like SGD). The update formulas are + +$$ +\begin{align} +(v_t)_i &= \frac{\operatorname{RMS}((v_{t-1})_i)}{\operatorname{RMS}\left( \nabla L(W_t) \right)_{i}} \left( \nabla L(W_{t'}) \right)_i +\\ +\operatorname{RMS}\left( \nabla L(W_t) \right)_{i} &= \sqrt{E[g^2] + \varepsilon} +\\ +E[g^2]_t &= \delta{E[g^2]_{t-1} } + (1-\delta)g_{t}^2 +\end{align} +$$ + +and + +$$ +(W_{t+1})_i = +(W_t)_i - \alpha +(v_t)_i. +$$ + +[1] M. Zeiler + [ADADELTA: AN ADAPTIVE LEARNING RATE METHOD](http://arxiv.org/pdf/1212.5701.pdf). + *arXiv preprint*, 2012. + ### AdaGrad The **adaptive gradient** (`solver_type: ADAGRAD`) method (Duchi et al. [1]) is a gradient-based optimization method (like SGD) that attempts to "find needles in haystacks in the form of very predictive but rarely seen features," in Duchi et al.'s words. @@ -124,6 +157,28 @@ Note that in practice, for weights $$ W \in \mathcal{R}^d $$, AdaGrad implementa [Adaptive Subgradient Methods for Online Learning and Stochastic Optimization](http://www.magicbroom.info/Papers/DuchiHaSi10.pdf). *The Journal of Machine Learning Research*, 2011. +### Adam + +The **Adam** (`solver_type: ADAM`), proposed in Kingma et al. [1], is a gradient-based optimization method (like SGD). This includes an "adaptive moment estimation" ($$m_t, v_t$$) and can be regarded as a generalization of AdaGrad. The update formulas are + +$$ +(m_t)_i = \beta_1 (m_{t-1})_i + (1-\beta_1)(\nabla L(W_t))_i,\\ +(v_t)_i = \beta_2 (v_{t-1})_i + (1-\beta_2)(\nabla L(W_t))_i^2 +$$ + +and + +$$ +(W_{t+1})_i = +(W_t)_i - \alpha \frac{\sqrt{1-(\beta_2)_i^t}}{1-(\beta_1)_i^t}\frac{(m_t)_i}{\sqrt{(v_t)_i}+\varepsilon}. +$$ + +Kingma et al. [1] proposed to use $$\beta_1 = 0.9, \beta_2 = 0.999, \varepsilon = 10^{-8}$$ as default values. Caffe uses the values of `momemtum, momentum2, delta` for $$\beta_1, \beta_2, \varepsilon$$, respectively. + +[1] D. Kingma, J. Ba. + [Adam: A Method for Stochastic Optimization](http://arxiv.org/abs/1412.6980). + *International Conference for Learning Representations*, 2015. + ### NAG **Nesterov's accelerated gradient** (`solver_type: NESTEROV`) was proposed by Nesterov [1] as an "optimal" method of convex optimization, achieving a convergence rate of $$ \mathcal{O}(1/t^2) $$ rather than the $$ \mathcal{O}(1/t) $$. @@ -149,6 +204,28 @@ What distinguishes the method from SGD is the weight setting $$ W $$ on which we [On the Importance of Initialization and Momentum in Deep Learning](http://www.cs.toronto.edu/~fritz/absps/momentum.pdf). *Proceedings of the 30th International Conference on Machine Learning*, 2013. +### RMSprop + +The **RMSprop** (`solver_type: RMSPROP`), suggested by Tieleman in a Coursera course lecture, is a gradient-based optimization method (like SGD). The update formulas are + +$$ +(v_t)_i = +\begin{cases} +(v_{t-1})_i + \delta, &(\nabla L(W_t))_i(\nabla L(W_{t-1}))_i > 0\\ +(v_{t-1})_i \cdot (1-\delta), & \text{else} +\end{cases} +$$ + +$$ +(W_{t+1})_i =(W_t)_i - \alpha (v_t)_i, +$$ + +If the gradient updates results in oscillations the gradient is reduced by times $$1-\delta$$. Otherwise it will be increased by $$\delta$$. The default value of $$\delta$$ (`rms_decay`) is set to $$\delta = 0.02$$. + +[1] T. Tieleman, and G. Hinton. + [RMSProp: Divide the gradient by a running average of its recent magnitude](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf). + *COURSERA: Neural Networks for Machine Learning.Technical report*, 2012. + ## Scaffolding The solver scaffolding prepares the optimization method and initializes the model to be learned in `Solver::Presolve()`. From e696f85abe2c50107fbb67b211bf7dad0f87ade0 Mon Sep 17 00:00:00 2001 From: LI Yi Date: Fri, 14 Aug 2015 20:51:45 +0800 Subject: [PATCH 033/223] Destroy CUDA stream when finished --- src/caffe/layers/base_data_layer.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/caffe/layers/base_data_layer.cpp b/src/caffe/layers/base_data_layer.cpp index 20f76f62..5303fe9c 100644 --- a/src/caffe/layers/base_data_layer.cpp +++ b/src/caffe/layers/base_data_layer.cpp @@ -74,7 +74,7 @@ void BasePrefetchingDataLayer::InternalThreadEntry() { #ifndef CPU_ONLY cudaStream_t stream; if (Caffe::mode() == Caffe::GPU) { - cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); + CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); } #endif @@ -85,7 +85,7 @@ void BasePrefetchingDataLayer::InternalThreadEntry() { #ifndef CPU_ONLY if (Caffe::mode() == Caffe::GPU) { batch->data_.data().get()->async_gpu_push(stream); - cudaStreamSynchronize(stream); + CUDA_CHECK(cudaStreamSynchronize(stream)); } #endif prefetch_full_.push(batch); @@ -93,6 +93,11 @@ void BasePrefetchingDataLayer::InternalThreadEntry() { } catch (boost::thread_interrupted&) { // Interrupted exception is expected on shutdown } +#ifndef CPU_ONLY + if (Caffe::mode() == Caffe::GPU) { + CUDA_CHECK(cudaStreamDestroy(stream)); + } +#endif } template From 4e4c89b026cb0b7f296aae4dfbb45e2eb1654f43 Mon Sep 17 00:00:00 2001 From: PatWie Date: Mon, 3 Aug 2015 17:31:14 +0200 Subject: [PATCH 034/223] Adam solver This commit implements the Adam solver by Kingma et. al for CPU and GPU. All solver parameters are defined in the caffe.proto. This also adds an example for the MNIST dataset. --- examples/mnist/lenet_solver_adam.prototxt | 26 +++ examples/mnist/train_lenet_adam.sh | 3 + include/caffe/solver.hpp | 17 ++ src/caffe/proto/caffe.proto | 7 +- src/caffe/solver.cpp | 104 +++++++++++ src/caffe/test/test_gradient_based_solver.cpp | 170 +++++++++++++++--- 6 files changed, 299 insertions(+), 28 deletions(-) create mode 100644 examples/mnist/lenet_solver_adam.prototxt create mode 100755 examples/mnist/train_lenet_adam.sh diff --git a/examples/mnist/lenet_solver_adam.prototxt b/examples/mnist/lenet_solver_adam.prototxt new file mode 100644 index 00000000..d22c5718 --- /dev/null +++ b/examples/mnist/lenet_solver_adam.prototxt @@ -0,0 +1,26 @@ +# The train/test net protocol buffer definition +# this follows "ADAM: A METHOD FOR STOCHASTIC OPTIMIZATION" +net: "examples/mnist/lenet_train_test.prototxt" +# test_iter specifies how many forward passes the test should carry out. +# In the case of MNIST, we have test batch size 100 and 100 test iterations, +# covering the full 10,000 testing images. +test_iter: 100 +# Carry out testing every 500 training iterations. +test_interval: 500 +# All parameters are from the cited paper above +base_lr: 0.001 +momentum: 0.9 +momentum2: 0.999 +# since Adam dynamically changes the learning rate, we set the base learning +# rate to a fixed value +lr_policy: "fixed" +# Display every 100 iterations +display: 100 +# The maximum number of iterations +max_iter: 10000 +# snapshot intermediate results +snapshot: 5000 +snapshot_prefix: "examples/mnist/lenet" +# solver mode: CPU or GPU +solver_type: ADAM +solver_mode: GPU diff --git a/examples/mnist/train_lenet_adam.sh b/examples/mnist/train_lenet_adam.sh new file mode 100755 index 00000000..a32ecf2d --- /dev/null +++ b/examples/mnist/train_lenet_adam.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env sh + +./build/tools/caffe train --solver=examples/mnist/lenet_solver_adam.prototxt diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index d2b99923..582aa142 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -217,6 +217,21 @@ class AdaDeltaSolver : public SGDSolver { DISABLE_COPY_AND_ASSIGN(AdaDeltaSolver); }; +template +class AdamSolver : public SGDSolver { + public: + explicit AdamSolver(const SolverParameter& param) + : SGDSolver(param) { AdamPreSolve();} + explicit AdamSolver(const string& param_file) + : SGDSolver(param_file) { AdamPreSolve(); } + + protected: + void AdamPreSolve(); + virtual void ComputeUpdateValue(int param_id, Dtype rate); + + DISABLE_COPY_AND_ASSIGN(AdamSolver); +}; + template Solver* GetSolver(const SolverParameter& param) { SolverParameter_SolverType type = param.solver_type(); @@ -232,6 +247,8 @@ Solver* GetSolver(const SolverParameter& param) { return new RMSPropSolver(param); case SolverParameter_SolverType_ADADELTA: return new AdaDeltaSolver(param); + case SolverParameter_SolverType_ADAM: + return new AdamSolver(param); default: LOG(FATAL) << "Unknown SolverType: " << type; } diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index fc0d961a..d4c97d2b 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -98,7 +98,7 @@ message NetParameter { // NOTE // Update the next available ID when you add a new SolverParameter field. // -// SolverParameter next available ID: 39 (last added: rms_decay) +// SolverParameter next available ID: 40 (last added: momentum2) message SolverParameter { ////////////////////////////////////////////////////////////////////////////// // Specifying the train and test networks @@ -216,10 +216,13 @@ message SolverParameter { ADAGRAD = 2; RMSPROP = 3; ADADELTA = 4; + ADAM = 5; } optional SolverType solver_type = 30 [default = SGD]; - // numerical stability for AdaGrad + // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam optional float delta = 31 [default = 1e-8]; + // parameters for the Adam solver + optional float momentum2 = 39 [default = 0.999]; // RMSProp decay value // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t) diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 248f238e..9348e11c 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -1114,11 +1114,115 @@ void AdaDeltaSolver::ComputeUpdateValue(int param_id, Dtype rate) { } } +template +void AdamSolver::AdamPreSolve() { + // Add the extra history entries for Adam after those from + // SGDSolver::PreSolve + const vector*>& net_params = this->net_->learnable_params(); + for (int i = 0; i < net_params.size(); ++i) { + const vector& shape = net_params[i]->shape(); + this->history_.push_back( + shared_ptr >(new Blob(shape))); + } +} + +template +void AdamSolver::ComputeUpdateValue(int param_id, Dtype rate) { + const vector*>& net_params = this->net_->learnable_params(); + const vector& net_params_lr = this->net_->params_lr(); + Dtype local_rate = rate * net_params_lr[param_id]; + const Dtype beta1 = this->param_.momentum(); + const Dtype beta2 = this->param_.momentum2(); + + // we create aliases for convenience + size_t update_history_offset = net_params.size(); + Blob* val_m = this->history_[param_id].get(); + Blob* val_v = this->history_[param_id + update_history_offset].get(); + Blob* val_t = this->temp_[param_id].get(); + + const int t = this->iter_ + 1; + const Dtype correction = std::sqrt(Dtype(1) - pow(beta2, t)) / + (Dtype(1.) - pow(beta1, t)); + const int N = net_params[param_id]->count(); + const Dtype eps_hat = this->param_.delta(); + + switch (Caffe::mode()) { + case Caffe::CPU: { + // update m <- \beta_1 m_{t-1} + (1-\beta_1)g_t + caffe_cpu_axpby(N, Dtype(1)-beta1, + net_params[param_id]->cpu_diff(), beta1, + val_m->mutable_cpu_data()); + + // update v <- \beta_2 m_{t-1} + (1-\beta_2)g_t^2 + caffe_mul(N, + net_params[param_id]->cpu_diff(), + net_params[param_id]->cpu_diff(), + val_t->mutable_cpu_data()); + caffe_cpu_axpby(N, Dtype(1)-beta2, + val_t->cpu_data(), beta2, + val_v->mutable_cpu_data()); + + // set update + caffe_powx(N, + val_v->cpu_data(), Dtype(0.5), + val_t->mutable_cpu_data()); + caffe_add_scalar(N, eps_hat, val_t->mutable_cpu_data()); + caffe_div(N, + val_m->cpu_data(), + val_t->cpu_data(), + val_t->mutable_cpu_data()); + + caffe_cpu_scale(N, local_rate*correction, + val_t->cpu_data(), + net_params[param_id]->mutable_cpu_diff()); + break; + } + case Caffe::GPU: { +#ifndef CPU_ONLY + // update m <- \beta_1 m_{t-1} + (1-\beta_1)g_t + caffe_gpu_axpby(N, Dtype(1)-beta1, + net_params[param_id]->gpu_diff(), beta1, + val_m->mutable_gpu_data()); + + // update v <- \beta_2 m_{t-1} + (1-\beta_2)g_t^2 + caffe_gpu_mul(N, + net_params[param_id]->gpu_diff(), + net_params[param_id]->gpu_diff(), + val_t->mutable_gpu_data()); + caffe_gpu_axpby(N, Dtype(1)-beta2, + val_t->gpu_data(), beta2, + val_v->mutable_gpu_data()); + + // set update + caffe_gpu_powx(N, + val_v->gpu_data(), Dtype(0.5), + val_t->mutable_gpu_data()); + caffe_gpu_add_scalar(N, eps_hat, + val_t->mutable_gpu_data()); + caffe_gpu_div(N, + val_m->gpu_data(), + val_t->gpu_data(), + val_t->mutable_gpu_data()); + + caffe_gpu_scale(N, local_rate*correction, + val_t->gpu_data(), + net_params[param_id]->mutable_gpu_diff()); +#else + NO_GPU; +#endif + break; + } + default: + LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); + } +} + INSTANTIATE_CLASS(Solver); INSTANTIATE_CLASS(SGDSolver); INSTANTIATE_CLASS(NesterovSolver); INSTANTIATE_CLASS(AdaGradSolver); INSTANTIATE_CLASS(RMSPropSolver); INSTANTIATE_CLASS(AdaDeltaSolver); +INSTANTIATE_CLASS(AdamSolver); } // namespace caffe diff --git a/src/caffe/test/test_gradient_based_solver.cpp b/src/caffe/test/test_gradient_based_solver.cpp index 1d255a86..dcbfff1c 100644 --- a/src/caffe/test/test_gradient_based_solver.cpp +++ b/src/caffe/test/test_gradient_based_solver.cpp @@ -42,7 +42,7 @@ class GradientBasedSolverTest : public MultiDeviceTest { // TODO this is brittle and the hdf5 file should be checked instead. int num_, channels_, height_, width_; bool share_; - Dtype delta_; // Stability constant for AdaGrad. + Dtype delta_; // Stability constant for RMSProp, AdaGrad, AdaDelta and Adam // Test data: check out generate_sample_data.py in the same directory. string* input_file_; @@ -65,10 +65,7 @@ class GradientBasedSolverTest : public MultiDeviceTest { LOG(FATAL) << "Unknown Caffe mode: " << Caffe::mode(); } InitSolver(param); - delta_ = (solver_type() == SolverParameter_SolverType_ADAGRAD || - solver_type() == SolverParameter_SolverType_RMSPROP || - solver_type() == SolverParameter_SolverType_ADADELTA) ? - param.delta() : 0; + delta_ = param.delta(); } string RunLeastSquaresSolver(const Dtype learning_rate, @@ -216,7 +213,7 @@ class GradientBasedSolverTest : public MultiDeviceTest { // updated_params will store the updated weight and bias results, // using the blobs' diffs to hold the update values themselves. void ComputeLeastSquaresUpdate(const Dtype learning_rate, - const Dtype weight_decay, const Dtype momentum, + const Dtype weight_decay, const Dtype momentum, const int num_iters, vector > >* updated_params) { const int N = num_; const int D = channels_ * height_ * width_; @@ -282,7 +279,8 @@ class GradientBasedSolverTest : public MultiDeviceTest { ((i == D) ? bias.cpu_data()[0] : weights.cpu_data()[i]); // Finally, compute update. const vector > >& history = solver_->history(); - if (solver_type() != SolverParameter_SolverType_ADADELTA) { + if (solver_type() != SolverParameter_SolverType_ADADELTA + && solver_type() != SolverParameter_SolverType_ADAM) { ASSERT_EQ(2, history.size()); // 1 blob for weights, 1 for bias } else { ASSERT_EQ(4, history.size()); // additional blobs for update history @@ -312,16 +310,31 @@ class GradientBasedSolverTest : public MultiDeviceTest { case SolverParameter_SolverType_ADADELTA: { const Dtype update_history_value = (i == D) ? - history[3]->cpu_data()[0] : history[2]->cpu_data()[i]; + history[1 + num_param_blobs]->cpu_data()[0] : + history[0 + num_param_blobs]->cpu_data()[i]; const Dtype weighted_gradient_average = momentum * history_value + (1 - momentum) * (grad * grad); update_value = grad * std::sqrt((update_history_value + delta_) / - (weighted_gradient_average + delta_)); + (weighted_gradient_average + delta_)) * learning_rate; // not actually needed, just here for illustrative purposes // const Dtype weighted_update_average = // momentum * update_history_value + (1 - momentum) * (update_value); break; } + case SolverParameter_SolverType_ADAM: { + const Dtype momentum2 = 0.999; + const Dtype m = history_value; + const Dtype v = (i == D) ? + history[1 + num_param_blobs]->cpu_data()[0] : + history[0 + num_param_blobs]->cpu_data()[i]; + const Dtype val_m = (1 - momentum) * grad + momentum * m; + const Dtype val_v = (1 - momentum2) * grad * grad + momentum2 * v; + Dtype alpha_t = learning_rate * + std::sqrt(Dtype(1) - pow(momentum2, num_iters)) / + (Dtype(1.) - pow(momentum, num_iters)); + update_value = alpha_t * val_m / (std::sqrt(val_v) + delta_); + break; + } default: LOG(FATAL) << "Unknown solver type: " << solver_type(); } @@ -465,7 +478,7 @@ class GradientBasedSolverTest : public MultiDeviceTest { // Compute the (K+1)th update using the analytic least squares gradient. vector > > updated_params; ComputeLeastSquaresUpdate(learning_rate, weight_decay, momentum, - &updated_params); + iter_to_check + 1, &updated_params); // Reinitialize the solver and run K+1 solver iterations. num_ = kNum; @@ -946,13 +959,13 @@ TYPED_TEST_CASE(AdaDeltaSolverTest, TestDtypesAndDevices); TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdate) { typedef typename TypeParam::Dtype Dtype; - const Dtype kLearningRate = 1.0; + const Dtype kLearningRate = 0.1; this->TestLeastSquaresUpdate(kLearningRate); } TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithWeightDecay) { typedef typename TypeParam::Dtype Dtype; - const Dtype kLearningRate = 1.0; + const Dtype kLearningRate = 0.1; const Dtype kWeightDecay = 0.5; const Dtype kMomentum = 0.95; this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum); @@ -960,64 +973,64 @@ TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithWeightDecay) { TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithHalfMomentum) { typedef typename TypeParam::Dtype Dtype; - const Dtype kLearningRate = 1.0; + const Dtype kLearningRate = 0.1; const Dtype kWeightDecay = 0.0; const Dtype kMomentum = 0.5; const int kNumIters = 1; for (int i = 0; i <= kNumIters; ++i) { - this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum); + this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum); } } TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithMomentum) { typedef typename TypeParam::Dtype Dtype; - const Dtype kLearningRate = 1.0; + const Dtype kLearningRate = 0.1; const Dtype kWeightDecay = 0.0; const Dtype kMomentum = 0.95; const int kNumIters = 1; for (int i = 0; i <= kNumIters; ++i) { - this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum); + this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum); } } TYPED_TEST(AdaDeltaSolverTest, TestLeastSquaresUpdateWithMomentumMultiIter) { typedef typename TypeParam::Dtype Dtype; - const Dtype kLearningRate = 1.0; + const Dtype kLearningRate = 0.1; const Dtype kWeightDecay = 0.0; const Dtype kMomentum = 0.95; const int kNumIters = 4; for (int i = 0; i <= kNumIters; ++i) { - this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i); + this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i); } } TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithEverything) { typedef typename TypeParam::Dtype Dtype; - const Dtype kLearningRate = 1.0; + const Dtype kLearningRate = 0.1; const Dtype kWeightDecay = 0.1; const Dtype kMomentum = 0.95; const int kNumIters = 4; for (int i = 0; i <= kNumIters; ++i) { - this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i); + this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i); } } TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithEverythingShare) { typedef typename TypeParam::Dtype Dtype; - const Dtype kLearningRate = 1.0; + const Dtype kLearningRate = 0.1; const Dtype kWeightDecay = 0.1; const Dtype kMomentum = 0.95; const int kNumIters = 4; this->share_ = true; for (int i = 0; i <= kNumIters; ++i) { - this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i); + this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i); } } TYPED_TEST(AdaDeltaSolverTest, TestLeastSquaresUpdateWithEverythingAccum) { typedef typename TypeParam::Dtype Dtype; - const Dtype kLearningRate = 1.0; + const Dtype kLearningRate = 0.1; const Dtype kWeightDecay = 0.1; const Dtype kMomentum = 0.95; const int kNumIters = 4; @@ -1028,7 +1041,7 @@ TYPED_TEST(AdaDeltaSolverTest, TestLeastSquaresUpdateWithEverythingAccum) { TYPED_TEST(AdaDeltaSolverTest, TestLeastSquaresUpdateWithEverythingAccumShare) { typedef typename TypeParam::Dtype Dtype; - const Dtype kLearningRate = 1.0; + const Dtype kLearningRate = 0.1; const Dtype kWeightDecay = 0.1; const Dtype kMomentum = 0.95; const int kNumIters = 4; @@ -1040,7 +1053,7 @@ TYPED_TEST(AdaDeltaSolverTest, TestLeastSquaresUpdateWithEverythingAccumShare) { TYPED_TEST(AdaDeltaSolverTest, TestSnapshot) { typedef typename TypeParam::Dtype Dtype; - const Dtype kLearningRate = 1.0; + const Dtype kLearningRate = 0.1; const Dtype kWeightDecay = 0.1; const Dtype kMomentum = 0.95; const int kNumIters = 4; @@ -1051,7 +1064,7 @@ TYPED_TEST(AdaDeltaSolverTest, TestSnapshot) { TYPED_TEST(AdaDeltaSolverTest, TestSnapshotShare) { typedef typename TypeParam::Dtype Dtype; - const Dtype kLearningRate = 1.0; + const Dtype kLearningRate = 0.1; const Dtype kWeightDecay = 0.1; const Dtype kMomentum = 0.95; const int kNumIters = 4; @@ -1061,6 +1074,111 @@ TYPED_TEST(AdaDeltaSolverTest, TestSnapshotShare) { } } +template +class AdamSolverTest : public GradientBasedSolverTest { + typedef typename TypeParam::Dtype Dtype; + + protected: + virtual void InitSolver(const SolverParameter& param) { + SolverParameter new_param = param; + const Dtype momentum = 0.9; + new_param.set_momentum(momentum); + const Dtype momentum2 = 0.999; + new_param.set_momentum2(momentum2); + this->solver_.reset(new AdamSolver(new_param)); + } + virtual SolverParameter_SolverType solver_type() { + return SolverParameter_SolverType_ADAM; + } +}; + +TYPED_TEST_CASE(AdamSolverTest, TestDtypesAndDevices); + +TYPED_TEST(AdamSolverTest, TestAdamLeastSquaresUpdate) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 0.01; + const Dtype kWeightDecay = 0; + const Dtype kMomentum = 0.9; + this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum); +} + +TYPED_TEST(AdamSolverTest, TestAdamLeastSquaresUpdateWithWeightDecay) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 0.01; + const Dtype kWeightDecay = 0.5; + const Dtype kMomentum = 0.9; + this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum); +} + +TYPED_TEST(AdamSolverTest, TestAdamLeastSquaresUpdateWithEverything) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 0.01; + const Dtype kWeightDecay = 0.5; + const Dtype kMomentum = 0.9; + const int kNumIters = 4; + for (int i = 0; i <= kNumIters; ++i) { + this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i); + } +} + +TYPED_TEST(AdamSolverTest, TestAdamLeastSquaresUpdateWithEverythingShare) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 0.01; + const Dtype kWeightDecay = 0.5; + const Dtype kMomentum = 0.9; + const int kNumIters = 4; + this->share_ = true; + for (int i = 0; i <= kNumIters; ++i) { + this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i); + } +} + +TYPED_TEST(AdamSolverTest, TestLeastSquaresUpdateWithEverythingAccum) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 0.01; + const Dtype kWeightDecay = 0.5; + const Dtype kMomentum = 0.9; + const int kNumIters = 4; + const int kIterSize = 2; + this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kNumIters, + kIterSize); +} + +TYPED_TEST(AdamSolverTest, TestLeastSquaresUpdateWithEverythingAccumShare) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 0.01; + const Dtype kWeightDecay = 0.5; + const Dtype kMomentum = 0.9; + const int kNumIters = 4; + const int kIterSize = 2; + this->share_ = true; + this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kNumIters, + kIterSize); +} + +TYPED_TEST(AdamSolverTest, TestSnapshot) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 0.01; + const Dtype kWeightDecay = 0.5; + const Dtype kMomentum = 0.9; + const int kNumIters = 4; + for (int i = 1; i <= kNumIters; ++i) { + this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, i); + } +} + +TYPED_TEST(AdamSolverTest, TestSnapshotShare) { + typedef typename TypeParam::Dtype Dtype; + const Dtype kLearningRate = 0.01; + const Dtype kWeightDecay = 0.5; + const Dtype kMomentum = 0.9; + const int kNumIters = 4; + this->share_ = true; + for (int i = 1; i <= kNumIters; ++i) { + this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, i); + } +} + template class RMSPropSolverTest : public GradientBasedSolverTest { typedef typename TypeParam::Dtype Dtype; From bf42e6ebf7c56ff2f0d13bdcc7294d357d7592c6 Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Thu, 13 Aug 2015 22:41:21 -0700 Subject: [PATCH 035/223] Cite Adam paper in solver.hpp --- include/caffe/solver.hpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index 582aa142..ab12ef1b 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -217,6 +217,14 @@ class AdaDeltaSolver : public SGDSolver { DISABLE_COPY_AND_ASSIGN(AdaDeltaSolver); }; +/** + * @brief AdamSolver, an algorithm for first-order gradient-based optimization + * of stochastic objective functions, based on adaptive estimates of + * lower-order moments. Described in [1]. + * + * [1] D. P. Kingma and J. L. Ba, "ADAM: A Method for Stochastic Optimization." + * arXiv preprint arXiv:1412.6980v8 (2014). + */ template class AdamSolver : public SGDSolver { public: From 1d820f7fa81e6b02a64140a93edbfd30dc529e8b Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Fri, 14 Aug 2015 09:19:48 -0700 Subject: [PATCH 036/223] Malloc at least one byte in Parallel --- src/caffe/parallel.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/caffe/parallel.cpp b/src/caffe/parallel.cpp index 6e7d802b..d48136c5 100644 --- a/src/caffe/parallel.cpp +++ b/src/caffe/parallel.cpp @@ -64,7 +64,9 @@ static size_t total_size(const vector*>& params) { size_t size = 0; for (int i = 0; i < params.size(); ++i) size += params[i]->count(); - return size; + // Size have at least one byte, otherwise cudaMalloc fails if net has no + // learnable parameters. + return (size > 0) ? size : 1; } template From 98593e3aa64d9a8f42723fb03fa46a1343e12c33 Mon Sep 17 00:00:00 2001 From: Felix Abecassis Date: Fri, 14 Aug 2015 11:15:56 -0700 Subject: [PATCH 037/223] Make classification.bin support models with less than 5 classes The example program would crash if the number of classes was less than 5, since it was still attempting to get the top 5 predictions. Close #2585 --- examples/cpp_classification/classification.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/cpp_classification/classification.cpp b/examples/cpp_classification/classification.cpp index 1c6371e3..dc8b863f 100644 --- a/examples/cpp_classification/classification.cpp +++ b/examples/cpp_classification/classification.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -101,6 +102,7 @@ static std::vector Argmax(const std::vector& v, int N) { std::vector Classifier::Classify(const cv::Mat& img, int N) { std::vector output = Predict(img); + N = std::min(labels_.size(), N); std::vector maxN = Argmax(output, N); std::vector predictions; for (int i = 0; i < N; ++i) { From e94be07e2d7e186821698df053ef77a35fe71c26 Mon Sep 17 00:00:00 2001 From: philkr Date: Thu, 23 Jul 2015 08:33:58 -0700 Subject: [PATCH 038/223] Exposing blob loss weight to python --- python/caffe/_caffe.cpp | 4 ++++ python/caffe/pycaffe.py | 10 ++++++++++ 2 files changed, 14 insertions(+) diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp index bb5130fd..e1ae3ec7 100644 --- a/python/caffe/_caffe.cpp +++ b/python/caffe/_caffe.cpp @@ -211,6 +211,8 @@ BOOST_PYTHON_MODULE(_caffe) { .def("copy_from", static_cast::*)(const string)>( &Net::CopyTrainedLayersFrom)) .def("share_with", &Net::ShareTrainedLayersWith) + .add_property("_blob_loss_weights", bp::make_function( + &Net::blob_loss_weights, bp::return_internal_reference<>())) .add_property("_blobs", bp::make_function(&Net::blobs, bp::return_internal_reference<>())) .add_property("layers", bp::make_function(&Net::layers, @@ -293,6 +295,8 @@ BOOST_PYTHON_MODULE(_caffe) { .def(bp::vector_indexing_suite >()); bp::class_ >("IntVec") .def(bp::vector_indexing_suite >()); + bp::class_ >("DtypeVec") + .def(bp::vector_indexing_suite >()); bp::class_ > > >("NetVec") .def(bp::vector_indexing_suite > >, true>()); bp::class_ >("BoolVec") diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py index e8a676a2..4f980a92 100644 --- a/python/caffe/pycaffe.py +++ b/python/caffe/pycaffe.py @@ -27,6 +27,15 @@ def _Net_blobs(self): return OrderedDict(zip(self._blob_names, self._blobs)) +@property +def _Net_blob_loss_weights(self): + """ + An OrderedDict (bottom to top, i.e., input to output) of network + blob loss weights indexed by name + """ + return OrderedDict(zip(self._blob_names, self._blob_loss_weights)) + + @property def _Net_params(self): """ @@ -270,6 +279,7 @@ def _Net_batch(self, blobs): # Attach methods to Net. Net.blobs = _Net_blobs +Net.blob_loss_weights = _Net_blob_loss_weights Net.params = _Net_params Net.forward = _Net_forward Net.backward = _Net_backward From 7f4ffcd7c4d3896fd2a40cc4dd153ba04b1ba968 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Fri, 14 Aug 2015 12:52:39 -0700 Subject: [PATCH 039/223] [net] improve net config and shape mismatch error messages --- src/caffe/net.cpp | 37 ++++++++++++++++++++++++++------ src/caffe/util/insert_splits.cpp | 3 ++- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index 7875285f..31644422 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -424,7 +424,8 @@ void Net::AppendTop(const NetParameter& param, const int layer_id, blob_name_to_idx->find(blob_name) != blob_name_to_idx->end()) { // If we are not doing in-place computation but have duplicated blobs, // raise an error. - LOG(FATAL) << "Duplicate blobs produced by multiple sources."; + LOG(FATAL) << "Top blob '" << blob_name + << "' produced by multiple sources."; } else { // Normal output. if (Caffe::root_solver()) { @@ -468,8 +469,8 @@ int Net::AppendBottom(const NetParameter& param, const int layer_id, const LayerParameter& layer_param = param.layer(layer_id); const string& blob_name = layer_param.bottom(bottom_id); if (available_blobs->find(blob_name) == available_blobs->end()) { - LOG(FATAL) << "Unknown blob input " << blob_name - << " (at index " << bottom_id << ") to layer " << layer_id; + LOG(FATAL) << "Unknown bottom blob '" << blob_name << "' (layer '" + << layer_param.name() << "', bottom index " << bottom_id << ")"; } const int blob_id = (*blob_name_to_idx)[blob_name]; if (Caffe::root_solver()) { @@ -545,10 +546,19 @@ void Net::AppendParam(const NetParameter& param, const int layer_id, ParamSpec_DimCheckMode_PERMISSIVE)) { // Permissive dimension checking -- only check counts are the same. CHECK_EQ(this_blob->count(), owner_blob->count()) - << "Shared parameter blobs must have the same count."; + << "Cannot share param '" << param_name << "' owned by layer '" + << layer_names_[owner_layer_id] << "' with layer '" + << layer_names_[layer_id] << "'; count mismatch. Owner layer param " + << "shape is " << owner_blob->shape_string() << "; sharing layer " + << "shape is " << this_blob->shape_string(); } else { // Strict dimension checking -- all dims must be the same. - CHECK(this_blob->shape() == owner_blob->shape()); + CHECK(this_blob->shape() == owner_blob->shape()) + << "Cannot share param '" << param_name << "' owned by layer '" + << layer_names_[owner_layer_id] << "' with layer '" + << layer_names_[layer_id] << "'; shape mismatch. Owner layer param " + << "shape is " << owner_blob->shape_string() << "; sharing layer " + << "expects shape " << this_blob->shape_string(); } const int learnable_param_id = learnable_param_ids_[owner_net_param_id]; learnable_param_ids_.push_back(learnable_param_id); @@ -775,7 +785,11 @@ void Net::ShareTrainedLayersWith(const Net* other) { << "Incompatible number of blobs for layer " << source_layer_name; for (int j = 0; j < target_blobs.size(); ++j) { Blob* source_blob = source_layer->blobs()[j].get(); - CHECK(target_blobs[j]->shape() == source_blob->shape()); + CHECK(target_blobs[j]->shape() == source_blob->shape()) + << "Cannot share param " << j << " weights from layer '" + << source_layer_name << "'; shape mismatch. Source param shape is " + << source_blob->shape_string() << "; target param shape is " + << target_blobs[j]->shape_string(); target_blobs[j]->ShareData(*source_blob); } } @@ -839,6 +853,17 @@ void Net::CopyTrainedLayersFrom(const NetParameter& param) { CHECK_EQ(target_blobs.size(), source_layer.blobs_size()) << "Incompatible number of blobs for layer " << source_layer_name; for (int j = 0; j < target_blobs.size(); ++j) { + if (!target_blobs[j]->ShapeEquals(source_layer.blobs(j))) { + Blob source_blob; + const bool kReshape = true; + source_blob.FromProto(source_layer.blobs(j), kReshape); + LOG(FATAL) << "Cannot copy param " << j << " weights from layer '" + << source_layer_name << "'; shape mismatch. Source param shape is " + << source_blob.shape_string() << "; target param shape is " + << target_blobs[j]->shape_string() << ". " + << "To learn this layer's parameters from scratch rather than " + << "copying from a saved net, rename the layer."; + } const bool kReshape = false; target_blobs[j]->FromProto(source_layer.blobs(j), kReshape); } diff --git a/src/caffe/util/insert_splits.cpp b/src/caffe/util/insert_splits.cpp index 416f80ab..475a2a9f 100644 --- a/src/caffe/util/insert_splits.cpp +++ b/src/caffe/util/insert_splits.cpp @@ -32,7 +32,8 @@ void InsertSplits(const NetParameter& param, NetParameter* param_split) { const string& blob_name = layer_param.bottom(j); if (blob_name_to_last_top_idx.find(blob_name) == blob_name_to_last_top_idx.end()) { - LOG(FATAL) << "Unknown blob input " << blob_name << " to layer " << j; + LOG(FATAL) << "Unknown bottom blob '" << blob_name << "' (layer '" + << layer_param.name() << "', bottom index " << j << ")"; } const pair& bottom_idx = make_pair(i, j); const pair& top_idx = blob_name_to_last_top_idx[blob_name]; From 660cd1239038bac54942c1c4fefe694413e633a0 Mon Sep 17 00:00:00 2001 From: Cyprien Noel Date: Fri, 14 Aug 2015 16:22:34 -0700 Subject: [PATCH 040/223] New make target to only build the library. --- Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 05b783af..80bc3737 100644 --- a/Makefile +++ b/Makefile @@ -386,11 +386,13 @@ endif ############################## # Define build targets ############################## -.PHONY: all test clean docs linecount lint lintclean tools examples $(DIST_ALIASES) \ +.PHONY: all lib test clean docs linecount lint lintclean tools examples $(DIST_ALIASES) \ py mat py$(PROJECT) mat$(PROJECT) proto runtest \ superclean supercleanlist supercleanfiles warn everything -all: $(STATIC_NAME) $(DYNAMIC_NAME) tools examples +all: lib tools examples + +lib: $(STATIC_NAME) $(DYNAMIC_NAME) everything: $(EVERYTHING_TARGETS) From 3778a2a6ca1c5d909f12add066478d04fe44b8cd Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Fri, 14 Aug 2015 13:52:01 -0700 Subject: [PATCH 041/223] Fix MultiGPU solver test with TEST_GPUID != 1 This is a patch for multi-gpu testing issue (#2926). The problem fixed in this commit is that when calling make runtest with TEST_GPUID != 0 on a MultiGPU machine, solver tests will crash because gpu ids in multi-gpu tests doesn't match that of single GPU test. --- src/caffe/test/test_gradient_based_solver.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/caffe/test/test_gradient_based_solver.cpp b/src/caffe/test/test_gradient_based_solver.cpp index dcbfff1c..7ad7467f 100644 --- a/src/caffe/test/test_gradient_based_solver.cpp +++ b/src/caffe/test/test_gradient_based_solver.cpp @@ -73,12 +73,19 @@ class GradientBasedSolverTest : public MultiDeviceTest { const int iter_size = 1, const int devices = 1, const bool snapshot = false, const char* from_snapshot = NULL) { ostringstream proto; + int device_id = 0; +#ifndef CPU_ONLY + if (Caffe::mode() == Caffe::GPU) { + CUDA_CHECK(cudaGetDevice(&device_id)); + } +#endif proto << "snapshot_after_train: " << snapshot << " " "max_iter: " << num_iters << " " "base_lr: " << learning_rate << " " "lr_policy: 'fixed' " "iter_size: " << iter_size << " " + "device_id: " << device_id << " " "net_param { " " name: 'TestNetwork' " " layer { " @@ -189,8 +196,12 @@ class GradientBasedSolverTest : public MultiDeviceTest { } else { LOG(INFO) << "Multi-GPU test on " << devices << " devices"; vector gpus; - for (int i = 0; i < devices; ++i) { - gpus.push_back(i); + // put current device at the beginning + int device_id = solver_->param().device_id(); + gpus.push_back(device_id); + for (int i = 0; gpus.size() < devices; ++i) { + if (i != device_id) + gpus.push_back(i); } Caffe::set_solver_count(gpus.size()); this->sync_.reset(new P2PSync( From 09868aced72a20bc30198b84a225e58fdd6f13a6 Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Sat, 15 Aug 2015 19:02:57 -0700 Subject: [PATCH 042/223] Malloc at least 1 byte for MultiGPU P2PSync buffers --- src/caffe/parallel.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/caffe/parallel.cpp b/src/caffe/parallel.cpp index d48136c5..a6d154e1 100644 --- a/src/caffe/parallel.cpp +++ b/src/caffe/parallel.cpp @@ -55,7 +55,8 @@ static void apply_buffers(const vector*>& blobs, } ptr += size; } - CHECK_EQ(total_size, ptr - buffer); + // total_size is at least one byte + CHECK_EQ(total_size, (ptr == buffer ? 1 : ptr - buffer)); } // Buffer size necessary to store given blobs From 7453bbf6ea1aeb03330b5892a06276b69434f699 Mon Sep 17 00:00:00 2001 From: mhouston Date: Tue, 18 Aug 2015 13:40:00 -0700 Subject: [PATCH 043/223] Add some documentation on Multi-GPU support --- docs/multigpu.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 docs/multigpu.md diff --git a/docs/multigpu.md b/docs/multigpu.md new file mode 100644 index 00000000..4b202347 --- /dev/null +++ b/docs/multigpu.md @@ -0,0 +1,24 @@ +--- +title: Multi-GPU Usage, Hardware Configuration Assumptions, and Performance +--- + +# Multi-GPU Usage + +Currently Multi-GPU is only supported via the C/C++ paths and only for training. + +The GPUs to be used for training can be set with the "-gpu" flag on the command line to the 'caffe' tool. e.g. "build/tools/caffe train --solver=models/bvlc_alexnet/solver.prototxt --gpu=0,1" will train on GPUs 0 and 1. + +**NOTE**: each GPU runs the batchsize specified in your train_val.prototxt. So if you go from 1 GPU to 2 GPU, your effective batchsize will double. e.g. if your train_val.prototxt specified a batchsize of 256, if you run 2 GPUs your effective batch size is now 512. So you need to adjust the batchsize when running multiple GPUs and/or adjust your solver params, specifically learning rate. + +# Hardware Configuration Assumptions + +The current implementation uses a tree reduction strategy. e.g. if there are 4 GPUs in the system, 0:1, 2:3 will exchange gradients, then 0:2 (top of the tree) will exchange gradients, 0 will calculate +updated model, 0\-\>2, and then 0\-\>1, 2\-\>3. + +For best performance, P2P DMA access between devices is needed. Without P2P access, for example crossing PCIe root complex, data is copied through host and effective exchange bandwidth is greatly reduced. + +Current implementation has a "soft" assumption that the devices being used are homogeneous. In practice, any devices of the same general class should work together, but performance and total size is limited by the smallest device being used. e.g. if you combine a TitanX and a GTX980, peformance will be limited by the 980. Mixing vastly different levels of boards, e.g. Kepler and Fermi, is not supported. + +# Scaling Performance + +Performance is **heavily** dependent on the PCIe topology of the system, the configuration of the neural network you are training, and the speed of each of the layers. Systems like the DIGITS DevBox have an optimized PCIe topology (X99-E WS chipset). In general, scaling on 2 GPUs tends to be ~1.8X on average for networks like AlexNet, CaffeNet, VGG, GoogleNet. 4 GPUs begins to have falloff in scaling. Generally with "weak scaling" where the batchsize increases with the number of GPUs you will see 3.5x scaling or so. With "strong scaling", the system can become communication bound, especially with layer performance optimizations like those in [cuDNNv3](http://nvidia.com/cudnn), and you will likely see closer to mid 2.x scaling in performance. Networks that have heavy computation compared to the number of parameters tend to have the best scaling performance. \ No newline at end of file From 26a9880d72e81d415d1dc3bf449586ce54185ea4 Mon Sep 17 00:00:00 2001 From: mhouston Date: Tue, 18 Aug 2015 15:29:26 -0700 Subject: [PATCH 044/223] Add information about how to get GPU topology from nvidia-smi --- docs/multigpu.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/multigpu.md b/docs/multigpu.md index 4b202347..01cfb893 100644 --- a/docs/multigpu.md +++ b/docs/multigpu.md @@ -19,6 +19,8 @@ For best performance, P2P DMA access between devices is needed. Without P2P acce Current implementation has a "soft" assumption that the devices being used are homogeneous. In practice, any devices of the same general class should work together, but performance and total size is limited by the smallest device being used. e.g. if you combine a TitanX and a GTX980, peformance will be limited by the 980. Mixing vastly different levels of boards, e.g. Kepler and Fermi, is not supported. +"nvidia-smi topo -m" will show you the connectivity matrix. You can do P2P through PCIe bridges, but not across socket level links at this time, e.g. across CPU sockets on a multi-socket motherboard. + # Scaling Performance Performance is **heavily** dependent on the PCIe topology of the system, the configuration of the neural network you are training, and the speed of each of the layers. Systems like the DIGITS DevBox have an optimized PCIe topology (X99-E WS chipset). In general, scaling on 2 GPUs tends to be ~1.8X on average for networks like AlexNet, CaffeNet, VGG, GoogleNet. 4 GPUs begins to have falloff in scaling. Generally with "weak scaling" where the batchsize increases with the number of GPUs you will see 3.5x scaling or so. With "strong scaling", the system can become communication bound, especially with layer performance optimizations like those in [cuDNNv3](http://nvidia.com/cudnn), and you will likely see closer to mid 2.x scaling in performance. Networks that have heavy computation compared to the number of parameters tend to have the best scaling performance. \ No newline at end of file From 3b00ca6d32647e683d9808d13243bd0240550901 Mon Sep 17 00:00:00 2001 From: max argus Date: Thu, 20 Aug 2015 09:01:58 +0000 Subject: [PATCH 045/223] In BasePrefetchingDataLayer::Forward_cpu hanged top[0]->Reshape to top[0]->ReshapeLike, in line with other calls. --- src/caffe/layers/base_data_layer.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/caffe/layers/base_data_layer.cpp b/src/caffe/layers/base_data_layer.cpp index 5303fe9c..b90bd4e0 100644 --- a/src/caffe/layers/base_data_layer.cpp +++ b/src/caffe/layers/base_data_layer.cpp @@ -105,8 +105,7 @@ void BasePrefetchingDataLayer::Forward_cpu( const vector*>& bottom, const vector*>& top) { Batch* batch = prefetch_full_.pop("Data layer prefetch queue empty"); // Reshape to loaded data. - top[0]->Reshape(batch->data_.num(), batch->data_.channels(), - batch->data_.height(), batch->data_.width()); + top[0]->ReshapeLike(batch->data_); // Copy the data caffe_copy(batch->data_.count(), batch->data_.cpu_data(), top[0]->mutable_cpu_data()); From 51b172ce2fcd7f63aa7830389af54d353f53a3bc Mon Sep 17 00:00:00 2001 From: Luke Yeager Date: Fri, 14 Aug 2015 16:53:39 -0700 Subject: [PATCH 046/223] Expose LayerFactory::LayerTypeList in pycaffe Useful for validating NetParameters without crashing on SIGABRT --- include/caffe/layer_factory.hpp | 31 +++++++++++++++-------- python/caffe/__init__.py | 2 +- python/caffe/_caffe.cpp | 2 ++ python/caffe/test/test_layer_type_list.py | 10 ++++++++ 4 files changed, 34 insertions(+), 11 deletions(-) create mode 100644 python/caffe/test/test_layer_type_list.py diff --git a/include/caffe/layer_factory.hpp b/include/caffe/layer_factory.hpp index 32e849de..2c2fde4d 100644 --- a/include/caffe/layer_factory.hpp +++ b/include/caffe/layer_factory.hpp @@ -41,6 +41,7 @@ #include #include +#include #include "caffe/common.hpp" #include "caffe/proto/caffe.pb.h" @@ -77,26 +78,36 @@ class LayerRegistry { const string& type = param.type(); CreatorRegistry& registry = Registry(); CHECK_EQ(registry.count(type), 1) << "Unknown layer type: " << type - << " (known types: " << LayerTypeList() << ")"; + << " (known types: " << LayerTypeListString() << ")"; return registry[type](param); } + static vector LayerTypeList() { + CreatorRegistry& registry = Registry(); + vector layer_types; + for (typename CreatorRegistry::iterator iter = registry.begin(); + iter != registry.end(); ++iter) { + layer_types.push_back(iter->first); + } + return layer_types; + } + private: // Layer registry should never be instantiated - everything is done with its // static variables. LayerRegistry() {} - static string LayerTypeList() { - CreatorRegistry& registry = Registry(); - string layer_types; - for (typename CreatorRegistry::iterator iter = registry.begin(); - iter != registry.end(); ++iter) { - if (iter != registry.begin()) { - layer_types += ", "; + static string LayerTypeListString() { + vector layer_types = LayerTypeList(); + string layer_types_str; + for (vector::iterator iter = layer_types.begin(); + iter != layer_types.end(); ++iter) { + if (iter != layer_types.begin()) { + layer_types_str += ", "; } - layer_types += iter->first; + layer_types_str += *iter; } - return layer_types; + return layer_types_str; } }; diff --git a/python/caffe/__init__.py b/python/caffe/__init__.py index 1b2da510..6cc44e72 100644 --- a/python/caffe/__init__.py +++ b/python/caffe/__init__.py @@ -1,5 +1,5 @@ from .pycaffe import Net, SGDSolver -from ._caffe import set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver +from ._caffe import set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list from .proto.caffe_pb2 import TRAIN, TEST from .classifier import Classifier from .detector import Detector diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp index bb5130fd..f9b2dba1 100644 --- a/python/caffe/_caffe.cpp +++ b/python/caffe/_caffe.cpp @@ -200,6 +200,8 @@ BOOST_PYTHON_MODULE(_caffe) { bp::def("set_mode_gpu", &set_mode_gpu); bp::def("set_device", &Caffe::SetDevice); + bp::def("layer_type_list", &LayerRegistry::LayerTypeList); + bp::class_, shared_ptr >, boost::noncopyable >("Net", bp::no_init) .def("__init__", bp::make_constructor(&Net_Init)) diff --git a/python/caffe/test/test_layer_type_list.py b/python/caffe/test/test_layer_type_list.py new file mode 100644 index 00000000..7edc80df --- /dev/null +++ b/python/caffe/test/test_layer_type_list.py @@ -0,0 +1,10 @@ +import unittest + +import caffe + +class TestLayerTypeList(unittest.TestCase): + + def test_standard_types(self): + for type_name in ['Data', 'Convolution', 'InnerProduct']: + self.assertIn(type_name, caffe.layer_type_list(), + '%s not in layer_type_list()' % type_name) From 35657e31ad82f73a0682deb52cb1606b33a202be Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Thu, 20 Aug 2015 11:54:08 -0700 Subject: [PATCH 047/223] DeconvolutionLayer Backward_gpu fix: don't redo im2col --- src/caffe/layers/deconv_layer.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/caffe/layers/deconv_layer.cu b/src/caffe/layers/deconv_layer.cu index 39bc4de8..8a1eed8a 100644 --- a/src/caffe/layers/deconv_layer.cu +++ b/src/caffe/layers/deconv_layer.cu @@ -52,7 +52,8 @@ void DeconvolutionLayer::Backward_gpu(const vector*>& top, // gradient w.r.t. bottom data, if necessary. if (propagate_down[i]) { this->forward_gpu_gemm(top_diff + top[i]->offset(n), weight, - bottom_diff + bottom[i]->offset(n)); + bottom_diff + bottom[i]->offset(n), + this->param_propagate_down_[0]); } } } From 2f5889cb84b8c7f8f64a6b7eb48e1dc624b18162 Mon Sep 17 00:00:00 2001 From: Luke Yeager Date: Thu, 20 Aug 2015 14:29:02 -0700 Subject: [PATCH 048/223] Use input_shape instead of input_dim in examples --- examples/cifar10/cifar10_full.prototxt | 10 ++++++---- examples/cifar10/cifar10_quick.prototxt | 10 ++++++---- examples/mnist/lenet.prototxt | 10 ++++++---- examples/net_surgery/bvlc_caffenet_full_conv.prototxt | 10 ++++++---- examples/net_surgery/conv.prototxt | 10 ++++++---- examples/siamese/mnist_siamese.prototxt | 10 ++++++---- models/bvlc_alexnet/deploy.prototxt | 10 ++++++---- models/bvlc_googlenet/deploy.prototxt | 10 ++++++---- models/bvlc_reference_caffenet/deploy.prototxt | 10 ++++++---- models/bvlc_reference_rcnn_ilsvrc13/deploy.prototxt | 10 ++++++---- models/finetune_flickr_style/deploy.prototxt | 10 ++++++---- 11 files changed, 66 insertions(+), 44 deletions(-) diff --git a/examples/cifar10/cifar10_full.prototxt b/examples/cifar10/cifar10_full.prototxt index c16f7dca..446479da 100644 --- a/examples/cifar10/cifar10_full.prototxt +++ b/examples/cifar10/cifar10_full.prototxt @@ -2,10 +2,12 @@ name: "CIFAR10_full_deploy" # N.B. input image must be in CIFAR-10 format # as described at http://www.cs.toronto.edu/~kriz/cifar.html input: "data" -input_dim: 1 -input_dim: 3 -input_dim: 32 -input_dim: 32 +input_shape { + dim: 1 + dim: 3 + dim: 32 + dim: 32 +} layer { name: "conv1" type: "Convolution" diff --git a/examples/cifar10/cifar10_quick.prototxt b/examples/cifar10/cifar10_quick.prototxt index 1ad190e1..9352fbf6 100644 --- a/examples/cifar10/cifar10_quick.prototxt +++ b/examples/cifar10/cifar10_quick.prototxt @@ -1,9 +1,11 @@ name: "CIFAR10_quick_test" input: "data" -input_dim: 1 -input_dim: 3 -input_dim: 32 -input_dim: 32 +input_shape { + dim: 1 + dim: 3 + dim: 32 + dim: 32 +} layer { name: "conv1" type: "Convolution" diff --git a/examples/mnist/lenet.prototxt b/examples/mnist/lenet.prototxt index cb42610f..dff7123b 100644 --- a/examples/mnist/lenet.prototxt +++ b/examples/mnist/lenet.prototxt @@ -1,9 +1,11 @@ name: "LeNet" input: "data" -input_dim: 64 -input_dim: 1 -input_dim: 28 -input_dim: 28 +input_shape { + dim: 64 + dim: 1 + dim: 28 + dim: 28 +} layer { name: "conv1" type: "Convolution" diff --git a/examples/net_surgery/bvlc_caffenet_full_conv.prototxt b/examples/net_surgery/bvlc_caffenet_full_conv.prototxt index 3c951970..0cadde9b 100644 --- a/examples/net_surgery/bvlc_caffenet_full_conv.prototxt +++ b/examples/net_surgery/bvlc_caffenet_full_conv.prototxt @@ -1,10 +1,12 @@ # Fully convolutional network version of CaffeNet. name: "CaffeNetConv" input: "data" -input_dim: 1 -input_dim: 3 -input_dim: 451 -input_dim: 451 +input_shape { + dim: 1 + dim: 3 + dim: 451 + dim: 451 +} layer { name: "conv1" type: "Convolution" diff --git a/examples/net_surgery/conv.prototxt b/examples/net_surgery/conv.prototxt index 9444c63a..6b3e5c76 100644 --- a/examples/net_surgery/conv.prototxt +++ b/examples/net_surgery/conv.prototxt @@ -1,10 +1,12 @@ # Simple single-layer network to showcase editing model parameters. name: "convolution" input: "data" -input_dim: 1 -input_dim: 1 -input_dim: 100 -input_dim: 100 +input_shape { + dim: 1 + dim: 1 + dim: 100 + dim: 100 +} layer { name: "conv" type: "Convolution" diff --git a/examples/siamese/mnist_siamese.prototxt b/examples/siamese/mnist_siamese.prototxt index 0e903f85..332731bd 100644 --- a/examples/siamese/mnist_siamese.prototxt +++ b/examples/siamese/mnist_siamese.prototxt @@ -1,9 +1,11 @@ name: "mnist_siamese" input: "data" -input_dim: 10000 -input_dim: 1 -input_dim: 28 -input_dim: 28 +input_shape { + dim: 10000 + dim: 1 + dim: 28 + dim: 28 +} layer { name: "conv1" type: "Convolution" diff --git a/models/bvlc_alexnet/deploy.prototxt b/models/bvlc_alexnet/deploy.prototxt index ced055b8..ff10daa9 100644 --- a/models/bvlc_alexnet/deploy.prototxt +++ b/models/bvlc_alexnet/deploy.prototxt @@ -1,9 +1,11 @@ name: "AlexNet" input: "data" -input_dim: 10 -input_dim: 3 -input_dim: 227 -input_dim: 227 +input_shape { + dim: 10 + dim: 3 + dim: 227 + dim: 227 +} layer { name: "conv1" type: "Convolution" diff --git a/models/bvlc_googlenet/deploy.prototxt b/models/bvlc_googlenet/deploy.prototxt index 4648bf26..1f90ee21 100644 --- a/models/bvlc_googlenet/deploy.prototxt +++ b/models/bvlc_googlenet/deploy.prototxt @@ -1,9 +1,11 @@ name: "GoogleNet" input: "data" -input_dim: 10 -input_dim: 3 -input_dim: 224 -input_dim: 224 +input_shape { + dim: 10 + dim: 3 + dim: 224 + dim: 224 +} layer { name: "conv1/7x7_s2" type: "Convolution" diff --git a/models/bvlc_reference_caffenet/deploy.prototxt b/models/bvlc_reference_caffenet/deploy.prototxt index 29ccf146..127f1e26 100644 --- a/models/bvlc_reference_caffenet/deploy.prototxt +++ b/models/bvlc_reference_caffenet/deploy.prototxt @@ -1,9 +1,11 @@ name: "CaffeNet" input: "data" -input_dim: 10 -input_dim: 3 -input_dim: 227 -input_dim: 227 +input_shape { + dim: 10 + dim: 3 + dim: 227 + dim: 227 +} layer { name: "conv1" type: "Convolution" diff --git a/models/bvlc_reference_rcnn_ilsvrc13/deploy.prototxt b/models/bvlc_reference_rcnn_ilsvrc13/deploy.prototxt index ea9cf98a..ae1df967 100644 --- a/models/bvlc_reference_rcnn_ilsvrc13/deploy.prototxt +++ b/models/bvlc_reference_rcnn_ilsvrc13/deploy.prototxt @@ -1,9 +1,11 @@ name: "R-CNN-ilsvrc13" input: "data" -input_dim: 10 -input_dim: 3 -input_dim: 227 -input_dim: 227 +input_shape { + dim: 10 + dim: 3 + dim: 227 + dim: 227 +} layer { name: "conv1" type: "Convolution" diff --git a/models/finetune_flickr_style/deploy.prototxt b/models/finetune_flickr_style/deploy.prototxt index 4a924f74..0f07e47a 100644 --- a/models/finetune_flickr_style/deploy.prototxt +++ b/models/finetune_flickr_style/deploy.prototxt @@ -1,9 +1,11 @@ name: "FlickrStyleCaffeNet" input: "data" -input_dim: 10 -input_dim: 3 -input_dim: 227 -input_dim: 227 +input_shape { + dim: 10 + dim: 3 + dim: 227 + dim: 227 +} layer { name: "conv1" type: "Convolution" From 7146e596347db81869b5bfa9b4cb014e80be9732 Mon Sep 17 00:00:00 2001 From: David Larson Date: Thu, 20 Aug 2015 15:18:03 -0700 Subject: [PATCH 049/223] [examples] fix link to feature visualization notebook --- examples/feature_extraction/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/feature_extraction/readme.md b/examples/feature_extraction/readme.md index a980b8b3..2bc3dacb 100644 --- a/examples/feature_extraction/readme.md +++ b/examples/feature_extraction/readme.md @@ -64,7 +64,7 @@ If you meet with the error "Check failed: status.ok() Failed to open leveldb exa rm -rf examples/_temp/features/ -If you'd like to use the Python wrapper for extracting features, check out the [layer visualization notebook](http://nbviewer.ipython.org/github/BVLC/caffe/blob/master/examples/filter_visualization.ipynb). +If you'd like to use the Python wrapper for extracting features, check out the [filter visualization notebook](http://nbviewer.ipython.org/github/BVLC/caffe/blob/master/examples/00-classification.ipynb). Clean Up -------- From 08086c7580ddfd0d2b996157b3f3b4c1a52fd2b5 Mon Sep 17 00:00:00 2001 From: Jonathan L Long Date: Fri, 21 Aug 2015 21:23:22 -0700 Subject: [PATCH 050/223] remove superfluous code in Net::ToProto --- src/caffe/net.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index a18ee638..554343f9 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -763,12 +763,6 @@ void Net::ToProto(NetParameter* param, bool write_diff) const { DLOG(INFO) << "Serializing " << layers_.size() << " layers"; for (int i = 0; i < layers_.size(); ++i) { LayerParameter* layer_param = param->add_layer(); - for (int j = 0; j < bottom_id_vecs_[i].size(); ++j) { - layer_param->add_bottom(blob_names_[bottom_id_vecs_[i][j]]); - } - for (int j = 0; j < top_id_vecs_[i].size(); ++j) { - layer_param->add_top(blob_names_[top_id_vecs_[i][j]]); - } layers_[i]->ToProto(layer_param, write_diff); } } From ff19d5f5c010dd8d6bfcf768b4fe27d0458f17df Mon Sep 17 00:00:00 2001 From: J Yegerlehner Date: Fri, 3 Apr 2015 16:11:23 -0500 Subject: [PATCH 051/223] Add signal handler and early exit/snapshot to Solver. Add signal handler and early exit/snapshot to Solver. Add signal handler and early exit/snapshot to Solver. Also check for exit and snapshot when testing. Skip running test after early exit. Fix more lint. Rebase on master. Finish rebase on master. Fixups per review comments. Redress review comments. Lint. Correct error message wording. --- include/caffe/solver.hpp | 37 ++++++++- include/caffe/util/signal_handler.h | 24 ++++++ src/caffe/solver.cpp | 70 +++++++++++++++-- src/caffe/util/signal_handler.cpp | 115 ++++++++++++++++++++++++++++ tools/caffe.cpp | 32 +++++++- 5 files changed, 268 insertions(+), 10 deletions(-) create mode 100644 include/caffe/util/signal_handler.h create mode 100644 src/caffe/util/signal_handler.cpp diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index ab12ef1b..aba3e036 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -1,6 +1,6 @@ #ifndef CAFFE_OPTIMIZATION_SOLVER_HPP_ #define CAFFE_OPTIMIZATION_SOLVER_HPP_ - +#include #include #include @@ -8,6 +8,28 @@ namespace caffe { +/** + * @brief Enumeration of actions that a client of the Solver may request by + * implementing the Solver's action request function, which a + * a client may optionally provide in order to request early termination + * or saving a snapshot without exiting. In the executable caffe, this + * mechanism is used to allow the snapshot to be saved when stopping + * execution with a SIGINT (Ctrl-C). + */ + namespace SolverAction { + enum Enum { + NONE = 0, // Take no special action. + STOP = 1, // Stop training. snapshot_after_train controls whether a + // snapshot is created. + SNAPSHOT = 2 // Take a snapshot, and keep training. + }; + } + +/** + * @brief Type of a function that returns a Solver Action enumeration. + */ +typedef boost::function ActionCallback; + /** * @brief An interface for classes that perform optimization on Net%s. * @@ -23,6 +45,12 @@ class Solver { void Init(const SolverParameter& param); void InitTrainNet(); void InitTestNets(); + + // Client of the Solver optionally may call this in order to set the function + // that the solver uses to see what action it should take (e.g. snapshot or + // exit training early). + void SetActionFunction(ActionCallback func); + SolverAction::Enum GetRequestedAction(); // The main entry of the solver function. In default, iter will be zero. Pass // in a non-zero iter number to resume training for a pre-trained net. virtual void Solve(const char* resume_file = NULL); @@ -84,6 +112,13 @@ class Solver { // in data parallelism const Solver* const root_solver_; + // A function that can be set by a client of the Solver to provide indication + // that it wants a snapshot saved and/or to exit early. + ActionCallback action_request_function_; + + // True iff a request to stop early was received. + bool requested_early_exit_; + DISABLE_COPY_AND_ASSIGN(Solver); }; diff --git a/include/caffe/util/signal_handler.h b/include/caffe/util/signal_handler.h new file mode 100644 index 00000000..fb84c65b --- /dev/null +++ b/include/caffe/util/signal_handler.h @@ -0,0 +1,24 @@ +#ifndef INCLUDE_CAFFE_UTIL_SIGNAL_HANDLER_H_ +#define INCLUDE_CAFFE_UTIL_SIGNAL_HANDLER_H_ + +#include "caffe/proto/caffe.pb.h" +#include "caffe/solver.hpp" + +namespace caffe { + +class SignalHandler { + public: + // Contructor. Specify what action to take when a signal is received. + SignalHandler(SolverAction::Enum SIGINT_action, + SolverAction::Enum SIGHUP_action); + ~SignalHandler(); + ActionCallback GetActionFunction(); + private: + SolverAction::Enum CheckForSignals() const; + SolverAction::Enum SIGINT_action_; + SolverAction::Enum SIGHUP_action_; +}; + +} // namespace caffe + +#endif // INCLUDE_CAFFE_UTIL_SIGNAL_HANDLER_H_ diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 9348e11c..394ec3b3 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -17,15 +17,31 @@ namespace caffe { +template +void Solver::SetActionFunction(ActionCallback func) { + action_request_function_ = func; +} + +template +SolverAction::Enum Solver::GetRequestedAction() { + if (action_request_function_) { + // If the external request function has been set, call it. + return action_request_function_(); + } + return SolverAction::NONE; +} + template Solver::Solver(const SolverParameter& param, const Solver* root_solver) - : net_(), callbacks_(), root_solver_(root_solver) { + : net_(), callbacks_(), root_solver_(root_solver), + requested_early_exit_(false) { Init(param); } template Solver::Solver(const string& param_file, const Solver* root_solver) - : net_(), callbacks_(), root_solver_(root_solver) { + : net_(), callbacks_(), root_solver_(root_solver), + requested_early_exit_(false) { SolverParameter param; ReadProtoFromTextFileOrDie(param_file, ¶m); Init(param); @@ -195,6 +211,10 @@ void Solver::Step(int iters) { && (iter_ > 0 || param_.test_initialization()) && Caffe::root_solver()) { TestAll(); + if (requested_early_exit_) { + // Break out of the while loop because stop was requested while testing. + break; + } } for (int i = 0; i < callbacks_.size(); ++i) { @@ -250,12 +270,20 @@ void Solver::Step(int iters) { // the number of times the weights have been updated. ++iter_; + SolverAction::Enum request = GetRequestedAction(); + // Save a snapshot if needed. - if (param_.snapshot() - && iter_ % param_.snapshot() == 0 - && Caffe::root_solver()) { + if ((param_.snapshot() + && iter_ % param_.snapshot() == 0 + && Caffe::root_solver()) || + (request == SolverAction::SNAPSHOT)) { Snapshot(); } + if (SolverAction::STOP == request) { + requested_early_exit_ = true; + // Break out of training loop. + break; + } } } @@ -265,6 +293,9 @@ void Solver::Solve(const char* resume_file) { LOG(INFO) << "Solving " << net_->name(); LOG(INFO) << "Learning Rate Policy: " << param_.lr_policy(); + // Initialize to false every time we start solving. + requested_early_exit_ = false; + if (resume_file) { LOG(INFO) << "Restoring previous solver status from " << resume_file; Restore(resume_file); @@ -279,6 +310,10 @@ void Solver::Solve(const char* resume_file) { && (!param_.snapshot() || iter_ % param_.snapshot() != 0)) { Snapshot(); } + if (requested_early_exit_) { + LOG(INFO) << "Optimization stopped early."; + return; + } // After the optimization is done, run an additional train and test pass to // display the train and test loss/outputs if appropriate (based on the // display and test_interval settings, respectively). Unlike in the rest of @@ -296,10 +331,11 @@ void Solver::Solve(const char* resume_file) { LOG(INFO) << "Optimization Done."; } - template void Solver::TestAll() { - for (int test_net_id = 0; test_net_id < test_nets_.size(); ++test_net_id) { + for (int test_net_id = 0; + test_net_id < test_nets_.size() && !requested_early_exit_; + ++test_net_id) { Test(test_net_id); } } @@ -317,6 +353,21 @@ void Solver::Test(const int test_net_id) { const shared_ptr >& test_net = test_nets_[test_net_id]; Dtype loss = 0; for (int i = 0; i < param_.test_iter(test_net_id); ++i) { + SolverAction::Enum request = GetRequestedAction(); + // Check to see if stoppage of testing/training has been requested. + while (request != SolverAction::NONE) { + if (SolverAction::SNAPSHOT == request) { + Snapshot(); + } else if (SolverAction::STOP == request) { + requested_early_exit_ = true; + } + request = GetRequestedAction(); + } + if (requested_early_exit_) { + // break out of test loop. + break; + } + Dtype iter_loss; const vector*>& result = test_net->Forward(bottom_vec, &iter_loss); @@ -341,6 +392,10 @@ void Solver::Test(const int test_net_id) { } } } + if (requested_early_exit_) { + LOG(INFO) << "Test interrupted."; + return; + } if (param_.test_compute_loss()) { loss /= param_.test_iter(test_net_id); LOG(INFO) << "Test loss: " << loss; @@ -361,7 +416,6 @@ void Solver::Test(const int test_net_id) { } } - template void Solver::Snapshot() { CHECK(Caffe::root_solver()); diff --git a/src/caffe/util/signal_handler.cpp b/src/caffe/util/signal_handler.cpp new file mode 100644 index 00000000..5d764ec5 --- /dev/null +++ b/src/caffe/util/signal_handler.cpp @@ -0,0 +1,115 @@ +#include +#include + +#include +#include + +#include "caffe/util/signal_handler.h" + +namespace { + static volatile sig_atomic_t got_sigint = false; + static volatile sig_atomic_t got_sighup = false; + static bool already_hooked_up = false; + + void handle_signal(int signal) { + switch (signal) { + case SIGHUP: + got_sighup = true; + break; + case SIGINT: + got_sigint = true; + break; + } + } + + void HookupHandler() { + if (already_hooked_up) { + LOG(FATAL) << "Tried to hookup signal handlers more than once."; + } + already_hooked_up = true; + + struct sigaction sa; + // Setup the handler + sa.sa_handler = &handle_signal; + // Restart the system call, if at all possible + sa.sa_flags = SA_RESTART; + // Block every signal during the handler + sigfillset(&sa.sa_mask); + // Intercept SIGHUP and SIGINT + if (sigaction(SIGHUP, &sa, NULL) == -1) { + LOG(FATAL) << "Cannot install SIGHUP handler."; + } + if (sigaction(SIGINT, &sa, NULL) == -1) { + LOG(FATAL) << "Cannot install SIGINT handler."; + } + } + + // Set the signal handlers to the default. + void UnhookHandler() { + if (already_hooked_up) { + struct sigaction sa; + // Setup the sighub handler + sa.sa_handler = SIG_DFL; + // Restart the system call, if at all possible + sa.sa_flags = SA_RESTART; + // Block every signal during the handler + sigfillset(&sa.sa_mask); + // Intercept SIGHUP and SIGINT + if (sigaction(SIGHUP, &sa, NULL) == -1) { + LOG(FATAL) << "Cannot uninstall SIGHUP handler."; + } + if (sigaction(SIGINT, &sa, NULL) == -1) { + LOG(FATAL) << "Cannot uninstall SIGINT handler."; + } + + already_hooked_up = false; + } + } + + // Return true iff a SIGINT has been received since the last time this + // function was called. + bool GotSIGINT() { + bool result = got_sigint; + got_sigint = false; + return result; + } + + // Return true iff a SIGHUP has been received since the last time this + // function was called. + bool GotSIGHUP() { + bool result = got_sighup; + got_sighup = false; + return result; + } +} // namespace + +namespace caffe { + +SignalHandler::SignalHandler(SolverAction::Enum SIGINT_action, + SolverAction::Enum SIGHUP_action): + SIGINT_action_(SIGINT_action), + SIGHUP_action_(SIGHUP_action) { + HookupHandler(); +} + +SignalHandler::~SignalHandler() { + UnhookHandler(); +} + +SolverAction::Enum SignalHandler::CheckForSignals() const { + if (GotSIGHUP()) { + return SIGHUP_action_; + } + if (GotSIGINT()) { + return SIGINT_action_; + } + return SolverAction::NONE; +} + +// Return the function that the solver can use to find out if a snapshot or +// early exit is being requested. +ActionCallback SignalHandler::GetActionFunction() { + return boost::bind(&SignalHandler::CheckForSignals, this); +} + +} // namespace caffe diff --git a/tools/caffe.cpp b/tools/caffe.cpp index 9f31b37a..ff63860a 100644 --- a/tools/caffe.cpp +++ b/tools/caffe.cpp @@ -12,6 +12,7 @@ namespace bp = boost::python; #include "boost/algorithm/string.hpp" #include "caffe/caffe.hpp" +#include "caffe/util/signal_handler.h" using caffe::Blob; using caffe::Caffe; @@ -39,6 +40,12 @@ DEFINE_string(weights, "", "separated by ','. Cannot be set simultaneously with snapshot."); DEFINE_int32(iterations, 50, "The number of iterations to run."); +DEFINE_string(sigint_effect, "stop", + "Optional; action to take when a SIGINT signal is received: " + "snapshot, stop or none."); +DEFINE_string(sighup_effect, "snapshot", + "Optional; action to take when a SIGHUP signal is received: " + "snapshot, stop or none."); // A simple registry for caffe commands. typedef int (*BrewFunction)(); @@ -126,6 +133,22 @@ void CopyLayers(caffe::Solver* solver, const std::string& model_list) { } } +// Translate the signal effect the user specified on the command-line to the +// corresponding enumeration. +caffe::SolverAction::Enum GetRequestedAction( + const std::string& flag_value) { + if (flag_value == "stop") { + return caffe::SolverAction::STOP; + } + if (flag_value == "snapshot") { + return caffe::SolverAction::SNAPSHOT; + } + if (flag_value == "none") { + return caffe::SolverAction::NONE; + } + LOG(FATAL) << "Invalid signal effect \""<< flag_value << "\" was specified"; +} + // Train / Finetune a model. int train() { CHECK_GT(FLAGS_solver.size(), 0) << "Need a solver definition to train."; @@ -165,7 +188,14 @@ int train() { Caffe::set_solver_count(gpus.size()); } - shared_ptr > solver(caffe::GetSolver(solver_param)); + caffe::SignalHandler signal_handler( + GetRequestedAction(FLAGS_sigint_effect), + GetRequestedAction(FLAGS_sighup_effect)); + + shared_ptr > + solver(caffe::GetSolver(solver_param)); + + solver->SetActionFunction(signal_handler.GetActionFunction()); if (FLAGS_snapshot.size()) { LOG(INFO) << "Resuming from " << FLAGS_snapshot; From 374fb8c79c3f23ee36c46d0bcaeb2176037aa4b8 Mon Sep 17 00:00:00 2001 From: Ran Date: Sat, 15 Aug 2015 20:09:43 +0300 Subject: [PATCH 052/223] Output accuracies per class. Fixed case where number of samples in class can be zero. - Fixed ignore_label case, also added a test. - Two other fixes. Fixed lint errors. Small fix. --- include/caffe/loss_layers.hpp | 8 +- src/caffe/layers/accuracy_layer.cpp | 20 +++++ src/caffe/test/test_accuracy_layer.cpp | 107 +++++++++++++++++++++++++ 3 files changed, 134 insertions(+), 1 deletion(-) diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp index 52826639..02687a94 100644 --- a/include/caffe/loss_layers.hpp +++ b/include/caffe/loss_layers.hpp @@ -39,7 +39,11 @@ class AccuracyLayer : public Layer { virtual inline const char* type() const { return "Accuracy"; } virtual inline int ExactNumBottomBlobs() const { return 2; } - virtual inline int ExactNumTopBlobs() const { return 1; } + + // If there are two top blobs, then the second blob will contain + // accuracies per class. + virtual inline int MinTopBlobs() const { return 1; } + virtual inline int MaxTopBlos() const { return 2; } protected: /** @@ -86,6 +90,8 @@ class AccuracyLayer : public Layer { bool has_ignore_label_; /// The label indicating that an instance should be ignored. int ignore_label_; + /// Keeps counts of the number of samples per class. + Blob nums_buffer_; }; /** diff --git a/src/caffe/layers/accuracy_layer.cpp b/src/caffe/layers/accuracy_layer.cpp index 90aad675..e2d8d9f8 100644 --- a/src/caffe/layers/accuracy_layer.cpp +++ b/src/caffe/layers/accuracy_layer.cpp @@ -38,6 +38,13 @@ void AccuracyLayer::Reshape( << "with integer values in {0, 1, ..., C-1}."; vector top_shape(0); // Accuracy is a scalar; 0 axes. top[0]->Reshape(top_shape); + if (top.size() > 1) { + // Per-class accuracy is a vector; 1 axes. + vector top_shape_per_class(1); + top_shape_per_class[0] = bottom[0]->shape(label_axis_); + top[1]->Reshape(top_shape_per_class); + nums_buffer_.Reshape(top_shape_per_class); + } } template @@ -50,6 +57,10 @@ void AccuracyLayer::Forward_cpu(const vector*>& bottom, const int num_labels = bottom[0]->shape(label_axis_); vector maxval(top_k_+1); vector max_id(top_k_+1); + if (top.size() > 1) { + caffe_set(nums_buffer_.count(), Dtype(0), nums_buffer_.mutable_cpu_data()); + caffe_set(top[1]->count(), Dtype(0), top[1]->mutable_cpu_data()); + } int count = 0; for (int i = 0; i < outer_num_; ++i) { for (int j = 0; j < inner_num_; ++j) { @@ -58,6 +69,7 @@ void AccuracyLayer::Forward_cpu(const vector*>& bottom, if (has_ignore_label_ && label_value == ignore_label_) { continue; } + if (top.size() > 1) ++nums_buffer_.mutable_cpu_data()[label_value]; DCHECK_GE(label_value, 0); DCHECK_LT(label_value, num_labels); // Top-k accuracy @@ -73,6 +85,7 @@ void AccuracyLayer::Forward_cpu(const vector*>& bottom, for (int k = 0; k < top_k_; k++) { if (bottom_data_vector[k].second == label_value) { ++accuracy; + if (top.size() > 1) ++top[1]->mutable_cpu_data()[label_value]; break; } } @@ -82,6 +95,13 @@ void AccuracyLayer::Forward_cpu(const vector*>& bottom, // LOG(INFO) << "Accuracy: " << accuracy; top[0]->mutable_cpu_data()[0] = accuracy / count; + if (top.size() > 1) { + for (int i = 0; i < top[1]->count(); ++i) { + top[1]->mutable_cpu_data()[i] = + nums_buffer_.cpu_data()[i] == 0 ? 0 + : top[1]->cpu_data()[i] / nums_buffer_.cpu_data()[i]; + } + } // Accuracy layer should not be used as a loss function. } diff --git a/src/caffe/test/test_accuracy_layer.cpp b/src/caffe/test/test_accuracy_layer.cpp index c14b67cc..94e529b5 100644 --- a/src/caffe/test/test_accuracy_layer.cpp +++ b/src/caffe/test/test_accuracy_layer.cpp @@ -22,6 +22,7 @@ class AccuracyLayerTest : public CPUDeviceTest { : blob_bottom_data_(new Blob()), blob_bottom_label_(new Blob()), blob_top_(new Blob()), + blob_top_per_class_(new Blob()), top_k_(3) { vector shape(2); shape[0] = 100; @@ -34,6 +35,8 @@ class AccuracyLayerTest : public CPUDeviceTest { blob_bottom_vec_.push_back(blob_bottom_data_); blob_bottom_vec_.push_back(blob_bottom_label_); blob_top_vec_.push_back(blob_top_); + blob_top_per_class_vec_.push_back(blob_top_); + blob_top_per_class_vec_.push_back(blob_top_per_class_); } virtual void FillBottoms() { @@ -56,12 +59,15 @@ class AccuracyLayerTest : public CPUDeviceTest { delete blob_bottom_data_; delete blob_bottom_label_; delete blob_top_; + delete blob_top_per_class_; } Blob* const blob_bottom_data_; Blob* const blob_bottom_label_; Blob* const blob_top_; + Blob* const blob_top_per_class_; vector*> blob_bottom_vec_; vector*> blob_top_vec_; + vector*> blob_top_per_class_vec_; int top_k_; }; @@ -90,6 +96,20 @@ TYPED_TEST(AccuracyLayerTest, TestSetupTopK) { EXPECT_EQ(this->blob_top_->width(), 1); } +TYPED_TEST(AccuracyLayerTest, TestSetupOutputPerClass) { + LayerParameter layer_param; + AccuracyLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_per_class_vec_); + EXPECT_EQ(this->blob_top_->num(), 1); + EXPECT_EQ(this->blob_top_->channels(), 1); + EXPECT_EQ(this->blob_top_->height(), 1); + EXPECT_EQ(this->blob_top_->width(), 1); + EXPECT_EQ(this->blob_top_per_class_->num(), 10); + EXPECT_EQ(this->blob_top_per_class_->channels(), 1); + EXPECT_EQ(this->blob_top_per_class_->height(), 1); + EXPECT_EQ(this->blob_top_per_class_->width(), 1); +} + TYPED_TEST(AccuracyLayerTest, TestForwardCPU) { LayerParameter layer_param; AccuracyLayer layer(layer_param); @@ -228,4 +248,91 @@ TYPED_TEST(AccuracyLayerTest, TestForwardCPUTopK) { num_correct_labels / 100.0, 1e-4); } +TYPED_TEST(AccuracyLayerTest, TestForwardCPUPerClass) { + LayerParameter layer_param; + Caffe::set_mode(Caffe::CPU); + AccuracyLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_per_class_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_per_class_vec_); + + TypeParam max_value; + int max_id; + int num_correct_labels = 0; + const int num_class = this->blob_top_per_class_->num(); + vector correct_per_class(num_class, 0); + vector num_per_class(num_class, 0); + for (int i = 0; i < 100; ++i) { + max_value = -FLT_MAX; + max_id = 0; + for (int j = 0; j < 10; ++j) { + if (this->blob_bottom_data_->data_at(i, j, 0, 0) > max_value) { + max_value = this->blob_bottom_data_->data_at(i, j, 0, 0); + max_id = j; + } + } + ++num_per_class[this->blob_bottom_label_->data_at(i, 0, 0, 0)]; + if (max_id == this->blob_bottom_label_->data_at(i, 0, 0, 0)) { + ++num_correct_labels; + ++correct_per_class[max_id]; + } + } + EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0), + num_correct_labels / 100.0, 1e-4); + for (int i = 0; i < num_class; ++i) { + EXPECT_NEAR(this->blob_top_per_class_->data_at(i, 0, 0, 0), + static_cast(correct_per_class[i]) / num_per_class[i], + 1e-4); + } +} + + +TYPED_TEST(AccuracyLayerTest, TestForwardCPUPerClassWithIgnoreLabel) { + LayerParameter layer_param; + Caffe::set_mode(Caffe::CPU); + const TypeParam kIgnoreLabelValue = -1; + layer_param.mutable_accuracy_param()->set_ignore_label(kIgnoreLabelValue); + AccuracyLayer layer(layer_param); + // Manually set some labels to the ignore label value (-1). + this->blob_bottom_label_->mutable_cpu_data()[2] = kIgnoreLabelValue; + this->blob_bottom_label_->mutable_cpu_data()[5] = kIgnoreLabelValue; + this->blob_bottom_label_->mutable_cpu_data()[32] = kIgnoreLabelValue; + layer.SetUp(this->blob_bottom_vec_, this->blob_top_per_class_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_per_class_vec_); + + TypeParam max_value; + int max_id; + int num_correct_labels = 0; + const int num_class = this->blob_top_per_class_->num(); + vector correct_per_class(num_class, 0); + vector num_per_class(num_class, 0); + int count = 0; + for (int i = 0; i < 100; ++i) { + if (kIgnoreLabelValue == this->blob_bottom_label_->data_at(i, 0, 0, 0)) { + continue; + } + ++count; + max_value = -FLT_MAX; + max_id = 0; + for (int j = 0; j < 10; ++j) { + if (this->blob_bottom_data_->data_at(i, j, 0, 0) > max_value) { + max_value = this->blob_bottom_data_->data_at(i, j, 0, 0); + max_id = j; + } + } + ++num_per_class[this->blob_bottom_label_->data_at(i, 0, 0, 0)]; + if (max_id == this->blob_bottom_label_->data_at(i, 0, 0, 0)) { + ++num_correct_labels; + ++correct_per_class[max_id]; + } + } + EXPECT_EQ(count, 97); + EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0), + num_correct_labels / TypeParam(count), 1e-4); + for (int i = 0; i < 10; ++i) { + EXPECT_NEAR(this->blob_top_per_class_->data_at(i, 0, 0, 0), + TypeParam(correct_per_class[i]) / num_per_class[i], + 1e-4); + } +} + } // namespace caffe From 4bed0ac9bab45246183184d84ff1b742e60574c7 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Mon, 24 Aug 2015 19:44:18 -0700 Subject: [PATCH 053/223] TestConcatLayer: add gradient check for bottom[1] only (to verify propagate_down[0] == false correctness) --- src/caffe/test/test_concat_layer.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/caffe/test/test_concat_layer.cpp b/src/caffe/test/test_concat_layer.cpp index 662a50fa..088e0a41 100644 --- a/src/caffe/test/test_concat_layer.cpp +++ b/src/caffe/test/test_concat_layer.cpp @@ -173,4 +173,13 @@ TYPED_TEST(ConcatLayerTest, TestGradientChannels) { this->blob_top_vec_); } +TYPED_TEST(ConcatLayerTest, TestGradientChannelsBottomOneOnly) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConcatLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-2); + checker.CheckGradient(&layer, this->blob_bottom_vec_0_, + this->blob_top_vec_, 1); +} + } // namespace caffe From 6a7d4d6652018245f7bde1499d5208996912f3fb Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Mon, 24 Aug 2015 19:22:54 -0700 Subject: [PATCH 054/223] bugfix for ConcatLayer with propagate_down set if propagate_down[i] was set, offset_concat_axis was not correctly updated for subsequent bottoms i+1, i+2, ... --- src/caffe/layers/concat_layer.cpp | 13 +++++++------ src/caffe/layers/concat_layer.cu | 17 +++++++++-------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/caffe/layers/concat_layer.cpp b/src/caffe/layers/concat_layer.cpp index 1cac8fc3..95fba105 100644 --- a/src/caffe/layers/concat_layer.cpp +++ b/src/caffe/layers/concat_layer.cpp @@ -76,13 +76,14 @@ void ConcatLayer::Backward_cpu(const vector*>& top, int offset_concat_axis = 0; const int top_concat_axis = top[0]->shape(concat_axis_); for (int i = 0; i < bottom.size(); ++i) { - if (!propagate_down[i]) { continue; } - Dtype* bottom_diff = bottom[i]->mutable_cpu_diff(); const int bottom_concat_axis = bottom[i]->shape(concat_axis_); - for (int n = 0; n < num_concats_; ++n) { - caffe_copy(bottom_concat_axis * concat_input_size_, top_diff + - (n * top_concat_axis + offset_concat_axis) * concat_input_size_, - bottom_diff + n * bottom_concat_axis * concat_input_size_); + if (propagate_down[i]) { + Dtype* bottom_diff = bottom[i]->mutable_cpu_diff(); + for (int n = 0; n < num_concats_; ++n) { + caffe_copy(bottom_concat_axis * concat_input_size_, top_diff + + (n * top_concat_axis + offset_concat_axis) * concat_input_size_, + bottom_diff + n * bottom_concat_axis * concat_input_size_); + } } offset_concat_axis += bottom_concat_axis; } diff --git a/src/caffe/layers/concat_layer.cu b/src/caffe/layers/concat_layer.cu index 8f2e85d8..3c64c7ef 100644 --- a/src/caffe/layers/concat_layer.cu +++ b/src/caffe/layers/concat_layer.cu @@ -53,15 +53,16 @@ void ConcatLayer::Backward_gpu(const vector*>& top, const int top_concat_axis = top[0]->shape(concat_axis_); const bool kForward = false; for (int i = 0; i < bottom.size(); ++i) { - if (!propagate_down[i]) { continue; } - Dtype* bottom_diff = bottom[i]->mutable_gpu_diff(); const int bottom_concat_axis = bottom[i]->shape(concat_axis_); - const int bottom_concat_size = bottom_concat_axis * concat_input_size_; - const int nthreads = bottom_concat_size * num_concats_; - Concat // NOLINT_NEXT_LINE(whitespace/operators) - <<>>( - nthreads, top_diff, kForward, num_concats_, concat_input_size_, - top_concat_axis, bottom_concat_axis, offset_concat_axis, bottom_diff); + if (propagate_down[i]) { + Dtype* bottom_diff = bottom[i]->mutable_gpu_diff(); + const int bottom_concat_size = bottom_concat_axis * concat_input_size_; + const int nthreads = bottom_concat_size * num_concats_; + Concat // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + nthreads, top_diff, kForward, num_concats_, concat_input_size_, + top_concat_axis, bottom_concat_axis, offset_concat_axis, bottom_diff); + } offset_concat_axis += bottom_concat_axis; } } From 6aecb431319df1e6c97a6d15cda77ed62cb04148 Mon Sep 17 00:00:00 2001 From: philkr Date: Wed, 19 Aug 2015 15:11:30 -0700 Subject: [PATCH 055/223] Allow the python layer have weight/parameter blobs. --- python/caffe/_caffe.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp index 020a5bee..1638e9d7 100644 --- a/python/caffe/_caffe.cpp +++ b/python/caffe/_caffe.cpp @@ -189,7 +189,18 @@ bp::object Blob_Reshape(bp::tuple args, bp::dict kwargs) { // We need to explicitly return None to use bp::raw_function. return bp::object(); } - +bp::object BlobVec_add_blob(bp::tuple args, bp::dict kwargs) { + if (bp::len(kwargs) > 0) + throw std::runtime_error("BlobVec.add_blob takes no kwargs"); + typedef vector > > BlobVec; + BlobVec* self = bp::extract(args[0]); + vector shape(bp::len(args) - 1); + for (int i = 1; i < bp::len(args); ++i) + shape[i - 1] = bp::extract(args[i]); + self->push_back(shared_ptr >(new Blob(shape))); + // We need to explicitly return None to use bp::raw_function. + return bp::object(); +} BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(SolveOverloads, Solve, 0, 1); BOOST_PYTHON_MODULE(_caffe) { @@ -288,7 +299,8 @@ BOOST_PYTHON_MODULE(_caffe) { // vector wrappers for all the vector types we use bp::class_ > > >("BlobVec") - .def(bp::vector_indexing_suite > >, true>()); + .def(bp::vector_indexing_suite > >, true>()) + .def("add_blob", bp::raw_function(&BlobVec_add_blob)); bp::class_*> >("RawBlobVec") .def(bp::vector_indexing_suite*>, true>()); bp::class_ > > >("LayerVec") From 60c0d58baab7be6c770d81f4c5a7cc1fce0ef7af Mon Sep 17 00:00:00 2001 From: philkr Date: Tue, 25 Aug 2015 10:20:53 -0700 Subject: [PATCH 056/223] Python parameter test added --- python/caffe/_caffe.cpp | 8 +++- python/caffe/test/test_python_layer.py | 54 ++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp index 1638e9d7..cc49f60a 100644 --- a/python/caffe/_caffe.cpp +++ b/python/caffe/_caffe.cpp @@ -189,18 +189,22 @@ bp::object Blob_Reshape(bp::tuple args, bp::dict kwargs) { // We need to explicitly return None to use bp::raw_function. return bp::object(); } + bp::object BlobVec_add_blob(bp::tuple args, bp::dict kwargs) { - if (bp::len(kwargs) > 0) + if (bp::len(kwargs) > 0) { throw std::runtime_error("BlobVec.add_blob takes no kwargs"); + } typedef vector > > BlobVec; BlobVec* self = bp::extract(args[0]); vector shape(bp::len(args) - 1); - for (int i = 1; i < bp::len(args); ++i) + for (int i = 1; i < bp::len(args); ++i) { shape[i - 1] = bp::extract(args[i]); + } self->push_back(shared_ptr >(new Blob(shape))); // We need to explicitly return None to use bp::raw_function. return bp::object(); } + BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(SolveOverloads, Solve, 0, 1); BOOST_PYTHON_MODULE(_caffe) { diff --git a/python/caffe/test/test_python_layer.py b/python/caffe/test/test_python_layer.py index a1e11bc2..8ed86655 100644 --- a/python/caffe/test/test_python_layer.py +++ b/python/caffe/test/test_python_layer.py @@ -28,6 +28,21 @@ class ExceptionLayer(caffe.Layer): def setup(self, bottom, top): raise RuntimeError +class ParameterLayer(caffe.Layer): + """A layer that just multiplies by ten""" + + def setup(self, bottom, top): + self.blobs.add_blob(1) + self.blobs[0].data[0] = 0 + + def reshape(self, bottom, top): + top[0].reshape(*bottom[0].data.shape) + + def forward(self, bottom, top): + pass + + def backward(self, top, propagate_down, bottom): + self.blobs[0].diff[0] = 1 def python_net_file(): with tempfile.NamedTemporaryFile(mode='w+', delete=False) as f: @@ -52,6 +67,16 @@ def exception_net_file(): return f.name +def parameter_net_file(): + with tempfile.NamedTemporaryFile(mode='w+', delete=False) as f: + f.write("""name: 'pythonnet' force_backward: true + input: 'data' input_shape { dim: 10 dim: 9 dim: 8 } + layer { type: 'Python' name: 'layer' bottom: 'data' top: 'top' + python_param { module: 'test_python_layer' layer: 'ParameterLayer' } } + """) + return f.name + + class TestPythonLayer(unittest.TestCase): def setUp(self): net_file = python_net_file() @@ -84,3 +109,32 @@ def test_exception(self): net_file = exception_net_file() self.assertRaises(RuntimeError, caffe.Net, net_file, caffe.TEST) os.remove(net_file) + + def test_parameter(self): + net_file = parameter_net_file() + net = caffe.Net(net_file, caffe.TRAIN) + # Test forward and backward + net.forward() + net.backward() + layer = net.layers[list(net._layer_names).index('layer')] + self.assertEqual(layer.blobs[0].data[0], 0) + self.assertEqual(layer.blobs[0].diff[0], 1) + layer.blobs[0].data[0] += layer.blobs[0].diff[0] + self.assertEqual(layer.blobs[0].data[0], 1) + + # Test saving and loading + h, caffemodel_file = tempfile.mkstemp() + net.save(caffemodel_file) + layer.blobs[0].data[0] = -1 + self.assertEqual(layer.blobs[0].data[0], -1) + net.copy_from(caffemodel_file) + self.assertEqual(layer.blobs[0].data[0], 1) + os.remove(caffemodel_file) + + # Test weight sharing + net2 = caffe.Net(net_file, caffe.TRAIN) + net2.share_with(net) + layer = net.layers[list(net2._layer_names).index('layer')] + self.assertEqual(layer.blobs[0].data[0], 1) + + os.remove(net_file) From 251e67ab3141bc8ac2adf97ea4e961e5664ae008 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Wed, 31 Dec 2014 14:07:00 -0800 Subject: [PATCH 057/223] Add TileLayer --- include/caffe/common_layers.hpp | 29 ++++++ src/caffe/layers/tile_layer.cpp | 62 +++++++++++ src/caffe/layers/tile_layer.cu | 42 ++++++++ src/caffe/proto/caffe.proto | 13 ++- src/caffe/test/test_tile_layer.cpp | 162 +++++++++++++++++++++++++++++ 5 files changed, 307 insertions(+), 1 deletion(-) create mode 100644 src/caffe/layers/tile_layer.cpp create mode 100644 src/caffe/layers/tile_layer.cu create mode 100644 src/caffe/test/test_tile_layer.cpp diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index 691e755f..8e64b3e5 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -644,6 +644,35 @@ class SliceLayer : public Layer { vector slice_point_; }; +/** + * @brief Copy a Blob along specified dimensions. + */ +template +class TileLayer : public Layer { + public: + explicit TileLayer(const LayerParameter& param) + : Layer(param) {} + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Tile"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + unsigned int axis_, tiles_, outer_dim_, inner_dim_; +}; + } // namespace caffe #endif // CAFFE_COMMON_LAYERS_HPP_ diff --git a/src/caffe/layers/tile_layer.cpp b/src/caffe/layers/tile_layer.cpp new file mode 100644 index 00000000..f55008cc --- /dev/null +++ b/src/caffe/layers/tile_layer.cpp @@ -0,0 +1,62 @@ +#include + +#include "caffe/common_layers.hpp" +#include "caffe/layer.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void TileLayer::Reshape( + const vector*>& bottom, const vector*>& top) { + const TileParameter& tile_param = this->layer_param_.tile_param(); + axis_ = bottom[0]->CanonicalAxisIndex(tile_param.axis()); + CHECK(tile_param.has_tiles()) << "Number of tiles must be specified"; + tiles_ = tile_param.tiles(); + CHECK_GT(tiles_, 0) << "Number of tiles must be positive."; + vector top_shape = bottom[0]->shape(); + top_shape[axis_] = bottom[0]->shape(axis_) * tiles_; + top[0]->Reshape(top_shape); + outer_dim_ = bottom[0]->count(0, axis_); + inner_dim_ = bottom[0]->count(axis_); +} + +template +void TileLayer::Forward_cpu( + const vector*>& bottom, const vector*>& top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* top_data = top[0]->mutable_cpu_data(); + for (int i = 0; i < outer_dim_; ++i) { + for (int t = 0; t < tiles_; ++t) { + caffe_copy(inner_dim_, bottom_data, top_data); + top_data += inner_dim_; + } + bottom_data += inner_dim_; + } +} + +template +void TileLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + if (!propagate_down[0]) { return; } + const Dtype* top_diff = top[0]->cpu_diff(); + Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); + for (int i = 0; i < outer_dim_; ++i) { + caffe_copy(inner_dim_, top_diff, bottom_diff); + top_diff += inner_dim_; + for (int t = 1; t < tiles_; ++t) { + caffe_axpy(inner_dim_, Dtype(1), top_diff, bottom_diff); + top_diff += inner_dim_; + } + bottom_diff += inner_dim_; + } +} + +#ifdef CPU_ONLY +STUB_GPU(TileLayer); +#endif + +INSTANTIATE_CLASS(TileLayer); +REGISTER_LAYER_CLASS(Tile); + +} // namespace caffe diff --git a/src/caffe/layers/tile_layer.cu b/src/caffe/layers/tile_layer.cu new file mode 100644 index 00000000..3af8e2eb --- /dev/null +++ b/src/caffe/layers/tile_layer.cu @@ -0,0 +1,42 @@ +#include + +#include "caffe/common_layers.hpp" +#include "caffe/layer.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void TileLayer::Forward_gpu( + const vector*>& bottom, const vector*>& top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* top_data = top[0]->mutable_gpu_data(); + for (int i = 0; i < outer_dim_; ++i) { + for (int t = 0; t < tiles_; ++t) { + caffe_copy(inner_dim_, bottom_data, top_data); + top_data += inner_dim_; + } + bottom_data += inner_dim_; + } +} + +template +void TileLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + if (!propagate_down[0]) { return; } + const Dtype* top_diff = top[0]->gpu_diff(); + Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); + for (int i = 0; i < outer_dim_; ++i) { + caffe_copy(inner_dim_, top_diff, bottom_diff); + top_diff += inner_dim_; + for (int t = 1; t < tiles_; ++t) { + caffe_gpu_axpy(inner_dim_, Dtype(1), top_diff, bottom_diff); + top_diff += inner_dim_; + } + bottom_diff += inner_dim_; + } +} + +INSTANTIATE_LAYER_GPU_FUNCS(TileLayer); + +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 35264610..aa299f86 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -301,7 +301,7 @@ message ParamSpec { // NOTE // Update the next available ID when you add a new LayerParameter field. // -// LayerParameter next available layer-specific ID: 138 (last added: embed_param) +// LayerParameter next available layer-specific ID: 139 (last added: tile_param) message LayerParameter { optional string name = 1; // the layer name optional string type = 2; // the layer type @@ -383,6 +383,7 @@ message LayerParameter { optional SliceParameter slice_param = 126; optional TanHParameter tanh_param = 127; optional ThresholdParameter threshold_param = 128; + optional TileParameter tile_param = 138; optional WindowDataParameter window_data_param = 129; } @@ -919,6 +920,16 @@ message TanHParameter { optional Engine engine = 1 [default = DEFAULT]; } +// Message that stores parameters used by TileLayer +message TileParameter { + // The index of the axis to tile. + optional int32 axis = 1 [default = 1]; + + // The number of copies (tiles) of the blob to output. + optional int32 tiles = 2; +} + +// Message that stores parameters used by ThresholdLayer message ThresholdParameter { optional float threshold = 1 [default = 0]; // Strictly positive values } diff --git a/src/caffe/test/test_tile_layer.cpp b/src/caffe/test/test_tile_layer.cpp new file mode 100644 index 00000000..540aac3c --- /dev/null +++ b/src/caffe/test/test_tile_layer.cpp @@ -0,0 +1,162 @@ +#include +#include + +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/vision_layers.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +namespace caffe { + +template +class TileLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + + protected: + TileLayerTest() + : blob_bottom_(new Blob(2, 3, 4, 5)), + blob_top_(new Blob()) {} + virtual void SetUp() { + blob_bottom_vec_.push_back(blob_bottom_); + blob_top_vec_.push_back(blob_top_); + FillerParameter filler_param; + filler_param.set_mean(0.0); + filler_param.set_std(1.0); + GaussianFiller filler(filler_param); + filler.Fill(blob_bottom_); + } + + virtual ~TileLayerTest() { + delete blob_bottom_; + delete blob_top_; + } + + Blob* const blob_bottom_; + Blob* const blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +TYPED_TEST_CASE(TileLayerTest, TestDtypesAndDevices); + +TYPED_TEST(TileLayerTest, TestTrivialSetup) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + const int kNumTiles = 1; + layer_param.mutable_tile_param()->set_tiles(kNumTiles); + for (int i = 0; i < this->blob_bottom_->num_axes(); ++i) { + layer_param.mutable_tile_param()->set_axis(i); + TileLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_top_->num_axes(), this->blob_bottom_->num_axes()); + for (int j = 0; j < this->blob_bottom_->num_axes(); ++j) { + EXPECT_EQ(this->blob_top_->shape(j), this->blob_bottom_->shape(j)); + } + } +} + +TYPED_TEST(TileLayerTest, TestSetup) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + const int kNumTiles = 3; + layer_param.mutable_tile_param()->set_tiles(kNumTiles); + for (int i = 0; i < this->blob_bottom_->num_axes(); ++i) { + layer_param.mutable_tile_param()->set_axis(i); + TileLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_top_->num_axes(), this->blob_bottom_->num_axes()); + for (int j = 0; j < this->blob_bottom_->num_axes(); ++j) { + const int top_dim = + ((i == j) ? kNumTiles : 1) * this->blob_bottom_->shape(j); + EXPECT_EQ(top_dim, this->blob_top_->shape(j)); + } + } +} + +TYPED_TEST(TileLayerTest, TestForwardNum) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + const int kTileAxis = 0; + const int kNumTiles = 3; + layer_param.mutable_tile_param()->set_axis(kTileAxis); + layer_param.mutable_tile_param()->set_tiles(kNumTiles); + TileLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_top_->num(); ++n) { + for (int c = 0; c < this->blob_top_->channels(); ++c) { + for (int h = 0; h < this->blob_top_->height(); ++h) { + for (int w = 0; w < this->blob_top_->width(); ++w) { + const int bottom_n = n % this->blob_bottom_->num(); + EXPECT_EQ(this->blob_bottom_->data_at(bottom_n, c, h, w), + this->blob_top_->data_at(n, c, h, w)); + } + } + } + } +} + +TYPED_TEST(TileLayerTest, TestForwardChannels) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + const int kNumTiles = 3; + layer_param.mutable_tile_param()->set_tiles(kNumTiles); + TileLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_top_->num(); ++n) { + for (int c = 0; c < this->blob_top_->channels(); ++c) { + for (int h = 0; h < this->blob_top_->height(); ++h) { + for (int w = 0; w < this->blob_top_->width(); ++w) { + const int bottom_c = c % this->blob_bottom_->channels(); + EXPECT_EQ(this->blob_bottom_->data_at(n, bottom_c, h, w), + this->blob_top_->data_at(n, c, h, w)); + } + } + } + } +} + +TYPED_TEST(TileLayerTest, TestTrivialGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + const int kNumTiles = 1; + layer_param.mutable_tile_param()->set_tiles(kNumTiles); + TileLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-2); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(TileLayerTest, TestGradientNum) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + const int kTileAxis = 0; + const int kNumTiles = 3; + layer_param.mutable_tile_param()->set_axis(kTileAxis); + layer_param.mutable_tile_param()->set_tiles(kNumTiles); + TileLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-2); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(TileLayerTest, TestGradientChannels) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + const int kTileAxis = 1; + const int kNumTiles = 3; + layer_param.mutable_tile_param()->set_axis(kTileAxis); + layer_param.mutable_tile_param()->set_tiles(kNumTiles); + TileLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-2); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +} // namespace caffe From cbff2255bc8470299e15cc155ae7957a3acdd688 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Tue, 18 Aug 2015 18:15:20 -0700 Subject: [PATCH 058/223] TileLayer: add CUDA kernels --- src/caffe/layers/tile_layer.cu | 53 +++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/src/caffe/layers/tile_layer.cu b/src/caffe/layers/tile_layer.cu index 3af8e2eb..7fd3bc47 100644 --- a/src/caffe/layers/tile_layer.cu +++ b/src/caffe/layers/tile_layer.cu @@ -6,17 +6,45 @@ namespace caffe { +template +__global__ void Tile(const int nthreads, const Dtype* bottom_data, + const int tile_size, const int num_tiles, const int bottom_tile_axis, + Dtype* top_data) { + CUDA_KERNEL_LOOP(index, nthreads) { + const int d = index % tile_size; + const int b = (index / tile_size / num_tiles) % bottom_tile_axis; + const int n = index / tile_size / num_tiles / bottom_tile_axis; + const int bottom_index = (n * bottom_tile_axis + b) * tile_size + d; + top_data[index] = bottom_data[bottom_index]; + } +} + template void TileLayer::Forward_gpu( const vector*>& bottom, const vector*>& top) { const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = top[0]->mutable_gpu_data(); - for (int i = 0; i < outer_dim_; ++i) { - for (int t = 0; t < tiles_; ++t) { - caffe_copy(inner_dim_, bottom_data, top_data); - top_data += inner_dim_; + const int bottom_tile_axis = bottom[0]->shape(axis_); + const int nthreads = top[0]->count(); + Tile // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + nthreads, bottom_data, inner_dim_, tiles_, bottom_tile_axis, top_data); +} + +template +__global__ void TileBackward(const int nthreads, const Dtype* top_diff, + const int tile_size, const int num_tiles, const int bottom_tile_axis, + Dtype* bottom_diff) { + CUDA_KERNEL_LOOP(index, nthreads) { + const int d = index % tile_size; + const int b = (index / tile_size) % bottom_tile_axis; + const int n = index / tile_size / bottom_tile_axis; + bottom_diff[index] = 0; + int top_index = (n * num_tiles * bottom_tile_axis + b) * tile_size + d; + for (int t = 0; t < num_tiles; ++t) { + bottom_diff[index] += top_diff[top_index]; + top_index += bottom_tile_axis * tile_size; } - bottom_data += inner_dim_; } } @@ -26,15 +54,12 @@ void TileLayer::Backward_gpu(const vector*>& top, if (!propagate_down[0]) { return; } const Dtype* top_diff = top[0]->gpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); - for (int i = 0; i < outer_dim_; ++i) { - caffe_copy(inner_dim_, top_diff, bottom_diff); - top_diff += inner_dim_; - for (int t = 1; t < tiles_; ++t) { - caffe_gpu_axpy(inner_dim_, Dtype(1), top_diff, bottom_diff); - top_diff += inner_dim_; - } - bottom_diff += inner_dim_; - } + const int bottom_tile_axis = bottom[0]->shape(axis_); + const int tile_size = inner_dim_ / bottom_tile_axis; + const int nthreads = bottom[0]->count(); + TileBackward // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + nthreads, top_diff, tile_size, tiles_, bottom_tile_axis, bottom_diff); } INSTANTIATE_LAYER_GPU_FUNCS(TileLayer); From 1f3f9529df6285a5be5f8e72bd1922a6a0cec4d8 Mon Sep 17 00:00:00 2001 From: J Yegerlehner Date: Sun, 23 Aug 2015 18:57:16 -0500 Subject: [PATCH 059/223] MVNLayer fixes. Fix the MVNLayer tests so they actually test what they claim. MVNLayer fixes: sum_multiplier_ sized correctly; backward gradient calculation. Gradient calculation per analysis of seanbell, found here: https://github.com/BVLC/caffe/issues/1938 Fixes according to review comments. --- src/caffe/layers/mvn_layer.cpp | 15 ++++++++++++--- src/caffe/layers/mvn_layer.cu | 7 ++++++- src/caffe/test/test_mvn_layer.cpp | 13 +++++++++---- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/caffe/layers/mvn_layer.cpp b/src/caffe/layers/mvn_layer.cpp index 3e79bddc..325691b1 100644 --- a/src/caffe/layers/mvn_layer.cpp +++ b/src/caffe/layers/mvn_layer.cpp @@ -18,8 +18,12 @@ void MVNLayer::Reshape(const vector*>& bottom, 1, 1); temp_.Reshape(bottom[0]->num(), bottom[0]->channels(), bottom[0]->height(), bottom[0]->width()); - sum_multiplier_.Reshape(1, 1, - bottom[0]->height(), bottom[0]->width()); + if ( this->layer_param_.mvn_param().across_channels() ) { + sum_multiplier_.Reshape(1, bottom[0]->channels(), bottom[0]->height(), + bottom[0]->width()); + } else { + sum_multiplier_.Reshape(1, 1, bottom[0]->height(), bottom[0]->width()); + } Dtype* multiplier_data = sum_multiplier_.mutable_cpu_data(); caffe_set(sum_multiplier_.count(), Dtype(1), multiplier_data); eps_ = this->layer_param_.mvn_param().eps(); @@ -130,7 +134,12 @@ void MVNLayer::Backward_cpu(const vector*>& top, caffe_div(temp_.count(), bottom_diff, temp_.cpu_data(), bottom_diff); } else { - caffe_copy(temp_.count(), top_diff, bottom_diff); + caffe_cpu_gemv(CblasNoTrans, num, dim, 1. / dim, top_diff, + sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., + mean_.cpu_data(), sum_multiplier_.cpu_data(), 0., + temp_.mutable_cpu_data()); + caffe_add(temp_.count(), top_diff, temp_.cpu_data(), bottom_diff); } } diff --git a/src/caffe/layers/mvn_layer.cu b/src/caffe/layers/mvn_layer.cu index 3888a0c7..d86a2e73 100644 --- a/src/caffe/layers/mvn_layer.cu +++ b/src/caffe/layers/mvn_layer.cu @@ -113,7 +113,12 @@ void MVNLayer::Backward_gpu(const vector*>& top, caffe_gpu_div(temp_.count(), bottom_diff, temp_.gpu_data(), bottom_diff); } else { - caffe_copy(temp_.count(), top_diff, bottom_diff); + caffe_gpu_gemv(CblasNoTrans, num, dim, 1. / dim, top_diff, + sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data()); + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., + mean_.gpu_data(), sum_multiplier_.gpu_data(), 0., + temp_.mutable_gpu_data()); + caffe_gpu_add(temp_.count(), top_diff, temp_.gpu_data(), bottom_diff); } } diff --git a/src/caffe/test/test_mvn_layer.cpp b/src/caffe/test/test_mvn_layer.cpp index 933b4326..be23d86e 100644 --- a/src/caffe/test/test_mvn_layer.cpp +++ b/src/caffe/test/test_mvn_layer.cpp @@ -6,6 +6,7 @@ #include "caffe/common.hpp" #include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "google/protobuf/text_format.h" #include "gtest/gtest.h" #include "caffe/test/test_caffe_main.hpp" @@ -73,7 +74,8 @@ TYPED_TEST(MVNLayerTest, TestForward) { TYPED_TEST(MVNLayerTest, TestForwardMeanOnly) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; - layer_param.ParseFromString("mvn_param{normalize_variance: false}"); + CHECK(google::protobuf::TextFormat::ParseFromString( + "mvn_param{normalize_variance: false}", &layer_param)); MVNLayer layer(layer_param); layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); @@ -105,7 +107,8 @@ TYPED_TEST(MVNLayerTest, TestForwardMeanOnly) { TYPED_TEST(MVNLayerTest, TestForwardAcrossChannels) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; - layer_param.ParseFromString("mvn_param{across_channels: true}"); + CHECK(google::protobuf::TextFormat::ParseFromString( + "mvn_param{across_channels: true}", &layer_param)); MVNLayer layer(layer_param); layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); @@ -149,7 +152,8 @@ TYPED_TEST(MVNLayerTest, TestGradient) { TYPED_TEST(MVNLayerTest, TestGradientMeanOnly) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; - layer_param.ParseFromString("mvn_param{normalize_variance: false}"); + CHECK(google::protobuf::TextFormat::ParseFromString( + "mvn_param{normalize_variance: false}", &layer_param)); MVNLayer layer(layer_param); GradientChecker checker(1e-2, 1e-3); checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, @@ -159,7 +163,8 @@ TYPED_TEST(MVNLayerTest, TestGradientMeanOnly) { TYPED_TEST(MVNLayerTest, TestGradientAcrossChannels) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; - layer_param.ParseFromString("mvn_param{across_channels: true}"); + CHECK(google::protobuf::TextFormat::ParseFromString( + "mvn_param{across_channels: true}", &layer_param)); MVNLayer layer(layer_param); GradientChecker checker(1e-2, 1e-3); checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, From c548a7972b554b3ababaa0dc52db15a5f5a28be8 Mon Sep 17 00:00:00 2001 From: Jonas Maaskola Date: Sun, 2 Aug 2015 23:47:12 +0200 Subject: [PATCH 060/223] Draw Deconvolution layers like Convolution layers --- python/caffe/draw.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/caffe/draw.py b/python/caffe/draw.py index 324929de..a002b60b 100644 --- a/python/caffe/draw.py +++ b/python/caffe/draw.py @@ -40,7 +40,7 @@ def get_edge_label(layer): if layer.type == 'Data': edge_label = 'Batch ' + str(layer.data_param.batch_size) - elif layer.type == 'Convolution': + elif layer.type == 'Convolution' or layer.type == 'Deconvolution': edge_label = str(layer.convolution_param.num_output) elif layer.type == 'InnerProduct': edge_label = str(layer.inner_product_param.num_output) @@ -74,7 +74,7 @@ def get_layer_label(layer, rankdir): # horizontal space is not; separate words with newlines separator = '\\n' - if layer.type == 'Convolution': + if layer.type == 'Convolution' or layer.type == 'Deconvolution': # Outer double quotes needed or else colon characters don't parse # properly node_label = '"%s%s(%s)%skernel size: %d%sstride: %d%spad: %d"' %\ @@ -109,7 +109,7 @@ def choose_color_by_layertype(layertype): """Define colors for nodes based on the layer type. """ color = '#6495ED' # Default - if layertype == 'Convolution': + if layertype == 'Convolution' or layertype == 'Deconvolution': color = '#FF5050' elif layertype == 'Pooling': color = '#FF9900' From 292dbc5866c0b0b2ad56278591dce8b519166b20 Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Mon, 24 Aug 2015 14:46:23 -0700 Subject: [PATCH 061/223] Fix previous mistake on unimplemented top and address pyramid_height_==1 in SPPLayer also, do nothing in SPPLayer Reshape if already reshaped once and bottom size unchanged --- include/caffe/vision_layers.hpp | 10 +++------ src/caffe/layers/spp_layer.cpp | 36 +++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index a6bd86a9..211e3d90 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -471,13 +471,7 @@ class SPPLayer : public Layer { virtual inline const char* type() const { return "SPP"; } virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int MinTopBlobs() const { return 1; } - // MAX POOL layers can output an extra top blob for the mask; - // others can only output the pooled inputs. - virtual inline int MaxTopBlobs() const { - return (this->layer_param_.pooling_param().pool() == - PoolingParameter_PoolMethod_MAX) ? 2 : 1; - } + virtual inline int ExactNumTopBlobs() const { return 1; } protected: virtual void Forward_cpu(const vector*>& bottom, @@ -491,9 +485,11 @@ class SPPLayer : public Layer { int pyramid_height_; int bottom_h_, bottom_w_; + int num_; int channels_; int kernel_h_, kernel_w_; int pad_h_, pad_w_; + bool reshaped_first_time_; /// the internal Split layer that feeds the pooling layers shared_ptr > split_layer_; diff --git a/src/caffe/layers/spp_layer.cpp b/src/caffe/layers/spp_layer.cpp index 795dd716..d7622910 100644 --- a/src/caffe/layers/spp_layer.cpp +++ b/src/caffe/layers/spp_layer.cpp @@ -66,8 +66,11 @@ void SPPLayer::LayerSetUp(const vector*>& bottom, const vector*>& top) { SPPParameter spp_param = this->layer_param_.spp_param(); + num_ = bottom[0]->num(); + channels_ = bottom[0]->channels(); bottom_h_ = bottom[0]->height(); bottom_w_ = bottom[0]->width(); + reshaped_first_time_ = false; CHECK_GT(bottom_h_, 0) << "Input dimensions cannot be zero."; CHECK_GT(bottom_w_, 0) << "Input dimensions cannot be zero."; @@ -82,6 +85,15 @@ void SPPLayer::LayerSetUp(const vector*>& bottom, flatten_outputs_.clear(); concat_bottom_vec_.clear(); + if (pyramid_height_ == 1) { + // pooling layer setup + LayerParameter pooling_param = GetPoolingParam(0, bottom_h_, bottom_w_, + spp_param); + pooling_layers_.push_back(shared_ptr > ( + new PoolingLayer(pooling_param))); + pooling_layers_[0]->SetUp(bottom, top); + return; + } // split layer output holders setup for (int i = 0; i < pyramid_height_; i++) { split_top_vec_.push_back(new Blob()); @@ -135,10 +147,26 @@ void SPPLayer::Reshape(const vector*>& bottom, const vector*>& top) { CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " << "corresponding to (num, channels, height, width)"; + // Do nothing if bottom shape is unchanged since last Reshape + if (num_ == bottom[0]->num() && channels_ == bottom[0]->channels() && + bottom_h_ == bottom[0]->height() && bottom_w_ == bottom[0]->width() && + reshaped_first_time_) { + return; + } + num_ = bottom[0]->num(); channels_ = bottom[0]->channels(); bottom_h_ = bottom[0]->height(); bottom_w_ = bottom[0]->width(); + reshaped_first_time_ = true; SPPParameter spp_param = this->layer_param_.spp_param(); + if (pyramid_height_ == 1) { + LayerParameter pooling_param = GetPoolingParam(0, bottom_h_, bottom_w_, + spp_param); + pooling_layers_[0].reset(new PoolingLayer(pooling_param)); + pooling_layers_[0]->SetUp(bottom, top); + pooling_layers_[0]->Reshape(bottom, top); + return; + } split_layer_->Reshape(bottom, split_top_vec_); for (int i = 0; i < pyramid_height_; i++) { LayerParameter pooling_param = GetPoolingParam( @@ -159,6 +187,10 @@ void SPPLayer::Reshape(const vector*>& bottom, template void SPPLayer::Forward_cpu(const vector*>& bottom, const vector*>& top) { + if (pyramid_height_ == 1) { + pooling_layers_[0]->Forward(bottom, top); + return; + } split_layer_->Forward(bottom, split_top_vec_); for (int i = 0; i < pyramid_height_; i++) { pooling_layers_[i]->Forward( @@ -175,6 +207,10 @@ void SPPLayer::Backward_cpu(const vector*>& top, if (!propagate_down[0]) { return; } + if (pyramid_height_ == 1) { + pooling_layers_[0]->Backward(top, propagate_down, bottom); + return; + } vector concat_propagate_down(pyramid_height_, true); concat_layer_->Backward(top, concat_propagate_down, concat_bottom_vec_); for (int i = 0; i < pyramid_height_; i++) { From 4f64b9ee3ed6c1267c4252cf79b2ccf0d042f0b2 Mon Sep 17 00:00:00 2001 From: Matt Dawkins Date: Thu, 27 Aug 2015 10:51:36 -0400 Subject: [PATCH 062/223] Add extra openblas search path --- cmake/Modules/FindOpenBLAS.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/Modules/FindOpenBLAS.cmake b/cmake/Modules/FindOpenBLAS.cmake index b8434927..a6512ae7 100644 --- a/cmake/Modules/FindOpenBLAS.cmake +++ b/cmake/Modules/FindOpenBLAS.cmake @@ -2,8 +2,10 @@ SET(Open_BLAS_INCLUDE_SEARCH_PATHS /usr/include + /usr/include/openblas /usr/include/openblas-base /usr/local/include + /usr/local/include/openblas /usr/local/include/openblas-base /opt/OpenBLAS/include $ENV{OpenBLAS_HOME} From 4d7fe4de7e7bddfe107f4a37a7ec85c6f6178469 Mon Sep 17 00:00:00 2001 From: J Yegerlehner Date: Thu, 27 Aug 2015 10:47:14 -0500 Subject: [PATCH 063/223] Fix EmbedLayer compiler warning for unused variable. --- src/caffe/layers/embed_layer.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/src/caffe/layers/embed_layer.cu b/src/caffe/layers/embed_layer.cu index 672fb9c6..62a4db81 100644 --- a/src/caffe/layers/embed_layer.cu +++ b/src/caffe/layers/embed_layer.cu @@ -64,7 +64,6 @@ void EmbedLayer::Backward_gpu(const vector*>& top, CHECK(!propagate_down[0]) << "Can't backpropagate to EmbedLayer input."; if (this->param_propagate_down_[0]) { const int top_count = top[0]->count(); - const int count = this->blobs_[0]->count(); const Dtype* top_diff = top[0]->gpu_diff(); const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff(); From 846f2c3cce8b937637e0b46a7f62be068b835ade Mon Sep 17 00:00:00 2001 From: Jonathan L Long Date: Fri, 28 Aug 2015 21:27:11 -0700 Subject: [PATCH 064/223] fix GPU data race Previously, the prefetch GPU -> top GPU and prefetch CPU -> prefetch GPU copies were launched concurrently in separate streams, allowing the next batch to be copied in before the current one is read. This patch explicitly synchronizes the prefetch -> top copy wrt the host, preventing the CPU -> GPU from being launched until its completion. --- src/caffe/layers/base_data_layer.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/caffe/layers/base_data_layer.cu b/src/caffe/layers/base_data_layer.cu index 56439bc5..ff6e412a 100644 --- a/src/caffe/layers/base_data_layer.cu +++ b/src/caffe/layers/base_data_layer.cu @@ -20,7 +20,9 @@ void BasePrefetchingDataLayer::Forward_gpu( caffe_copy(batch->label_.count(), batch->label_.gpu_data(), top[1]->mutable_gpu_data()); } - + // Ensure the copy is synchronous wrt the host, so that the next batch isn't + // copied in meanwhile. + CUDA_CHECK(cudaStreamSynchronize(cudaStreamDefault)); prefetch_free_.push(batch); } From a6751723234926bdd03b6167ea6414da109854a3 Mon Sep 17 00:00:00 2001 From: philkr Date: Tue, 1 Sep 2015 13:11:26 -0700 Subject: [PATCH 065/223] Compute backward for negative lr_mult --- src/caffe/net.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index f1fc63ab..89d14013 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -182,7 +182,7 @@ void Net::Init(const NetParameter& in_param) { for (int param_id = 0; param_id < num_param_blobs; ++param_id) { const ParamSpec* param_spec = (param_id < param_size) ? &layer_param.param(param_id) : &default_param_spec; - const bool param_need_backward = param_spec->lr_mult() > 0; + const bool param_need_backward = param_spec->lr_mult() != 0; need_backward |= param_need_backward; layers_[layer_id]->set_param_propagate_down(param_id, param_need_backward); From b04000e4204753803f4cdead66af8a830bc7c4ca Mon Sep 17 00:00:00 2001 From: Darren Garvey Date: Tue, 1 Sep 2015 15:42:26 +0100 Subject: [PATCH 066/223] Cleanup: Fixup capitalisation of Caffe_POSTFIX. Replaces CAffe_POSTFIX -> Caffe_POSTFIX. --- cmake/Misc.cmake | 4 ++-- examples/CMakeLists.txt | 2 +- matlab/CMakeLists.txt | 4 ++-- python/CMakeLists.txt | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cmake/Misc.cmake b/cmake/Misc.cmake index 7676754f..9dd2609b 100644 --- a/cmake/Misc.cmake +++ b/cmake/Misc.cmake @@ -46,7 +46,7 @@ endif() # ---[ Set debug postfix set(Caffe_DEBUG_POSTFIX "-d") -set(CAffe_POSTFIX "") +set(Caffe_POSTFIX "") if(CMAKE_BUILD_TYPE MATCHES "Debug") - set(CAffe_POSTFIX ${Caffe_DEBUG_POSTFIX}) + set(Caffe_POSTFIX ${Caffe_DEBUG_POSTFIX}) endif() diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index f29fc7e5..663d7360 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -24,7 +24,7 @@ foreach(source_file ${examples_srcs}) if(UNIX OR APPLE) # Funny command to make tutorials work # TODO: remove in future as soon as naming is standartaized everywhere - set(__outname ${PROJECT_BINARY_DIR}/examples/${folder}/${name}${CAffe_POSTFIX}) + set(__outname ${PROJECT_BINARY_DIR}/examples/${folder}/${name}${Caffe_POSTFIX}) add_custom_command(TARGET ${name} POST_BUILD COMMAND ln -sf "${__outname}" "${__outname}.bin") endif() diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt index 4b0d549f..f420df8d 100644 --- a/matlab/CMakeLists.txt +++ b/matlab/CMakeLists.txt @@ -43,7 +43,7 @@ string(REPLACE ";" ";-L" link_folders "-L${folders}") string(REPLACE ";" ":" rpath_folders "${folders}") if(build_using MATCHES "Matlab") - set(libflags -lcaffe${CAffe_POSTFIX} ${libflags}) # Matlab R2014a complans for -Wl,--whole-archive + set(libflags -lcaffe${Caffe_POSTFIX} ${libflags}) # Matlab R2014a complans for -Wl,--whole-archive caffe_fetch_and_set_proper_mexext(Matlab_caffe_mex) add_custom_command(OUTPUT ${Matlab_caffe_mex} COMMAND ${Matlab_mex} @@ -56,7 +56,7 @@ elseif(build_using MATCHES "Octave") if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") set(libflags -Wl,-force_load,$ ${libflags}) elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - set(libflags -Wl,--whole-archive -lcaffe${CAffe_POSTFIX} -Wl,--no-whole-archive ${libflags}) + set(libflags -Wl,--whole-archive -lcaffe${Caffe_POSTFIX} -Wl,--no-whole-archive ${libflags}) endif() add_custom_command(OUTPUT ${Matlab_caffe_mex} COMMAND ${Octave_compiler} diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index df0401da..0e2bc7e6 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -18,7 +18,7 @@ if(UNIX OR APPLE) COMMAND ${CMAKE_COMMAND} -E make_directory ${PROJECT_SOURCE_DIR}/python/caffe/proto COMMAND touch ${PROJECT_SOURCE_DIR}/python/caffe/proto/__init__.py COMMAND cp ${proto_gen_folder}/*.py ${PROJECT_SOURCE_DIR}/python/caffe/proto/ - COMMENT "Creating symlink ${__linkname} -> ${PROJECT_BINARY_DIR}/lib/_caffe${CAffe_POSTFIX}.so") + COMMENT "Creating symlink ${__linkname} -> ${PROJECT_BINARY_DIR}/lib/_caffe${Caffe_POSTFIX}.so") endif() # ---[ Install From e8f96f58aa6b64726f62f7304964d1c0a82b5c38 Mon Sep 17 00:00:00 2001 From: Darren Garvey Date: Mon, 10 Aug 2015 02:16:20 +0100 Subject: [PATCH 067/223] Fix memory leak in convert_mnist_siamese_data. This fixes a memory leak by using delete[] rather than plain delete. --- examples/siamese/convert_mnist_siamese_data.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/siamese/convert_mnist_siamese_data.cpp b/examples/siamese/convert_mnist_siamese_data.cpp index 71c56a0a..8008b443 100644 --- a/examples/siamese/convert_mnist_siamese_data.cpp +++ b/examples/siamese/convert_mnist_siamese_data.cpp @@ -102,7 +102,7 @@ void convert_dataset(const char* image_filename, const char* label_filename, } delete db; - delete pixels; + delete [] pixels; } int main(int argc, char** argv) { From 583194a00811a312e0bdfa151a8ad6a222c2dd4d Mon Sep 17 00:00:00 2001 From: Darren Garvey Date: Tue, 1 Sep 2015 20:09:25 +0100 Subject: [PATCH 068/223] cifar10: Fix examples by setting snapshot_format. Commit 4227828a set the default binary format from HDF5 to BINARYPROTO to fix #2885. This broke the cifar10 examples which relied on this default. This commit specifies the snapshot_format explicitly since the rest of the example relies on this being HDF5. --- examples/cifar10/cifar10_full_solver.prototxt | 1 + examples/cifar10/cifar10_full_solver_lr1.prototxt | 1 + examples/cifar10/cifar10_full_solver_lr2.prototxt | 1 + examples/cifar10/cifar10_quick_solver.prototxt | 1 + examples/cifar10/cifar10_quick_solver_lr1.prototxt | 1 + 5 files changed, 5 insertions(+) diff --git a/examples/cifar10/cifar10_full_solver.prototxt b/examples/cifar10/cifar10_full_solver.prototxt index f30b3986..882daa2d 100644 --- a/examples/cifar10/cifar10_full_solver.prototxt +++ b/examples/cifar10/cifar10_full_solver.prototxt @@ -21,6 +21,7 @@ display: 200 max_iter: 60000 # snapshot intermediate results snapshot: 10000 +snapshot_format: HDF5 snapshot_prefix: "examples/cifar10/cifar10_full" # solver mode: CPU or GPU solver_mode: GPU diff --git a/examples/cifar10/cifar10_full_solver_lr1.prototxt b/examples/cifar10/cifar10_full_solver_lr1.prototxt index 59bc5721..55f4be44 100644 --- a/examples/cifar10/cifar10_full_solver_lr1.prototxt +++ b/examples/cifar10/cifar10_full_solver_lr1.prototxt @@ -21,6 +21,7 @@ display: 200 max_iter: 65000 # snapshot intermediate results snapshot: 5000 +snapshot_format: HDF5 snapshot_prefix: "examples/cifar10/cifar10_full" # solver mode: CPU or GPU solver_mode: GPU diff --git a/examples/cifar10/cifar10_full_solver_lr2.prototxt b/examples/cifar10/cifar10_full_solver_lr2.prototxt index d4ed5d8e..7c3d2da3 100644 --- a/examples/cifar10/cifar10_full_solver_lr2.prototxt +++ b/examples/cifar10/cifar10_full_solver_lr2.prototxt @@ -21,6 +21,7 @@ display: 200 max_iter: 70000 # snapshot intermediate results snapshot: 5000 +snapshot_format: HDF5 snapshot_prefix: "examples/cifar10/cifar10_full" # solver mode: CPU or GPU solver_mode: GPU diff --git a/examples/cifar10/cifar10_quick_solver.prototxt b/examples/cifar10/cifar10_quick_solver.prototxt index 14b4401b..5de276f7 100644 --- a/examples/cifar10/cifar10_quick_solver.prototxt +++ b/examples/cifar10/cifar10_quick_solver.prototxt @@ -20,6 +20,7 @@ display: 100 max_iter: 4000 # snapshot intermediate results snapshot: 4000 +snapshot_format: HDF5 snapshot_prefix: "examples/cifar10/cifar10_quick" # solver mode: CPU or GPU solver_mode: GPU diff --git a/examples/cifar10/cifar10_quick_solver_lr1.prototxt b/examples/cifar10/cifar10_quick_solver_lr1.prototxt index d3af70c0..f8f1efd5 100644 --- a/examples/cifar10/cifar10_quick_solver_lr1.prototxt +++ b/examples/cifar10/cifar10_quick_solver_lr1.prototxt @@ -20,6 +20,7 @@ display: 100 max_iter: 5000 # snapshot intermediate results snapshot: 5000 +snapshot_format: HDF5 snapshot_prefix: "examples/cifar10/cifar10_quick" # solver mode: CPU or GPU solver_mode: GPU From 6f5812c4547dd912dd0569330ebdd44a5afd278e Mon Sep 17 00:00:00 2001 From: Darren Garvey Date: Wed, 2 Sep 2015 00:54:06 +0100 Subject: [PATCH 069/223] Fix up documentation errors. Fix some doxygen warnings about an undocumented argument in Blob and incorrect documentation for SoftmaxWithLossLayer::Forward_cpu(). --- include/caffe/blob.hpp | 2 +- include/caffe/loss_layers.hpp | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp index 9b813e73..dda7b1f8 100644 --- a/include/caffe/blob.hpp +++ b/include/caffe/blob.hpp @@ -109,7 +109,7 @@ class Blob { * @brief Returns the 'canonical' version of a (usually) user-specified axis, * allowing for negative indexing (e.g., -1 for the last axis). * - * @param index the axis index. + * @param axis_index the axis index. * If 0 <= index < num_axes(), return index. * If -num_axes <= index <= -1, return (num_axes() - (-index)), * e.g., the last axis index (num_axes() - 1) if index == -1, diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp index 02687a94..8d41af34 100644 --- a/include/caffe/loss_layers.hpp +++ b/include/caffe/loss_layers.hpp @@ -712,7 +712,6 @@ class SoftmaxWithLossLayer : public LossLayer { virtual inline int MaxTopBlobs() const { return 2; } protected: - /// @copydoc SoftmaxWithLossLayer virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); virtual void Forward_gpu(const vector*>& bottom, From 6ca0ab66077c578dff14aa775858035b2a69fed6 Mon Sep 17 00:00:00 2001 From: Luke Yeager Date: Tue, 1 Sep 2015 17:20:37 -0700 Subject: [PATCH 070/223] Show output from convert_imageset tool --- tools/convert_imageset.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/convert_imageset.cpp b/tools/convert_imageset.cpp index 816a91f9..aad1f1fe 100644 --- a/tools/convert_imageset.cpp +++ b/tools/convert_imageset.cpp @@ -44,6 +44,8 @@ DEFINE_string(encode_type, "", int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); + // Print output to stderr (while still logging) + FLAGS_alsologtostderr = 1; #ifndef GFLAGS_GFLAGS_H_ namespace gflags = google; @@ -140,13 +142,13 @@ int main(int argc, char** argv) { // Commit db txn->Commit(); txn.reset(db->NewTransaction()); - LOG(ERROR) << "Processed " << count << " files."; + LOG(INFO) << "Processed " << count << " files."; } } // write the last batch if (count % 1000 != 0) { txn->Commit(); - LOG(ERROR) << "Processed " << count << " files."; + LOG(INFO) << "Processed " << count << " files."; } return 0; } From d1a0345eaaff501d11a1705986cac8f124dd2545 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Tue, 7 Oct 2014 11:55:54 -0700 Subject: [PATCH 071/223] SliceLayer: allow trivial operation with single top Blob --- include/caffe/common_layers.hpp | 2 +- src/caffe/layers/slice_layer.cpp | 7 ++++++- src/caffe/layers/slice_layer.cu | 3 ++- src/caffe/test/test_slice_layer.cpp | 27 +++++++++++++++++++++++++++ 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index 8e64b3e5..6d4a9e3c 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -625,7 +625,7 @@ class SliceLayer : public Layer { virtual inline const char* type() const { return "Slice"; } virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int MinTopBlobs() const { return 2; } + virtual inline int MinTopBlobs() const { return 1; } protected: virtual void Forward_cpu(const vector*>& bottom, diff --git a/src/caffe/layers/slice_layer.cpp b/src/caffe/layers/slice_layer.cpp index e4418c9c..0a059ae8 100644 --- a/src/caffe/layers/slice_layer.cpp +++ b/src/caffe/layers/slice_layer.cpp @@ -67,11 +67,16 @@ void SliceLayer::Reshape(const vector*>& bottom, } } CHECK_EQ(count, bottom[0]->count()); + if (top.size() == 1) { + top[0]->ShareData(*bottom[0]); + top[0]->ShareDiff(*bottom[0]); + } } template void SliceLayer::Forward_cpu(const vector*>& bottom, const vector*>& top) { + if (top.size() == 1) { return; } int offset_slice_axis = 0; const Dtype* bottom_data = bottom[0]->cpu_data(); const int bottom_slice_axis = bottom[0]->shape(slice_axis_); @@ -92,7 +97,7 @@ void SliceLayer::Forward_cpu(const vector*>& bottom, template void SliceLayer::Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { - if (!propagate_down[0]) { return; } + if (!propagate_down[0] || top.size() == 1) { return; } int offset_slice_axis = 0; Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const int bottom_slice_axis = bottom[0]->shape(slice_axis_); diff --git a/src/caffe/layers/slice_layer.cu b/src/caffe/layers/slice_layer.cu index 796841d3..e8dc6cd9 100644 --- a/src/caffe/layers/slice_layer.cu +++ b/src/caffe/layers/slice_layer.cu @@ -28,6 +28,7 @@ __global__ void Slice(const int nthreads, const Dtype* in_data, template void SliceLayer::Forward_gpu(const vector*>& bottom, const vector*>& top) { + if (top.size() == 1) { return; } int offset_slice_axis = 0; const Dtype* bottom_data = bottom[0]->gpu_data(); const int bottom_slice_axis = bottom[0]->shape(slice_axis_); @@ -48,7 +49,7 @@ void SliceLayer::Forward_gpu(const vector*>& bottom, template void SliceLayer::Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { - if (!propagate_down[0]) { return; } + if (!propagate_down[0] || top.size() == 1) { return; } int offset_slice_axis = 0; Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); const int bottom_slice_axis = bottom[0]->shape(slice_axis_); diff --git a/src/caffe/test/test_slice_layer.cpp b/src/caffe/test/test_slice_layer.cpp index ccd03646..2d2d0fdc 100644 --- a/src/caffe/test/test_slice_layer.cpp +++ b/src/caffe/test/test_slice_layer.cpp @@ -88,6 +88,21 @@ TYPED_TEST(SliceLayerTest, TestSetupChannels) { EXPECT_EQ(this->blob_bottom_->width(), this->blob_top_0_->width()); } +TYPED_TEST(SliceLayerTest, TestTrivialSlice) { + // Test the trivial (single output) "slice" operation -- + // should be the identity. + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + SliceLayer layer(layer_param); + this->blob_top_vec_0_.resize(1); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_0_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_0_->shape()); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + EXPECT_EQ(this->blob_bottom_->cpu_data()[i], + this->blob_top_0_->cpu_data()[i]); + } +} + TYPED_TEST(SliceLayerTest, TestSliceAcrossNum) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; @@ -161,6 +176,18 @@ TYPED_TEST(SliceLayerTest, TestSliceAcrossChannels) { } } +TYPED_TEST(SliceLayerTest, TestGradientTrivial) { + // Test the trivial (single output) "slice" operation -- + // should be the identity. + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + SliceLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + this->blob_top_vec_0_.resize(1); + checker.CheckGradientEltwise(&layer, this->blob_bottom_vec_, + this->blob_top_vec_0_); +} + TYPED_TEST(SliceLayerTest, TestGradientAcrossNum) { typedef typename TypeParam::Dtype Dtype; // Gradient checks are slow; reduce blob size. From 784dfddd42cc787fd9661a954f2a360990867df2 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Fri, 23 Jan 2015 12:52:44 -0800 Subject: [PATCH 072/223] ConcatLayer: allow trivial operation with single bottom Blob --- include/caffe/common_layers.hpp | 2 +- src/caffe/layers/concat_layer.cpp | 6 ++++++ src/caffe/layers/concat_layer.cu | 2 ++ src/caffe/test/test_concat_layer.cpp | 23 +++++++++++++++++++++++ 4 files changed, 32 insertions(+), 1 deletion(-) diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index 6d4a9e3c..89bab8d6 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -85,7 +85,7 @@ class ConcatLayer : public Layer { const vector*>& top); virtual inline const char* type() const { return "Concat"; } - virtual inline int MinBottomBlobs() const { return 2; } + virtual inline int MinBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; } protected: diff --git a/src/caffe/layers/concat_layer.cpp b/src/caffe/layers/concat_layer.cpp index 95fba105..86b500de 100644 --- a/src/caffe/layers/concat_layer.cpp +++ b/src/caffe/layers/concat_layer.cpp @@ -48,11 +48,16 @@ void ConcatLayer::Reshape(const vector*>& bottom, } top[0]->Reshape(top_shape); CHECK_EQ(bottom_count_sum, top[0]->count()); + if (bottom.size() == 1) { + top[0]->ShareData(*bottom[0]); + top[0]->ShareDiff(*bottom[0]); + } } template void ConcatLayer::Forward_cpu(const vector*>& bottom, const vector*>& top) { + if (bottom.size() == 1) { return; } Dtype* top_data = top[0]->mutable_cpu_data(); int offset_concat_axis = 0; const int top_concat_axis = top[0]->shape(concat_axis_); @@ -72,6 +77,7 @@ void ConcatLayer::Forward_cpu(const vector*>& bottom, template void ConcatLayer::Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { + if (bottom.size() == 1) { return; } const Dtype* top_diff = top[0]->cpu_diff(); int offset_concat_axis = 0; const int top_concat_axis = top[0]->shape(concat_axis_); diff --git a/src/caffe/layers/concat_layer.cu b/src/caffe/layers/concat_layer.cu index 3c64c7ef..617701e2 100644 --- a/src/caffe/layers/concat_layer.cu +++ b/src/caffe/layers/concat_layer.cu @@ -28,6 +28,7 @@ __global__ void Concat(const int nthreads, const Dtype* in_data, template void ConcatLayer::Forward_gpu(const vector*>& bottom, const vector*>& top) { + if (bottom.size() == 1) { return; } Dtype* top_data = top[0]->mutable_gpu_data(); int offset_concat_axis = 0; const int top_concat_axis = top[0]->shape(concat_axis_); @@ -48,6 +49,7 @@ void ConcatLayer::Forward_gpu(const vector*>& bottom, template void ConcatLayer::Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { + if (bottom.size() == 1) { return; } const Dtype* top_diff = top[0]->gpu_diff(); int offset_concat_axis = 0; const int top_concat_axis = top[0]->shape(concat_axis_); diff --git a/src/caffe/test/test_concat_layer.cpp b/src/caffe/test/test_concat_layer.cpp index 088e0a41..ccd97eb1 100644 --- a/src/caffe/test/test_concat_layer.cpp +++ b/src/caffe/test/test_concat_layer.cpp @@ -99,6 +99,19 @@ TYPED_TEST(ConcatLayerTest, TestSetupChannelsNegativeIndexing) { EXPECT_EQ(this->blob_top_->width(), this->blob_bottom_0_->width()); } +TYPED_TEST(ConcatLayerTest, TestForwardTrivial) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConcatLayer layer(layer_param); + this->blob_bottom_vec_0_.resize(1); + layer.SetUp(this->blob_bottom_vec_0_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_0_, this->blob_top_vec_); + for (int i = 0; i < this->blob_bottom_0_->count(); ++i) { + EXPECT_EQ(this->blob_bottom_0_->cpu_data()[i], + this->blob_top_->cpu_data()[i]); + } +} + TYPED_TEST(ConcatLayerTest, TestForwardNum) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; @@ -154,6 +167,16 @@ TYPED_TEST(ConcatLayerTest, TestForwardChannels) { } } +TYPED_TEST(ConcatLayerTest, TestGradientTrivial) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConcatLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-2); + this->blob_bottom_vec_0_.resize(1); + checker.CheckGradientEltwise(&layer, this->blob_bottom_vec_0_, + this->blob_top_vec_); +} + TYPED_TEST(ConcatLayerTest, TestGradientNum) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; From cf1516634d677cb8d2b2068e2b795c9b58a7c098 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Sun, 15 Feb 2015 15:18:56 -0800 Subject: [PATCH 073/223] Net: expose param_display_names_ --- include/caffe/net.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp index 1bf07d28..bed241d2 100644 --- a/include/caffe/net.hpp +++ b/include/caffe/net.hpp @@ -179,6 +179,9 @@ class Net { return param_names_index_; } inline const vector& param_owners() const { return param_owners_; } + inline const vector& param_display_names() const { + return param_display_names_; + } /// @brief Input and output blob numbers inline int num_inputs() const { return net_input_blobs_.size(); } inline int num_outputs() const { return net_output_blobs_.size(); } From 46c3f00bc91819668fb384b7982706d19b2a9fd6 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Thu, 3 Sep 2015 14:57:19 -0700 Subject: [PATCH 074/223] net.cpp fix debug_info params -> learnable_params --- src/caffe/net.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index 89d14013..ebb8b5d2 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -810,12 +810,11 @@ void Net::Backward() { BackwardFromTo(layers_.size() - 1, 0); if (debug_info_) { Dtype asum_data = 0, asum_diff = 0, sumsq_data = 0, sumsq_diff = 0; - for (int i = 0; i < params_.size(); ++i) { - if (param_owners_[i] >= 0) { continue; } - asum_data += params_[i]->asum_data(); - asum_diff += params_[i]->asum_diff(); - sumsq_data += params_[i]->sumsq_data(); - sumsq_diff += params_[i]->sumsq_diff(); + for (int i = 0; i < learnable_params_.size(); ++i) { + asum_data += learnable_params_[i]->asum_data(); + asum_diff += learnable_params_[i]->asum_diff(); + sumsq_data += learnable_params_[i]->sumsq_data(); + sumsq_diff += learnable_params_[i]->sumsq_diff(); } const Dtype l2norm_data = std::sqrt(sumsq_data); const Dtype l2norm_diff = std::sqrt(sumsq_diff); From c2484747d813d616bcb504d97f93071b26bb372d Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Fri, 21 Aug 2015 17:29:06 -0700 Subject: [PATCH 075/223] NetSpec: don't require lists to specify single-element repeated fields --- python/caffe/net_spec.py | 10 ++++++++-- python/caffe/test/test_net_spec.py | 3 +-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/python/caffe/net_spec.py b/python/caffe/net_spec.py index 77a0e007..93fc0192 100644 --- a/python/caffe/net_spec.py +++ b/python/caffe/net_spec.py @@ -56,8 +56,14 @@ def to_proto(*tops): def assign_proto(proto, name, val): """Assign a Python object to a protobuf message, based on the Python type (in recursive fashion). Lists become repeated fields/messages, dicts - become messages, and other types are assigned directly.""" - + become messages, and other types are assigned directly. For convenience, + repeated fields whose values are not lists are converted to single-element + lists; e.g., `my_repeated_int_field=3` is converted to + `my_repeated_int_field=[3]`.""" + + is_repeated_field = hasattr(getattr(proto, name), 'extend') + if is_repeated_field and not isinstance(val, list): + val = [val] if isinstance(val, list): if isinstance(val[0], dict): for item in val: diff --git a/python/caffe/test/test_net_spec.py b/python/caffe/test/test_net_spec.py index b4595e65..fee3c0aa 100644 --- a/python/caffe/test/test_net_spec.py +++ b/python/caffe/test/test_net_spec.py @@ -43,8 +43,7 @@ def anon_lenet(batch_size): def silent_net(): n = caffe.NetSpec() - n.data, n.data2 = L.DummyData(shape=[dict(dim=[3]), dict(dim=[4, 2])], - ntop=2) + n.data, n.data2 = L.DummyData(shape=dict(dim=3), ntop=2) n.silence_data = L.Silence(n.data, ntop=0) n.silence_data2 = L.Silence(n.data2, ntop=0) return n.to_proto() From 1bdc18c5beb4c6e679ed359eb707be8822306ea5 Mon Sep 17 00:00:00 2001 From: Lumin Zhou Date: Fri, 4 Sep 2015 04:38:43 +0000 Subject: [PATCH 076/223] Update extract_features.cpp --- tools/extract_features.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/extract_features.cpp b/tools/extract_features.cpp index 365dd495..084c9bf8 100644 --- a/tools/extract_features.cpp +++ b/tools/extract_features.cpp @@ -42,7 +42,7 @@ int feature_extraction_pipeline(int argc, char** argv) { " save_feature_dataset_name1[,name2,...] num_mini_batches db_type" " [CPU/GPU] [DEVICE_ID=0]\n" "Note: you can extract multiple features in one pass by specifying" - " multiple feature blob names and dataset names seperated by ','." + " multiple feature blob names and dataset names separated by ','." " The names cannot contain white space characters and the number of blobs" " and datasets must be equal."; return 1; From aa40ab98717507a60c23fb9cd4bf61c8b0bbb38d Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Thu, 3 Sep 2015 21:44:45 -0700 Subject: [PATCH 077/223] Fix AccuracyLayerTest for per-class accuracy. Fix AccuracyLayerTest for per-class accuracy. Previously in #2935, it crashes since the test accuracy is nan (0/0) when a class never appear. --- src/caffe/test/test_accuracy_layer.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/caffe/test/test_accuracy_layer.cpp b/src/caffe/test/test_accuracy_layer.cpp index 94e529b5..ef0e57a3 100644 --- a/src/caffe/test/test_accuracy_layer.cpp +++ b/src/caffe/test/test_accuracy_layer.cpp @@ -250,7 +250,6 @@ TYPED_TEST(AccuracyLayerTest, TestForwardCPUTopK) { TYPED_TEST(AccuracyLayerTest, TestForwardCPUPerClass) { LayerParameter layer_param; - Caffe::set_mode(Caffe::CPU); AccuracyLayer layer(layer_param); layer.SetUp(this->blob_bottom_vec_, this->blob_top_per_class_vec_); layer.Forward(this->blob_bottom_vec_, this->blob_top_per_class_vec_); @@ -279,16 +278,16 @@ TYPED_TEST(AccuracyLayerTest, TestForwardCPUPerClass) { EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0), num_correct_labels / 100.0, 1e-4); for (int i = 0; i < num_class; ++i) { + TypeParam accuracy_per_class = (num_per_class[i] > 0 ? + static_cast(correct_per_class[i]) / num_per_class[i] : 0); EXPECT_NEAR(this->blob_top_per_class_->data_at(i, 0, 0, 0), - static_cast(correct_per_class[i]) / num_per_class[i], - 1e-4); + accuracy_per_class, 1e-4); } } TYPED_TEST(AccuracyLayerTest, TestForwardCPUPerClassWithIgnoreLabel) { LayerParameter layer_param; - Caffe::set_mode(Caffe::CPU); const TypeParam kIgnoreLabelValue = -1; layer_param.mutable_accuracy_param()->set_ignore_label(kIgnoreLabelValue); AccuracyLayer layer(layer_param); @@ -329,9 +328,10 @@ TYPED_TEST(AccuracyLayerTest, TestForwardCPUPerClassWithIgnoreLabel) { EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0), num_correct_labels / TypeParam(count), 1e-4); for (int i = 0; i < 10; ++i) { + TypeParam accuracy_per_class = (num_per_class[i] > 0 ? + static_cast(correct_per_class[i]) / num_per_class[i] : 0); EXPECT_NEAR(this->blob_top_per_class_->data_at(i, 0, 0, 0), - TypeParam(correct_per_class[i]) / num_per_class[i], - 1e-4); + accuracy_per_class, 1e-4); } } From 5cc76ad2e38f19a140497ff09c475500da9d76cf Mon Sep 17 00:00:00 2001 From: Daniel Gordon Date: Fri, 4 Sep 2015 10:12:00 -0700 Subject: [PATCH 078/223] enabling the alternate solvers to be accessed by the python interface --- python/caffe/__init__.py | 2 +- python/caffe/_caffe.cpp | 9 +++++++++ python/caffe/pycaffe.py | 3 ++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/python/caffe/__init__.py b/python/caffe/__init__.py index 6cc44e72..ccda1bca 100644 --- a/python/caffe/__init__.py +++ b/python/caffe/__init__.py @@ -1,4 +1,4 @@ -from .pycaffe import Net, SGDSolver +from .pycaffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, RMSPropSolver, AdaDeltaSolver, AdamSolver from ._caffe import set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list from .proto.caffe_pb2 import TRAIN, TEST from .classifier import Classifier diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp index cc49f60a..ccd5776a 100644 --- a/python/caffe/_caffe.cpp +++ b/python/caffe/_caffe.cpp @@ -297,6 +297,15 @@ BOOST_PYTHON_MODULE(_caffe) { bp::class_, bp::bases >, shared_ptr >, boost::noncopyable>( "AdaGradSolver", bp::init()); + bp::class_, bp::bases >, + shared_ptr >, boost::noncopyable>( + "RMSPropSolver", bp::init()); + bp::class_, bp::bases >, + shared_ptr >, boost::noncopyable>( + "AdaDeltaSolver", bp::init()); + bp::class_, bp::bases >, + shared_ptr >, boost::noncopyable>( + "AdamSolver", bp::init()); bp::def("get_solver", &GetSolverFromFile, bp::return_value_policy()); diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py index 4f980a92..8ea24da4 100644 --- a/python/caffe/pycaffe.py +++ b/python/caffe/pycaffe.py @@ -10,7 +10,8 @@ from itertools import zip_longest as izip_longest import numpy as np -from ._caffe import Net, SGDSolver +from ._caffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, \ + RMSPropSolver, AdaDeltaSolver, AdamSolver import caffe.io # We directly update methods from Net here (rather than using composition or From 1394cdc383e2f41d7435862442b15151e8ac1237 Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Fri, 4 Sep 2015 14:16:31 -0700 Subject: [PATCH 079/223] disallow PythonLayer in Multi-GPU training --- include/caffe/python_layer.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/caffe/python_layer.hpp b/include/caffe/python_layer.hpp index c43c1e8a..b839d526 100644 --- a/include/caffe/python_layer.hpp +++ b/include/caffe/python_layer.hpp @@ -18,6 +18,12 @@ class PythonLayer : public Layer { virtual void LayerSetUp(const vector*>& bottom, const vector*>& top) { + // Disallow PythonLayer in MultiGPU training stage, due to GIL issues + // Details: https://github.com/BVLC/caffe/issues/2936 + if (this->phase_ == TRAIN && Caffe::solver_count() > 1 + && !ShareInParallel()) { + LOG(FATAL) << "PythonLayer is not implemented in Multi-GPU training"; + } self_.attr("param_str") = bp::str( this->layer_param_.python_param().param_str()); self_.attr("setup")(bottom, top); From 8fbac04ac32672ae8a97f8a1171f1d39456b97ca Mon Sep 17 00:00:00 2001 From: Sean Bell Date: Wed, 9 Sep 2015 12:52:52 -0400 Subject: [PATCH 080/223] Minor: missing space in string formatting --- src/caffe/solver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 394ec3b3..2e59a881 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -732,7 +732,7 @@ void SGDSolver::SnapshotSolverStateToBinaryProto( } string snapshot_filename = Solver::SnapshotFilename(".solverstate"); LOG(INFO) - << "Snapshotting solver state to binary proto file" << snapshot_filename; + << "Snapshotting solver state to binary proto file " << snapshot_filename; WriteProtoToBinaryFile(state, snapshot_filename.c_str()); } From 3456259d400f7eef27e07c15c34f22b8d5e13bdd Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Sun, 13 Sep 2015 20:46:24 -0700 Subject: [PATCH 081/223] Use EXPECT_NEAR in EltwiseLayer test Otherwise there seem to be some numerical issues causing BLAS results not exactly same as evaluated results in test code. --- src/caffe/test/test_eltwise_layer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/caffe/test/test_eltwise_layer.cpp b/src/caffe/test/test_eltwise_layer.cpp index be0c1347..8031f6e9 100644 --- a/src/caffe/test/test_eltwise_layer.cpp +++ b/src/caffe/test/test_eltwise_layer.cpp @@ -80,7 +80,7 @@ TYPED_TEST(EltwiseLayerTest, TestProd) { const Dtype* in_data_b = this->blob_bottom_b_->cpu_data(); const Dtype* in_data_c = this->blob_bottom_c_->cpu_data(); for (int i = 0; i < count; ++i) { - EXPECT_EQ(data[i], in_data_a[i] * in_data_b[i] * in_data_c[i]); + EXPECT_NEAR(data[i], in_data_a[i] * in_data_b[i] * in_data_c[i], 1e-4); } } @@ -99,7 +99,7 @@ TYPED_TEST(EltwiseLayerTest, TestSum) { const Dtype* in_data_b = this->blob_bottom_b_->cpu_data(); const Dtype* in_data_c = this->blob_bottom_c_->cpu_data(); for (int i = 0; i < count; ++i) { - EXPECT_EQ(data[i], in_data_a[i] + in_data_b[i] + in_data_c[i]); + EXPECT_NEAR(data[i], in_data_a[i] + in_data_b[i] + in_data_c[i], 1e-4); } } From ab554cb4918cf7bccfada00339b4d1d5ccf3b4af Mon Sep 17 00:00:00 2001 From: Sean Bell Date: Wed, 9 Sep 2015 12:49:27 -0400 Subject: [PATCH 082/223] Check that the snapshot directory is writeable before starting training --- include/caffe/solver.hpp | 2 ++ src/caffe/solver.cpp | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index aba3e036..8d52785a 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -82,6 +82,8 @@ class Solver { callbacks_.push_back(value); } + void CheckSnapshotWritePermissions(); + protected: // Make and apply the update value for the current iteration. virtual void ApplyUpdate() = 0; diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 394ec3b3..47493174 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -55,6 +55,7 @@ void Solver::Init(const SolverParameter& param) { << std::endl << param.DebugString(); param_ = param; CHECK_GE(param_.average_loss(), 1) << "average_loss should be non-negative."; + CheckSnapshotWritePermissions(); if (Caffe::root_solver() && param_.random_seed() >= 0) { Caffe::set_random_seed(param_.random_seed()); } @@ -434,6 +435,24 @@ void Solver::Snapshot() { SnapshotSolverState(model_filename); } +template +void Solver::CheckSnapshotWritePermissions() { + if (Caffe::root_solver() && param_.snapshot()) { + CHECK(param_.has_snapshot_prefix()) + << "In solver params, snapshot is specified but snapshot_prefix is not"; + string probe_filename = SnapshotFilename(".tempfile"); + std::ofstream probe_ofs(probe_filename.c_str()); + if (probe_ofs.good()) { + probe_ofs.close(); + std::remove(probe_filename.c_str()); + } else { + LOG(FATAL) << "Cannot write to snapshot prefix '" + << param_.snapshot_prefix() << "'. Make sure " + << "that the directory exists and is writeable."; + } + } +} + template string Solver::SnapshotFilename(const string extension) { string filename(param_.snapshot_prefix()); From b7f9cba875c6db5c4ae33446dc80cd010c1c392c Mon Sep 17 00:00:00 2001 From: Mohamed Omran Date: Tue, 15 Sep 2015 17:18:32 +0200 Subject: [PATCH 083/223] removed bug in caffe.io.resize_image when applied to Nd images --- python/caffe/io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/caffe/io.py b/python/caffe/io.py index fc962660..0cad7211 100644 --- a/python/caffe/io.py +++ b/python/caffe/io.py @@ -329,7 +329,7 @@ def resize_image(im, new_dims, interp_order=1): return ret else: # ndimage interpolates anything but more slowly. - scale = tuple(np.array(new_dims) / np.array(im.shape[:2])) + scale = tuple(np.array(new_dims, dtype=float) / np.array(im.shape[:2])) resized_im = zoom(im, scale + (1,), order=interp_order) return resized_im.astype(np.float32) From 3d3a8b2ca09b64d94836652d0c9b5ffbb31551f6 Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Wed, 16 Sep 2015 12:06:16 -0700 Subject: [PATCH 084/223] Get back 'USE CPU' print for caffe train --- tools/caffe.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/caffe.cpp b/tools/caffe.cpp index ff63860a..e3f684b5 100644 --- a/tools/caffe.cpp +++ b/tools/caffe.cpp @@ -174,6 +174,7 @@ int train() { vector gpus; get_gpus(&gpus); if (gpus.size() == 0) { + LOG(INFO) << "Use CPU."; Caffe::set_mode(Caffe::CPU); } else { ostringstream s; From f3a933a620b8b089a7fe29ba76ec27f5408ff68d Mon Sep 17 00:00:00 2001 From: Tea Date: Sat, 27 Jun 2015 11:44:56 +0800 Subject: [PATCH 085/223] Separate IO dependencies OpenCV, LMDB, LevelDB and Snappy are made optional via switches (USE_OPENCV, USE_LMDB, USE_LEVELDB) available for Make and CMake builds. Since Snappy is a LevelDB dependency, its use is determined by USE_LEVELDB. HDF5 is left bundled because it is used for serializing weights and solverstates. --- .travis.yml | 11 ++--- CMakeLists.txt | 7 +++- Makefile | 28 +++++++++++-- Makefile.config.example | 5 +++ cmake/ConfigGen.cmake | 12 ++++++ cmake/Dependencies.cmake | 41 ++++++++++++------- cmake/Summary.cmake | 18 +++++--- cmake/Templates/CaffeConfig.cmake.in | 26 ++++++------ cmake/Templates/caffe_config.h.in | 5 +++ docs/installation.md | 9 ++-- .../cpp_classification/classification.cpp | 8 ++++ examples/mnist/convert_mnist_data.cpp | 12 ++++++ .../siamese/convert_mnist_siamese_data.cpp | 9 +++- include/caffe/data_layers.hpp | 3 +- include/caffe/data_transformer.hpp | 5 ++- include/caffe/util/db_leveldb.hpp | 2 + include/caffe/util/db_lmdb.hpp | 2 + include/caffe/util/io.hpp | 2 + python/caffe/test/test_layer_type_list.py | 1 + scripts/travis/travis_build_and_test.sh | 14 ++++++- .../travis/travis_setup_makefile_config.sh | 6 +++ src/caffe/data_transformer.cpp | 16 +++++++- src/caffe/layers/data_layer.cpp | 3 +- src/caffe/layers/image_data_layer.cpp | 2 + src/caffe/layers/memory_data_layer.cpp | 4 ++ src/caffe/layers/window_data_layer.cpp | 2 + src/caffe/test/test_data_layer.cpp | 6 +++ src/caffe/test/test_data_transformer.cpp | 2 + src/caffe/test/test_db.cpp | 2 + src/caffe/test/test_image_data_layer.cpp | 2 + src/caffe/test/test_io.cpp | 2 + src/caffe/test/test_layer_factory.cpp | 4 ++ src/caffe/test/test_memory_data_layer.cpp | 5 ++- src/caffe/test/test_upgrade_proto.cpp | 12 +++++- src/caffe/util/db.cpp | 14 +++++-- src/caffe/util/db_leveldb.cpp | 2 + src/caffe/util/db_lmdb.cpp | 2 + src/caffe/util/io.cpp | 10 ++++- tools/compute_image_mean.cpp | 4 ++ tools/convert_imageset.cpp | 4 ++ 40 files changed, 264 insertions(+), 60 deletions(-) diff --git a/.travis.yml b/.travis.yml index b920a935..4dc7ed72 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,11 +2,12 @@ # one using CMake, and one using make. env: matrix: - - WITH_CUDA=false WITH_CMAKE=false - - WITH_CUDA=false WITH_CMAKE=true - - WITH_CUDA=true WITH_CMAKE=false - - WITH_CUDA=true WITH_CMAKE=true - - WITH_CUDA=false WITH_CMAKE=true PYTHON_VERSION=3 + - WITH_CUDA=false WITH_CMAKE=false WITH_IO=true + - WITH_CUDA=false WITH_CMAKE=true WITH_IO=true PYTHON_VERSION=3 + - WITH_CUDA=true WITH_CMAKE=false WITH_IO=true + - WITH_CUDA=true WITH_CMAKE=true WITH_IO=true + - WITH_CUDA=false WITH_CMAKE=false WITH_IO=false + - WITH_CUDA=false WITH_CMAKE=true WITH_IO=false PYTHON_VERSION=3 language: cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ef599b68..838723be 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,13 +16,16 @@ include(cmake/ConfigGen.cmake) # ---[ Options caffe_option(CPU_ONLY "Build Caffe without CUDA support" OFF) # TODO: rename to USE_CUDA -caffe_option(USE_CUDNN "Build Caffe with cuDNN libary support" ON IF NOT CPU_ONLY) +caffe_option(USE_CUDNN "Build Caffe with cuDNN library support" ON IF NOT CPU_ONLY) caffe_option(BUILD_SHARED_LIBS "Build shared libraries" ON) caffe_option(BUILD_python "Build Python wrapper" ON) set(python_version "2" CACHE STRING "Specify which python version to use") caffe_option(BUILD_matlab "Build Matlab wrapper" OFF IF UNIX OR APPLE) caffe_option(BUILD_docs "Build documentation" ON IF UNIX OR APPLE) -caffe_option(BUILD_python_layer "Build the Caffe python layer" ON) +caffe_option(BUILD_python_layer "Build the caffe python layer" ON) +caffe_option(USE_LMDB "Build with lmdb" ON) +caffe_option(USE_LEVELDB "Build with levelDB" ON) +caffe_option(USE_OPENCV "Build with OpenCV support" ON) # ---[ Dependencies include(cmake/Dependencies.cmake) diff --git a/Makefile b/Makefile index 80bc3737..ddaed59b 100644 --- a/Makefile +++ b/Makefile @@ -169,9 +169,18 @@ ifneq ($(CPU_ONLY), 1) LIBRARY_DIRS += $(CUDA_LIB_DIR) LIBRARIES := cudart cublas curand endif -LIBRARIES += glog gflags protobuf leveldb snappy \ - lmdb boost_system hdf5_hl hdf5 m \ - opencv_core opencv_highgui opencv_imgproc + +LIBRARIES += glog gflags protobuf boost_system m hdf5_hl hdf5 + +ifeq ($(USE_LEVELDB), 1) + LIBRARIES += leveldb snappy +endif +ifeq ($(USE_LMDB), 1) + LIBRARIES += lmdb +endif +ifeq ($(USE_OPENCV), 1) + LIBRARIES += opencv_core opencv_highgui opencv_imgproc +endif PYTHON_LIBRARIES := boost_python python2.7 WARNINGS := -Wall -Wno-sign-compare @@ -290,6 +299,17 @@ ifeq ($(USE_CUDNN), 1) COMMON_FLAGS += -DUSE_CUDNN endif +# i/o libraries configuration +ifeq ($(USE_OPENCV), 1) + COMMON_FLAGS += -DUSE_OPENCV +endif +ifeq ($(USE_LEVELDB), 1) + COMMON_FLAGS += -DUSE_LEVELDB +endif +ifeq ($(USE_LMDB), 1) + COMMON_FLAGS += -DUSE_LMDB +endif + # CPU-only configuration ifeq ($(CPU_ONLY), 1) OBJS := $(PROTO_OBJS) $(CXX_OBJS) @@ -472,7 +492,7 @@ runtest: $(TEST_ALL_BIN) pytest: py cd python; python -m unittest discover -s caffe/test - + mattest: mat cd matlab; $(MATLAB_DIR)/bin/matlab -nodisplay -r 'caffe.run_tests(), exit()' diff --git a/Makefile.config.example b/Makefile.config.example index a8735025..32e67ee4 100644 --- a/Makefile.config.example +++ b/Makefile.config.example @@ -7,6 +7,11 @@ # CPU-only switch (uncomment to build without GPU support). # CPU_ONLY := 1 +# comment out to disable IO dependencies +USE_LEVELDB := 1 +USE_LMDB := 1 +USE_OPENCV := 1 + # To customize your choice of compiler, uncomment and set the following. # N.B. the default for Linux is g++ and the default for OSX is clang++ # CUSTOM_CXX := g++ diff --git a/cmake/ConfigGen.cmake b/cmake/ConfigGen.cmake index 566d6ca0..8b259965 100644 --- a/cmake/ConfigGen.cmake +++ b/cmake/ConfigGen.cmake @@ -56,6 +56,18 @@ function(caffe_generate_export_configs) list(APPEND Caffe_DEFINITIONS -DCPU_ONLY) endif() + if(USE_OPENCV) + list(APPEND Caffe_DEFINITIONS -DUSE_OPENCV) + endif() + + if(USE_LMDB) + list(APPEND Caffe_DEFINITIONS -DUSE_LMDB) + endif() + + if(USE_LEVELDB) + list(APPEND Caffe_DEFINITIONS -DUSE_LEVELDB) + endif() + if(NOT HAVE_CUDNN) set(HAVE_CUDNN FALSE) else() diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 7c86dd55..d68d7bfb 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -29,19 +29,27 @@ include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR}) list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES}) # ---[ LMDB -find_package(LMDB REQUIRED) -include_directories(SYSTEM ${LMDB_INCLUDE_DIR}) -list(APPEND Caffe_LINKER_LIBS ${LMDB_LIBRARIES}) +if(USE_LMDB) + find_package(LMDB REQUIRED) + include_directories(SYSTEM ${LMDB_INCLUDE_DIR}) + list(APPEND Caffe_LINKER_LIBS ${LMDB_LIBRARIES}) + add_definitions(-DUSE_LMDB) +endif() # ---[ LevelDB -find_package(LevelDB REQUIRED) -include_directories(SYSTEM ${LevelDB_INCLUDE}) -list(APPEND Caffe_LINKER_LIBS ${LevelDB_LIBRARIES}) +if(USE_LEVELDB) + find_package(LevelDB REQUIRED) + include_directories(SYSTEM ${LevelDB_INCLUDE}) + list(APPEND Caffe_LINKER_LIBS ${LevelDB_LIBRARIES}) + add_definitions(-DUSE_LEVELDB) +endif() # ---[ Snappy -find_package(Snappy REQUIRED) -include_directories(SYSTEM ${Snappy_INCLUDE_DIR}) -list(APPEND Caffe_LINKER_LIBS ${Snappy_LIBRARIES}) +if(USE_LEVELDB) + find_package(Snappy REQUIRED) + include_directories(SYSTEM ${Snappy_INCLUDE_DIR}) + list(APPEND Caffe_LINKER_LIBS ${Snappy_LIBRARIES}) +endif() # ---[ CUDA include(cmake/Cuda.cmake) @@ -57,13 +65,16 @@ if(NOT HAVE_CUDA) endif() # ---[ OpenCV -find_package(OpenCV QUIET COMPONENTS core highgui imgproc imgcodecs) -if(NOT OpenCV_FOUND) # if not OpenCV 3.x, then imgcodecs are not found - find_package(OpenCV REQUIRED COMPONENTS core highgui imgproc) +if(USE_OPENCV) + find_package(OpenCV QUIET COMPONENTS core highgui imgproc imgcodecs) + if(NOT OpenCV_FOUND) # if not OpenCV 3.x, then imgcodecs are not found + find_package(OpenCV REQUIRED COMPONENTS core highgui imgproc) + endif() + include_directories(SYSTEM ${OpenCV_INCLUDE_DIRS}) + list(APPEND Caffe_LINKER_LIBS ${OpenCV_LIBS}) + message(STATUS "OpenCV found (${OpenCV_CONFIG_PATH})") + add_definitions(-DUSE_OPENCV) endif() -include_directories(SYSTEM ${OpenCV_INCLUDE_DIRS}) -list(APPEND Caffe_LINKER_LIBS ${OpenCV_LIBS}) -message(STATUS "OpenCV found (${OpenCV_CONFIG_PATH})") # ---[ BLAS if(NOT APPLE) diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index e094ac00..3d12e81a 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -114,6 +114,9 @@ function(caffe_print_configuration_summary) caffe_status(" BUILD_matlab : ${BUILD_matlab}") caffe_status(" BUILD_docs : ${BUILD_docs}") caffe_status(" CPU_ONLY : ${CPU_ONLY}") + caffe_status(" USE_LMDB : ${USE_LMDB}") + caffe_status(" USE_LEVELDB : ${USE_LEVELDB}") + caffe_status(" USE_OPENCV : ${USE_OPENCV}") caffe_status("") caffe_status("Dependencies:") caffe_status(" BLAS : " APPLE THEN "Yes (vecLib)" ELSE "Yes (${BLAS})") @@ -121,10 +124,16 @@ function(caffe_print_configuration_summary) caffe_status(" glog : Yes") caffe_status(" gflags : Yes") caffe_status(" protobuf : " PROTOBUF_FOUND THEN "Yes (ver. ${PROTOBUF_VERSION})" ELSE "No" ) - caffe_status(" lmdb : " LMDB_FOUND THEN "Yes (ver. ${LMDB_VERSION})" ELSE "No") - caffe_status(" Snappy : " SNAPPY_FOUND THEN "Yes (ver. ${Snappy_VERSION})" ELSE "No" ) - caffe_status(" LevelDB : " LEVELDB_FOUND THEN "Yes (ver. ${LEVELDB_VERSION})" ELSE "No") - caffe_status(" OpenCV : Yes (ver. ${OpenCV_VERSION})") + if(USE_LMDB) + caffe_status(" lmdb : " LMDB_FOUND THEN "Yes (ver. ${LMDB_VERSION})" ELSE "No") + endif() + if(USE_LEVELDB) + caffe_status(" LevelDB : " LEVELDB_FOUND THEN "Yes (ver. ${LEVELDB_VERSION})" ELSE "No") + caffe_status(" Snappy : " SNAPPY_FOUND THEN "Yes (ver. ${Snappy_VERSION})" ELSE "No" ) + endif() + if(USE_OPENCV) + caffe_status(" OpenCV : Yes (ver. ${OpenCV_VERSION})") + endif() caffe_status(" CUDA : " HAVE_CUDA THEN "Yes (ver. ${CUDA_VERSION})" ELSE "No" ) caffe_status("") if(HAVE_CUDA) @@ -165,4 +174,3 @@ function(caffe_print_configuration_summary) caffe_status(" Install path : ${CMAKE_INSTALL_PREFIX}") caffe_status("") endfunction() - diff --git a/cmake/Templates/CaffeConfig.cmake.in b/cmake/Templates/CaffeConfig.cmake.in index 8f23742e..73f57ac2 100644 --- a/cmake/Templates/CaffeConfig.cmake.in +++ b/cmake/Templates/CaffeConfig.cmake.in @@ -17,22 +17,24 @@ # Caffe_HAVE_CUDNN - signals about cuDNN support -# OpenCV dependency +# OpenCV dependency (optional) -if(NOT OpenCV_FOUND) - set(Caffe_OpenCV_CONFIG_PATH "@OpenCV_CONFIG_PATH@") - if(Caffe_OpenCV_CONFIG_PATH) - get_filename_component(Caffe_OpenCV_CONFIG_PATH ${Caffe_OpenCV_CONFIG_PATH} ABSOLUTE) +if(@USE_OPENCV@) + if(NOT OpenCV_FOUND) + set(Caffe_OpenCV_CONFIG_PATH "@OpenCV_CONFIG_PATH@") + if(Caffe_OpenCV_CONFIG_PATH) + get_filename_component(Caffe_OpenCV_CONFIG_PATH ${Caffe_OpenCV_CONFIG_PATH} ABSOLUTE) - if(EXISTS ${Caffe_OpenCV_CONFIG_PATH} AND NOT TARGET opencv_core) - message(STATUS "Caffe: using OpenCV config from ${Caffe_OpenCV_CONFIG_PATH}") - include(${Caffe_OpenCV_CONFIG_PATH}/OpenCVModules.cmake) - endif() + if(EXISTS ${Caffe_OpenCV_CONFIG_PATH} AND NOT TARGET opencv_core) + message(STATUS "Caffe: using OpenCV config from ${Caffe_OpenCV_CONFIG_PATH}") + include(${Caffe_OpenCV_CONFIG_PATH}/OpenCVModules.cmake) + endif() - else() - find_package(OpenCV REQUIRED) + else() + find_package(OpenCV REQUIRED) + endif() + unset(Caffe_OpenCV_CONFIG_PATH) endif() - unset(Caffe_OpenCV_CONFIG_PATH) endif() # Compute paths diff --git a/cmake/Templates/caffe_config.h.in b/cmake/Templates/caffe_config.h.in index 6039e8f6..9302022d 100644 --- a/cmake/Templates/caffe_config.h.in +++ b/cmake/Templates/caffe_config.h.in @@ -30,3 +30,8 @@ /* Matlab */ #cmakedefine HAVE_MATLAB + +/* IO libraries */ +#cmakedefine USE_OPENCV +#cmakedefine USE_LMDB +#cmakedefine USE_LEVELDB diff --git a/docs/installation.md b/docs/installation.md index d535c6d0..89a8c71c 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -17,16 +17,19 @@ When updating Caffe, it's best to `make clean` before re-compiling. ## Prerequisites -Caffe has several dependencies. +Caffe has several dependencies: * [CUDA](https://developer.nvidia.com/cuda-zone) is required for GPU mode. * library version 7.0 and the latest driver version are recommended, but 6.* is fine too * 5.5, and 5.0 are compatible but considered legacy * [BLAS](http://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) via ATLAS, MKL, or OpenBLAS. * [Boost](http://www.boost.org/) >= 1.55 +* `protobuf`, `glog`, `gflags`, `hdf5` + +Optional dependencies: + * [OpenCV](http://opencv.org/) >= 2.4 including 3.0 -* `protobuf`, `glog`, `gflags` -* IO libraries `hdf5`, `leveldb`, `snappy`, `lmdb` +* IO libraries: `lmdb`, `leveldb` (note: leveldb requires `snappy`) Pycaffe and Matcaffe interfaces have their own natural needs. diff --git a/examples/cpp_classification/classification.cpp b/examples/cpp_classification/classification.cpp index dc8b863f..de48fb69 100644 --- a/examples/cpp_classification/classification.cpp +++ b/examples/cpp_classification/classification.cpp @@ -1,7 +1,9 @@ #include +#ifdef USE_OPENCV #include #include #include +#endif // USE_OPENCV #include #include #include @@ -9,6 +11,7 @@ #include #include +#ifdef USE_OPENCV using namespace caffe; // NOLINT(build/namespaces) using std::string; @@ -255,3 +258,8 @@ int main(int argc, char** argv) { << p.first << "\"" << std::endl; } } +#else +int main(int argc, char** argv) { + LOG(FATAL) << "This example requires OpenCV; compile with USE_OPENCV."; +} +#endif // USE_OPENCV diff --git a/examples/mnist/convert_mnist_data.cpp b/examples/mnist/convert_mnist_data.cpp index 54443f11..8f29bafd 100644 --- a/examples/mnist/convert_mnist_data.cpp +++ b/examples/mnist/convert_mnist_data.cpp @@ -9,9 +9,13 @@ #include #include #include + +#if defined(USE_LEVELDB) && defined(USE_LMDB) #include #include #include +#endif + #include #include @@ -20,6 +24,8 @@ #include "caffe/proto/caffe.pb.h" +#if defined(USE_LEVELDB) && defined(USE_LMDB) + using namespace caffe; // NOLINT(build/namespaces) using std::string; @@ -196,3 +202,9 @@ int main(int argc, char** argv) { } return 0; } +#else +int main(int argc, char** argv) { + LOG(FATAL) << "This example requires LevelDB and LMDB; " << + "compile with USE_LEVELDB and USE_LMDB."; +} +#endif // USE_LEVELDB and USE_LMDB diff --git a/examples/siamese/convert_mnist_siamese_data.cpp b/examples/siamese/convert_mnist_siamese_data.cpp index 8008b443..ad08036f 100644 --- a/examples/siamese/convert_mnist_siamese_data.cpp +++ b/examples/siamese/convert_mnist_siamese_data.cpp @@ -10,12 +10,14 @@ #include "glog/logging.h" #include "google/protobuf/text_format.h" -#include "leveldb/db.h" #include "stdint.h" #include "caffe/proto/caffe.pb.h" #include "caffe/util/math_functions.hpp" +#ifdef USE_LEVELDB +#include "leveldb/db.h" + uint32_t swap_endian(uint32_t val) { val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF); return (val << 16) | (val >> 16); @@ -121,3 +123,8 @@ int main(int argc, char** argv) { } return 0; } +#else +int main(int argc, char** argv) { + LOG(FATAL) << "This example requires LevelDB; compile with USE_LEVELDB."; +} +#endif // USE_LEVELDB diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp index 552d8141..90fd0d19 100644 --- a/include/caffe/data_layers.hpp +++ b/include/caffe/data_layers.hpp @@ -4,7 +4,6 @@ #include #include #include - #include "hdf5.h" #include "caffe/blob.hpp" @@ -275,8 +274,10 @@ class MemoryDataLayer : public BaseDataLayer { virtual inline int ExactNumTopBlobs() const { return 2; } virtual void AddDatumVector(const vector& datum_vector); +#ifdef USE_OPENCV virtual void AddMatVector(const vector& mat_vector, const vector& labels); +#endif // USE_OPENCV // Reset should accept const pointers, but can't, because the memory // will be given to Blob, which is mutable diff --git a/include/caffe/data_transformer.hpp b/include/caffe/data_transformer.hpp index 0ad68c80..97b4ee6a 100644 --- a/include/caffe/data_transformer.hpp +++ b/include/caffe/data_transformer.hpp @@ -50,6 +50,7 @@ class DataTransformer { void Transform(const vector & datum_vector, Blob* transformed_blob); +#ifdef USE_OPENCV /** * @brief Applies the transformation defined in the data layer's * transform_param block to a vector of Mat. @@ -74,6 +75,7 @@ class DataTransformer { * set_cpu_data() is used. See image_data_layer.cpp for an example. */ void Transform(const cv::Mat& cv_img, Blob* transformed_blob); +#endif // USE_OPENCV /** * @brief Applies the same transformation defined in the data layer's @@ -113,6 +115,7 @@ class DataTransformer { * @param mat_vector * A vector of Mat containing the data to be transformed. */ +#ifdef USE_OPENCV vector InferBlobShape(const vector & mat_vector); /** * @brief Infers the shape of transformed_blob will have when @@ -122,6 +125,7 @@ class DataTransformer { * cv::Mat containing the data to be transformed. */ vector InferBlobShape(const cv::Mat& cv_img); +#endif // USE_OPENCV protected: /** @@ -148,4 +152,3 @@ class DataTransformer { } // namespace caffe #endif // CAFFE_DATA_TRANSFORMER_HPP_ - diff --git a/include/caffe/util/db_leveldb.hpp b/include/caffe/util/db_leveldb.hpp index 10623554..e9fa0d32 100644 --- a/include/caffe/util/db_leveldb.hpp +++ b/include/caffe/util/db_leveldb.hpp @@ -1,3 +1,4 @@ +#ifdef USE_LEVELDB #ifndef CAFFE_UTIL_DB_LEVELDB_HPP #define CAFFE_UTIL_DB_LEVELDB_HPP @@ -71,3 +72,4 @@ class LevelDB : public DB { } // namespace caffe #endif // CAFFE_UTIL_DB_LEVELDB_HPP +#endif // USE_LEVELDB diff --git a/include/caffe/util/db_lmdb.hpp b/include/caffe/util/db_lmdb.hpp index cc7c90af..4e1568ac 100644 --- a/include/caffe/util/db_lmdb.hpp +++ b/include/caffe/util/db_lmdb.hpp @@ -1,3 +1,4 @@ +#ifdef USE_LMDB #ifndef CAFFE_UTIL_DB_LMDB_HPP #define CAFFE_UTIL_DB_LMDB_HPP @@ -89,3 +90,4 @@ class LMDB : public DB { } // namespace caffe #endif // CAFFE_UTIL_DB_LMDB_HPP +#endif // USE_LMDB diff --git a/include/caffe/util/io.hpp b/include/caffe/util/io.hpp index c0938ad0..6070b4c7 100644 --- a/include/caffe/util/io.hpp +++ b/include/caffe/util/io.hpp @@ -120,6 +120,7 @@ inline bool ReadImageToDatum(const string& filename, const int label, bool DecodeDatumNative(Datum* datum); bool DecodeDatum(Datum* datum, bool is_color); +#ifdef USE_OPENCV cv::Mat ReadImageToCVMat(const string& filename, const int height, const int width, const bool is_color); @@ -135,6 +136,7 @@ cv::Mat DecodeDatumToCVMatNative(const Datum& datum); cv::Mat DecodeDatumToCVMat(const Datum& datum, bool is_color); void CVMatToDatum(const cv::Mat& cv_img, Datum* datum); +#endif // USE_OPENCV } // namespace caffe diff --git a/python/caffe/test/test_layer_type_list.py b/python/caffe/test/test_layer_type_list.py index 7edc80df..47f4cf6d 100644 --- a/python/caffe/test/test_layer_type_list.py +++ b/python/caffe/test/test_layer_type_list.py @@ -5,6 +5,7 @@ class TestLayerTypeList(unittest.TestCase): def test_standard_types(self): + #removing 'Data' from list for type_name in ['Data', 'Convolution', 'InnerProduct']: self.assertIn(type_name, caffe.layer_type_list(), '%s not in layer_type_list()' % type_name) diff --git a/scripts/travis/travis_build_and_test.sh b/scripts/travis/travis_build_and_test.sh index 9ba737e2..bbc82133 100755 --- a/scripts/travis/travis_build_and_test.sh +++ b/scripts/travis/travis_build_and_test.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Script called by Travis to do a CPU-only build of and test Caffe. +# Script called by Travis to build and test Caffe. set -e MAKE="make --jobs=$NUM_THREADS --keep-going" @@ -15,7 +15,12 @@ if $WITH_CMAKE; then if [ "$PYTHON_VERSION" = "3" ]; then PYTHON_ARGS="$PYTHON_ARGS -Dpython_version=3 -DBOOST_LIBRARYDIR=$CONDA_DIR/lib/" fi - cmake -DBUILD_python=ON -DCMAKE_BUILD_TYPE=Release $CPU_ONLY $PYTHON_ARGS -DCMAKE_INCLUDE_PATH="$CONDA_DIR/include/" -DCMAKE_LIBRARY_PATH="$CONDA_DIR/lib/" .. + if $WITH_IO; then + IO_ARGS="-DUSE_OPENCV=ON -DUSE_LMDB=ON -DUSE_LEVELDB=ON" + else + IO_ARGS="-DUSE_OPENCV=OFF -DUSE_LMDB=OFF -DUSE_LEVELDB=OFF" + fi + cmake -DBUILD_python=ON -DCMAKE_BUILD_TYPE=Release $CPU_ONLY $PYTHON_ARGS -DCMAKE_INCLUDE_PATH="$CONDA_DIR/include/" -DCMAKE_LIBRARY_PATH="$CONDA_DIR/lib/" $IO_ARGS .. $MAKE $MAKE pytest if ! $WITH_CUDA; then @@ -28,6 +33,11 @@ else if ! $WITH_CUDA; then export CPU_ONLY=1 fi + if $WITH_IO; then + export USE_LMDB=1 + export USE_LEVELDB=1 + export USE_OPENCV=1 + fi $MAKE all test pycaffe warn lint || true if ! $WITH_CUDA; then $MAKE runtest diff --git a/scripts/travis/travis_setup_makefile_config.sh b/scripts/travis/travis_setup_makefile_config.sh index 1440be2a..83aacf11 100755 --- a/scripts/travis/travis_setup_makefile_config.sh +++ b/scripts/travis/travis_setup_makefile_config.sh @@ -11,6 +11,12 @@ if $WITH_CUDA; then echo "CUDA_ARCH := $GENCODE" >> Makefile.config fi +# Remove IO library settings from Makefile.config +# to avoid conflicts with CI configuration +sed -i -e '/USE_LMDB/d' Makefile.config +sed -i -e '/USE_LEVELDB/d' Makefile.config +sed -i -e '/USE_OPENCV/d' Makefile.config + cat << 'EOF' >> Makefile.config # Travis' nvcc doesn't like newer boost versions NVCCFLAGS := -Xcudafe --diag_suppress=cc_clobber_ignored -Xcudafe --diag_suppress=useless_using_declaration -Xcudafe --diag_suppress=set_but_not_used diff --git a/src/caffe/data_transformer.cpp b/src/caffe/data_transformer.cpp index 4666d9bd..7189d67e 100644 --- a/src/caffe/data_transformer.cpp +++ b/src/caffe/data_transformer.cpp @@ -1,4 +1,6 @@ +#ifdef USE_OPENCV #include +#endif // USE_OPENCV #include #include @@ -124,11 +126,13 @@ void DataTransformer::Transform(const Datum& datum, } } + template void DataTransformer::Transform(const Datum& datum, Blob* transformed_blob) { // If datum is encoded, decoded and transform the cv::image. if (datum.encoded()) { +#ifdef USE_OPENCV CHECK(!(param_.force_color() && param_.force_gray())) << "cannot set both force_color and force_gray"; cv::Mat cv_img; @@ -140,6 +144,9 @@ void DataTransformer::Transform(const Datum& datum, } // Transform the cv::image into blob. return Transform(cv_img, transformed_blob); +#else + LOG(FATAL) << "Encoded datum requires OpenCV; compile with USE_OPENCV."; +#endif // USE_OPENCV } else { if (param_.force_color() || param_.force_gray()) { LOG(ERROR) << "force_color and force_gray only for encoded datum"; @@ -194,6 +201,7 @@ void DataTransformer::Transform(const vector & datum_vector, } } +#ifdef USE_OPENCV template void DataTransformer::Transform(const vector & mat_vector, Blob* transformed_blob) { @@ -315,6 +323,7 @@ void DataTransformer::Transform(const cv::Mat& cv_img, } } } +#endif // USE_OPENCV template void DataTransformer::Transform(Blob* input_blob, @@ -432,6 +441,7 @@ void DataTransformer::Transform(Blob* input_blob, template vector DataTransformer::InferBlobShape(const Datum& datum) { if (datum.encoded()) { +#ifdef USE_OPENCV CHECK(!(param_.force_color() && param_.force_gray())) << "cannot set both force_color and force_gray"; cv::Mat cv_img; @@ -443,8 +453,10 @@ vector DataTransformer::InferBlobShape(const Datum& datum) { } // InferBlobShape using the cv::image. return InferBlobShape(cv_img); +#else + LOG(FATAL) << "Encoded datum requires OpenCV; compile with USE_OPENCV."; +#endif // USE_OPENCV } - const int crop_size = param_.crop_size(); const int datum_channels = datum.channels(); const int datum_height = datum.height(); @@ -474,6 +486,7 @@ vector DataTransformer::InferBlobShape( return shape; } +#ifdef USE_OPENCV template vector DataTransformer::InferBlobShape(const cv::Mat& cv_img) { const int crop_size = param_.crop_size(); @@ -504,6 +517,7 @@ vector DataTransformer::InferBlobShape( shape[0] = num; return shape; } +#endif // USE_OPENCV template void DataTransformer::InitRand() { diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp index 0932d9fe..71f8cb09 100644 --- a/src/caffe/layers/data_layer.cpp +++ b/src/caffe/layers/data_layer.cpp @@ -1,5 +1,6 @@ +#ifdef USE_OPENCV #include - +#endif // USE_OPENCV #include #include diff --git a/src/caffe/layers/image_data_layer.cpp b/src/caffe/layers/image_data_layer.cpp index 223ba3a7..3d2190f8 100644 --- a/src/caffe/layers/image_data_layer.cpp +++ b/src/caffe/layers/image_data_layer.cpp @@ -1,3 +1,4 @@ +#ifdef USE_OPENCV #include #include // NOLINT(readability/streams) @@ -164,3 +165,4 @@ INSTANTIATE_CLASS(ImageDataLayer); REGISTER_LAYER_CLASS(ImageData); } // namespace caffe +#endif // USE_OPENCV diff --git a/src/caffe/layers/memory_data_layer.cpp b/src/caffe/layers/memory_data_layer.cpp index 42de4198..2370aa04 100644 --- a/src/caffe/layers/memory_data_layer.cpp +++ b/src/caffe/layers/memory_data_layer.cpp @@ -1,4 +1,6 @@ +#ifdef USE_OPENCV #include +#endif // USE_OPENCV #include @@ -53,6 +55,7 @@ void MemoryDataLayer::AddDatumVector(const vector& datum_vector) { has_new_data_ = true; } +#ifdef USE_OPENCV template void MemoryDataLayer::AddMatVector(const vector& mat_vector, const vector& labels) { @@ -76,6 +79,7 @@ void MemoryDataLayer::AddMatVector(const vector& mat_vector, Reset(top_data, top_label, num); has_new_data_ = true; } +#endif // USE_OPENCV template void MemoryDataLayer::Reset(Dtype* data, Dtype* labels, int n) { diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp index f637f2ec..f8db61c9 100644 --- a/src/caffe/layers/window_data_layer.cpp +++ b/src/caffe/layers/window_data_layer.cpp @@ -1,3 +1,4 @@ +#ifdef USE_OPENCV #include #include @@ -468,3 +469,4 @@ INSTANTIATE_CLASS(WindowDataLayer); REGISTER_LAYER_CLASS(WindowData); } // namespace caffe +#endif // USE_OPENCV diff --git a/src/caffe/test/test_data_layer.cpp b/src/caffe/test/test_data_layer.cpp index afe2a40d..9e03954a 100644 --- a/src/caffe/test/test_data_layer.cpp +++ b/src/caffe/test/test_data_layer.cpp @@ -1,3 +1,4 @@ +#ifdef USE_OPENCV #include #include @@ -348,6 +349,7 @@ class DataLayerTest : public MultiDeviceTest { TYPED_TEST_CASE(DataLayerTest, TestDtypesAndDevices); +#ifdef USE_LEVELDB TYPED_TEST(DataLayerTest, TestReadLevelDB) { const bool unique_pixels = false; // all pixels the same; images different this->Fill(unique_pixels, DataParameter_DB_LEVELDB); @@ -385,7 +387,9 @@ TYPED_TEST(DataLayerTest, TestReadCropTestLevelDB) { this->Fill(unique_pixels, DataParameter_DB_LEVELDB); this->TestReadCrop(TEST); } +#endif // USE_LEVELDB +#ifdef USE_LMDB TYPED_TEST(DataLayerTest, TestReadLMDB) { const bool unique_pixels = false; // all pixels the same; images different this->Fill(unique_pixels, DataParameter_DB_LMDB); @@ -424,4 +428,6 @@ TYPED_TEST(DataLayerTest, TestReadCropTestLMDB) { this->TestReadCrop(TEST); } +#endif // USE_LMDB } // namespace caffe +#endif // USE_OPENCV diff --git a/src/caffe/test/test_data_transformer.cpp b/src/caffe/test/test_data_transformer.cpp index 16570e20..8a101374 100644 --- a/src/caffe/test/test_data_transformer.cpp +++ b/src/caffe/test/test_data_transformer.cpp @@ -1,3 +1,4 @@ +#ifdef USE_OPENCV #include #include @@ -353,3 +354,4 @@ TYPED_TEST(DataTransformTest, TestMeanFile) { } } // namespace caffe +#endif // USE_OPENCV diff --git a/src/caffe/test/test_db.cpp b/src/caffe/test/test_db.cpp index 5b2ac230..1b487b14 100644 --- a/src/caffe/test/test_db.cpp +++ b/src/caffe/test/test_db.cpp @@ -1,3 +1,4 @@ +#if defined(USE_LEVELDB) && defined(USE_LMDB) && defined(USE_OPENCV) #include #include "boost/scoped_ptr.hpp" @@ -132,3 +133,4 @@ TYPED_TEST(DBTest, TestWrite) { } } // namespace caffe +#endif // USE_LEVELDB, USE_LMDB and USE_OPENCV diff --git a/src/caffe/test/test_image_data_layer.cpp b/src/caffe/test/test_image_data_layer.cpp index 931a5ebf..481fcef7 100644 --- a/src/caffe/test/test_image_data_layer.cpp +++ b/src/caffe/test/test_image_data_layer.cpp @@ -1,3 +1,4 @@ +#ifdef USE_OPENCV #include #include #include @@ -177,3 +178,4 @@ TYPED_TEST(ImageDataLayerTest, TestShuffle) { } } // namespace caffe +#endif // USE_OPENCV diff --git a/src/caffe/test/test_io.cpp b/src/caffe/test/test_io.cpp index 4ab96311..c2c919e9 100644 --- a/src/caffe/test/test_io.cpp +++ b/src/caffe/test/test_io.cpp @@ -1,3 +1,4 @@ +#ifdef USE_OPENCV #include #include #include @@ -420,3 +421,4 @@ TEST_F(IOTest, TestDecodeDatumToCVMatContentNative) { } } // namespace caffe +#endif // USE_OPENCV diff --git a/src/caffe/test/test_layer_factory.cpp b/src/caffe/test/test_layer_factory.cpp index c86fafd0..7d5d39d8 100644 --- a/src/caffe/test/test_layer_factory.cpp +++ b/src/caffe/test/test_layer_factory.cpp @@ -31,12 +31,16 @@ TYPED_TEST(LayerFactoryTest, TestCreateLayer) { LayerParameter layer_param; // Data layers expect a DB if (iter->first == "Data") { +#ifdef USE_LEVELDB string tmp; MakeTempDir(&tmp); boost::scoped_ptr db(db::GetDB(DataParameter_DB_LEVELDB)); db->Open(tmp, db::NEW); db->Close(); layer_param.mutable_data_param()->set_source(tmp); +#else + continue; +#endif // USE_LEVELDB } layer_param.set_type(iter->first); layer = LayerRegistry::CreateLayer(layer_param); diff --git a/src/caffe/test/test_memory_data_layer.cpp b/src/caffe/test/test_memory_data_layer.cpp index a79033f5..7269a4d4 100644 --- a/src/caffe/test/test_memory_data_layer.cpp +++ b/src/caffe/test/test_memory_data_layer.cpp @@ -1,4 +1,6 @@ +#ifdef USE_OPENCV #include +#endif // USE_OPENCV #include #include @@ -113,6 +115,7 @@ TYPED_TEST(MemoryDataLayerTest, TestForward) { } } +#ifdef USE_OPENCV TYPED_TEST(MemoryDataLayerTest, AddDatumVectorDefaultTransform) { typedef typename TypeParam::Dtype Dtype; @@ -292,5 +295,5 @@ TYPED_TEST(MemoryDataLayerTest, TestSetBatchSize) { } } } - +#endif // USE_OPENCV } // namespace caffe diff --git a/src/caffe/test/test_upgrade_proto.cpp b/src/caffe/test/test_upgrade_proto.cpp index 00672023..ee05b151 100644 --- a/src/caffe/test/test_upgrade_proto.cpp +++ b/src/caffe/test/test_upgrade_proto.cpp @@ -2892,6 +2892,7 @@ TEST_F(NetUpgradeTest, TestImageNet) { this->RunV1UpgradeTest(expected_v1_proto, expected_v2_proto); } // NOLINT(readability/fn_size) +#ifdef USE_OPENCV TEST_F(NetUpgradeTest, TestUpgradeV1LayerType) { LayerParameter layer_param; shared_ptr > layer; @@ -2906,16 +2907,25 @@ TEST_F(NetUpgradeTest, TestUpgradeV1LayerType) { layer_param.set_type(v2_layer_type); // Data layers expect a DB if (v2_layer_type == "Data") { + #ifdef USE_LEVELDB string tmp; MakeTempDir(&tmp); boost::scoped_ptr db(db::GetDB(DataParameter_DB_LEVELDB)); db->Open(tmp, db::NEW); db->Close(); layer_param.mutable_data_param()->set_source(tmp); + #else + continue; + #endif // USE_LEVELDB } + #ifndef USE_OPENCV + if (v2_layer_type == "ImageData" || v2_layer_type == "WindowData") { + continue; + } + #endif // !USE_OPENCV layer = LayerRegistry::CreateLayer(layer_param); EXPECT_EQ(v2_layer_type, layer->type()); } } - +#endif // USE_OPENCV } // NOLINT(readability/fn_size) // namespace caffe diff --git a/src/caffe/util/db.cpp b/src/caffe/util/db.cpp index f55420e9..ccda054d 100644 --- a/src/caffe/util/db.cpp +++ b/src/caffe/util/db.cpp @@ -8,23 +8,31 @@ namespace caffe { namespace db { DB* GetDB(DataParameter::DB backend) { switch (backend) { +#ifdef USE_LEVELDB case DataParameter_DB_LEVELDB: return new LevelDB(); +#endif // USE_LEVELDB +#ifdef USE_LMDB case DataParameter_DB_LMDB: return new LMDB(); +#endif // USE_LMDB default: LOG(FATAL) << "Unknown database backend"; } } DB* GetDB(const string& backend) { +#ifdef USE_LEVELDB if (backend == "leveldb") { return new LevelDB(); - } else if (backend == "lmdb") { + } +#endif // USE_LEVELDB +#ifdef USE_LMDB + if (backend == "lmdb") { return new LMDB(); - } else { - LOG(FATAL) << "Unknown database backend"; } +#endif // USE_LMDB + LOG(FATAL) << "Unknown database backend"; } } // namespace db diff --git a/src/caffe/util/db_leveldb.cpp b/src/caffe/util/db_leveldb.cpp index 06c46627..f5c4d8a6 100644 --- a/src/caffe/util/db_leveldb.cpp +++ b/src/caffe/util/db_leveldb.cpp @@ -1,3 +1,4 @@ +#ifdef USE_LEVELDB #include "caffe/util/db_leveldb.hpp" #include @@ -19,3 +20,4 @@ void LevelDB::Open(const string& source, Mode mode) { } // namespace db } // namespace caffe +#endif // USE_LEVELDB diff --git a/src/caffe/util/db_lmdb.cpp b/src/caffe/util/db_lmdb.cpp index a054b796..78dd880a 100644 --- a/src/caffe/util/db_lmdb.cpp +++ b/src/caffe/util/db_lmdb.cpp @@ -1,3 +1,4 @@ +#ifdef USE_LMDB #include "caffe/util/db_lmdb.hpp" #include @@ -49,3 +50,4 @@ void LMDBTransaction::Put(const string& key, const string& value) { } // namespace db } // namespace caffe +#endif // USE_LMDB diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp index 6f033142..f2b1dd98 100644 --- a/src/caffe/util/io.cpp +++ b/src/caffe/util/io.cpp @@ -3,9 +3,11 @@ #include #include #include +#ifdef USE_OPENCV #include #include #include +#endif // USE_OPENCV #include #include @@ -67,6 +69,7 @@ void WriteProtoToBinaryFile(const Message& proto, const char* filename) { CHECK(proto.SerializeToOstream(&output)); } +#ifdef USE_OPENCV cv::Mat ReadImageToCVMat(const string& filename, const int height, const int width, const bool is_color) { cv::Mat cv_img; @@ -98,6 +101,7 @@ cv::Mat ReadImageToCVMat(const string& filename, cv::Mat ReadImageToCVMat(const string& filename) { return ReadImageToCVMat(filename, 0, 0, true); } + // Do the file extension and encoding match? static bool matchExt(const std::string & fn, std::string en) { @@ -111,6 +115,7 @@ static bool matchExt(const std::string & fn, return true; return false; } + bool ReadImageToDatum(const string& filename, const int label, const int height, const int width, const bool is_color, const std::string & encoding, Datum* datum) { @@ -135,6 +140,7 @@ bool ReadImageToDatum(const string& filename, const int label, return false; } } +#endif // USE_OPENCV bool ReadFileToDatum(const string& filename, const int label, Datum* datum) { @@ -156,6 +162,7 @@ bool ReadFileToDatum(const string& filename, const int label, } } +#ifdef USE_OPENCV cv::Mat DecodeDatumToCVMatNative(const Datum& datum) { cv::Mat cv_img; CHECK(datum.encoded()) << "Datum not encoded"; @@ -227,6 +234,5 @@ void CVMatToDatum(const cv::Mat& cv_img, Datum* datum) { } datum->set_data(buffer); } - - +#endif // USE_OPENCV } // namespace caffe diff --git a/tools/compute_image_mean.cpp b/tools/compute_image_mean.cpp index b1fc7cae..2035d515 100644 --- a/tools/compute_image_mean.cpp +++ b/tools/compute_image_mean.cpp @@ -24,6 +24,7 @@ DEFINE_string(backend, "lmdb", int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); +#ifdef USE_OPENCV #ifndef GFLAGS_GFLAGS_H_ namespace gflags = google; #endif @@ -115,5 +116,8 @@ int main(int argc, char** argv) { } LOG(INFO) << "mean_value channel [" << c << "]:" << mean_values[c] / dim; } +#else + LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV."; +#endif // USE_OPENCV return 0; } diff --git a/tools/convert_imageset.cpp b/tools/convert_imageset.cpp index aad1f1fe..e51a2631 100644 --- a/tools/convert_imageset.cpp +++ b/tools/convert_imageset.cpp @@ -43,6 +43,7 @@ DEFINE_string(encode_type, "", "Optional: What type should we encode the image as ('png','jpg',...)."); int main(int argc, char** argv) { +#ifdef USE_OPENCV ::google::InitGoogleLogging(argv[0]); // Print output to stderr (while still logging) FLAGS_alsologtostderr = 1; @@ -150,5 +151,8 @@ int main(int argc, char** argv) { txn->Commit(); LOG(INFO) << "Processed " << count << " files."; } +#else + LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV."; +#endif // USE_OPENCV return 0; } From 2349c6de69bf5043508cde41bb1d337fdb78e188 Mon Sep 17 00:00:00 2001 From: Tea Date: Thu, 17 Sep 2015 15:02:45 +0800 Subject: [PATCH 086/223] Fix case in CMake notices --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 838723be..37f937fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,10 +19,10 @@ caffe_option(CPU_ONLY "Build Caffe without CUDA support" OFF) # TODO: rename to caffe_option(USE_CUDNN "Build Caffe with cuDNN library support" ON IF NOT CPU_ONLY) caffe_option(BUILD_SHARED_LIBS "Build shared libraries" ON) caffe_option(BUILD_python "Build Python wrapper" ON) -set(python_version "2" CACHE STRING "Specify which python version to use") +set(python_version "2" CACHE STRING "Specify which Python version to use") caffe_option(BUILD_matlab "Build Matlab wrapper" OFF IF UNIX OR APPLE) caffe_option(BUILD_docs "Build documentation" ON IF UNIX OR APPLE) -caffe_option(BUILD_python_layer "Build the caffe python layer" ON) +caffe_option(BUILD_python_layer "Build the Caffe Python layer" ON) caffe_option(USE_LMDB "Build with lmdb" ON) caffe_option(USE_LEVELDB "Build with levelDB" ON) caffe_option(USE_OPENCV "Build with OpenCV support" ON) From 68c9e2b4703ce18fd9a7ab541addf701129a8080 Mon Sep 17 00:00:00 2001 From: "T.E.A de Souza" Date: Tue, 8 Sep 2015 12:20:40 +0800 Subject: [PATCH 087/223] Add a comment indicating that Travis CI tests are CPU only --- scripts/travis/travis_build_and_test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/travis/travis_build_and_test.sh b/scripts/travis/travis_build_and_test.sh index bbc82133..174f1ee5 100755 --- a/scripts/travis/travis_build_and_test.sh +++ b/scripts/travis/travis_build_and_test.sh @@ -1,5 +1,6 @@ #!/bin/bash # Script called by Travis to build and test Caffe. +# Travis CI tests are CPU-only for lack of compatible hardware. set -e MAKE="make --jobs=$NUM_THREADS --keep-going" From e75ae965519444fb64d67c0aa6323bc2ef4049ef Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Thu, 17 Sep 2015 15:05:12 -0700 Subject: [PATCH 088/223] [build] include IO dependencies by default keep old behavior by including leveldb, lmdb, and opencv by default --- Makefile | 7 ++++++- Makefile.config.example | 8 ++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index ddaed59b..a9111336 100644 --- a/Makefile +++ b/Makefile @@ -172,6 +172,11 @@ endif LIBRARIES += glog gflags protobuf boost_system m hdf5_hl hdf5 +# handle IO dependencies +USE_LEVELDB ?= 1 +USE_LMDB ?= 1 +USE_OPENCV ?= 1 + ifeq ($(USE_LEVELDB), 1) LIBRARIES += leveldb snappy endif @@ -299,7 +304,7 @@ ifeq ($(USE_CUDNN), 1) COMMON_FLAGS += -DUSE_CUDNN endif -# i/o libraries configuration +# configure IO libraries ifeq ($(USE_OPENCV), 1) COMMON_FLAGS += -DUSE_OPENCV endif diff --git a/Makefile.config.example b/Makefile.config.example index 32e67ee4..a20bad2f 100644 --- a/Makefile.config.example +++ b/Makefile.config.example @@ -7,10 +7,10 @@ # CPU-only switch (uncomment to build without GPU support). # CPU_ONLY := 1 -# comment out to disable IO dependencies -USE_LEVELDB := 1 -USE_LMDB := 1 -USE_OPENCV := 1 +# uncomment to disable IO dependencies and corresponding data layers +# USE_LEVELDB := 0 +# USE_LMDB := 0 +# USE_OPENCV := 0 # To customize your choice of compiler, uncomment and set the following. # N.B. the default for Linux is g++ and the default for OSX is clang++ From b4f9add57fa468ab43aa40f0a95badf3e9ace243 Mon Sep 17 00:00:00 2001 From: Gustav Larsson Date: Thu, 17 Sep 2015 20:32:33 -0500 Subject: [PATCH 089/223] Expose `Snapshot` to pycaffe - Solver::Snapshot is made public - It is also added as `snapshot` to pycaffe Addressing #3077 --- include/caffe/solver.hpp | 10 +++++----- python/caffe/_caffe.cpp | 3 ++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index 8d52785a..51f8d495 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -60,6 +60,11 @@ class Solver { // RestoreSolverStateFrom___ protected methods. You should implement these // methods to restore the state from the appropriate snapshot type. void Restore(const char* resume_file); + // The Solver::Snapshot function implements the basic snapshotting utility + // that stores the learned net. You should implement the SnapshotSolverState() + // function that produces a SolverState protocol buffer that needs to be + // written to disk together with the learned net. + void Snapshot(); virtual ~Solver() {} inline const SolverParameter& param() const { return param_; } inline shared_ptr > net() { return net_; } @@ -87,11 +92,6 @@ class Solver { protected: // Make and apply the update value for the current iteration. virtual void ApplyUpdate() = 0; - // The Solver::Snapshot function implements the basic snapshotting utility - // that stores the learned net. You should implement the SnapshotSolverState() - // function that produces a SolverState protocol buffer that needs to be - // written to disk together with the learned net. - void Snapshot(); string SnapshotFilename(const string extension); string SnapshotToBinaryProto(); string SnapshotToHDF5(); diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp index ccd5776a..6c2ccaa5 100644 --- a/python/caffe/_caffe.cpp +++ b/python/caffe/_caffe.cpp @@ -286,7 +286,8 @@ BOOST_PYTHON_MODULE(_caffe) { .def("solve", static_cast::*)(const char*)>( &Solver::Solve), SolveOverloads()) .def("step", &Solver::Step) - .def("restore", &Solver::Restore); + .def("restore", &Solver::Restore) + .def("snapshot", &Solver::Snapshot); bp::class_, bp::bases >, shared_ptr >, boost::noncopyable>( From f75d594bbec1efab69cdc09c04bed1762aebd0e1 Mon Sep 17 00:00:00 2001 From: Yan Chen Date: Fri, 18 Sep 2015 17:02:16 +0800 Subject: [PATCH 090/223] refine format of switch case in solver --- include/caffe/solver.hpp | 14 +++++++------- src/caffe/solver.cpp | 16 ++++++++-------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index 8d52785a..2ecf539b 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -283,19 +283,19 @@ Solver* GetSolver(const SolverParameter& param) { switch (type) { case SolverParameter_SolverType_SGD: - return new SGDSolver(param); + return new SGDSolver(param); case SolverParameter_SolverType_NESTEROV: - return new NesterovSolver(param); + return new NesterovSolver(param); case SolverParameter_SolverType_ADAGRAD: - return new AdaGradSolver(param); + return new AdaGradSolver(param); case SolverParameter_SolverType_RMSPROP: - return new RMSPropSolver(param); + return new RMSPropSolver(param); case SolverParameter_SolverType_ADADELTA: - return new AdaDeltaSolver(param); + return new AdaDeltaSolver(param); case SolverParameter_SolverType_ADAM: - return new AdamSolver(param); + return new AdamSolver(param); default: - LOG(FATAL) << "Unknown SolverType: " << type; + LOG(FATAL) << "Unknown SolverType: " << type; } return (Solver*) NULL; } diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 3574ce75..12c13dd8 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -422,14 +422,14 @@ void Solver::Snapshot() { CHECK(Caffe::root_solver()); string model_filename; switch (param_.snapshot_format()) { - case caffe::SolverParameter_SnapshotFormat_BINARYPROTO: - model_filename = SnapshotToBinaryProto(); - break; - case caffe::SolverParameter_SnapshotFormat_HDF5: - model_filename = SnapshotToHDF5(); - break; - default: - LOG(FATAL) << "Unsupported snapshot format."; + case caffe::SolverParameter_SnapshotFormat_BINARYPROTO: + model_filename = SnapshotToBinaryProto(); + break; + case caffe::SolverParameter_SnapshotFormat_HDF5: + model_filename = SnapshotToHDF5(); + break; + default: + LOG(FATAL) << "Unsupported snapshot format."; } SnapshotSolverState(model_filename); From 4c2ff1693ea509dc4758e73b913f4cbec6c1ac3a Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Wed, 4 Mar 2015 19:27:56 -0800 Subject: [PATCH 091/223] caffe.proto: generalize ConvolutionParameter to N spatial axes --- src/caffe/proto/caffe.proto | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index aa299f86..86683eb4 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -471,18 +471,24 @@ message ContrastiveLossParameter { message ConvolutionParameter { optional uint32 num_output = 1; // The number of outputs for the layer optional bool bias_term = 2 [default = true]; // whether to have bias terms + // Pad, kernel size, and stride are all given as a single value for equal - // dimensions in height and width or as Y, X pairs. - optional uint32 pad = 3 [default = 0]; // The padding size (equal in Y, X) - optional uint32 pad_h = 9 [default = 0]; // The padding height - optional uint32 pad_w = 10 [default = 0]; // The padding width - optional uint32 kernel_size = 4; // The kernel size (square) - optional uint32 kernel_h = 11; // The kernel height - optional uint32 kernel_w = 12; // The kernel width + // dimensions in all spatial dimensions, or once per spatial dimension. + repeated uint32 pad = 3; // The padding size; defaults to 0 + repeated uint32 kernel_size = 4; // The kernel size + repeated uint32 stride = 6; // The stride; defaults to 1 + + // For 2D convolution only, the *_h and *_w versions may also be used to + // specify both spatial dimensions. + optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only) + optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only) + optional uint32 kernel_h = 11; // The kernel height (2D only) + optional uint32 kernel_w = 12; // The kernel width (2D only) + optional uint32 stride_h = 13; // The stride height (2D only) + optional uint32 stride_w = 14; // The stride width (2D only) + optional uint32 group = 5 [default = 1]; // The group size for group conv - optional uint32 stride = 6 [default = 1]; // The stride (equal in Y, X) - optional uint32 stride_h = 13; // The stride height - optional uint32 stride_w = 14; // The stride width + optional FillerParameter weight_filler = 7; // The filler for the weight optional FillerParameter bias_filler = 8; // The filler for the bias enum Engine { @@ -491,6 +497,17 @@ message ConvolutionParameter { CUDNN = 2; } optional Engine engine = 15 [default = DEFAULT]; + + // The axis to interpret as "channels" when performing convolution. + // Preceding dimensions are treated as independent inputs; + // succeeding dimensions are treated as "spatial". + // With (N, C, H, W) inputs, and axis == 1 (the default), we perform + // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for + // groups g>1) filters across the spatial axes (H, W) of the input. + // With (N, C, D, H, W) inputs, and axis == 1, we perform + // N independent 3D convolutions, sliding (C/g)-channels + // filters across the spatial axes (D, H, W) of the input. + optional int32 axis = 16 [default = 1]; } message DataParameter { From 0813f32038bf7477d343ae369981166cfed783b5 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Wed, 4 Mar 2015 21:31:34 -0800 Subject: [PATCH 092/223] Blob: add SyncedMemory shape accessor for GPU shape access --- include/caffe/blob.hpp | 2 ++ src/caffe/blob.cpp | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp index dda7b1f8..fea5117e 100644 --- a/include/caffe/blob.hpp +++ b/include/caffe/blob.hpp @@ -219,6 +219,7 @@ class Blob { const Dtype* cpu_data() const; void set_cpu_data(Dtype* data); + const int* gpu_shape() const; const Dtype* gpu_data() const; const Dtype* cpu_diff() const; const Dtype* gpu_diff() const; @@ -268,6 +269,7 @@ class Blob { protected: shared_ptr data_; shared_ptr diff_; + shared_ptr shape_data_; vector shape_; int count_; int capacity_; diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp index 8450aa14..c86fd5d1 100644 --- a/src/caffe/blob.cpp +++ b/src/caffe/blob.cpp @@ -24,11 +24,16 @@ void Blob::Reshape(const vector& shape) { CHECK_LE(shape.size(), kMaxBlobAxes); count_ = 1; shape_.resize(shape.size()); + if (!shape_data_ || shape_data_->size() < shape.size() * sizeof(int)) { + shape_data_.reset(new SyncedMemory(shape.size() * sizeof(int))); + } + int* shape_data = static_cast(shape_data_->mutable_cpu_data()); for (int i = 0; i < shape.size(); ++i) { CHECK_GE(shape[i], 0); CHECK_LE(shape[i], INT_MAX / count_) << "blob size exceeds INT_MAX"; count_ *= shape[i]; shape_[i] = shape[i]; + shape_data[i] = shape[i]; } if (count_ > capacity_) { capacity_ = count_; @@ -67,6 +72,12 @@ Blob::Blob(const vector& shape) Reshape(shape); } +template +const int* Blob::gpu_shape() const { + CHECK(shape_data_); + return (const int*)shape_data_->gpu_data(); +} + template const Dtype* Blob::cpu_data() const { CHECK(data_); From 9d8206e0f906069e7c04f08dfddefa1357f3915c Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Wed, 4 Mar 2015 19:30:17 -0800 Subject: [PATCH 093/223] Im2col and Convolution layers support N spatial axes --- include/caffe/util/im2col.hpp | 24 ++ include/caffe/vision_layers.hpp | 108 +++++- src/caffe/layers/base_conv_layer.cpp | 241 ++++++++---- src/caffe/layers/conv_layer.cpp | 32 +- src/caffe/layers/conv_layer.cu | 16 +- src/caffe/layers/cudnn_conv_layer.cpp | 46 ++- src/caffe/layers/cudnn_conv_layer.cu | 18 +- src/caffe/layers/deconv_layer.cpp | 32 +- src/caffe/layers/deconv_layer.cu | 16 +- src/caffe/layers/im2col_layer.cpp | 171 +++++--- src/caffe/layers/im2col_layer.cu | 41 +- src/caffe/proto/caffe.proto | 7 + src/caffe/test/test_convolution_layer.cpp | 409 ++++++++++++++++---- src/caffe/test/test_deconvolution_layer.cpp | 159 +++++++- src/caffe/test/test_im2col_kernel.cu | 87 ++++- src/caffe/test/test_im2col_layer.cpp | 30 +- src/caffe/util/im2col.cpp | 116 ++++++ src/caffe/util/im2col.cu | 306 ++++++++++++++- src/caffe/util/upgrade_proto.cpp | 6 +- 19 files changed, 1554 insertions(+), 311 deletions(-) diff --git a/include/caffe/util/im2col.hpp b/include/caffe/util/im2col.hpp index 0051e2fa..531fd29c 100644 --- a/include/caffe/util/im2col.hpp +++ b/include/caffe/util/im2col.hpp @@ -3,24 +3,48 @@ namespace caffe { +template +void im2col_nd_cpu(const Dtype* data_im, const int num_spatial_axes, + const int* im_shape, const int* col_shape, + const int* kernel_shape, const int* pad, const int* stride, + Dtype* data_col); + template void im2col_cpu(const Dtype* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, Dtype* data_col); +template +void col2im_nd_cpu(const Dtype* data_col, const int num_spatial_axes, + const int* im_shape, const int* col_shape, + const int* kernel_shape, const int* pad, const int* stride, + Dtype* data_im); + template void col2im_cpu(const Dtype* data_col, const int channels, const int height, const int width, const int patch_h, const int patch_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, Dtype* data_im); +template +void im2col_nd_gpu(const Dtype* data_im, const int num_spatial_axes, + const int col_size, const int* im_shape, const int* col_shape, + const int* kernel_shape, const int* pad, const int* stride, + Dtype* data_col); + template void im2col_gpu(const Dtype* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, Dtype* data_col); +template +void col2im_nd_gpu(const Dtype* data_col, const int num_spatial_axes, + const int im_size, const int* im_shape, const int* col_shape, + const int* kernel_shape, const int* pad, const int* stride, + Dtype* data_im); + template void col2im_gpu(const Dtype* data_col, const int channels, const int height, const int width, const int patch_h, const int patch_w, diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index 211e3d90..eae65820 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -64,46 +64,101 @@ class BaseConvolutionLayer : public Layer { // Compute height_out_ and width_out_ from other parameters. virtual void compute_output_shape() = 0; - int kernel_h_, kernel_w_; - int stride_h_, stride_w_; + /// @brief The spatial dimensions of a filter kernel. + Blob kernel_shape_; + /// @brief The spatial dimensions of the stride. + Blob stride_; + /// @brief The spatial dimensions of the padding. + Blob pad_; + /// @brief The spatial dimensions of the convolution input. + Blob conv_input_shape_; + /// @brief The spatial dimensions of the input. + Blob input_shape_; + /// @brief The spatial dimensions of the col_buffer. + vector col_buffer_shape_; + /// @brief The spatial dimensions of the output. + vector output_shape_; + + int num_spatial_axes_; + int bottom_dim_; + int top_dim_; + + int channel_axis_; int num_; int channels_; - int pad_h_, pad_w_; - int height_, width_; int group_; + int out_spatial_dim_; + int weight_offset_; int num_output_; - int height_out_, width_out_; bool bias_term_; bool is_1x1_; + bool force_nd_im2col_; private: // wrap im2col/col2im so we don't have to remember the (long) argument lists inline void conv_im2col_cpu(const Dtype* data, Dtype* col_buff) { - im2col_cpu(data, conv_in_channels_, conv_in_height_, conv_in_width_, - kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, col_buff); + if (!force_nd_im2col_ && num_spatial_axes_ == 2) { + im2col_cpu(data, conv_in_channels_, + conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], + kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], + pad_.cpu_data()[0], pad_.cpu_data()[1], + stride_.cpu_data()[0], stride_.cpu_data()[1], col_buff); + } else { + im2col_nd_cpu(data, num_spatial_axes_, conv_input_shape_.cpu_data(), + col_buffer_shape_.data(), kernel_shape_.cpu_data(), + pad_.cpu_data(), stride_.cpu_data(), col_buff); + } } inline void conv_col2im_cpu(const Dtype* col_buff, Dtype* data) { - col2im_cpu(col_buff, conv_in_channels_, conv_in_height_, conv_in_width_, - kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, data); + if (!force_nd_im2col_ && num_spatial_axes_ == 2) { + col2im_cpu(col_buff, conv_in_channels_, + conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], + kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], + pad_.cpu_data()[0], pad_.cpu_data()[1], + stride_.cpu_data()[0], stride_.cpu_data()[1], data); + } else { + col2im_nd_cpu(col_buff, num_spatial_axes_, conv_input_shape_.cpu_data(), + col_buffer_shape_.data(), kernel_shape_.cpu_data(), + pad_.cpu_data(), stride_.cpu_data(), data); + } } #ifndef CPU_ONLY inline void conv_im2col_gpu(const Dtype* data, Dtype* col_buff) { - im2col_gpu(data, conv_in_channels_, conv_in_height_, conv_in_width_, - kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, col_buff); + if (!force_nd_im2col_ && num_spatial_axes_ == 2) { + im2col_gpu(data, conv_in_channels_, + conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], + kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], + pad_.cpu_data()[0], pad_.cpu_data()[1], + stride_.cpu_data()[0], stride_.cpu_data()[1], col_buff); + } else { + im2col_nd_gpu(data, num_spatial_axes_, num_kernels_im2col_, + conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(), + kernel_shape_.gpu_data(), pad_.gpu_data(), + stride_.gpu_data(), col_buff); + } } inline void conv_col2im_gpu(const Dtype* col_buff, Dtype* data) { - col2im_gpu(col_buff, conv_in_channels_, conv_in_height_, conv_in_width_, - kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, data); + if (!force_nd_im2col_ && num_spatial_axes_ == 2) { + col2im_gpu(col_buff, conv_in_channels_, + conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], + kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], + pad_.cpu_data()[0], pad_.cpu_data()[1], + stride_.cpu_data()[0], stride_.cpu_data()[1], data); + } else { + col2im_nd_gpu(col_buff, num_spatial_axes_, num_kernels_col2im_, + conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(), + kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(), + data); + } } #endif + int num_kernels_im2col_; + int num_kernels_col2im_; int conv_out_channels_; int conv_in_channels_; int conv_out_spatial_dim_; - int conv_in_height_; - int conv_in_width_; int kernel_dim_; - int weight_offset_; int col_offset_; int output_offset_; @@ -250,7 +305,7 @@ class CuDNNConvolutionLayer : public ConvolutionLayer { cudnnTensorDescriptor_t bias_desc_; cudnnFilterDescriptor_t filter_desc_; vector conv_descs_; - int bottom_offset_, top_offset_, weight_offset_, bias_offset_; + int bottom_offset_, top_offset_, bias_offset_; size_t workspaceSizeInBytes; void *workspace; }; @@ -287,11 +342,22 @@ class Im2colLayer : public Layer { virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); - int kernel_h_, kernel_w_; - int stride_h_, stride_w_; + /// @brief The spatial dimensions of a filter kernel. + Blob kernel_shape_; + /// @brief The spatial dimensions of the stride. + Blob stride_; + /// @brief The spatial dimensions of the padding. + Blob pad_; + + int num_spatial_axes_; + int bottom_dim_; + int top_dim_; + + int channel_axis_; + int num_; int channels_; - int height_, width_; - int pad_h_, pad_w_; + + bool force_nd_im2col_; }; // Forward declare PoolingLayer and SplitLayer for use in LRNLayer. diff --git a/src/caffe/layers/base_conv_layer.cpp b/src/caffe/layers/base_conv_layer.cpp index ccb3adc7..a5b90a54 100644 --- a/src/caffe/layers/base_conv_layer.cpp +++ b/src/caffe/layers/base_conv_layer.cpp @@ -1,3 +1,4 @@ +#include #include #include "caffe/filler.hpp" @@ -11,50 +12,103 @@ namespace caffe { template void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, const vector*>& top) { - CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " - << "corresponding to (num, channels, height, width)"; // Configure the kernel size, padding, stride, and inputs. ConvolutionParameter conv_param = this->layer_param_.convolution_param(); - CHECK(!conv_param.has_kernel_size() != - !(conv_param.has_kernel_h() && conv_param.has_kernel_w())) - << "Filter size is kernel_size OR kernel_h and kernel_w; not both"; - CHECK(conv_param.has_kernel_size() || - (conv_param.has_kernel_h() && conv_param.has_kernel_w())) - << "For non-square filters both kernel_h and kernel_w are required."; - CHECK((!conv_param.has_pad() && conv_param.has_pad_h() - && conv_param.has_pad_w()) - || (!conv_param.has_pad_h() && !conv_param.has_pad_w())) - << "pad is pad OR pad_h and pad_w are required."; - CHECK((!conv_param.has_stride() && conv_param.has_stride_h() - && conv_param.has_stride_w()) - || (!conv_param.has_stride_h() && !conv_param.has_stride_w())) - << "Stride is stride OR stride_h and stride_w are required."; - if (conv_param.has_kernel_size()) { - kernel_h_ = kernel_w_ = conv_param.kernel_size(); + force_nd_im2col_ = conv_param.force_nd_im2col(); + channel_axis_ = bottom[0]->CanonicalAxisIndex(conv_param.axis()); + const int first_spatial_axis = channel_axis_ + 1; + const int num_axes = bottom[0]->num_axes(); + num_spatial_axes_ = num_axes - first_spatial_axis; + CHECK_GE(num_spatial_axes_, 0); + // Setup input dimensions (input_shape_). + vector bottom_dim_blob_shape(1, num_spatial_axes_ + 1); + input_shape_.Reshape(bottom_dim_blob_shape); + int* input_shape_data = input_shape_.mutable_cpu_data(); + for (int i = 0; i < num_spatial_axes_ + 1; ++i) { + input_shape_data[i] = bottom[0]->shape(channel_axis_ + i); + } + vector spatial_dim_blob_shape(1, std::max(num_spatial_axes_, 1)); + // Setup filter kernel dimensions (kernel_shape_). + kernel_shape_.Reshape(spatial_dim_blob_shape); + int* kernel_shape_data = kernel_shape_.mutable_cpu_data(); + if (conv_param.has_kernel_h() || conv_param.has_kernel_w()) { + CHECK_EQ(num_spatial_axes_, 2) + << "kernel_h & kernel_w can only be used for 2D convolution."; + CHECK_EQ(0, conv_param.kernel_size_size()) + << "Either kernel_size or kernel_h/w should be specified; not both."; + kernel_shape_data[0] = conv_param.kernel_h(); + kernel_shape_data[1] = conv_param.kernel_w(); } else { - kernel_h_ = conv_param.kernel_h(); - kernel_w_ = conv_param.kernel_w(); + const int num_kernel_dims = conv_param.kernel_size_size(); + CHECK(num_kernel_dims == 1 || num_kernel_dims == num_spatial_axes_) + << "kernel_size must be specified once, or once per spatial dimension " + << "(kernel_size specified " << num_kernel_dims << " times; " + << num_spatial_axes_ << " spatial dims);"; + for (int i = 0; i < num_spatial_axes_; ++i) { + kernel_shape_data[i] = + conv_param.kernel_size((num_kernel_dims == 1) ? 0 : i); + } + } + for (int i = 0; i < num_spatial_axes_; ++i) { + CHECK_GT(kernel_shape_data[i], 0) << "Filter dimensions must be nonzero."; } - CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero."; - CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero."; - if (!conv_param.has_pad_h()) { - pad_h_ = pad_w_ = conv_param.pad(); + // Setup stride dimensions (stride_). + stride_.Reshape(spatial_dim_blob_shape); + int* stride_data = stride_.mutable_cpu_data(); + if (conv_param.has_stride_h() || conv_param.has_stride_w()) { + CHECK_EQ(num_spatial_axes_, 2) + << "stride_h & stride_w can only be used for 2D convolution."; + CHECK_EQ(0, conv_param.stride_size()) + << "Either stride or stride_h/w should be specified; not both."; + stride_data[0] = conv_param.stride_h(); + stride_data[1] = conv_param.stride_w(); } else { - pad_h_ = conv_param.pad_h(); - pad_w_ = conv_param.pad_w(); + const int num_stride_dims = conv_param.stride_size(); + CHECK(num_stride_dims == 0 || num_stride_dims == 1 || + num_stride_dims == num_spatial_axes_) + << "stride must be specified once, or once per spatial dimension " + << "(stride specified " << num_stride_dims << " times; " + << num_spatial_axes_ << " spatial dims);"; + const int kDefaultStride = 1; + for (int i = 0; i < num_spatial_axes_; ++i) { + stride_data[i] = (num_stride_dims == 0) ? kDefaultStride : + conv_param.stride((num_stride_dims == 1) ? 0 : i); + CHECK_GT(stride_data[i], 0) << "Stride dimensions must be nonzero."; + } } - if (!conv_param.has_stride_h()) { - stride_h_ = stride_w_ = conv_param.stride(); + // Setup pad dimensions (pad_). + pad_.Reshape(spatial_dim_blob_shape); + int* pad_data = pad_.mutable_cpu_data(); + if (conv_param.has_pad_h() || conv_param.has_pad_w()) { + CHECK_EQ(num_spatial_axes_, 2) + << "pad_h & pad_w can only be used for 2D convolution."; + CHECK_EQ(0, conv_param.pad_size()) + << "Either pad or pad_h/w should be specified; not both."; + pad_data[0] = conv_param.pad_h(); + pad_data[1] = conv_param.pad_w(); } else { - stride_h_ = conv_param.stride_h(); - stride_w_ = conv_param.stride_w(); + const int num_pad_dims = conv_param.pad_size(); + CHECK(num_pad_dims == 0 || num_pad_dims == 1 || + num_pad_dims == num_spatial_axes_) + << "pad must be specified once, or once per spatial dimension " + << "(pad specified " << num_pad_dims << " times; " + << num_spatial_axes_ << " spatial dims);"; + const int kDefaultPad = 0; + for (int i = 0; i < num_spatial_axes_; ++i) { + pad_data[i] = (num_pad_dims == 0) ? kDefaultPad : + conv_param.pad((num_pad_dims == 1) ? 0 : i); + } } // Special case: im2col is the identity for 1x1 convolution with stride 1 // and no padding, so flag for skipping the buffer and transformation. - is_1x1_ = kernel_w_ == 1 && kernel_h_ == 1 - && stride_h_ == 1 && stride_w_ == 1 && pad_h_ == 0 && pad_w_ == 0; + is_1x1_ = true; + for (int i = 0; i < num_spatial_axes_; ++i) { + is_1x1_ &= + kernel_shape_data[i] == 1 && stride_data[i] == 1 && pad_data[i] == 0; + if (!is_1x1_) { break; } + } // Configure output channels and groups. - channels_ = bottom[0]->channels(); + channels_ = bottom[0]->shape(channel_axis_); num_output_ = this->layer_param_.convolution_param().num_output(); CHECK_GT(num_output_, 0); group_ = this->layer_param_.convolution_param().group(); @@ -71,8 +125,29 @@ void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, // Handle the parameters: weights and biases. // - blobs_[0] holds the filter weights // - blobs_[1] holds the biases (optional) + vector weight_shape(2); + weight_shape[0] = conv_out_channels_; + weight_shape[1] = conv_in_channels_ / group_; + for (int i = 0; i < num_spatial_axes_; ++i) { + weight_shape.push_back(kernel_shape_data[i]); + } bias_term_ = this->layer_param_.convolution_param().bias_term(); + vector bias_shape(bias_term_, num_output_); if (this->blobs_.size() > 0) { + CHECK_EQ(1 + bias_term_, this->blobs_.size()) + << "Incorrect number of weight blobs."; + if (weight_shape != this->blobs_[0]->shape()) { + Blob weight_shaped_blob(weight_shape); + LOG(FATAL) << "Incorrect weight shape: expected shape " + << weight_shaped_blob.shape_string() << "; instead, shape was " + << this->blobs_[0]->shape_string(); + } + if (bias_term_ && bias_shape != this->blobs_[1]->shape()) { + Blob bias_shaped_blob(bias_shape); + LOG(FATAL) << "Incorrect bias shape: expected shape " + << bias_shaped_blob.shape_string() << "; instead, shape was " + << this->blobs_[1]->shape_string(); + } LOG(INFO) << "Skipping parameter initialization"; } else { if (bias_term_) { @@ -82,20 +157,20 @@ void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, } // Initialize and fill the weights: // output channels x input channels per-group x kernel height x kernel width - this->blobs_[0].reset(new Blob( - conv_out_channels_, conv_in_channels_ / group_, kernel_h_, kernel_w_)); + this->blobs_[0].reset(new Blob(weight_shape)); shared_ptr > weight_filler(GetFiller( this->layer_param_.convolution_param().weight_filler())); weight_filler->Fill(this->blobs_[0].get()); // If necessary, initialize and fill the biases. if (bias_term_) { - vector bias_shape(1, num_output_); this->blobs_[1].reset(new Blob(bias_shape)); shared_ptr > bias_filler(GetFiller( this->layer_param_.convolution_param().bias_filler())); bias_filler->Fill(this->blobs_[1].get()); } } + kernel_dim_ = this->blobs_[0]->count(1); + weight_offset_ = conv_out_channels_ * kernel_dim_ / group_; // Propagate gradients to the parameters (as directed by backward pass). this->param_propagate_down_.resize(this->blobs_.size(), true); } @@ -103,52 +178,68 @@ void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, template void BaseConvolutionLayer::Reshape(const vector*>& bottom, const vector*>& top) { - CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " - << "corresponding to (num, channels, height, width)"; - num_ = bottom[0]->num(); - height_ = bottom[0]->height(); - width_ = bottom[0]->width(); - CHECK_EQ(bottom[0]->channels(), channels_) << "Input size incompatible with" - " convolution kernel."; + const int first_spatial_axis = channel_axis_ + 1; + CHECK_EQ(bottom[0]->num_axes(), first_spatial_axis + num_spatial_axes_) + << "bottom num_axes may not change."; + num_ = bottom[0]->count(0, channel_axis_); + CHECK_EQ(bottom[0]->shape(channel_axis_), channels_) + << "Input size incompatible with convolution kernel."; // TODO: generalize to handle inputs of different shapes. for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { - CHECK_EQ(num_, bottom[bottom_id]->num()) << "Inputs must have same num."; - CHECK_EQ(channels_, bottom[bottom_id]->channels()) - << "Inputs must have same channels."; - CHECK_EQ(height_, bottom[bottom_id]->height()) - << "Inputs must have same height."; - CHECK_EQ(width_, bottom[bottom_id]->width()) - << "Inputs must have same width."; + CHECK(bottom[0]->shape() == bottom[bottom_id]->shape()) + << "All inputs must have the same shape."; } // Shape the tops. compute_output_shape(); + vector top_shape(bottom[0]->shape().begin(), + bottom[0]->shape().begin() + channel_axis_); + top_shape.push_back(num_output_); + for (int i = 0; i < num_spatial_axes_; ++i) { + top_shape.push_back(output_shape_[i]); + } for (int top_id = 0; top_id < top.size(); ++top_id) { - top[top_id]->Reshape(num_, num_output_, height_out_, width_out_); + top[top_id]->Reshape(top_shape); } if (reverse_dimensions()) { - conv_in_height_ = height_out_; - conv_in_width_ = width_out_; - conv_out_spatial_dim_ = height_ * width_; + conv_out_spatial_dim_ = bottom[0]->count(first_spatial_axis); } else { - conv_in_height_ = height_; - conv_in_width_ = width_; - conv_out_spatial_dim_ = height_out_ * width_out_; + conv_out_spatial_dim_ = top[0]->count(first_spatial_axis); } - kernel_dim_ = conv_in_channels_ * kernel_h_ * kernel_w_; - weight_offset_ = conv_out_channels_ * kernel_dim_ / group_ / group_; - col_offset_ = kernel_dim_ * conv_out_spatial_dim_ / group_; + col_offset_ = kernel_dim_ * conv_out_spatial_dim_; output_offset_ = conv_out_channels_ * conv_out_spatial_dim_ / group_; + // Setup input dimensions (conv_input_shape_). + vector bottom_dim_blob_shape(1, num_spatial_axes_ + 1); + conv_input_shape_.Reshape(bottom_dim_blob_shape); + int* conv_input_shape_data = conv_input_shape_.mutable_cpu_data(); + for (int i = 0; i < num_spatial_axes_ + 1; ++i) { + if (reverse_dimensions()) { + conv_input_shape_data[i] = top[0]->shape(channel_axis_ + i); + } else { + conv_input_shape_data[i] = bottom[0]->shape(channel_axis_ + i); + } + } // The im2col result buffer will only hold one image at a time to avoid // overly large memory usage. In the special case of 1x1 convolution // it goes lazily unused to save memory. - if (reverse_dimensions()) { - col_buffer_.Reshape(1, kernel_dim_, height_, width_); - } else { - col_buffer_.Reshape(1, kernel_dim_, height_out_, width_out_); + col_buffer_shape_.clear(); + col_buffer_shape_.push_back(kernel_dim_ * group_); + const int* input_shape_data = input_shape_.cpu_data() + 1; + for (int i = 0; i < num_spatial_axes_; ++i) { + if (reverse_dimensions()) { + col_buffer_shape_.push_back(input_shape_data[i]); + } else { + col_buffer_shape_.push_back(output_shape_[i]); + } } + col_buffer_.Reshape(col_buffer_shape_); + bottom_dim_ = bottom[0]->count(channel_axis_); + top_dim_ = top[0]->count(channel_axis_); + num_kernels_im2col_ = conv_in_channels_ * conv_out_spatial_dim_; + num_kernels_col2im_ = reverse_dimensions() ? top_dim_ : bottom_dim_; // Set up the all ones "bias multiplier" for adding biases by BLAS + out_spatial_dim_ = top[0]->count(first_spatial_axis); if (bias_term_) { - vector bias_multiplier_shape(1, height_out_ * width_out_); + vector bias_multiplier_shape(1, out_spatial_dim_); bias_multiplier_.Reshape(bias_multiplier_shape); caffe_set(bias_multiplier_.count(), Dtype(1), bias_multiplier_.mutable_cpu_data()); @@ -167,7 +258,7 @@ void BaseConvolutionLayer::forward_cpu_gemm(const Dtype* input, } for (int g = 0; g < group_; ++g) { caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, conv_out_channels_ / - group_, conv_out_spatial_dim_, kernel_dim_ / group_, + group_, conv_out_spatial_dim_, kernel_dim_, (Dtype)1., weights + weight_offset_ * g, col_buff + col_offset_ * g, (Dtype)0., output + output_offset_ * g); } @@ -177,7 +268,7 @@ template void BaseConvolutionLayer::forward_cpu_bias(Dtype* output, const Dtype* bias) { caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num_output_, - height_out_ * width_out_, 1, (Dtype)1., bias, bias_multiplier_.cpu_data(), + out_spatial_dim_, 1, (Dtype)1., bias, bias_multiplier_.cpu_data(), (Dtype)1., output); } @@ -189,7 +280,7 @@ void BaseConvolutionLayer::backward_cpu_gemm(const Dtype* output, col_buff = input; } for (int g = 0; g < group_; ++g) { - caffe_cpu_gemm(CblasTrans, CblasNoTrans, kernel_dim_ / group_, + caffe_cpu_gemm(CblasTrans, CblasNoTrans, kernel_dim_, conv_out_spatial_dim_, conv_out_channels_ / group_, (Dtype)1., weights + weight_offset_ * g, output + output_offset_ * g, (Dtype)0., col_buff + col_offset_ * g); @@ -209,7 +300,7 @@ void BaseConvolutionLayer::weight_cpu_gemm(const Dtype* input, } for (int g = 0; g < group_; ++g) { caffe_cpu_gemm(CblasNoTrans, CblasTrans, conv_out_channels_ / group_, - kernel_dim_ / group_, conv_out_spatial_dim_, + kernel_dim_, conv_out_spatial_dim_, (Dtype)1., output + output_offset_ * g, col_buff + col_offset_ * g, (Dtype)1., weights + weight_offset_ * g); } @@ -218,7 +309,7 @@ void BaseConvolutionLayer::weight_cpu_gemm(const Dtype* input, template void BaseConvolutionLayer::backward_cpu_bias(Dtype* bias, const Dtype* input) { - caffe_cpu_gemv(CblasNoTrans, num_output_, height_out_ * width_out_, 1., + caffe_cpu_gemv(CblasNoTrans, num_output_, out_spatial_dim_, 1., input, bias_multiplier_.cpu_data(), 1., bias); } @@ -236,7 +327,7 @@ void BaseConvolutionLayer::forward_gpu_gemm(const Dtype* input, } for (int g = 0; g < group_; ++g) { caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, conv_out_channels_ / - group_, conv_out_spatial_dim_, kernel_dim_ / group_, + group_, conv_out_spatial_dim_, kernel_dim_, (Dtype)1., weights + weight_offset_ * g, col_buff + col_offset_ * g, (Dtype)0., output + output_offset_ * g); } @@ -246,7 +337,7 @@ template void BaseConvolutionLayer::forward_gpu_bias(Dtype* output, const Dtype* bias) { caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num_output_, - height_out_ * width_out_, 1, (Dtype)1., bias, bias_multiplier_.gpu_data(), + out_spatial_dim_, 1, (Dtype)1., bias, bias_multiplier_.gpu_data(), (Dtype)1., output); } @@ -258,7 +349,7 @@ void BaseConvolutionLayer::backward_gpu_gemm(const Dtype* output, col_buff = input; } for (int g = 0; g < group_; ++g) { - caffe_gpu_gemm(CblasTrans, CblasNoTrans, kernel_dim_ / group_, + caffe_gpu_gemm(CblasTrans, CblasNoTrans, kernel_dim_, conv_out_spatial_dim_, conv_out_channels_ / group_, (Dtype)1., weights + weight_offset_ * g, output + output_offset_ * g, (Dtype)0., col_buff + col_offset_ * g); @@ -278,7 +369,7 @@ void BaseConvolutionLayer::weight_gpu_gemm(const Dtype* input, } for (int g = 0; g < group_; ++g) { caffe_gpu_gemm(CblasNoTrans, CblasTrans, conv_out_channels_ / group_, - kernel_dim_ / group_, conv_out_spatial_dim_, + kernel_dim_, conv_out_spatial_dim_, (Dtype)1., output + output_offset_ * g, col_buff + col_offset_ * g, (Dtype)1., weights + weight_offset_ * g); } @@ -287,7 +378,7 @@ void BaseConvolutionLayer::weight_gpu_gemm(const Dtype* input, template void BaseConvolutionLayer::backward_gpu_bias(Dtype* bias, const Dtype* input) { - caffe_gpu_gemv(CblasNoTrans, num_output_, height_out_ * width_out_, 1., + caffe_gpu_gemv(CblasNoTrans, num_output_, out_spatial_dim_, 1., input, bias_multiplier_.gpu_data(), 1., bias); } diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp index 928ef5ee..5cf26970 100644 --- a/src/caffe/layers/conv_layer.cpp +++ b/src/caffe/layers/conv_layer.cpp @@ -10,10 +10,18 @@ namespace caffe { template void ConvolutionLayer::compute_output_shape() { - this->height_out_ = (this->height_ + 2 * this->pad_h_ - this->kernel_h_) - / this->stride_h_ + 1; - this->width_out_ = (this->width_ + 2 * this->pad_w_ - this->kernel_w_) - / this->stride_w_ + 1; + // input_shape_ + 1 to skip channel axis + const int* input_shape_data = this->input_shape_.cpu_data() + 1; + const int* kernel_shape_data = this->kernel_shape_.cpu_data(); + const int* stride_data = this->stride_.cpu_data(); + const int* pad_data = this->pad_.cpu_data(); + this->output_shape_.clear(); + for (int i = 0; i < this->num_spatial_axes_; ++i) { + const int input_dim = input_shape_data[i]; + const int output_dim = (input_dim + 2 * pad_data[i] - kernel_shape_data[i]) + / stride_data[i] + 1; + this->output_shape_.push_back(output_dim); + } } template @@ -24,11 +32,11 @@ void ConvolutionLayer::Forward_cpu(const vector*>& bottom, const Dtype* bottom_data = bottom[i]->cpu_data(); Dtype* top_data = top[i]->mutable_cpu_data(); for (int n = 0; n < this->num_; ++n) { - this->forward_cpu_gemm(bottom_data + bottom[i]->offset(n), weight, - top_data + top[i]->offset(n)); + this->forward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight, + top_data + n * this->top_dim_); if (this->bias_term_) { const Dtype* bias = this->blobs_[1]->cpu_data(); - this->forward_cpu_bias(top_data + top[i]->offset(n), bias); + this->forward_cpu_bias(top_data + n * this->top_dim_, bias); } } } @@ -47,20 +55,20 @@ void ConvolutionLayer::Backward_cpu(const vector*>& top, if (this->bias_term_ && this->param_propagate_down_[1]) { Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff(); for (int n = 0; n < this->num_; ++n) { - this->backward_cpu_bias(bias_diff, top_diff + top[i]->offset(n)); + this->backward_cpu_bias(bias_diff, top_diff + n * this->top_dim_); } } if (this->param_propagate_down_[0] || propagate_down[i]) { for (int n = 0; n < this->num_; ++n) { // gradient w.r.t. weight. Note that we will accumulate diffs. if (this->param_propagate_down_[0]) { - this->weight_cpu_gemm(bottom_data + bottom[i]->offset(n), - top_diff + top[i]->offset(n), weight_diff); + this->weight_cpu_gemm(bottom_data + n * this->bottom_dim_, + top_diff + n * this->top_dim_, weight_diff); } // gradient w.r.t. bottom data, if necessary. if (propagate_down[i]) { - this->backward_cpu_gemm(top_diff + top[i]->offset(n), weight, - bottom_diff + bottom[i]->offset(n)); + this->backward_cpu_gemm(top_diff + n * this->top_dim_, weight, + bottom_diff + n * this->bottom_dim_); } } } diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu index b8a98ff7..b429d2b4 100644 --- a/src/caffe/layers/conv_layer.cu +++ b/src/caffe/layers/conv_layer.cu @@ -16,11 +16,11 @@ void ConvolutionLayer::Forward_gpu(const vector*>& bottom, const Dtype* bottom_data = bottom[i]->gpu_data(); Dtype* top_data = top[i]->mutable_gpu_data(); for (int n = 0; n < this->num_; ++n) { - this->forward_gpu_gemm(bottom_data + bottom[i]->offset(n), weight, - top_data + top[i]->offset(n)); + this->forward_gpu_gemm(bottom_data + n * this->bottom_dim_, weight, + top_data + n * this->top_dim_); if (this->bias_term_) { const Dtype* bias = this->blobs_[1]->gpu_data(); - this->forward_gpu_bias(top_data + top[i]->offset(n), bias); + this->forward_gpu_bias(top_data + n * this->top_dim_, bias); } } } @@ -37,7 +37,7 @@ void ConvolutionLayer::Backward_gpu(const vector*>& top, if (this->bias_term_ && this->param_propagate_down_[1]) { Dtype* bias_diff = this->blobs_[1]->mutable_gpu_diff(); for (int n = 0; n < this->num_; ++n) { - this->backward_gpu_bias(bias_diff, top_diff + top[i]->offset(n)); + this->backward_gpu_bias(bias_diff, top_diff + n * this->top_dim_); } } if (this->param_propagate_down_[0] || propagate_down[i]) { @@ -46,13 +46,13 @@ void ConvolutionLayer::Backward_gpu(const vector*>& top, for (int n = 0; n < this->num_; ++n) { // gradient w.r.t. weight. Note that we will accumulate diffs. if (this->param_propagate_down_[0]) { - this->weight_gpu_gemm(bottom_data + bottom[i]->offset(n), - top_diff + top[i]->offset(n), weight_diff); + this->weight_gpu_gemm(bottom_data + n * this->bottom_dim_, + top_diff + n * this->top_dim_, weight_diff); } // gradient w.r.t. bottom data, if necessary. if (propagate_down[i]) { - this->backward_gpu_gemm(top_diff + top[i]->offset(n), weight, - bottom_diff + bottom[i]->offset(n)); + this->backward_gpu_gemm(top_diff + n * this->top_dim_, weight, + bottom_diff + n * this->bottom_dim_); } } } diff --git a/src/caffe/layers/cudnn_conv_layer.cpp b/src/caffe/layers/cudnn_conv_layer.cpp index 104d2b9d..3514fe2a 100644 --- a/src/caffe/layers/cudnn_conv_layer.cpp +++ b/src/caffe/layers/cudnn_conv_layer.cpp @@ -34,14 +34,15 @@ void CuDNNConvolutionLayer::LayerSetUp( } // Set the indexing parameters. - weight_offset_ = (this->num_output_ / this->group_) - * (this->channels_ / this->group_) * this->kernel_h_ * this->kernel_w_; bias_offset_ = (this->num_output_ / this->group_); // Create filter descriptor. + const int* kernel_shape_data = this->kernel_shape_.cpu_data(); + const int kernel_h = kernel_shape_data[0]; + const int kernel_w = kernel_shape_data[1]; cudnn::createFilterDesc(&filter_desc_, this->num_output_ / this->group_, this->channels_ / this->group_, - this->kernel_h_, this->kernel_w_); + kernel_h, kernel_w); // Create tensor descriptor(s) for data and corresponding convolution(s). for (int i = 0; i < bottom.size(); i++) { @@ -68,29 +69,36 @@ template void CuDNNConvolutionLayer::Reshape( const vector*>& bottom, const vector*>& top) { ConvolutionLayer::Reshape(bottom, top); - bottom_offset_ = (this->channels_ / this->group_) - * this->height_ * this->width_; - top_offset_ = (this->num_output_ / this->group_) - * this->height_out_ * this->width_out_; + CHECK_EQ(2, this->num_spatial_axes_) + << "CuDNNConvolution input must have 2 spatial axes " + << "(e.g., height and width). " + << "Use 'engine: CAFFE' for general ND convolution."; + bottom_offset_ = this->bottom_dim_ / this->group_; + top_offset_ = this->top_dim_ / this->group_; + const int height = bottom[0]->shape(this->channel_axis_ + 1); + const int width = bottom[0]->shape(this->channel_axis_ + 2); + const int height_out = top[0]->shape(this->channel_axis_ + 1); + const int width_out = top[0]->shape(this->channel_axis_ + 2); + const int* pad_data = this->pad_.cpu_data(); + const int pad_h = pad_data[0]; + const int pad_w = pad_data[1]; + const int* stride_data = this->stride_.cpu_data(); + const int stride_h = stride_data[0]; + const int stride_w = stride_data[1]; for (int i = 0; i < bottom.size(); i++) { cudnn::setTensor4dDesc(&bottom_descs_[i], this->num_, - this->channels_ / this->group_, - this->height_, this->width_, - this->channels_ * this->height_ * this->width_, - this->height_ * this->width_, - this->width_, 1); + this->channels_ / this->group_, height, width, + this->channels_ * height * width, + height * width, width, 1); cudnn::setTensor4dDesc(&top_descs_[i], this->num_, - this->num_output_ / this->group_, - this->height_out_, this->width_out_, - this->num_output_ * this->height_out_ * this->width_out_, - this->height_out_ * this->width_out_, - this->width_out_, 1); + this->num_output_ / this->group_, height_out, width_out, + this->num_output_ * this->out_spatial_dim_, + this->out_spatial_dim_, width_out, 1); cudnn::setConvolutionDesc(&conv_descs_[i], bottom_descs_[i], - filter_desc_, this->pad_h_, this->pad_w_, - this->stride_h_, this->stride_w_); + filter_desc_, pad_h, pad_w, stride_h, stride_w); } // Tensor descriptor for bias. diff --git a/src/caffe/layers/cudnn_conv_layer.cu b/src/caffe/layers/cudnn_conv_layer.cu index b4e802e1..69115202 100644 --- a/src/caffe/layers/cudnn_conv_layer.cu +++ b/src/caffe/layers/cudnn_conv_layer.cu @@ -14,15 +14,15 @@ __global__ void sync_conv_groups() { } template void CuDNNConvolutionLayer::Forward_gpu( const vector*>& bottom, const vector*>& top) { + const int* kernel_shape_data = this->kernel_shape_.cpu_data(); + const int kernel_h = kernel_shape_data[0]; + const int kernel_w = kernel_shape_data[1]; + const size_t workspace_limit_bytes = + kernel_h * kernel_w * this->channels_ * sizeof(int) + 1; + const Dtype* weight = this->blobs_[0]->gpu_data(); for (int i = 0; i < bottom.size(); ++i) { const Dtype* bottom_data = bottom[i]->gpu_data(); Dtype* top_data = top[i]->mutable_gpu_data(); - const Dtype* weight = this->blobs_[0]->gpu_data(); - - size_t workspace_limit_bytes = this->kernel_h_ * - this->kernel_w_ * - this->channels_ * - sizeof(int) + 1; // Forward through cuDNN in parallel over groups. for (int g = 0; g < this->group_; g++) { @@ -69,7 +69,7 @@ void CuDNNConvolutionLayer::Forward_gpu( CUDNN_CHECK(cudnnConvolutionForward(handle_[g], cudnn::dataType::one, bottom_descs_[i], bottom_data + bottom_offset_ * g, - filter_desc_, weight + weight_offset_ * g, + filter_desc_, weight + this->weight_offset_ * g, conv_descs_[i], algo, workspace, workspaceSizeInBytes, cudnn::dataType::zero, @@ -128,7 +128,7 @@ void CuDNNConvolutionLayer::Backward_gpu(const vector*>& top, top_descs_[i], top_diff + top_offset_ * g, conv_descs_[i], cudnn::dataType::one, - filter_desc_, weight_diff + weight_offset_ * g)); + filter_desc_, weight_diff + this->weight_offset_ * g)); } // Gradient w.r.t. bottom data. @@ -139,7 +139,7 @@ void CuDNNConvolutionLayer::Backward_gpu(const vector*>& top, Dtype* bottom_diff = bottom[i]->mutable_gpu_diff(); CUDNN_CHECK(cudnnConvolutionBackwardData(handle_[2*this->group_ + g], cudnn::dataType::one, - filter_desc_, weight + weight_offset_ * g, + filter_desc_, weight + this->weight_offset_ * g, top_descs_[i], top_diff + top_offset_ * g, conv_descs_[i], cudnn::dataType::zero, diff --git a/src/caffe/layers/deconv_layer.cpp b/src/caffe/layers/deconv_layer.cpp index a4612963..f1d1abf2 100644 --- a/src/caffe/layers/deconv_layer.cpp +++ b/src/caffe/layers/deconv_layer.cpp @@ -10,10 +10,18 @@ namespace caffe { template void DeconvolutionLayer::compute_output_shape() { - this->height_out_ = this->stride_h_ * (this->height_ - 1) + this->kernel_h_ - - 2 * this->pad_h_; - this->width_out_ = this->stride_w_ * (this->width_ - 1) + this->kernel_w_ - - 2 * this->pad_w_; + // input_shape_ + 1 to skip channel axis + const int* input_shape_data = this->input_shape_.cpu_data() + 1; + const int* kernel_shape_data = this->kernel_shape_.cpu_data(); + const int* stride_data = this->stride_.cpu_data(); + const int* pad_data = this->pad_.cpu_data(); + this->output_shape_.clear(); + for (int i = 0; i < this->num_spatial_axes_; ++i) { + const int input_dim = input_shape_data[i]; + const int output_dim = stride_data[i] * (input_dim - 1) + + kernel_shape_data[i] - 2 * pad_data[i]; + this->output_shape_.push_back(output_dim); + } } template @@ -24,11 +32,11 @@ void DeconvolutionLayer::Forward_cpu(const vector*>& bottom, const Dtype* bottom_data = bottom[i]->cpu_data(); Dtype* top_data = top[i]->mutable_cpu_data(); for (int n = 0; n < this->num_; ++n) { - this->backward_cpu_gemm(bottom_data + bottom[i]->offset(n), weight, - top_data + top[i]->offset(n)); + this->backward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight, + top_data + n * this->top_dim_); if (this->bias_term_) { const Dtype* bias = this->blobs_[1]->cpu_data(); - this->forward_cpu_bias(top_data + top[i]->offset(n), bias); + this->forward_cpu_bias(top_data + n * this->top_dim_, bias); } } } @@ -47,21 +55,21 @@ void DeconvolutionLayer::Backward_cpu(const vector*>& top, if (this->bias_term_ && this->param_propagate_down_[1]) { Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff(); for (int n = 0; n < this->num_; ++n) { - this->backward_cpu_bias(bias_diff, top_diff + top[i]->offset(n)); + this->backward_cpu_bias(bias_diff, top_diff + n * this->top_dim_); } } if (this->param_propagate_down_[0] || propagate_down[i]) { for (int n = 0; n < this->num_; ++n) { // Gradient w.r.t. weight. Note that we will accumulate diffs. if (this->param_propagate_down_[0]) { - this->weight_cpu_gemm(top_diff + top[i]->offset(n), - bottom_data + bottom[i]->offset(n), weight_diff); + this->weight_cpu_gemm(top_diff + n * this->top_dim_, + bottom_data + n * this->bottom_dim_, weight_diff); } // Gradient w.r.t. bottom data, if necessary, reusing the column buffer // we might have just computed above. if (propagate_down[i]) { - this->forward_cpu_gemm(top_diff + top[i]->offset(n), weight, - bottom_diff + bottom[i]->offset(n), + this->forward_cpu_gemm(top_diff + n * this->top_dim_, weight, + bottom_diff + n * this->bottom_dim_, this->param_propagate_down_[0]); } } diff --git a/src/caffe/layers/deconv_layer.cu b/src/caffe/layers/deconv_layer.cu index 8a1eed8a..ea83f56f 100644 --- a/src/caffe/layers/deconv_layer.cu +++ b/src/caffe/layers/deconv_layer.cu @@ -16,11 +16,11 @@ void DeconvolutionLayer::Forward_gpu(const vector*>& bottom, const Dtype* bottom_data = bottom[i]->gpu_data(); Dtype* top_data = top[i]->mutable_gpu_data(); for (int n = 0; n < this->num_; ++n) { - this->backward_gpu_gemm(bottom_data + bottom[i]->offset(n), weight, - top_data + top[i]->offset(n)); + this->backward_gpu_gemm(bottom_data + n * this->bottom_dim_, weight, + top_data + n * this->top_dim_); if (this->bias_term_) { const Dtype* bias = this->blobs_[1]->gpu_data(); - this->forward_gpu_bias(top_data + top[i]->offset(n), bias); + this->forward_gpu_bias(top_data + n * this->top_dim_, bias); } } } @@ -39,20 +39,20 @@ void DeconvolutionLayer::Backward_gpu(const vector*>& top, if (this->bias_term_ && this->param_propagate_down_[1]) { Dtype* bias_diff = this->blobs_[1]->mutable_gpu_diff(); for (int n = 0; n < this->num_; ++n) { - this->backward_gpu_bias(bias_diff, top_diff + top[i]->offset(n)); + this->backward_gpu_bias(bias_diff, top_diff + n * this->top_dim_); } } if (this->param_propagate_down_[0] || propagate_down[i]) { for (int n = 0; n < this->num_; ++n) { // gradient w.r.t. weight. Note that we will accumulate diffs. if (this->param_propagate_down_[0]) { - this->weight_gpu_gemm(top_diff + top[i]->offset(n), - bottom_data + bottom[i]->offset(n), weight_diff); + this->weight_gpu_gemm(top_diff + n * this->top_dim_, + bottom_data + n * this->bottom_dim_, weight_diff); } // gradient w.r.t. bottom data, if necessary. if (propagate_down[i]) { - this->forward_gpu_gemm(top_diff + top[i]->offset(n), weight, - bottom_diff + bottom[i]->offset(n), + this->forward_gpu_gemm(top_diff + this->top_dim_, weight, + bottom_diff + n * this->bottom_dim_, this->param_propagate_down_[0]); } } diff --git a/src/caffe/layers/im2col_layer.cpp b/src/caffe/layers/im2col_layer.cpp index 1c802714..595c9dbb 100644 --- a/src/caffe/layers/im2col_layer.cpp +++ b/src/caffe/layers/im2col_layer.cpp @@ -11,54 +11,106 @@ template void Im2colLayer::LayerSetUp(const vector*>& bottom, const vector*>& top) { ConvolutionParameter conv_param = this->layer_param_.convolution_param(); - CHECK(!conv_param.has_kernel_size() != - !(conv_param.has_kernel_h() && conv_param.has_kernel_w())) - << "Filter size is kernel_size OR kernel_h and kernel_w; not both"; - CHECK(conv_param.has_kernel_size() || - (conv_param.has_kernel_h() && conv_param.has_kernel_w())) - << "For non-square filters both kernel_h and kernel_w are required."; - CHECK((!conv_param.has_pad() && conv_param.has_pad_h() - && conv_param.has_pad_w()) - || (!conv_param.has_pad_h() && !conv_param.has_pad_w())) - << "pad is pad OR pad_h and pad_w are required."; - CHECK((!conv_param.has_stride() && conv_param.has_stride_h() - && conv_param.has_stride_w()) - || (!conv_param.has_stride_h() && !conv_param.has_stride_w())) - << "Stride is stride OR stride_h and stride_w are required."; - if (conv_param.has_kernel_size()) { - kernel_h_ = kernel_w_ = conv_param.kernel_size(); + force_nd_im2col_ = conv_param.force_nd_im2col(); + const int input_num_dims = bottom[0]->shape().size(); + channel_axis_ = bottom[0]->CanonicalAxisIndex(conv_param.axis()); + const int first_spatial_dim = channel_axis_ + 1; + num_spatial_axes_ = input_num_dims - first_spatial_dim; + CHECK_GE(num_spatial_axes_, 1); + vector dim_blob_shape(1, num_spatial_axes_); + // Setup filter kernel dimensions (kernel_shape_). + kernel_shape_.Reshape(dim_blob_shape); + int* kernel_shape_data = kernel_shape_.mutable_cpu_data(); + if (conv_param.has_kernel_h() || conv_param.has_kernel_w()) { + CHECK_EQ(num_spatial_axes_, 2) + << "kernel_h & kernel_w can only be used for 2D convolution."; + CHECK_EQ(0, conv_param.kernel_size_size()) + << "Either kernel_size or kernel_h/w should be specified; not both."; + kernel_shape_data[0] = conv_param.kernel_h(); + kernel_shape_data[1] = conv_param.kernel_w(); } else { - kernel_h_ = conv_param.kernel_h(); - kernel_w_ = conv_param.kernel_w(); + const int num_kernel_dims = conv_param.kernel_size_size(); + CHECK(num_kernel_dims == 1 || num_kernel_dims == num_spatial_axes_) + << "kernel_size must be specified once, or once per spatial dimension " + << "(kernel_size specified " << num_kernel_dims << " times; " + << num_spatial_axes_ << " spatial dims);"; + for (int i = 0; i < num_spatial_axes_; ++i) { + kernel_shape_data[i] = + conv_param.kernel_size((num_kernel_dims == 1) ? 0 : i); + } } - CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero."; - CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero."; - if (!conv_param.has_pad_h()) { - pad_h_ = pad_w_ = conv_param.pad(); + for (int i = 0; i < num_spatial_axes_; ++i) { + CHECK_GT(kernel_shape_data[i], 0) << "Filter dimensions must be nonzero."; + } + // Setup stride dimensions (stride_). + stride_.Reshape(dim_blob_shape); + int* stride_data = stride_.mutable_cpu_data(); + if (conv_param.has_stride_h() || conv_param.has_stride_w()) { + CHECK_EQ(num_spatial_axes_, 2) + << "stride_h & stride_w can only be used for 2D convolution."; + CHECK_EQ(0, conv_param.stride_size()) + << "Either stride or stride_h/w should be specified; not both."; + stride_data[0] = conv_param.stride_h(); + stride_data[1] = conv_param.stride_w(); } else { - pad_h_ = conv_param.pad_h(); - pad_w_ = conv_param.pad_w(); + const int num_stride_dims = conv_param.stride_size(); + CHECK(num_stride_dims == 0 || num_stride_dims == 1 || + num_stride_dims == num_spatial_axes_) + << "stride must be specified once, or once per spatial dimension " + << "(stride specified " << num_stride_dims << " times; " + << num_spatial_axes_ << " spatial dims);"; + const int kDefaultStride = 1; + for (int i = 0; i < num_spatial_axes_; ++i) { + stride_data[i] = (num_stride_dims == 0) ? kDefaultStride : + conv_param.stride((num_stride_dims == 1) ? 0 : i); + CHECK_GT(stride_data[i], 0) << "Stride dimensions must be nonzero."; + } } - if (!conv_param.has_stride_h()) { - stride_h_ = stride_w_ = conv_param.stride(); + // Setup pad dimensions (pad_). + pad_.Reshape(dim_blob_shape); + int* pad_data = pad_.mutable_cpu_data(); + if (conv_param.has_pad_h() || conv_param.has_pad_w()) { + CHECK_EQ(num_spatial_axes_, 2) + << "pad_h & pad_w can only be used for 2D convolution."; + CHECK_EQ(0, conv_param.pad_size()) + << "Either pad or pad_h/w should be specified; not both."; + pad_data[0] = conv_param.pad_h(); + pad_data[1] = conv_param.pad_w(); } else { - stride_h_ = conv_param.stride_h(); - stride_w_ = conv_param.stride_w(); + const int num_pad_dims = conv_param.pad_size(); + CHECK(num_pad_dims == 0 || num_pad_dims == 1 || + num_pad_dims == num_spatial_axes_) + << "pad must be specified once, or once per spatial dimension " + << "(pad specified " << num_pad_dims << " times; " + << num_spatial_axes_ << " spatial dims);"; + const int kDefaultPad = 0; + for (int i = 0; i < num_spatial_axes_; ++i) { + pad_data[i] = (num_pad_dims == 0) ? kDefaultPad : + conv_param.pad((num_pad_dims == 1) ? 0 : i); + } } } template void Im2colLayer::Reshape(const vector*>& bottom, const vector*>& top) { - CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " - << "corresponding to (num, channels, height, width)"; - channels_ = bottom[0]->channels(); - height_ = bottom[0]->height(); - width_ = bottom[0]->width(); - top[0]->Reshape( - bottom[0]->num(), channels_ * kernel_h_ * kernel_w_, - (height_ + 2 * pad_h_ - kernel_h_) / stride_h_ + 1, - (width_ + 2 * pad_w_ - kernel_w_) / stride_w_ + 1); + vector top_shape = bottom[0]->shape(); + const int* kernel_shape_data = kernel_shape_.cpu_data(); + const int* stride_data = stride_.cpu_data(); + const int* pad_data = pad_.cpu_data(); + for (int i = 0; i < num_spatial_axes_; ++i) { + top_shape[channel_axis_] *= kernel_shape_data[i]; + const int input_dim = bottom[0]->shape(channel_axis_ + i + 1); + const int output_dim = (input_dim + 2 * pad_data[i] - kernel_shape_data[i]) + / stride_data[i] + 1; + top_shape[channel_axis_ + i + 1] = output_dim; + } + top[0]->Reshape(top_shape); + num_ = bottom[0]->count(0, channel_axis_); + bottom_dim_ = bottom[0]->count(channel_axis_); + top_dim_ = top[0]->count(channel_axis_); + + channels_ = bottom[0]->shape(channel_axis_); } template @@ -66,10 +118,27 @@ void Im2colLayer::Forward_cpu(const vector*>& bottom, const vector*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); - for (int n = 0; n < bottom[0]->num(); ++n) { - im2col_cpu(bottom_data + bottom[0]->offset(n), channels_, height_, - width_, kernel_h_, kernel_w_, pad_h_, pad_w_, - stride_h_, stride_w_, top_data + top[0]->offset(n)); + for (int n = 0; n < num_; ++n) { + DCHECK_EQ(bottom[0]->shape().size() - channel_axis_, num_spatial_axes_ + 1); + DCHECK_EQ(top[0]->shape().size() - channel_axis_, num_spatial_axes_ + 1); + DCHECK_EQ(kernel_shape_.count(), num_spatial_axes_); + DCHECK_EQ(pad_.count(), num_spatial_axes_); + DCHECK_EQ(stride_.count(), num_spatial_axes_); + if (!force_nd_im2col_ && num_spatial_axes_ == 2) { + im2col_cpu(bottom_data + n * bottom_dim_, channels_, + bottom[0]->shape(channel_axis_ + 1), + bottom[0]->shape(channel_axis_ + 2), + kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], + pad_.cpu_data()[0], pad_.cpu_data()[1], + stride_.cpu_data()[0], stride_.cpu_data()[1], + top_data + n * top_dim_); + } else { + im2col_nd_cpu(bottom_data + n * bottom_dim_, num_spatial_axes_, + bottom[0]->shape().data() + channel_axis_, + top[0]->shape().data() + channel_axis_, + kernel_shape_.cpu_data(), pad_.cpu_data(), stride_.cpu_data(), + top_data + n * top_dim_); + } } } @@ -78,10 +147,22 @@ void Im2colLayer::Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); - for (int n = 0; n < top[0]->num(); ++n) { - col2im_cpu(top_diff + top[0]->offset(n), channels_, height_, width_, - kernel_h_, kernel_w_, pad_h_, pad_w_, - stride_h_, stride_w_, bottom_diff + bottom[0]->offset(n)); + for (int n = 0; n < num_; ++n) { + if (!force_nd_im2col_ && num_spatial_axes_ == 2) { + col2im_cpu(top_diff + n * top_dim_, channels_, + bottom[0]->shape(channel_axis_ + 1), + bottom[0]->shape(channel_axis_ + 2), + kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], + pad_.cpu_data()[0], pad_.cpu_data()[1], + stride_.cpu_data()[0], stride_.cpu_data()[1], + bottom_diff + n * bottom_dim_); + } else { + col2im_nd_cpu(top_diff + n * top_dim_, num_spatial_axes_, + bottom[0]->shape().data() + channel_axis_, + top[0]->shape().data() + channel_axis_, + kernel_shape_.cpu_data(), pad_.cpu_data(), stride_.cpu_data(), + bottom_diff + n * bottom_dim_); + } } } diff --git a/src/caffe/layers/im2col_layer.cu b/src/caffe/layers/im2col_layer.cu index 9c338b14..cd507623 100644 --- a/src/caffe/layers/im2col_layer.cu +++ b/src/caffe/layers/im2col_layer.cu @@ -12,10 +12,23 @@ void Im2colLayer::Forward_gpu(const vector*>& bottom, const vector*>& top) { const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = top[0]->mutable_gpu_data(); - for (int n = 0; n < bottom[0]->num(); ++n) { - im2col_gpu(bottom_data + bottom[0]->offset(n), channels_, height_, - width_, kernel_h_, kernel_w_, pad_h_, pad_w_, - stride_h_, stride_w_, top_data + top[0]->offset(n)); + const int num_kernels = channels_ * top[0]->count(channel_axis_ + 1); + for (int n = 0; n < num_; ++n) { + if (!force_nd_im2col_ && num_spatial_axes_ == 2) { + im2col_gpu(bottom_data + n * bottom_dim_, channels_, + bottom[0]->shape(channel_axis_ + 1), + bottom[0]->shape(channel_axis_ + 2), + kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], + pad_.cpu_data()[0], pad_.cpu_data()[1], + stride_.cpu_data()[0], stride_.cpu_data()[1], + top_data + n * top_dim_); + } else { + im2col_nd_gpu(bottom_data + n * bottom_dim_, num_spatial_axes_, + num_kernels, bottom[0]->gpu_shape() + channel_axis_, + top[0]->gpu_shape() + channel_axis_, + kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(), + top_data + n * top_dim_); + } } } @@ -24,10 +37,22 @@ void Im2colLayer::Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { const Dtype* top_diff = top[0]->gpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); - for (int n = 0; n < top[0]->num(); ++n) { - col2im_gpu(top_diff + top[0]->offset(n), channels_, height_, width_, - kernel_h_, kernel_w_, pad_h_, pad_w_, - stride_h_, stride_w_, bottom_diff + bottom[0]->offset(n)); + for (int n = 0; n < num_; ++n) { + if (!force_nd_im2col_ && num_spatial_axes_ == 2) { + col2im_gpu(top_diff + n * top_dim_, channels_, + bottom[0]->shape(channel_axis_ + 1), + bottom[0]->shape(channel_axis_ + 2), + kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], + pad_.cpu_data()[0], pad_.cpu_data()[1], + stride_.cpu_data()[0], stride_.cpu_data()[1], + bottom_diff + n * bottom_dim_); + } else { + col2im_nd_gpu(top_diff + n * top_dim_, num_spatial_axes_, bottom_dim_, + bottom[0]->gpu_shape() + channel_axis_, + top[0]->gpu_shape() + channel_axis_, + kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(), + bottom_diff + n * bottom_dim_); + } } } diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 86683eb4..f52c941b 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -508,6 +508,13 @@ message ConvolutionParameter { // N independent 3D convolutions, sliding (C/g)-channels // filters across the spatial axes (D, H, W) of the input. optional int32 axis = 16 [default = 1]; + + // Whether to force use of the general ND convolution, even if a specific + // implementation for blobs of the appropriate number of spatial dimensions + // is available. (Currently, there is only a 2D-specific convolution + // implementation; for input blobs with num_axes != 2, this option is + // ignored and the ND implementation will be used.) + optional bool force_nd_im2col = 17 [default = false]; } message DataParameter { diff --git a/src/caffe/test/test_convolution_layer.cpp b/src/caffe/test/test_convolution_layer.cpp index 67d41fff..9df979a2 100644 --- a/src/caffe/test/test_convolution_layer.cpp +++ b/src/caffe/test/test_convolution_layer.cpp @@ -19,54 +19,87 @@ template void caffe_conv(const Blob* in, ConvolutionParameter* conv_param, const vector > >& weights, Blob* out) { + const bool has_depth = (out->num_axes() == 5); + if (!has_depth) { CHECK_EQ(4, out->num_axes()); } // Kernel size, stride, and pad int kernel_h, kernel_w; - if (conv_param->has_kernel_size()) { - kernel_h = kernel_w = conv_param->kernel_size(); - } else { + if (conv_param->has_kernel_h() || conv_param->has_kernel_w()) { kernel_h = conv_param->kernel_h(); kernel_w = conv_param->kernel_w(); + } else { + kernel_h = kernel_w = conv_param->kernel_size(0); } int pad_h, pad_w; - if (!conv_param->has_pad_h()) { - pad_h = pad_w = conv_param->pad(); - } else { + if (conv_param->has_pad_h() || conv_param->has_pad_w()) { pad_h = conv_param->pad_h(); pad_w = conv_param->pad_w(); + } else { + pad_h = pad_w = conv_param->pad_size() ? conv_param->pad(0) : 0; } int stride_h, stride_w; - if (!conv_param->has_stride_h()) { - stride_h = stride_w = conv_param->stride(); - } else { + if (conv_param->has_stride_h() || conv_param->has_stride_w()) { stride_h = conv_param->stride_h(); stride_w = conv_param->stride_w(); + } else { + stride_h = stride_w = conv_param->stride_size() ? conv_param->stride(0) : 1; + } + int kernel_d, pad_d, stride_d; + if (has_depth) { + kernel_d = kernel_h; + stride_d = stride_h; + pad_d = pad_h; + } else { + kernel_d = stride_d = 1; + pad_d = 0; } // Groups int groups = conv_param->group(); - int o_g = out->channels() / groups; - int k_g = in->channels() / groups; + int o_g = out->shape(1) / groups; + int k_g = in->shape(1) / groups; int o_head, k_head; // Convolution - const Dtype* in_data = in->cpu_data(); - const Dtype* weight_data = weights[0]->cpu_data(); + vector weight_offset(4 + has_depth); + vector in_offset(4 + has_depth); + vector out_offset(4 + has_depth); Dtype* out_data = out->mutable_cpu_data(); - for (int n = 0; n < out->num(); n++) { + for (int n = 0; n < out->shape(0); n++) { for (int g = 0; g < groups; g++) { o_head = o_g * g; k_head = k_g * g; for (int o = 0; o < o_g; o++) { for (int k = 0; k < k_g; k++) { - for (int y = 0; y < out->height(); y++) { - for (int x = 0; x < out->width(); x++) { - for (int p = 0; p < kernel_h; p++) { - for (int q = 0; q < kernel_w; q++) { - int in_y = y * stride_h - pad_h + p; - int in_x = x * stride_w - pad_w + q; - if (in_y >= 0 && in_y < in->height() - && in_x >= 0 && in_x < in->width()) { - out_data[out->offset(n, o + o_head, y, x)] += - in_data[in->offset(n, k + k_head, in_y, in_x)] - * weight_data[weights[0]->offset(o + o_head, k, p, q)]; + for (int z = 0; z < (has_depth ? out->shape(2) : 1); z++) { + for (int y = 0; y < out->shape(2 + has_depth); y++) { + for (int x = 0; x < out->shape(3 + has_depth); x++) { + for (int r = 0; r < kernel_d; r++) { + for (int p = 0; p < kernel_h; p++) { + for (int q = 0; q < kernel_w; q++) { + int in_z = z * stride_d - pad_d + r; + int in_y = y * stride_h - pad_h + p; + int in_x = x * stride_w - pad_w + q; + if (in_z >= 0 && in_z < (has_depth ? in->shape(2) : 1) + && in_y >= 0 && in_y < in->shape(2 + has_depth) + && in_x >= 0 && in_x < in->shape(3 + has_depth)) { + weight_offset[0] = o + o_head; + weight_offset[1] = k; + if (has_depth) { weight_offset[2] = r; } + weight_offset[2 + has_depth] = p; + weight_offset[3 + has_depth] = q; + in_offset[0] = n; + in_offset[1] = k + k_head; + if (has_depth) { in_offset[2] = in_z; } + in_offset[2 + has_depth] = in_y; + in_offset[3 + has_depth] = in_x; + out_offset[0] = n; + out_offset[1] = o + o_head; + if (has_depth) { out_offset[2] = z; } + out_offset[2 + has_depth] = y; + out_offset[3 + has_depth] = x; + out_data[out->offset(out_offset)] += + in->data_at(in_offset) + * weights[0]->data_at(weight_offset); + } + } } } } @@ -79,11 +112,18 @@ void caffe_conv(const Blob* in, ConvolutionParameter* conv_param, // Bias if (conv_param->bias_term()) { const Dtype* bias_data = weights[1]->cpu_data(); - for (int n = 0; n < out->num(); n++) { - for (int o = 0; o < out->channels(); o++) { - for (int y = 0; y < out->height(); y++) { - for (int x = 0; x < out->width(); x++) { - out_data[out->offset(n, o, y, x)] += bias_data[o]; + for (int n = 0; n < out->shape(0); n++) { + for (int o = 0; o < out->shape(1); o++) { + for (int z = 0; z < (has_depth ? out->shape(2) : 1); z++) { + for (int y = 0; y < out->shape(2 + has_depth); y++) { + for (int x = 0; x < out->shape(3 + has_depth); x++) { + out_offset[0] = n; + out_offset[1] = o; + if (has_depth) { out_offset[2] = z; } + out_offset[2 + has_depth] = y; + out_offset[3 + has_depth] = x; + out_data[out->offset(out_offset)] += bias_data[o]; + } } } } @@ -150,8 +190,8 @@ TYPED_TEST(ConvolutionLayerTest, TestSetup) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->set_kernel_size(3); - convolution_param->set_stride(2); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); convolution_param->set_num_output(4); this->blob_bottom_vec_.push_back(this->blob_bottom_2_); this->blob_top_vec_.push_back(this->blob_top_2_); @@ -188,8 +228,8 @@ TYPED_TEST(ConvolutionLayerTest, TestSimpleConvolution) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->set_kernel_size(3); - convolution_param->set_stride(2); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); convolution_param->set_num_output(4); convolution_param->mutable_weight_filler()->set_type("gaussian"); convolution_param->mutable_bias_filler()->set_type("constant"); @@ -217,13 +257,98 @@ TYPED_TEST(ConvolutionLayerTest, TestSimpleConvolution) { } } +TYPED_TEST(ConvolutionLayerTest, Test0DConvolution) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + const int kNumOutput = 3; + convolution_param->set_num_output(kNumOutput); + convolution_param->set_axis(3); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("gaussian"); + shared_ptr > layer( + new ConvolutionLayer(layer_param)); + vector top_shape = this->blob_bottom_->shape(); + top_shape[3] = kNumOutput; + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + EXPECT_EQ(top_shape, this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // Check against reference convolution. + vector weight_offset(2); + const Blob* weight = layer->blobs()[0].get(); + const Blob* bias = layer->blobs()[1].get(); + const int num = this->blob_top_->count(3); + const int dim = this->blob_top_->shape(3); + const int bottom_dim = this->blob_bottom_->shape(3); + for (int n = 0; n < num; ++n) { + for (int d = 0; d < dim; ++d) { + weight_offset[0] = d; + Dtype value = bias->cpu_data()[d]; + for (int bottom_d = 0; bottom_d < bottom_dim; ++bottom_d) { + weight_offset[1] = bottom_d; + value += weight->data_at(weight_offset) * + this->blob_bottom_->cpu_data()[n * bottom_dim + bottom_d]; + } + EXPECT_NEAR(value, this->blob_top_->cpu_data()[n * dim + d], 1e-4); + } + } +} + +TYPED_TEST(ConvolutionLayerTest, TestSimple3DConvolution) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_2_); + this->blob_top_vec_.push_back(this->blob_top_2_); + vector bottom_shape(5); + bottom_shape[0] = this->blob_bottom_vec_[0]->shape(0); + bottom_shape[1] = this->blob_bottom_vec_[0]->shape(1); + bottom_shape[2] = 5; + bottom_shape[3] = this->blob_bottom_vec_[0]->shape(2); + bottom_shape[4] = this->blob_bottom_vec_[0]->shape(3); + FillerParameter filler_param; + GaussianFiller filler(filler_param); + for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { + this->blob_bottom_vec_[i]->Reshape(bottom_shape); + filler.Fill(this->blob_bottom_vec_[i]); + } + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); + convolution_param->set_num_output(4); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("gaussian"); + shared_ptr > layer( + new ConvolutionLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // Check against reference convolution. + const Dtype* top_data; + const Dtype* ref_top_data; + caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), + this->MakeReferenceTop(this->blob_top_)); + top_data = this->blob_top_->cpu_data(); + ref_top_data = this->ref_blob_top_->cpu_data(); + for (int i = 0; i < this->blob_top_->count(); ++i) { + EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); + } + caffe_conv(this->blob_bottom_2_, convolution_param, layer->blobs(), + this->MakeReferenceTop(this->blob_top_2_)); + top_data = this->blob_top_2_->cpu_data(); + ref_top_data = this->ref_blob_top_->cpu_data(); + for (int i = 0; i < this->blob_top_->count(); ++i) { + EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); + } +} + TYPED_TEST(ConvolutionLayerTest, Test1x1Convolution) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->set_kernel_size(1); - convolution_param->set_stride(1); + convolution_param->add_kernel_size(1); + convolution_param->add_stride(1); convolution_param->set_num_output(4); convolution_param->mutable_weight_filler()->set_type("gaussian"); convolution_param->mutable_bias_filler()->set_type("constant"); @@ -249,8 +374,8 @@ TYPED_TEST(ConvolutionLayerTest, TestSimpleConvolutionGroup) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->set_kernel_size(3); - convolution_param->set_stride(2); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); convolution_param->set_num_output(3); convolution_param->set_group(3); convolution_param->mutable_weight_filler()->set_type("gaussian"); @@ -288,8 +413,8 @@ TYPED_TEST(ConvolutionLayerTest, TestSobelConvolution) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->set_kernel_size(3); - convolution_param->set_stride(2); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); convolution_param->set_num_output(1); convolution_param->set_bias_term(false); shared_ptr > layer( @@ -350,14 +475,11 @@ TYPED_TEST(ConvolutionLayerTest, TestSobelConvolution) { convolution_param->set_bias_term(false); layer.reset(new ConvolutionLayer(layer_param)); layer->blobs().resize(1); - layer->blobs()[0].reset(new Blob(1, 3, 1, 3)); + layer->blobs()[0].reset(new Blob(1, 1, 1, 3)); Dtype* weights_2 = layer->blobs()[0]->mutable_cpu_data(); - for (int c = 0; c < 3; ++c) { - int i = c * 3; // 1 x 3 filter - weights_2[i + 0] = -1; - weights_2[i + 1] = 0; - weights_2[i + 2] = 1; - } + weights_2[0] = -1; + weights_2[1] = 0; + weights_2[2] = 1; layer->SetUp(sep_blob_bottom_vec, sep_blob_top_vec); layer->Forward(sep_blob_bottom_vec, sep_blob_top_vec); // Test equivalence of full and separable filters. @@ -368,6 +490,124 @@ TYPED_TEST(ConvolutionLayerTest, TestSobelConvolution) { } } +TYPED_TEST(ConvolutionLayerTest, TestNDAgainst2D) { + typedef typename TypeParam::Dtype Dtype; + const int kernel_h = 11; + const int kernel_w = 13; + vector bottom_shape(4); + bottom_shape[0] = 15; + bottom_shape[1] = 18; + bottom_shape[2] = kernel_h * 2; + bottom_shape[3] = kernel_w * 2; + FillerParameter filler_param; + GaussianFiller filler(filler_param); + for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { + this->blob_bottom_vec_[i]->Reshape(bottom_shape); + filler.Fill(this->blob_bottom_vec_[i]); + } + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->set_num_output(12); + convolution_param->set_bias_term(false); + convolution_param->set_group(6); + convolution_param->set_kernel_h(kernel_h); + convolution_param->set_kernel_w(kernel_w); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + Blob weights; + Blob top_diff; + // Shape and fill weights and top_diff. + bool copy_diff; + bool reshape; + { + ConvolutionLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + top_diff.ReshapeLike(*this->blob_top_); + filler.Fill(&top_diff); + ASSERT_EQ(1, layer.blobs().size()); + copy_diff = false; reshape = true; + weights.CopyFrom(*layer.blobs()[0], copy_diff, reshape); + } + vector propagate_down(1, true); + Blob result_2d; + Blob backward_result_2d; + Blob backward_weight_result_2d; + // Test with 2D im2col + { + caffe_set(this->blob_top_->count(), Dtype(0), + this->blob_top_->mutable_cpu_data()); + caffe_set(this->blob_bottom_->count(), Dtype(0), + this->blob_bottom_->mutable_cpu_diff()); + caffe_set(weights.count(), Dtype(0), weights.mutable_cpu_diff()); + // Do SetUp and Forward; save Forward result in result_2d. + convolution_param->set_force_nd_im2col(false); + ConvolutionLayer layer_2d(layer_param); + layer_2d.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(1, layer_2d.blobs().size()); + copy_diff = false; reshape = false; + layer_2d.blobs()[0]->CopyFrom(weights, copy_diff, reshape); + layer_2d.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + copy_diff = false; reshape = true; + result_2d.CopyFrom(*this->blob_top_, copy_diff, reshape); + // Copy pre-generated top diff into actual top diff; + // do Backward and save result in backward_result_2d. + ASSERT_EQ(this->blob_top_->shape(), top_diff.shape()); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_top_->mutable_cpu_diff()); + layer_2d.Backward(this->blob_top_vec_, propagate_down, + this->blob_bottom_vec_); + copy_diff = true; reshape = true; + backward_result_2d.CopyFrom(*this->blob_bottom_, copy_diff, reshape); + backward_weight_result_2d.CopyFrom(weights, copy_diff, reshape); + } + Blob result_nd; + Blob backward_result_nd; + Blob backward_weight_result_nd; + // Test with ND im2col + { + caffe_set(this->blob_top_->count(), Dtype(0), + this->blob_top_->mutable_cpu_data()); + caffe_set(this->blob_bottom_->count(), Dtype(0), + this->blob_bottom_->mutable_cpu_diff()); + caffe_set(weights.count(), Dtype(0), weights.mutable_cpu_diff()); + // Do SetUp and Forward; save Forward result in result_nd. + convolution_param->set_force_nd_im2col(true); + ConvolutionLayer layer_nd(layer_param); + layer_nd.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(1, layer_nd.blobs().size()); + copy_diff = false; reshape = false; + layer_nd.blobs()[0]->CopyFrom(weights, copy_diff, reshape); + layer_nd.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + copy_diff = false; reshape = true; + result_nd.CopyFrom(*this->blob_top_, copy_diff, reshape); + // Copy pre-generated top diff into actual top diff; + // do Backward and save result in backward_result_nd. + ASSERT_EQ(this->blob_top_->shape(), top_diff.shape()); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_top_->mutable_cpu_diff()); + layer_nd.Backward(this->blob_top_vec_, propagate_down, + this->blob_bottom_vec_); + copy_diff = true; reshape = true; + backward_result_nd.CopyFrom(*this->blob_bottom_, copy_diff, reshape); + backward_weight_result_nd.CopyFrom(weights, copy_diff, reshape); + } + ASSERT_EQ(result_nd.count(), result_2d.count()); + for (int i = 0; i < result_2d.count(); ++i) { + EXPECT_EQ(result_2d.cpu_data()[i], result_nd.cpu_data()[i]); + } + ASSERT_EQ(backward_result_nd.count(), backward_result_2d.count()); + for (int i = 0; i < backward_result_2d.count(); ++i) { + EXPECT_EQ(backward_result_2d.cpu_diff()[i], + backward_result_nd.cpu_diff()[i]); + } + ASSERT_EQ(backward_weight_result_nd.count(), + backward_weight_result_2d.count()); + for (int i = 0; i < backward_weight_result_2d.count(); ++i) { + EXPECT_EQ(backward_weight_result_2d.cpu_diff()[i], + backward_weight_result_nd.cpu_diff()[i]); + } +} + TYPED_TEST(ConvolutionLayerTest, TestGradient) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; @@ -375,8 +615,36 @@ TYPED_TEST(ConvolutionLayerTest, TestGradient) { layer_param.mutable_convolution_param(); this->blob_bottom_vec_.push_back(this->blob_bottom_2_); this->blob_top_vec_.push_back(this->blob_top_2_); - convolution_param->set_kernel_size(3); - convolution_param->set_stride(2); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); + convolution_param->set_num_output(2); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("gaussian"); + ConvolutionLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(ConvolutionLayerTest, TestGradient3D) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + vector bottom_shape(5); + bottom_shape[0] = this->blob_bottom_vec_[0]->shape(0); + bottom_shape[1] = this->blob_bottom_vec_[0]->shape(1); + bottom_shape[2] = 5; + bottom_shape[3] = this->blob_bottom_vec_[0]->shape(2); + bottom_shape[4] = this->blob_bottom_vec_[0]->shape(3); + FillerParameter filler_param; + GaussianFiller filler(filler_param); + for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { + this->blob_bottom_vec_[i]->Reshape(bottom_shape); + filler.Fill(this->blob_bottom_vec_[i]); + } + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); convolution_param->set_num_output(2); convolution_param->mutable_weight_filler()->set_type("gaussian"); convolution_param->mutable_bias_filler()->set_type("gaussian"); @@ -393,8 +661,8 @@ TYPED_TEST(ConvolutionLayerTest, Test1x1Gradient) { layer_param.mutable_convolution_param(); this->blob_bottom_vec_.push_back(this->blob_bottom_2_); this->blob_top_vec_.push_back(this->blob_top_2_); - convolution_param->set_kernel_size(1); - convolution_param->set_stride(1); + convolution_param->add_kernel_size(1); + convolution_param->add_stride(1); convolution_param->set_num_output(2); convolution_param->mutable_weight_filler()->set_type("gaussian"); convolution_param->mutable_bias_filler()->set_type("gaussian"); @@ -409,8 +677,8 @@ TYPED_TEST(ConvolutionLayerTest, TestGradientGroup) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->set_kernel_size(3); - convolution_param->set_stride(2); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); convolution_param->set_num_output(3); convolution_param->set_group(3); convolution_param->mutable_weight_filler()->set_type("gaussian"); @@ -472,8 +740,8 @@ TYPED_TEST(CuDNNConvolutionLayerTest, TestSetupCuDNN) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->set_kernel_size(3); - convolution_param->set_stride(2); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); convolution_param->set_num_output(4); this->blob_bottom_vec_.push_back(this->blob_bottom_2_); this->blob_top_vec_.push_back(this->blob_top_2_); @@ -509,8 +777,8 @@ TYPED_TEST(CuDNNConvolutionLayerTest, TestSimpleConvolutionCuDNN) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->set_kernel_size(3); - convolution_param->set_stride(2); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); convolution_param->set_num_output(4); convolution_param->mutable_weight_filler()->set_type("gaussian"); convolution_param->mutable_bias_filler()->set_type("constant"); @@ -542,8 +810,8 @@ TYPED_TEST(CuDNNConvolutionLayerTest, TestSimpleConvolutionGroupCuDNN) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->set_kernel_size(3); - convolution_param->set_stride(2); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); convolution_param->set_num_output(3); convolution_param->set_group(3); convolution_param->mutable_weight_filler()->set_type("gaussian"); @@ -581,8 +849,8 @@ TYPED_TEST(CuDNNConvolutionLayerTest, TestSobelConvolutionCuDNN) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->set_kernel_size(3); - convolution_param->set_stride(2); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); convolution_param->set_num_output(1); convolution_param->set_bias_term(false); shared_ptr > layer( @@ -643,14 +911,11 @@ TYPED_TEST(CuDNNConvolutionLayerTest, TestSobelConvolutionCuDNN) { convolution_param->set_bias_term(false); layer.reset(new CuDNNConvolutionLayer(layer_param)); layer->blobs().resize(1); - layer->blobs()[0].reset(new Blob(1, 3, 1, 3)); + layer->blobs()[0].reset(new Blob(1, 1, 1, 3)); TypeParam* weights_2 = layer->blobs()[0]->mutable_cpu_data(); - for (int c = 0; c < 3; ++c) { - int i = c * 3; // 1 x 3 filter - weights_2[i + 0] = -1; - weights_2[i + 1] = 0; - weights_2[i + 2] = 1; - } + weights_2[0] = -1; + weights_2[1] = 0; + weights_2[2] = 1; layer->SetUp(sep_blob_bottom_vec, sep_blob_top_vec); layer->Forward(sep_blob_bottom_vec, sep_blob_top_vec); // Test equivalence of full and separable filters. @@ -667,8 +932,8 @@ TYPED_TEST(CuDNNConvolutionLayerTest, TestGradientCuDNN) { layer_param.mutable_convolution_param(); this->blob_bottom_vec_.push_back(this->blob_bottom_2_); this->blob_top_vec_.push_back(this->blob_top_2_); - convolution_param->set_kernel_size(3); - convolution_param->set_stride(2); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); convolution_param->set_num_output(2); convolution_param->mutable_weight_filler()->set_type("gaussian"); convolution_param->mutable_bias_filler()->set_type("gaussian"); @@ -682,8 +947,8 @@ TYPED_TEST(CuDNNConvolutionLayerTest, TestGradientGroupCuDNN) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->set_kernel_size(3); - convolution_param->set_stride(2); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); convolution_param->set_num_output(3); convolution_param->set_group(3); convolution_param->mutable_weight_filler()->set_type("gaussian"); diff --git a/src/caffe/test/test_deconvolution_layer.cpp b/src/caffe/test/test_deconvolution_layer.cpp index fc63d5ef..770e7b27 100644 --- a/src/caffe/test/test_deconvolution_layer.cpp +++ b/src/caffe/test/test_deconvolution_layer.cpp @@ -58,8 +58,8 @@ TYPED_TEST(DeconvolutionLayerTest, TestSetup) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->set_kernel_size(3); - convolution_param->set_stride(2); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); convolution_param->set_num_output(4); this->blob_bottom_vec_.push_back(this->blob_bottom_2_); this->blob_top_vec_.push_back(this->blob_top_2_); @@ -96,8 +96,8 @@ TYPED_TEST(DeconvolutionLayerTest, TestSimpleDeconvolution) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->set_kernel_size(3); - convolution_param->set_stride(2); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); convolution_param->set_num_output(4); convolution_param->mutable_weight_filler()->set_type("constant"); convolution_param->mutable_weight_filler()->set_value(1); @@ -144,8 +144,8 @@ TYPED_TEST(DeconvolutionLayerTest, TestGradient) { layer_param.mutable_convolution_param(); this->blob_bottom_vec_.push_back(this->blob_bottom_2_); this->blob_top_vec_.push_back(this->blob_top_2_); - convolution_param->set_kernel_size(2); - convolution_param->set_stride(1); + convolution_param->add_kernel_size(2); + convolution_param->add_stride(1); convolution_param->set_num_output(1); convolution_param->mutable_weight_filler()->set_type("gaussian"); convolution_param->mutable_bias_filler()->set_type("gaussian"); @@ -155,4 +155,151 @@ TYPED_TEST(DeconvolutionLayerTest, TestGradient) { this->blob_top_vec_); } +TYPED_TEST(DeconvolutionLayerTest, TestNDAgainst2D) { + typedef typename TypeParam::Dtype Dtype; + const int kernel_h = 11; + const int kernel_w = 13; + vector bottom_shape(4); + bottom_shape[0] = 15; + bottom_shape[1] = 12; + bottom_shape[2] = kernel_h * 2; + bottom_shape[3] = kernel_w * 2; + FillerParameter filler_param; + GaussianFiller filler(filler_param); + for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { + this->blob_bottom_vec_[i]->Reshape(bottom_shape); + filler.Fill(this->blob_bottom_vec_[i]); + } + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->set_num_output(18); + convolution_param->set_bias_term(false); + convolution_param->set_group(6); + convolution_param->set_kernel_h(kernel_h); + convolution_param->set_kernel_w(kernel_w); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + Blob weights; + Blob top_diff; + // Shape and fill weights and top_diff. + bool copy_diff; + bool reshape; + { + DeconvolutionLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + top_diff.ReshapeLike(*this->blob_top_); + filler.Fill(&top_diff); + ASSERT_EQ(1, layer.blobs().size()); + copy_diff = false; reshape = true; + weights.CopyFrom(*layer.blobs()[0], copy_diff, reshape); + } + vector propagate_down(1, true); + Blob result_2d; + Blob backward_result_2d; + Blob backward_weight_result_2d; + // Test with 2D im2col + { + caffe_set(this->blob_top_->count(), Dtype(0), + this->blob_top_->mutable_cpu_data()); + caffe_set(this->blob_bottom_->count(), Dtype(0), + this->blob_bottom_->mutable_cpu_diff()); + caffe_set(weights.count(), Dtype(0), weights.mutable_cpu_diff()); + // Do SetUp and Forward; save Forward result in result_2d. + convolution_param->set_force_nd_im2col(false); + DeconvolutionLayer layer_2d(layer_param); + layer_2d.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(1, layer_2d.blobs().size()); + copy_diff = false; reshape = false; + layer_2d.blobs()[0]->CopyFrom(weights, copy_diff, reshape); + layer_2d.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + copy_diff = false; reshape = true; + result_2d.CopyFrom(*this->blob_top_, copy_diff, reshape); + // Copy pre-generated top diff into actual top diff; + // do Backward and save result in backward_result_2d. + ASSERT_EQ(this->blob_top_->shape(), top_diff.shape()); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_top_->mutable_cpu_diff()); + layer_2d.Backward(this->blob_top_vec_, propagate_down, + this->blob_bottom_vec_); + copy_diff = true; reshape = true; + backward_result_2d.CopyFrom(*this->blob_bottom_, copy_diff, reshape); + backward_weight_result_2d.CopyFrom(weights, copy_diff, reshape); + } + Blob result_nd; + Blob backward_result_nd; + Blob backward_weight_result_nd; + // Test with ND im2col + { + caffe_set(this->blob_top_->count(), Dtype(0), + this->blob_top_->mutable_cpu_data()); + caffe_set(this->blob_bottom_->count(), Dtype(0), + this->blob_bottom_->mutable_cpu_diff()); + caffe_set(weights.count(), Dtype(0), weights.mutable_cpu_diff()); + // Do SetUp and Forward; save Forward result in result_nd. + convolution_param->set_force_nd_im2col(true); + DeconvolutionLayer layer_nd(layer_param); + layer_nd.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(1, layer_nd.blobs().size()); + copy_diff = false; reshape = false; + layer_nd.blobs()[0]->CopyFrom(weights, copy_diff, reshape); + layer_nd.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + copy_diff = false; reshape = true; + result_nd.CopyFrom(*this->blob_top_, copy_diff, reshape); + // Copy pre-generated top diff into actual top diff; + // do Backward and save result in backward_result_nd. + ASSERT_EQ(this->blob_top_->shape(), top_diff.shape()); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_top_->mutable_cpu_diff()); + layer_nd.Backward(this->blob_top_vec_, propagate_down, + this->blob_bottom_vec_); + copy_diff = true; reshape = true; + backward_result_nd.CopyFrom(*this->blob_bottom_, copy_diff, reshape); + backward_weight_result_nd.CopyFrom(weights, copy_diff, reshape); + } + ASSERT_EQ(result_nd.count(), result_2d.count()); + for (int i = 0; i < result_2d.count(); ++i) { + EXPECT_EQ(result_2d.cpu_data()[i], result_nd.cpu_data()[i]); + } + ASSERT_EQ(backward_result_nd.count(), backward_result_2d.count()); + for (int i = 0; i < backward_result_2d.count(); ++i) { + EXPECT_EQ(backward_result_2d.cpu_diff()[i], + backward_result_nd.cpu_diff()[i]); + } + ASSERT_EQ(backward_weight_result_nd.count(), + backward_weight_result_2d.count()); + for (int i = 0; i < backward_weight_result_2d.count(); ++i) { + EXPECT_EQ(backward_weight_result_2d.cpu_diff()[i], + backward_weight_result_nd.cpu_diff()[i]); + } +} + +TYPED_TEST(DeconvolutionLayerTest, TestGradient3D) { + typedef typename TypeParam::Dtype Dtype; + vector bottom_shape(5); + bottom_shape[0] = this->blob_bottom_vec_[0]->shape(0); + bottom_shape[1] = this->blob_bottom_vec_[0]->shape(1); + bottom_shape[2] = 2; + bottom_shape[3] = 3; + bottom_shape[4] = 2; + FillerParameter filler_param; + GaussianFiller filler(filler_param); + for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { + this->blob_bottom_vec_[i]->Reshape(bottom_shape); + filler.Fill(this->blob_bottom_vec_[i]); + } + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->add_kernel_size(2); + convolution_param->add_stride(2); + convolution_param->add_pad(1); + convolution_param->set_num_output(2); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("gaussian"); + DeconvolutionLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + } // namespace caffe diff --git a/src/caffe/test/test_im2col_kernel.cu b/src/caffe/test/test_im2col_kernel.cu index 0017ac23..f0b75fcc 100644 --- a/src/caffe/test/test_im2col_kernel.cu +++ b/src/caffe/test/test_im2col_kernel.cu @@ -22,6 +22,12 @@ __global__ void im2col_gpu_kernel(const int n, const Dtype* data_im, const int height_col, const int width_col, Dtype* data_col); +template +__global__ void im2col_nd_gpu_kernel(const int n, const Dtype* data_im, + const int* im_shape, const int* col_shape, + const int* kernel_shape, const int* pad, const int* stride, + Dtype* data_col); + extern cudaDeviceProp CAFFE_TEST_CUDA_PROP; template @@ -30,11 +36,18 @@ class Im2colKernelTest : public GPUDeviceTest { Im2colKernelTest() // big so launches > 1024 threads : blob_bottom_(new Blob(5, 500, 10, 10)), + blob_kernel_shape_(new Blob()), + blob_stride_(new Blob()), + blob_pad_(new Blob()), blob_top_(new Blob()), blob_top_cpu_(new Blob()) { FillerParameter filler_param; GaussianFiller filler(filler_param); filler.Fill(this->blob_bottom_); + vector dim_blob_shape(1, 2); + blob_kernel_shape_->Reshape(dim_blob_shape); + blob_stride_->Reshape(dim_blob_shape); + blob_pad_->Reshape(dim_blob_shape); height_ = blob_bottom_->height(); width_ = blob_bottom_->width(); @@ -44,14 +57,26 @@ class Im2colKernelTest : public GPUDeviceTest { kernel_size_ = 3; height_col_ = (height_ + 2 * pad_ - kernel_size_) / stride_ + 1; width_col_ = (width_ + 2 * pad_ - kernel_size_) / stride_ + 1; + + for (int i = 0; i < 2; ++i) { + blob_kernel_shape_->mutable_cpu_data()[i] = kernel_size_; + blob_stride_->mutable_cpu_data()[i] = stride_; + blob_pad_->mutable_cpu_data()[i] = pad_; + } } virtual ~Im2colKernelTest() { - delete blob_bottom_; - delete blob_top_; - delete blob_top_cpu_; + delete blob_bottom_; + delete blob_top_; + delete blob_top_cpu_; + delete blob_kernel_shape_; + delete blob_stride_; + delete blob_pad_; } + Blob* const blob_kernel_shape_; + Blob* const blob_stride_; + Blob* const blob_pad_; Blob* const blob_bottom_; Blob* const blob_top_; Blob* const blob_top_cpu_; @@ -67,7 +92,7 @@ class Im2colKernelTest : public GPUDeviceTest { TYPED_TEST_CASE(Im2colKernelTest, TestDtypes); -TYPED_TEST(Im2colKernelTest, TestGPU) { +TYPED_TEST(Im2colKernelTest, Test2D) { // Reshape the blobs to correct size for im2col output this->blob_top_->Reshape(this->blob_bottom_->num(), this->channels_ * this->kernel_size_ * this->kernel_size_, @@ -122,4 +147,58 @@ TYPED_TEST(Im2colKernelTest, TestGPU) { } } +TYPED_TEST(Im2colKernelTest, TestND) { + // Reshape the blobs to correct size for im2col output + this->blob_top_->Reshape(this->blob_bottom_->num(), + this->channels_ * this->kernel_size_ * this->kernel_size_, + this->height_col_, + this->width_col_); + + this->blob_top_cpu_->ReshapeLike(*this->blob_top_); + + const TypeParam* bottom_data_cpu = this->blob_bottom_->cpu_data(); + TypeParam* top_data_cpu = this->blob_top_cpu_->mutable_cpu_data(); + + // CPU Version + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + im2col_nd_cpu(bottom_data_cpu + this->blob_bottom_->offset(n), 2, + this->blob_bottom_->shape().data() + 1, + this->blob_top_cpu_->shape().data() + 1, + this->blob_kernel_shape_->cpu_data(), + this->blob_pad_->cpu_data(), this->blob_stride_->cpu_data(), + top_data_cpu + this->blob_top_cpu_->offset(n)); + } + + // GPU version + int num_kernels = this->channels_ * this->height_col_ * this->width_col_; + int default_grid_dim = CAFFE_GET_BLOCKS(num_kernels); + const TypeParam* bottom_data_gpu = this->blob_bottom_->gpu_data(); + + // Launch with different grid sizes + for (int grid_div = 2; grid_div <= 8; grid_div++) { + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + const int grid_dim = default_grid_dim / grid_div; + TypeParam* top_data_gpu = this->blob_top_->mutable_gpu_data(); + // NOLINT_NEXT_LINE(whitespace/operators) + im2col_nd_gpu_kernel<<>>( + num_kernels, bottom_data_gpu + this->blob_bottom_->offset(n), + this->blob_bottom_->gpu_shape() + 1, this->blob_top_->gpu_shape() + 1, + this->blob_kernel_shape_->gpu_data(), this->blob_pad_->gpu_data(), + this->blob_stride_->gpu_data(), + top_data_gpu + this->blob_top_->offset(n)); + CUDA_POST_KERNEL_CHECK; + } + + // Compare results against CPU version + for (int i = 0; i < this->blob_top_->count(); ++i) { + TypeParam cpuval = top_data_cpu[i]; + TypeParam gpuval = this->blob_top_->cpu_data()[i]; + EXPECT_EQ(cpuval, gpuval); + if (cpuval != gpuval) { + break; + } + } + } +} + } // namespace caffe diff --git a/src/caffe/test/test_im2col_layer.cpp b/src/caffe/test/test_im2col_layer.cpp index f50abe10..293aa262 100644 --- a/src/caffe/test/test_im2col_layer.cpp +++ b/src/caffe/test/test_im2col_layer.cpp @@ -21,6 +21,7 @@ class Im2colLayerTest : public MultiDeviceTest { : blob_bottom_(new Blob(2, 3, 6, 5)), blob_top_(new Blob()) { // fill the values + Caffe::set_random_seed(1701); FillerParameter filler_param; GaussianFiller filler(filler_param); filler.Fill(this->blob_bottom_); @@ -41,8 +42,8 @@ TYPED_TEST(Im2colLayerTest, TestSetup) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->set_kernel_size(3); - convolution_param->set_stride(2); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); Im2colLayer layer(layer_param); layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); EXPECT_EQ(this->blob_top_->num(), 2); @@ -56,8 +57,8 @@ TYPED_TEST(Im2colLayerTest, TestForward) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->set_kernel_size(3); - convolution_param->set_stride(2); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); Im2colLayer layer(layer_param); layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); @@ -73,14 +74,27 @@ TYPED_TEST(Im2colLayerTest, TestGradient) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); - convolution_param->set_kernel_size(3); - convolution_param->set_stride(2); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); Im2colLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, this->blob_top_vec_); } +TYPED_TEST(Im2colLayerTest, TestGradientForceND) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); + convolution_param->set_force_nd_im2col(true); + Im2colLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-2); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} TYPED_TEST(Im2colLayerTest, TestRect) { typedef typename TypeParam::Dtype Dtype; @@ -89,7 +103,7 @@ TYPED_TEST(Im2colLayerTest, TestRect) { layer_param.mutable_convolution_param(); convolution_param->set_kernel_h(5); convolution_param->set_kernel_w(3); - convolution_param->set_stride(2); + convolution_param->add_stride(2); Im2colLayer layer(layer_param); layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); @@ -108,7 +122,7 @@ TYPED_TEST(Im2colLayerTest, TestRectGradient) { layer_param.mutable_convolution_param(); convolution_param->set_kernel_h(5); convolution_param->set_kernel_w(3); - convolution_param->set_stride(2); + convolution_param->add_stride(2); Im2colLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, diff --git a/src/caffe/util/im2col.cpp b/src/caffe/util/im2col.cpp index c48f31f3..b0a7be50 100644 --- a/src/caffe/util/im2col.cpp +++ b/src/caffe/util/im2col.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include "caffe/util/im2col.hpp" #include "caffe/util/math_functions.hpp" @@ -44,6 +45,98 @@ template void im2col_cpu(const double* data_im, const int channels, const int pad_h, const int pad_w, const int stride_h, const int stride_w, double* data_col); +template +inline void im2col_nd_core_cpu(const Dtype* data_input, const bool im2col, + const int num_spatial_axes, const int* im_shape, const int* col_shape, + const int* kernel_shape, const int* pad, const int* stride, + Dtype* data_output) { + if (!im2col) { + int im_size = im_shape[0]; + for (int i = 0; i < num_spatial_axes; ++i) { + im_size *= im_shape[1 + i]; + } + caffe_set(im_size, Dtype(0), data_output); + } + int kernel_size = 1; + for (int i = 0; i < num_spatial_axes; ++i) { + kernel_size *= kernel_shape[i]; + } + const int channels_col = col_shape[0]; + vector d_offset(num_spatial_axes, 0); + vector d_iter(num_spatial_axes, 0); + for (int c = 0; c < channels_col; ++c) { + // Loop over spatial axes in reverse order to compute a per-axis offset. + int offset = c; + for (int d_i = num_spatial_axes - 1; d_i >= 0; --d_i) { + if (d_i < num_spatial_axes - 1) { + offset /= kernel_shape[d_i + 1]; + } + d_offset[d_i] = offset % kernel_shape[d_i]; + } + for (bool incremented = true; incremented; ) { + // Loop over spatial axes in forward order to compute the indices in the + // image and column, and whether the index lies in the padding. + int index_col = c; + int index_im = c / kernel_size; + bool is_padding = false; + for (int d_i = 0; d_i < num_spatial_axes; ++d_i) { + const int d = d_iter[d_i]; + const int d_pad = d * stride[d_i] - pad[d_i] + d_offset[d_i]; + is_padding |= d_pad < 0 || d_pad >= im_shape[d_i + 1]; + index_col *= col_shape[d_i + 1]; + index_col += d; + index_im *= im_shape[d_i + 1]; + index_im += d_pad; + } + if (im2col) { + if (is_padding) { + data_output[index_col] = 0; + } else { + data_output[index_col] = data_input[index_im]; + } + } else if (!is_padding) { // col2im + data_output[index_im] += data_input[index_col]; + } + // Loop over spatial axes in reverse order to choose an index, + // like counting. + incremented = false; + for (int d_i = num_spatial_axes - 1; d_i >= 0; --d_i) { + const int d_max = col_shape[d_i + 1]; + DCHECK_LT(d_iter[d_i], d_max); + if (d_iter[d_i] == d_max - 1) { + d_iter[d_i] = 0; + } else { // d_iter[d_i] < d_max - 1 + ++d_iter[d_i]; + incremented = true; + break; + } + } + } // while(incremented) { + } // for (int c = 0; c < channels_col; ++c) { +} + +template +void im2col_nd_cpu(const Dtype* data_im, const int num_spatial_axes, + const int* im_shape, const int* col_shape, + const int* kernel_shape, const int* pad, const int* stride, + Dtype* data_col) { + const bool kIm2Col = true; + im2col_nd_core_cpu(data_im, kIm2Col, num_spatial_axes, im_shape, col_shape, + kernel_shape, pad, stride, data_col); +} + +// Explicit instantiation +template void im2col_nd_cpu(const float* data_im, + const int num_spatial_axes, + const int* im_shape, const int* col_shape, + const int* kernel_shape, const int* pad, const int* stride, + float* data_col); +template void im2col_nd_cpu(const double* data_im, + const int num_spatial_axes, + const int* im_shape, const int* col_shape, + const int* kernel_shape, const int* pad, const int* stride, + double* data_col); + template void col2im_cpu(const Dtype* data_col, const int channels, const int height, const int width, const int patch_h, const int patch_w, @@ -80,4 +173,27 @@ template void col2im_cpu(const double* data_col, const int channels, const int pad_h, const int pad_w, const int stride_h, const int stride_w, double* data_im); +template +void col2im_nd_cpu(const Dtype* data_col, const int num_spatial_axes, + const int* im_shape, const int* col_shape, + const int* kernel_shape, const int* pad, const int* stride, + Dtype* data_im) { + const bool kIm2Col = false; + im2col_nd_core_cpu(data_col, kIm2Col, num_spatial_axes, im_shape, col_shape, + kernel_shape, pad, stride, data_im); +} + +// Explicit instantiation +template void col2im_nd_cpu(const float* data_col, + const int num_spatial_axes, + const int* im_shape, const int* col_shape, + const int* kernel_shape, const int* pad, const int* stride, + float* data_im); +template void col2im_nd_cpu(const double* data_col, + const int num_spatial_axes, + const int* im_shape, const int* col_shape, + const int* kernel_shape, const int* pad, const int* stride, + double* data_im); + + } // namespace caffe diff --git a/src/caffe/util/im2col.cu b/src/caffe/util/im2col.cu index c90f93eb..5a478ba6 100644 --- a/src/caffe/util/im2col.cu +++ b/src/caffe/util/im2col.cu @@ -59,7 +59,6 @@ void im2col_gpu(const Dtype* data_im, const int channels, CUDA_POST_KERNEL_CHECK; } - // Explicit instantiation template void im2col_gpu(const float* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, @@ -70,6 +69,156 @@ template void im2col_gpu(const double* data_im, const int channels, const int pad_h, const int pad_w, const int stride_h, const int stride_w, double* data_col); +template +__global__ void im2col_nd_gpu_kernel(const int n, const Dtype* data_im, + const int* im_shape, const int* col_shape, + const int* kernel_shape, const int* pad, const int* stride, + Dtype* data_col) { + int d_temp[num_axes]; // NOLINT(runtime/arrays) + int d_iter[num_axes]; // NOLINT(runtime/arrays) + int i; + CUDA_KERNEL_LOOP(index, n) { + // Initialize channel_in, computed in the loop below, with intermediate + // computations used to compute the spatial indices. + int channel_in = index; + int channel_out = 1; + for (i = num_axes - 1; i >= 0; --i) { + d_temp[i] = channel_in % col_shape[i + 1]; + channel_in /= col_shape[i + 1]; + channel_out *= kernel_shape[i]; + } + channel_out *= channel_in; + int data_col_inc = 1; + for (i = 0; i < num_axes; ++i) { + channel_out *= col_shape[i + 1]; + channel_out += d_temp[i]; + d_temp[i] = d_temp[i] * stride[i] - pad[i]; + channel_in *= im_shape[i + 1]; + channel_in += d_temp[i]; + data_col_inc *= col_shape[i + 1]; + d_iter[i] = 0; + } + Dtype* data_col_ptr = data_col + channel_out; + const Dtype* data_im_ptr = data_im + channel_in; + bool incremented; + do { + bool in_range = true; + for (i = 0; i < num_axes; ++i) { + const int d_iter_im = d_iter[i] + d_temp[i]; + in_range &= d_iter_im >= 0 && d_iter_im < im_shape[i + 1]; + if (!in_range) { break; } + } + if (in_range) { + int data_im_offset = d_iter[0]; + for (i = 1; i < num_axes; ++i) { + data_im_offset *= im_shape[i + 1]; + data_im_offset += d_iter[i]; + } + *data_col_ptr = data_im_ptr[data_im_offset]; + } else { + *data_col_ptr = 0; + } + data_col_ptr += data_col_inc; + incremented = false; + for (i = num_axes - 1; i >= 0; --i) { + const int d_max = kernel_shape[i]; + if (d_iter[i] == d_max - 1) { + d_iter[i] = 0; + } else { // d_iter[i] < d_max - 1 + ++d_iter[i]; + incremented = true; + break; + } + } // for (int i = num_axes - 1; i >= 0; --i) + } while (incremented); // do + } // CUDA_KERNEL_LOOP(index, n) +} + +template +void im2col_nd_gpu(const Dtype* data_im, const int num_spatial_axes, + const int num_kernels, const int* im_shape, const int* col_shape, + const int* kernel_shape, const int* pad, const int* stride, + Dtype* data_col) { + switch (num_spatial_axes) { + case 1: + im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + num_kernels, data_im, im_shape, col_shape, + kernel_shape, pad, stride, data_col); + break; + case 2: + im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + num_kernels, data_im, im_shape, col_shape, + kernel_shape, pad, stride, data_col); + break; + case 3: + im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + num_kernels, data_im, im_shape, col_shape, + kernel_shape, pad, stride, data_col); + break; + case 4: + im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + num_kernels, data_im, im_shape, col_shape, + kernel_shape, pad, stride, data_col); + break; + case 5: + im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + num_kernels, data_im, im_shape, col_shape, + kernel_shape, pad, stride, data_col); + break; + case 6: + im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + num_kernels, data_im, im_shape, col_shape, + kernel_shape, pad, stride, data_col); + break; + case 7: + im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + num_kernels, data_im, im_shape, col_shape, + kernel_shape, pad, stride, data_col); + break; + case 8: + im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + num_kernels, data_im, im_shape, col_shape, + kernel_shape, pad, stride, data_col); + break; + case 9: + im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + num_kernels, data_im, im_shape, col_shape, + kernel_shape, pad, stride, data_col); + break; + case 10: + im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + num_kernels, data_im, im_shape, col_shape, + kernel_shape, pad, stride, data_col); + break; + default: + LOG(FATAL) << "im2col_nd_gpu does not support computation with " + << num_spatial_axes << " spatial axes"; + } + CUDA_POST_KERNEL_CHECK; +} + +// Explicit instantiation +template void im2col_nd_gpu(const float* data_im, + const int num_spatial_axes, const int col_size, + const int* im_shape, const int* col_shape, + const int* kernel_shape, const int* pad, const int* stride, + float* data_col); +template void im2col_nd_gpu(const double* data_im, + const int num_spatial_axes, const int col_size, + const int* im_shape, const int* col_shape, + const int* kernel_shape, const int* pad, const int* stride, + double* data_col); + template __global__ void col2im_gpu_kernel(const int n, const Dtype* data_col, const int height, const int width, const int channels, @@ -141,4 +290,159 @@ template void col2im_gpu(const double* data_col, const int channels, const int pad_h, const int pad_w, const int stride_h, const int stride_w, double* data_im); +template +__global__ void col2im_nd_gpu_kernel(const int n, const Dtype* data_col, + const int* im_shape, const int* col_shape, + const int* kernel_shape, const int* pad, const int* stride, + Dtype* data_im) { + int d_im[num_axes]; // NOLINT(runtime/arrays) + int d_col_iter[num_axes]; // NOLINT(runtime/arrays) + int d_col_start[num_axes]; // NOLINT(runtime/arrays) + int d_col_end[num_axes]; // NOLINT(runtime/arrays) + CUDA_KERNEL_LOOP(index, n) { + // Initialize channel_in, computed in the loop below, with intermediate + // computations used to compute the spatial indices. + int channel_im = index; + // Calculate d_im (image dimensions). + for (int i = num_axes - 1; i >= 0; --i) { + d_im[i] = channel_im % im_shape[i + 1] + pad[i]; + channel_im /= im_shape[i + 1]; + } + // Calculate col start/end indices. + bool done = false; + for (int i = 0; i < num_axes; ++i) { + d_col_start[i] = d_col_iter[i] = + (d_im[i] < kernel_shape[i]) ? + 0 : (d_im[i] - kernel_shape[i]) / stride[i] + 1; + d_col_end[i] = min(d_im[i] / stride[i] + 1, col_shape[i + 1]); + if (d_col_start[i] >= d_col_end[i]) { + // Skip computation if the dimension is 0 at any spatial axis -- + // final val will be 0. + data_im[index] = 0; + done = true; + break; // for (int i = 0; i < num_axes; ++i) + } + } + if (done) { + continue; // CUDA_KERNEL_LOOP(index, n) + } + // Loop over the col to compute the output val. + Dtype val = 0; + bool incremented = true; + do { + // Compute the final offset. + int final_offset = 0; + int kernel_shape_prod = 1; + for (int i = num_axes - 1; i >= 0; --i) { + final_offset += + (d_im[i] - d_col_iter[i] * stride[i]) * kernel_shape_prod; + kernel_shape_prod *= kernel_shape[i]; + } + final_offset += kernel_shape_prod * channel_im; + for (int i = 0; i < num_axes; ++i) { + final_offset *= col_shape[i + 1]; + final_offset += d_col_iter[i]; + } + val += data_col[final_offset]; + incremented = false; + for (int i = num_axes - 1; i >= 0; --i) { + const int d_max = d_col_end[i]; + if (d_col_iter[i] == d_max - 1) { + d_col_iter[i] = d_col_start[i]; + } else { // d_col_iter[i] < d_max - 1 + ++d_col_iter[i]; + incremented = true; + break; // for (int i = num_axes - 1; i >= 0; --i) + } + } // for (int i = num_axes - 1; i >= 0; --i) + } while (incremented); + data_im[index] = val; + } // CUDA_KERNEL_LOOP(index, n) +} + +template +void col2im_nd_gpu(const Dtype* data_col, const int num_spatial_axes, + const int im_size, const int* im_shape, const int* col_shape, + const int* kernel_shape, const int* pad, const int* stride, + Dtype* data_im) { + switch (num_spatial_axes) { + case 1: + col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + im_size, data_col, im_shape, col_shape, + kernel_shape, pad, stride, data_im); + break; + case 2: + col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + im_size, data_col, im_shape, col_shape, + kernel_shape, pad, stride, data_im); + break; + case 3: + col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + im_size, data_col, im_shape, col_shape, + kernel_shape, pad, stride, data_im); + break; + case 4: + col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + im_size, data_col, im_shape, col_shape, + kernel_shape, pad, stride, data_im); + break; + case 5: + col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + im_size, data_col, im_shape, col_shape, + kernel_shape, pad, stride, data_im); + break; + case 6: + col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + im_size, data_col, im_shape, col_shape, + kernel_shape, pad, stride, data_im); + break; + case 7: + col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + im_size, data_col, im_shape, col_shape, + kernel_shape, pad, stride, data_im); + break; + case 8: + col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + im_size, data_col, im_shape, col_shape, + kernel_shape, pad, stride, data_im); + break; + case 9: + col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + im_size, data_col, im_shape, col_shape, + kernel_shape, pad, stride, data_im); + break; + case 10: + col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + im_size, data_col, im_shape, col_shape, + kernel_shape, pad, stride, data_im); + break; + default: + LOG(FATAL) << "col2im_nd_gpu does not support computation with " + << num_spatial_axes << " spatial axes"; + } + CUDA_POST_KERNEL_CHECK; +} + +// Explicit instantiation +template void col2im_nd_gpu(const float* data_col, + const int num_spatial_axes, const int im_size, + const int* im_shape, const int* col_shape, + const int* kernel_shape, const int* pad, const int* stride, + float* data_im); +template void col2im_nd_gpu(const double* data_col, + const int num_spatial_axes, const int im_size, + const int* im_shape, const int* col_shape, + const int* kernel_shape, const int* pad, const int* stride, + double* data_im); + } // namespace caffe diff --git a/src/caffe/util/upgrade_proto.cpp b/src/caffe/util/upgrade_proto.cpp index 92e5cf55..ac379e50 100644 --- a/src/caffe/util/upgrade_proto.cpp +++ b/src/caffe/util/upgrade_proto.cpp @@ -193,7 +193,7 @@ bool UpgradeV0LayerParameter(const V1LayerParameter& v0_layer_connection, } if (v0_layer_param.has_pad()) { if (type == "conv") { - layer_param->mutable_convolution_param()->set_pad(v0_layer_param.pad()); + layer_param->mutable_convolution_param()->add_pad(v0_layer_param.pad()); } else if (type == "pool") { layer_param->mutable_pooling_param()->set_pad(v0_layer_param.pad()); } else { @@ -203,7 +203,7 @@ bool UpgradeV0LayerParameter(const V1LayerParameter& v0_layer_connection, } if (v0_layer_param.has_kernelsize()) { if (type == "conv") { - layer_param->mutable_convolution_param()->set_kernel_size( + layer_param->mutable_convolution_param()->add_kernel_size( v0_layer_param.kernelsize()); } else if (type == "pool") { layer_param->mutable_pooling_param()->set_kernel_size( @@ -224,7 +224,7 @@ bool UpgradeV0LayerParameter(const V1LayerParameter& v0_layer_connection, } if (v0_layer_param.has_stride()) { if (type == "conv") { - layer_param->mutable_convolution_param()->set_stride( + layer_param->mutable_convolution_param()->add_stride( v0_layer_param.stride()); } else if (type == "pool") { layer_param->mutable_pooling_param()->set_stride( From 328df2450c534119f239ce1d606f8502922c6825 Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Sat, 19 Sep 2015 13:50:57 -0700 Subject: [PATCH 094/223] clarify im2col + col2im var names - clarify indices by naming *_im for indices in image and *_col for indices in column - mark corresonding im2col + col2im quantities by renaming patch_* -> kernel_* - fix out-of-date names in equivalent col2im loop --- include/caffe/util/im2col.hpp | 4 +- src/caffe/util/im2col.cpp | 72 +++++++++++++++++------------------ src/caffe/util/im2col.cu | 69 ++++++++++++++++----------------- 3 files changed, 73 insertions(+), 72 deletions(-) diff --git a/include/caffe/util/im2col.hpp b/include/caffe/util/im2col.hpp index 531fd29c..d3eb6ccd 100644 --- a/include/caffe/util/im2col.hpp +++ b/include/caffe/util/im2col.hpp @@ -23,7 +23,7 @@ void col2im_nd_cpu(const Dtype* data_col, const int num_spatial_axes, template void col2im_cpu(const Dtype* data_col, const int channels, - const int height, const int width, const int patch_h, const int patch_w, + const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, Dtype* data_im); @@ -47,7 +47,7 @@ void col2im_nd_gpu(const Dtype* data_col, const int num_spatial_axes, template void col2im_gpu(const Dtype* data_col, const int channels, - const int height, const int width, const int patch_h, const int patch_w, + const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, Dtype* data_im); diff --git a/src/caffe/util/im2col.cpp b/src/caffe/util/im2col.cpp index b0a7be50..afeb5e5d 100644 --- a/src/caffe/util/im2col.cpp +++ b/src/caffe/util/im2col.cpp @@ -17,19 +17,19 @@ void im2col_cpu(const Dtype* data_im, const int channels, int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; int channels_col = channels * kernel_h * kernel_w; - for (int c = 0; c < channels_col; ++c) { - int w_offset = c % kernel_w; - int h_offset = (c / kernel_w) % kernel_h; - int c_im = c / kernel_h / kernel_w; - for (int h = 0; h < height_col; ++h) { - for (int w = 0; w < width_col; ++w) { - int h_pad = h * stride_h - pad_h + h_offset; - int w_pad = w * stride_w - pad_w + w_offset; - if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width) - data_col[(c * height_col + h) * width_col + w] = - data_im[(c_im * height + h_pad) * width + w_pad]; + for (int c_col = 0; c_col < channels_col; ++c_col) { + int w_offset = c_col % kernel_w; + int h_offset = (c_col / kernel_w) % kernel_h; + int c_im = c_col / kernel_h / kernel_w; + for (int h_col = 0; h_col < height_col; ++h_col) { + for (int w_col = 0; w_col < width_col; ++w_col) { + int h_im = h_col * stride_h - pad_h + h_offset; + int w_im = w_col * stride_w - pad_w + w_offset; + if (h_im >= 0 && h_im < height && w_im >= 0 && w_im < width) + data_col[(c_col * height_col + h_col) * width_col + w_col] = + data_im[(c_im * height + h_im) * width + w_im]; else - data_col[(c * height_col + h) * width_col + w] = 0; + data_col[(c_col * height_col + h_im) * width_col + w_im] = 0; } } } @@ -64,9 +64,9 @@ inline void im2col_nd_core_cpu(const Dtype* data_input, const bool im2col, const int channels_col = col_shape[0]; vector d_offset(num_spatial_axes, 0); vector d_iter(num_spatial_axes, 0); - for (int c = 0; c < channels_col; ++c) { + for (int c_col = 0; c_col < channels_col; ++c_col) { // Loop over spatial axes in reverse order to compute a per-axis offset. - int offset = c; + int offset = c_col; for (int d_i = num_spatial_axes - 1; d_i >= 0; --d_i) { if (d_i < num_spatial_axes - 1) { offset /= kernel_shape[d_i + 1]; @@ -76,17 +76,17 @@ inline void im2col_nd_core_cpu(const Dtype* data_input, const bool im2col, for (bool incremented = true; incremented; ) { // Loop over spatial axes in forward order to compute the indices in the // image and column, and whether the index lies in the padding. - int index_col = c; - int index_im = c / kernel_size; + int index_col = c_col; + int index_im = c_col / kernel_size; bool is_padding = false; for (int d_i = 0; d_i < num_spatial_axes; ++d_i) { const int d = d_iter[d_i]; - const int d_pad = d * stride[d_i] - pad[d_i] + d_offset[d_i]; - is_padding |= d_pad < 0 || d_pad >= im_shape[d_i + 1]; + const int d_im = d * stride[d_i] - pad[d_i] + d_offset[d_i]; + is_padding |= d_im < 0 || d_im >= im_shape[d_i + 1]; index_col *= col_shape[d_i + 1]; index_col += d; index_im *= im_shape[d_i + 1]; - index_im += d_pad; + index_im += d_im; } if (im2col) { if (is_padding) { @@ -139,25 +139,25 @@ template void im2col_nd_cpu(const double* data_im, template void col2im_cpu(const Dtype* data_col, const int channels, - const int height, const int width, const int patch_h, const int patch_w, + const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, Dtype* data_im) { caffe_set(height * width * channels, Dtype(0), data_im); - int height_col = (height + 2 * pad_h - patch_h) / stride_h + 1; - int width_col = (width + 2 * pad_w - patch_w) / stride_w + 1; - int channels_col = channels * patch_h * patch_w; - for (int c = 0; c < channels_col; ++c) { - int w_offset = c % patch_w; - int h_offset = (c / patch_w) % patch_h; - int c_im = c / patch_h / patch_w; - for (int h = 0; h < height_col; ++h) { - for (int w = 0; w < width_col; ++w) { - int h_pad = h * stride_h - pad_h + h_offset; - int w_pad = w * stride_w - pad_w + w_offset; - if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width) - data_im[(c_im * height + h_pad) * width + w_pad] += - data_col[(c * height_col + h) * width_col + w]; + int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; + int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; + int channels_col = channels * kernel_h * kernel_w; + for (int c_col = 0; c_col < channels_col; ++c_col) { + int w_offset = c_col % kernel_w; + int h_offset = (c_col / kernel_w) % kernel_h; + int c_im = c_col / kernel_h / kernel_w; + for (int h_col = 0; h_col < height_col; ++h_col) { + for (int w_col = 0; w_col < width_col; ++w_col) { + int h_im = h_col * stride_h - pad_h + h_offset; + int w_im = w_col * stride_w - pad_w + w_offset; + if (h_im >= 0 && h_im < height && w_im >= 0 && w_im < width) + data_im[(c_im * height + h_im) * width + w_im] += + data_col[(c_col * height_col + h_col) * width_col + w_col]; } } } @@ -165,11 +165,11 @@ void col2im_cpu(const Dtype* data_col, const int channels, // Explicit instantiation template void col2im_cpu(const float* data_col, const int channels, - const int height, const int width, const int patch_h, const int patch_w, + const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, float* data_im); template void col2im_cpu(const double* data_col, const int channels, - const int height, const int width, const int patch_h, const int patch_w, + const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, double* data_im); diff --git a/src/caffe/util/im2col.cu b/src/caffe/util/im2col.cu index 5a478ba6..897e3c92 100644 --- a/src/caffe/util/im2col.cu +++ b/src/caffe/util/im2col.cu @@ -16,22 +16,23 @@ __global__ void im2col_gpu_kernel(const int n, const Dtype* data_im, const int height_col, const int width_col, Dtype* data_col) { CUDA_KERNEL_LOOP(index, n) { - int w_out = index % width_col; int h_index = index / width_col; - int h_out = h_index % height_col; - int channel_in = h_index / height_col; - int channel_out = channel_in * kernel_h * kernel_w; - int h_in = h_out * stride_h - pad_h; - int w_in = w_out * stride_w - pad_w; + int h_col = h_index % height_col; + int w_col = index % width_col; + int c_im = h_index / height_col; + int c_col = c_im * kernel_h * kernel_w; + int h_offset = h_col * stride_h - pad_h; + int w_offset = w_col * stride_w - pad_w; Dtype* data_col_ptr = data_col; - data_col_ptr += (channel_out * height_col + h_out) * width_col + w_out; + data_col_ptr += (c_col * height_col + h_col) * width_col + w_col; const Dtype* data_im_ptr = data_im; - data_im_ptr += (channel_in * height + h_in) * width + w_in; + data_im_ptr += (c_im * height + h_offset) * width + w_offset; for (int i = 0; i < kernel_h; ++i) { for (int j = 0; j < kernel_w; ++j) { - int h = h_in + i; - int w = w_in + j; - *data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ? + int h_im = h_offset + i; + int w_im = w_offset + j; + *data_col_ptr = + (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ? data_im_ptr[i * width + j] : 0; data_col_ptr += height_col * width_col; } @@ -222,35 +223,35 @@ template void im2col_nd_gpu(const double* data_im, template __global__ void col2im_gpu_kernel(const int n, const Dtype* data_col, const int height, const int width, const int channels, - const int patch_h, const int patch_w, + const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int height_col, const int width_col, Dtype* data_im) { CUDA_KERNEL_LOOP(index, n) { Dtype val = 0; - int w = index % width + pad_w; - int h = (index / width) % height + pad_h; - int c = index / (width * height); + int w_im = index % width + pad_w; + int h_im = (index / width) % height + pad_h; + int c_im = index / (width * height); // compute the start and end of the output - int w_col_start = (w < patch_w) ? 0 : (w - patch_w) / stride_w + 1; - int w_col_end = min(w / stride_w + 1, width_col); - int h_col_start = (h < patch_h) ? 0 : (h - patch_h) / stride_h + 1; - int h_col_end = min(h / stride_h + 1, height_col); + int w_col_start = (w_im < kernel_w) ? 0 : (w_im - kernel_w) / stride_w + 1; + int w_col_end = min(w_im / stride_w + 1, width_col); + int h_col_start = (h_im < kernel_h) ? 0 : (h_im - kernel_h) / stride_h + 1; + int h_col_end = min(h_im / stride_h + 1, height_col); /* for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { // the col location: [c * width * height + h_out, w_out] - int c_col = c * patch_h * patch_w + (h - h_col * stride_h) * ksize - + (w - w_col * stride_w); + int c_col = c_im * kernel_h * kernel_w + + (h_im - h_col * stride_h) * kernel_w + (w_im - w_col * stride_w); val += data_col[(c_col * height_col + h_col) * width_col + w_col]; } } */ // equivalent implementation - int offset = - (c * patch_h * patch_w + h * patch_w + w) * height_col * width_col; - int coeff_h_col = (1 - stride_h * patch_w * height_col) * width_col; + int offset = (c_im * kernel_h * kernel_w + h_im * kernel_w + w_im) + * height_col * width_col; + int coeff_h_col = (1 - stride_h * kernel_w * height_col) * width_col; int coeff_w_col = (1 - stride_w * height_col * width_col); for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { @@ -263,18 +264,18 @@ __global__ void col2im_gpu_kernel(const int n, const Dtype* data_col, template void col2im_gpu(const Dtype* data_col, const int channels, - const int height, const int width, const int patch_h, const int patch_w, + const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, Dtype* data_im) { - int height_col = (height + 2 * pad_h - patch_h) / stride_h + 1; - int width_col = (width + 2 * pad_w - patch_w) / stride_w + 1; + int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; + int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; int num_kernels = channels * height * width; // To avoid involving atomic operations, we will launch one kernel per // bottom dimension, and then in the kernel add up the top dimensions. // NOLINT_NEXT_LINE(whitespace/operators) col2im_gpu_kernel<<>>( - num_kernels, data_col, height, width, channels, patch_h, patch_w, + num_kernels, data_col, height, width, channels, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, height_col, width_col, data_im); CUDA_POST_KERNEL_CHECK; @@ -282,11 +283,11 @@ void col2im_gpu(const Dtype* data_col, const int channels, // Explicit instantiation template void col2im_gpu(const float* data_col, const int channels, - const int height, const int width, const int patch_h, const int patch_w, + const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, float* data_im); template void col2im_gpu(const double* data_col, const int channels, - const int height, const int width, const int patch_h, const int patch_w, + const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, double* data_im); @@ -302,11 +303,11 @@ __global__ void col2im_nd_gpu_kernel(const int n, const Dtype* data_col, CUDA_KERNEL_LOOP(index, n) { // Initialize channel_in, computed in the loop below, with intermediate // computations used to compute the spatial indices. - int channel_im = index; + int c_im = index; // Calculate d_im (image dimensions). for (int i = num_axes - 1; i >= 0; --i) { - d_im[i] = channel_im % im_shape[i + 1] + pad[i]; - channel_im /= im_shape[i + 1]; + d_im[i] = c_im % im_shape[i + 1] + pad[i]; + c_im /= im_shape[i + 1]; } // Calculate col start/end indices. bool done = false; @@ -338,7 +339,7 @@ __global__ void col2im_nd_gpu_kernel(const int n, const Dtype* data_col, (d_im[i] - d_col_iter[i] * stride[i]) * kernel_shape_prod; kernel_shape_prod *= kernel_shape[i]; } - final_offset += kernel_shape_prod * channel_im; + final_offset += kernel_shape_prod * c_im; for (int i = 0; i < num_axes; ++i) { final_offset *= col_shape[i + 1]; final_offset += d_col_iter[i]; From ec77358c2d2e05b3aa39221bd3ec093789bd40f6 Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Sat, 19 Sep 2015 14:00:14 -0700 Subject: [PATCH 095/223] harmonize the im2col_{cpu,gpu} assignment --- src/caffe/util/im2col.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/caffe/util/im2col.cpp b/src/caffe/util/im2col.cpp index afeb5e5d..018ff0cd 100644 --- a/src/caffe/util/im2col.cpp +++ b/src/caffe/util/im2col.cpp @@ -25,11 +25,9 @@ void im2col_cpu(const Dtype* data_im, const int channels, for (int w_col = 0; w_col < width_col; ++w_col) { int h_im = h_col * stride_h - pad_h + h_offset; int w_im = w_col * stride_w - pad_w + w_offset; - if (h_im >= 0 && h_im < height && w_im >= 0 && w_im < width) - data_col[(c_col * height_col + h_col) * width_col + w_col] = - data_im[(c_im * height + h_im) * width + w_im]; - else - data_col[(c_col * height_col + h_im) * width_col + w_im] = 0; + data_col[(c_col * height_col + h_col) * width_col + w_col] = + (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ? + data_im[(c_im * height + h_im) * width + w_im] : 0; } } } From d292a162b3659685b5f4399b8adf743bdcac49a1 Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Sat, 19 Sep 2015 14:07:10 -0700 Subject: [PATCH 096/223] mark const im2col + col2im terms --- src/caffe/util/im2col.cpp | 12 ++++++------ src/caffe/util/im2col.cu | 32 ++++++++++++++++++-------------- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/src/caffe/util/im2col.cpp b/src/caffe/util/im2col.cpp index 018ff0cd..09da23d4 100644 --- a/src/caffe/util/im2col.cpp +++ b/src/caffe/util/im2col.cpp @@ -14,9 +14,9 @@ void im2col_cpu(const Dtype* data_im, const int channels, const int pad_h, const int pad_w, const int stride_h, const int stride_w, Dtype* data_col) { - int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; - int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; - int channels_col = channels * kernel_h * kernel_w; + const int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; + const int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; + const int channels_col = channels * kernel_h * kernel_w; for (int c_col = 0; c_col < channels_col; ++c_col) { int w_offset = c_col % kernel_w; int h_offset = (c_col / kernel_w) % kernel_h; @@ -142,9 +142,9 @@ void col2im_cpu(const Dtype* data_col, const int channels, const int stride_h, const int stride_w, Dtype* data_im) { caffe_set(height * width * channels, Dtype(0), data_im); - int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; - int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; - int channels_col = channels * kernel_h * kernel_w; + const int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; + const int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; + const int channels_col = channels * kernel_h * kernel_w; for (int c_col = 0; c_col < channels_col; ++c_col) { int w_offset = c_col % kernel_w; int h_offset = (c_col / kernel_w) % kernel_h; diff --git a/src/caffe/util/im2col.cu b/src/caffe/util/im2col.cu index 897e3c92..451097f8 100644 --- a/src/caffe/util/im2col.cu +++ b/src/caffe/util/im2col.cu @@ -16,13 +16,13 @@ __global__ void im2col_gpu_kernel(const int n, const Dtype* data_im, const int height_col, const int width_col, Dtype* data_col) { CUDA_KERNEL_LOOP(index, n) { - int h_index = index / width_col; - int h_col = h_index % height_col; - int w_col = index % width_col; - int c_im = h_index / height_col; - int c_col = c_im * kernel_h * kernel_w; - int h_offset = h_col * stride_h - pad_h; - int w_offset = w_col * stride_w - pad_w; + const int h_index = index / width_col; + const int h_col = h_index % height_col; + const int w_col = index % width_col; + const int c_im = h_index / height_col; + const int c_col = c_im * kernel_h * kernel_w; + const int h_offset = h_col * stride_h - pad_h; + const int w_offset = w_col * stride_w - pad_w; Dtype* data_col_ptr = data_col; data_col_ptr += (c_col * height_col + h_col) * width_col + w_col; const Dtype* data_im_ptr = data_im; @@ -230,14 +230,18 @@ __global__ void col2im_gpu_kernel(const int n, const Dtype* data_col, Dtype* data_im) { CUDA_KERNEL_LOOP(index, n) { Dtype val = 0; - int w_im = index % width + pad_w; - int h_im = (index / width) % height + pad_h; - int c_im = index / (width * height); + const int w_im = index % width + pad_w; + const int h_im = (index / width) % height + pad_h; + const int c_im = index / (width * height); // compute the start and end of the output - int w_col_start = (w_im < kernel_w) ? 0 : (w_im - kernel_w) / stride_w + 1; - int w_col_end = min(w_im / stride_w + 1, width_col); - int h_col_start = (h_im < kernel_h) ? 0 : (h_im - kernel_h) / stride_h + 1; - int h_col_end = min(h_im / stride_h + 1, height_col); + const int w_col_start = + (w_im < kernel_w) ? 0 : (w_im - kernel_w) / stride_w + 1; + const int w_col_end = + min(w_im / stride_w + 1, width_col); + const int h_col_start = + (h_im < kernel_h) ? 0 : (h_im - kernel_h) / stride_h + 1; + const int h_col_end = + min(h_im / stride_h + 1, height_col); /* for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { From da75a0e715f3d434c6b4c23d55947e114b332337 Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Sat, 19 Sep 2015 20:38:18 -0700 Subject: [PATCH 097/223] [build] check xcode command line tools version >= 6 future-proof version check for BLAS libraries on OS X fix #3092 --- Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index a9111336..5fb6394e 100644 --- a/Makefile +++ b/Makefile @@ -354,8 +354,9 @@ else # OS X packages atlas as the vecLib framework LIBRARIES += cblas # 10.10 has accelerate while 10.9 has veclib - XCODE_CLT_VER := $(shell pkgutil --pkg-info=com.apple.pkg.CLTools_Executables | grep -o 'version: 6') - ifneq (,$(findstring version: 6,$(XCODE_CLT_VER))) + XCODE_CLT_VER := $(shell pkgutil --pkg-info=com.apple.pkg.CLTools_Executables | grep 'version' | sed 's/[^0-9]*\([0-9]\).*/\1/') + XCODE_CLT_GEQ_6 := $(shell [ $(XCODE_CLT_VER) -gt 5 ] && echo 1) + ifeq ($(XCODE_CLT_GEQ_6), 1) BLAS_INCLUDE ?= /System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/ LDFLAGS += -framework Accelerate else From 84eb44e6cf9623e09c354a863e201971270ba25b Mon Sep 17 00:00:00 2001 From: Jonathan L Long Date: Sat, 19 Sep 2015 14:14:03 -0700 Subject: [PATCH 098/223] [tools] add Python script for at-a-glance prototxt summary --- tools/extra/summarize.py | 140 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100755 tools/extra/summarize.py diff --git a/tools/extra/summarize.py b/tools/extra/summarize.py new file mode 100755 index 00000000..7e2d22fd --- /dev/null +++ b/tools/extra/summarize.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python + +"""Net summarization tool. + +This tool summarizes the structure of a net in a concise but comprehensive +tabular listing, taking a prototxt file as input. + +Use this tool to check at a glance that the computation you've specified is the +computation you expect. +""" + +from caffe.proto import caffe_pb2 +from google import protobuf +import re +import argparse + +# ANSI codes for coloring blobs (used cyclically) +COLORS = ['92', '93', '94', '95', '97', '96', '42', '43;30', '100', + '444', '103;30', '107;30'] +DISCONNECTED_COLOR = '41' + +def read_net(filename): + net = caffe_pb2.NetParameter() + with open(filename) as f: + protobuf.text_format.Parse(f.read(), net) + return net + +def format_param(param): + out = [] + if len(param.name) > 0: + out.append(param.name) + if param.lr_mult != 1: + out.append('x{}'.format(param.lr_mult)) + if param.decay_mult != 1: + out.append('Dx{}'.format(param.decay_mult)) + return ' '.join(out) + +def printed_len(s): + return len(re.sub(r'\033\[[\d;]+m', '', s)) + +def print_table(table, max_width): + """Print a simple nicely-aligned table. + + table must be a list of (equal-length) lists. Columns are space-separated, + and as narrow as possible, but no wider than max_width. Text may overflow + columns; note that unlike string.format, this will not affect subsequent + columns, if possible.""" + + max_widths = [max_width] * len(table[0]) + column_widths = [max(printed_len(row[j]) + 1 for row in table) + for j in range(len(table[0]))] + column_widths = [min(w, max_w) for w, max_w in zip(column_widths, max_widths)] + + for row in table: + row_str = '' + right_col = 0 + for cell, width in zip(row, column_widths): + right_col += width + row_str += cell + ' ' + row_str += ' ' * max(right_col - printed_len(row_str), 0) + print row_str + +def summarize_net(net): + disconnected_tops = set() + for lr in net.layer: + disconnected_tops |= set(lr.top) + disconnected_tops -= set(lr.bottom) + + table = [] + colors = {} + for lr in net.layer: + tops = [] + for ind, top in enumerate(lr.top): + color = colors.setdefault(top, COLORS[len(colors) % len(COLORS)]) + if top in disconnected_tops: + top = '\033[1;4m' + top + if len(lr.loss_weight) > 0: + top = '{} * {}'.format(lr.loss_weight[ind], top) + tops.append('\033[{}m{}\033[0m'.format(color, top)) + top_str = ', '.join(tops) + + bottoms = [] + for bottom in lr.bottom: + color = colors.get(bottom, DISCONNECTED_COLOR) + bottoms.append('\033[{}m{}\033[0m'.format(color, bottom)) + bottom_str = ', '.join(bottoms) + + if lr.type == 'Python': + type_str = lr.python_param.module + '.' + lr.python_param.layer + else: + type_str = lr.type + + # Summarize conv/pool parameters. + # TODO support rectangular/ND parameters + conv_param = lr.convolution_param + if (lr.type in ['Convolution', 'Deconvolution'] + and len(conv_param.kernel_size) == 1): + arg_str = str(conv_param.kernel_size[0]) + if len(conv_param.stride) > 0 and conv_param.stride[0] != 1: + arg_str += '/' + str(conv_param.stride[0]) + if len(conv_param.pad) > 0 and conv_param.pad[0] != 0: + arg_str += '+' + str(conv_param.pad[0]) + arg_str += ' ' + str(conv_param.num_output) + if conv_param.group != 1: + arg_str += '/' + str(conv_param.group) + elif lr.type == 'Pooling': + arg_str = str(lr.pooling_param.kernel_size) + if lr.pooling_param.stride != 1: + arg_str += '/' + str(lr.pooling_param.stride) + if lr.pooling_param.pad != 0: + arg_str += '+' + str(lr.pooling_param.pad) + else: + arg_str = '' + + if len(lr.param) > 0: + param_strs = map(format_param, lr.param) + if max(map(len, param_strs)) > 0: + param_str = '({})'.format(', '.join(param_strs)) + else: + param_str = '' + else: + param_str = '' + + table.append([lr.name, type_str, param_str, bottom_str, '->', top_str, + arg_str]) + return table + +def main(): + parser = argparse.ArgumentParser(description="Print a concise summary of net computation.") + parser.add_argument('filename', help='net prototxt file to summarize') + parser.add_argument('-w', '--max-width', help='maximum field width', + type=int, default=30) + args = parser.parse_args() + + net = read_net(args.filename) + table = summarize_net(net) + print_table(table, max_width=args.max_width) + +if __name__ == '__main__': + main() From a40c2a08421ebf9a164e198a70752f2d5cb1c93d Mon Sep 17 00:00:00 2001 From: Jonathan L Long Date: Sun, 20 Sep 2015 14:20:28 -0700 Subject: [PATCH 099/223] fix broken DeconvolutionLayer GPU backward caused by typo --- src/caffe/layers/deconv_layer.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/caffe/layers/deconv_layer.cu b/src/caffe/layers/deconv_layer.cu index ea83f56f..5dbdcc31 100644 --- a/src/caffe/layers/deconv_layer.cu +++ b/src/caffe/layers/deconv_layer.cu @@ -51,7 +51,7 @@ void DeconvolutionLayer::Backward_gpu(const vector*>& top, } // gradient w.r.t. bottom data, if necessary. if (propagate_down[i]) { - this->forward_gpu_gemm(top_diff + this->top_dim_, weight, + this->forward_gpu_gemm(top_diff + n * this->top_dim_, weight, bottom_diff + n * this->bottom_dim_, this->param_propagate_down_[0]); } From 6a00ecae67a95cf39e1961aaddc3be1f5a828bb4 Mon Sep 17 00:00:00 2001 From: Jonathan L Long Date: Sun, 20 Sep 2015 15:31:59 -0700 Subject: [PATCH 100/223] fix broken conv/deconv reshaping caused by reading bottom shape in LayerSetUp This also eliminates the extra copying of bottom's shape. --- include/caffe/vision_layers.hpp | 7 +++++-- src/caffe/layers/base_conv_layer.cpp | 10 ++-------- src/caffe/layers/conv_layer.cpp | 5 ++--- src/caffe/layers/deconv_layer.cpp | 5 ++--- 4 files changed, 11 insertions(+), 16 deletions(-) diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index eae65820..06bc0457 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -58,6 +58,10 @@ class BaseConvolutionLayer : public Layer { void backward_gpu_bias(Dtype* bias, const Dtype* input); #endif + /// @brief The spatial dimensions of the input. + inline int input_shape(int i) { + return (*bottom_shape_)[channel_axis_ + i]; + } // reverse_dimensions should return true iff we are implementing deconv, so // that conv helpers know which dimensions are which. virtual bool reverse_dimensions() = 0; @@ -72,12 +76,11 @@ class BaseConvolutionLayer : public Layer { Blob pad_; /// @brief The spatial dimensions of the convolution input. Blob conv_input_shape_; - /// @brief The spatial dimensions of the input. - Blob input_shape_; /// @brief The spatial dimensions of the col_buffer. vector col_buffer_shape_; /// @brief The spatial dimensions of the output. vector output_shape_; + const vector* bottom_shape_; int num_spatial_axes_; int bottom_dim_; diff --git a/src/caffe/layers/base_conv_layer.cpp b/src/caffe/layers/base_conv_layer.cpp index a5b90a54..c6b47550 100644 --- a/src/caffe/layers/base_conv_layer.cpp +++ b/src/caffe/layers/base_conv_layer.cpp @@ -20,13 +20,7 @@ void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, const int num_axes = bottom[0]->num_axes(); num_spatial_axes_ = num_axes - first_spatial_axis; CHECK_GE(num_spatial_axes_, 0); - // Setup input dimensions (input_shape_). vector bottom_dim_blob_shape(1, num_spatial_axes_ + 1); - input_shape_.Reshape(bottom_dim_blob_shape); - int* input_shape_data = input_shape_.mutable_cpu_data(); - for (int i = 0; i < num_spatial_axes_ + 1; ++i) { - input_shape_data[i] = bottom[0]->shape(channel_axis_ + i); - } vector spatial_dim_blob_shape(1, std::max(num_spatial_axes_, 1)); // Setup filter kernel dimensions (kernel_shape_). kernel_shape_.Reshape(spatial_dim_blob_shape); @@ -190,6 +184,7 @@ void BaseConvolutionLayer::Reshape(const vector*>& bottom, << "All inputs must have the same shape."; } // Shape the tops. + bottom_shape_ = &bottom[0]->shape(); compute_output_shape(); vector top_shape(bottom[0]->shape().begin(), bottom[0]->shape().begin() + channel_axis_); @@ -223,10 +218,9 @@ void BaseConvolutionLayer::Reshape(const vector*>& bottom, // it goes lazily unused to save memory. col_buffer_shape_.clear(); col_buffer_shape_.push_back(kernel_dim_ * group_); - const int* input_shape_data = input_shape_.cpu_data() + 1; for (int i = 0; i < num_spatial_axes_; ++i) { if (reverse_dimensions()) { - col_buffer_shape_.push_back(input_shape_data[i]); + col_buffer_shape_.push_back(input_shape(i + 1)); } else { col_buffer_shape_.push_back(output_shape_[i]); } diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp index 5cf26970..fb50bb09 100644 --- a/src/caffe/layers/conv_layer.cpp +++ b/src/caffe/layers/conv_layer.cpp @@ -10,14 +10,13 @@ namespace caffe { template void ConvolutionLayer::compute_output_shape() { - // input_shape_ + 1 to skip channel axis - const int* input_shape_data = this->input_shape_.cpu_data() + 1; const int* kernel_shape_data = this->kernel_shape_.cpu_data(); const int* stride_data = this->stride_.cpu_data(); const int* pad_data = this->pad_.cpu_data(); this->output_shape_.clear(); for (int i = 0; i < this->num_spatial_axes_; ++i) { - const int input_dim = input_shape_data[i]; + // i + 1 to skip channel axis + const int input_dim = this->input_shape(i + 1); const int output_dim = (input_dim + 2 * pad_data[i] - kernel_shape_data[i]) / stride_data[i] + 1; this->output_shape_.push_back(output_dim); diff --git a/src/caffe/layers/deconv_layer.cpp b/src/caffe/layers/deconv_layer.cpp index f1d1abf2..91aabb31 100644 --- a/src/caffe/layers/deconv_layer.cpp +++ b/src/caffe/layers/deconv_layer.cpp @@ -10,14 +10,13 @@ namespace caffe { template void DeconvolutionLayer::compute_output_shape() { - // input_shape_ + 1 to skip channel axis - const int* input_shape_data = this->input_shape_.cpu_data() + 1; const int* kernel_shape_data = this->kernel_shape_.cpu_data(); const int* stride_data = this->stride_.cpu_data(); const int* pad_data = this->pad_.cpu_data(); this->output_shape_.clear(); for (int i = 0; i < this->num_spatial_axes_; ++i) { - const int input_dim = input_shape_data[i]; + // i + 1 to skip channel axis + const int input_dim = this->input_shape(i + 1); const int output_dim = stride_data[i] * (input_dim - 1) + kernel_shape_data[i] - 2 * pad_data[i]; this->output_shape_.push_back(output_dim); From 74e174537418b6a3c0c8708e444edf45ab491e94 Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Wed, 23 Sep 2015 13:38:29 -0700 Subject: [PATCH 101/223] [test] TestReshape: check small then large checking large then small can mask failure since the smaller shape memory will fit within the larger shape. --- src/caffe/test/test_net.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp index 12998d89..ec01053c 100644 --- a/src/caffe/test/test_net.cpp +++ b/src/caffe/test/test_net.cpp @@ -2269,8 +2269,10 @@ TYPED_TEST(NetTest, TestReshape) { FillerParameter filler_param; filler_param.set_std(1); GaussianFiller filler(filler_param); - Blob blob1(4, 3, 9, 11); - Blob blob2(2, 3, 12, 10); + // Check smaller shape first as larger first could hide realloc failures. + Blob blob1(2, 3, 12, 10); + Blob blob2(4, 3, 9, 11); + ASSERT_LT(blob1.count(), blob2.count()); filler.Fill(&blob1); filler.Fill(&blob2); From ae77b15495d4c2a83202c49991bfc0885765de03 Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Wed, 23 Sep 2015 13:40:16 -0700 Subject: [PATCH 102/223] [test] TestReshape: expect instead of check --- src/caffe/test/test_net.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp index ec01053c..16c1d35f 100644 --- a/src/caffe/test/test_net.cpp +++ b/src/caffe/test/test_net.cpp @@ -2306,7 +2306,7 @@ TYPED_TEST(NetTest, TestReshape) { this->net_->ForwardPrefilled(); this->net_->Backward(); for (int i = 0; i < output1.count(); ++i) { - CHECK_EQ(*(output1.cpu_data() + i), *(output_blob->cpu_data() + i)); + EXPECT_FLOAT_EQ(*(output1.cpu_data() + i), *(output_blob->cpu_data() + i)); } input_blob->Reshape(blob2.num(), blob2.channels(), blob2.height(), @@ -2315,7 +2315,7 @@ TYPED_TEST(NetTest, TestReshape) { this->net_->ForwardPrefilled(); this->net_->Backward(); for (int i = 0; i < output2.count(); ++i) { - CHECK_EQ(*(output2.cpu_data() + i), *(output_blob->cpu_data() + i)); + EXPECT_FLOAT_EQ(*(output2.cpu_data() + i), *(output_blob->cpu_data() + i)); } } From b8c81bd2bfbc5bc2e394395bf2c1f435cb32b2a1 Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Wed, 23 Sep 2015 13:40:24 -0700 Subject: [PATCH 103/223] [test] TestReshape: check that shapes actually change Check that output spatial shape varies with input shape while the output num matches the input num. --- src/caffe/test/test_net.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp index 16c1d35f..ab4afba1 100644 --- a/src/caffe/test/test_net.cpp +++ b/src/caffe/test/test_net.cpp @@ -2262,8 +2262,8 @@ TEST_F(FilterNetTest, TestFilterInOutByExcludeMultiRule) { TYPED_TEST(NetTest, TestReshape) { typedef typename TypeParam::Dtype Dtype; // We set up bottom blobs of two different sizes, switch between - // them, and check that forward and backward both run and the results - // are the same. + // them, check that forward and backward both run and the results + // are the same, and check that the output shapes change. Caffe::set_random_seed(this->seed_); Caffe::set_mode(Caffe::CPU); FillerParameter filler_param; @@ -2317,6 +2317,18 @@ TYPED_TEST(NetTest, TestReshape) { for (int i = 0; i < output2.count(); ++i) { EXPECT_FLOAT_EQ(*(output2.cpu_data() + i), *(output_blob->cpu_data() + i)); } + + EXPECT_EQ(output1.num(), blob1.num()); + EXPECT_EQ(output2.num(), blob2.num()); + bool same_spatial_shape = true; + const int kFirstSpatialAxis = 2; + for (int i = kFirstSpatialAxis; i < output1.num_axes(); ++i) { + if (output1.shape(i) != output2.shape(i)) { + same_spatial_shape = false; + break; + } + } + EXPECT_FALSE(same_spatial_shape); } TYPED_TEST(NetTest, TestSkipPropagateDown) { From 84e390c5a16347c7369f6c92cb62526e42ce73ac Mon Sep 17 00:00:00 2001 From: Luke Yeager Date: Thu, 24 Sep 2015 12:35:35 -0700 Subject: [PATCH 104/223] Allow H5T_INTEGER in HDF5 files --- src/caffe/util/hdf5.cpp | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/caffe/util/hdf5.cpp b/src/caffe/util/hdf5.cpp index d0d05f70..7730e76a 100644 --- a/src/caffe/util/hdf5.cpp +++ b/src/caffe/util/hdf5.cpp @@ -27,7 +27,34 @@ void hdf5_load_nd_dataset_helper( status = H5LTget_dataset_info( file_id, dataset_name_, dims.data(), &class_, NULL); CHECK_GE(status, 0) << "Failed to get dataset info for " << dataset_name_; - CHECK_EQ(class_, H5T_FLOAT) << "Expected float or double data"; + switch (class_) { + case H5T_FLOAT: + LOG_FIRST_N(INFO, 1) << "Datatype class: H5T_FLOAT"; + break; + case H5T_INTEGER: + LOG_FIRST_N(INFO, 1) << "Datatype class: H5T_INTEGER"; + break; + case H5T_TIME: + LOG(FATAL) << "Unsupported datatype class: H5T_TIME"; + case H5T_STRING: + LOG(FATAL) << "Unsupported datatype class: H5T_STRING"; + case H5T_BITFIELD: + LOG(FATAL) << "Unsupported datatype class: H5T_BITFIELD"; + case H5T_OPAQUE: + LOG(FATAL) << "Unsupported datatype class: H5T_OPAQUE"; + case H5T_COMPOUND: + LOG(FATAL) << "Unsupported datatype class: H5T_COMPOUND"; + case H5T_REFERENCE: + LOG(FATAL) << "Unsupported datatype class: H5T_REFERENCE"; + case H5T_ENUM: + LOG(FATAL) << "Unsupported datatype class: H5T_ENUM"; + case H5T_VLEN: + LOG(FATAL) << "Unsupported datatype class: H5T_VLEN"; + case H5T_ARRAY: + LOG(FATAL) << "Unsupported datatype class: H5T_ARRAY"; + default: + LOG(FATAL) << "Datatype class unknown"; + } vector blob_dims(dims.size()); for (int i = 0; i < dims.size(); ++i) { From ebc9963fea7b72f397c446a10a9aeab576979566 Mon Sep 17 00:00:00 2001 From: Luke Yeager Date: Tue, 25 Aug 2015 18:58:45 -0700 Subject: [PATCH 105/223] Modify HDF5DataLayerTest to test H5T_INTEGER data --- .../test/test_data/generate_sample_data.py | 14 ++++++++------ .../test/test_data/sample_data_2_gzip.h5 | Bin 15446 -> 15446 bytes 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/caffe/test/test_data/generate_sample_data.py b/src/caffe/test/test_data/generate_sample_data.py index 3703b418..8349dbbc 100644 --- a/src/caffe/test/test_data/generate_sample_data.py +++ b/src/caffe/test/test_data/generate_sample_data.py @@ -36,23 +36,25 @@ f['label'] = label f['label2'] = label2 -with h5py.File(script_dir + '/sample_data_2_gzip.h5', 'w') as f: +with h5py.File(script_dir + '/sample_data_uint8_gzip.h5', 'w') as f: f.create_dataset( 'data', data=data + total_size, compression='gzip', compression_opts=1 ) f.create_dataset( 'label', data=label, - compression='gzip', compression_opts=1 + compression='gzip', compression_opts=1, + dtype='uint8', ) f.create_dataset( 'label2', data=label2, - compression='gzip', compression_opts=1 + compression='gzip', compression_opts=1, + dtype='uint8', ) with open(script_dir + '/sample_data_list.txt', 'w') as f: - f.write(script_dir + '/sample_data.h5\n') - f.write(script_dir + '/sample_data_2_gzip.h5\n') + f.write('src/caffe/test/test_data/sample_data.h5\n') + f.write('src/caffe/test/test_data/sample_uint8_gzip.h5\n') # Generate GradientBasedSolver solver_data.h5 @@ -76,4 +78,4 @@ f['targets'] = targets with open(script_dir + '/solver_data_list.txt', 'w') as f: - f.write(script_dir + '/solver_data.h5\n') + f.write('src/caffe/test/test_data/solver_data.h5\n') diff --git a/src/caffe/test/test_data/sample_data_2_gzip.h5 b/src/caffe/test/test_data/sample_data_2_gzip.h5 index a138e0367be3d4b4ce4b51dcf0d7895056018883..0cb9ef92241d049b699b65f87e800f97337cae54 100644 GIT binary patch delta 225 zcmcasajjwl4+~4C%-zt<0xUly1qB!w85kG@fEYwGFmOy(l#7r6vxOKqz(ODnNCN|d z$Ha}klMPrT7=H51ueFFg#LWgGis1!kei3Wf$yr%6iSb&3kmDU2qQ`Q_!o GjsXC!dpCsu From 859f93891e4bf47d02899f03f0620fd1f29ca224 Mon Sep 17 00:00:00 2001 From: Luke Yeager Date: Thu, 24 Sep 2015 13:33:11 -0700 Subject: [PATCH 106/223] Fix generate_sample_data.py - bug from #2978 --- src/caffe/test/test_data/generate_sample_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/caffe/test/test_data/generate_sample_data.py b/src/caffe/test/test_data/generate_sample_data.py index 8349dbbc..26450735 100644 --- a/src/caffe/test/test_data/generate_sample_data.py +++ b/src/caffe/test/test_data/generate_sample_data.py @@ -36,7 +36,7 @@ f['label'] = label f['label2'] = label2 -with h5py.File(script_dir + '/sample_data_uint8_gzip.h5', 'w') as f: +with h5py.File(script_dir + '/sample_data_2_gzip.h5', 'w') as f: f.create_dataset( 'data', data=data + total_size, compression='gzip', compression_opts=1 @@ -54,7 +54,7 @@ with open(script_dir + '/sample_data_list.txt', 'w') as f: f.write('src/caffe/test/test_data/sample_data.h5\n') - f.write('src/caffe/test/test_data/sample_uint8_gzip.h5\n') + f.write('src/caffe/test/test_data/sample_data_2_gzip.h5\n') # Generate GradientBasedSolver solver_data.h5 From 200bd40391bc1c072730ea4bd80a6fe42b7a3901 Mon Sep 17 00:00:00 2001 From: Dmytro Mishkin Date: Fri, 25 Sep 2015 10:00:23 +0300 Subject: [PATCH 107/223] Fix parse_log.sh against "prefetch queue empty" messages --- tools/extra/parse_log.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/extra/parse_log.sh b/tools/extra/parse_log.sh index 98ef0a05..9892c897 100755 --- a/tools/extra/parse_log.sh +++ b/tools/extra/parse_log.sh @@ -14,7 +14,12 @@ echo "Usage parse_log.sh /path/to/your.log" exit fi LOG=`basename $1` -grep -B 1 'Test ' $1 > aux.txt +sed -n '/Iteration .* Testing net/,/Iteration *. loss/p' $1 > aux.txt +sed -i '/Waiting for data/d' aux.txt +sed -i '/prefetch queue empty/d' aux.txt +sed -i '/Iteration .* loss/d' aux.txt +sed -i '/Iteration .* lr/d' aux.txt +sed -i '/Train net/d' aux.txt grep 'Iteration ' aux.txt | sed 's/.*Iteration \([[:digit:]]*\).*/\1/g' > aux0.txt grep 'Test net output #0' aux.txt | awk '{print $11}' > aux1.txt grep 'Test net output #1' aux.txt | awk '{print $11}' > aux2.txt From 6c02c8b7daf123f64b944ede407d0022e98d6e0b Mon Sep 17 00:00:00 2001 From: Tim Meinhardt Date: Tue, 15 Sep 2015 16:55:26 +0200 Subject: [PATCH 108/223] Add argmax_param axis --- include/caffe/common_layers.hpp | 2 ++ src/caffe/layers/argmax_layer.cpp | 22 +++++++++++++++++----- src/caffe/proto/caffe.proto | 5 +++++ 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index 89bab8d6..491f9edb 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -68,6 +68,8 @@ class ArgMaxLayer : public Layer { } bool out_max_val_; size_t top_k_; + bool has_axis_; + int axis_; }; /** diff --git a/src/caffe/layers/argmax_layer.cpp b/src/caffe/layers/argmax_layer.cpp index c4040cdc..dad3d08b 100644 --- a/src/caffe/layers/argmax_layer.cpp +++ b/src/caffe/layers/argmax_layer.cpp @@ -11,11 +11,23 @@ namespace caffe { template void ArgMaxLayer::LayerSetUp(const vector*>& bottom, const vector*>& top) { - out_max_val_ = this->layer_param_.argmax_param().out_max_val(); - top_k_ = this->layer_param_.argmax_param().top_k(); - CHECK_GE(top_k_, 1) << " top k must not be less than 1."; - CHECK_LE(top_k_, bottom[0]->count() / bottom[0]->num()) - << "top_k must be less than or equal to the number of classes."; + const ArgMaxParameter& argmax_param = this->layer_param_.argmax_param(); + out_max_val_ = argmax_param.out_max_val(); + top_k_ = argmax_param.top_k(); + has_axis_ = argmax_param.has_axis(); + CHECK_GE(top_k_, 1) << "top k must not be less than 1."; + if (has_axis_) { + axis_ = bottom[0]->CanonicalAxisIndex(argmax_param.axis()); + CHECK_GE(axis_, 0) << "axis must not be less than 0."; + CHECK_LE(axis_, bottom[0]->num_axes()) << + "axis must be less than or equal to the number of axis."; + CHECK_LE(top_k_, bottom[0]->shape(axis_)) + << "top_k must be less than or equal to the dimension of the axis."; + } else { + CHECK_LE(top_k_, bottom[0]->count(1)) + << "top_k must be less than or equal to" + " the dimension of the flattened bottom blob per instance."; + } } template diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index f52c941b..a8747c12 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -443,6 +443,11 @@ message ArgMaxParameter { // If true produce pairs (argmax, maxval) optional bool out_max_val = 1 [default = false]; optional uint32 top_k = 2 [default = 1]; + // The axis along which to maximise -- may be negative to index from the + // end (e.g., -1 for the last axis). + // By default ArgMaxLayer maximizes over the flattened trailing dimensions + // for each index of the first / num dimension. + optional int32 axis = 3; } message ConcatParameter { From c77d5e5156f94720c1decd13f7f87fe78df9d4eb Mon Sep 17 00:00:00 2001 From: Tim Meinhardt Date: Tue, 15 Sep 2015 16:56:16 +0200 Subject: [PATCH 109/223] Implement ArgMaxLayer forward_cpu and reshape for axis param --- src/caffe/layers/argmax_layer.cpp | 53 ++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/src/caffe/layers/argmax_layer.cpp b/src/caffe/layers/argmax_layer.cpp index dad3d08b..18ff5f5a 100644 --- a/src/caffe/layers/argmax_layer.cpp +++ b/src/caffe/layers/argmax_layer.cpp @@ -33,13 +33,19 @@ void ArgMaxLayer::LayerSetUp(const vector*>& bottom, template void ArgMaxLayer::Reshape(const vector*>& bottom, const vector*>& top) { - if (out_max_val_) { + std::vector shape(4, 1); + shape[0] = bottom[0]->shape(0); + // Produces max_ind + shape[2] = top_k_; + if (has_axis_) { + // Produces max_ind or max_val per axis + shape = bottom[0]->shape(); + shape[axis_] = top_k_; + } else if (out_max_val_) { // Produces max_ind and max_val - top[0]->Reshape(bottom[0]->num(), 2, top_k_, 1); - } else { - // Produces only max_ind - top[0]->Reshape(bottom[0]->num(), 1, top_k_, 1); + shape[1] = 2; } + top[0]->Reshape(shape); } template @@ -47,23 +53,40 @@ void ArgMaxLayer::Forward_cpu(const vector*>& bottom, const vector*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); - int num = bottom[0]->num(); - int dim = bottom[0]->count() / bottom[0]->num(); + int dim, axis_dist; + if (has_axis_) { + dim = bottom[0]->shape(axis_); + // Distance between values of axis in blob + axis_dist = bottom[0]->count(axis_) / dim; + } else { + dim = bottom[0]->count(1); + axis_dist = 1; + } + int num = bottom[0]->count() / dim; + std::vector > bottom_data_vector(dim); for (int i = 0; i < num; ++i) { - std::vector > bottom_data_vector; for (int j = 0; j < dim; ++j) { - bottom_data_vector.push_back( - std::make_pair(bottom_data[i * dim + j], j)); + bottom_data_vector[j] = std::make_pair( + bottom_data[(i / axis_dist * dim + j) * axis_dist + i % axis_dist], j); } std::partial_sort( bottom_data_vector.begin(), bottom_data_vector.begin() + top_k_, bottom_data_vector.end(), std::greater >()); for (int j = 0; j < top_k_; ++j) { - top_data[top[0]->offset(i, 0, j)] = bottom_data_vector[j].second; - } - if (out_max_val_) { - for (int j = 0; j < top_k_; ++j) { - top_data[top[0]->offset(i, 1, j)] = bottom_data_vector[j].first; + if (out_max_val_) { + if (has_axis_) { + // Produces max_val per axis + top_data[(i / axis_dist * top_k_ + j) * axis_dist + i % axis_dist] = + bottom_data_vector[j].first; + } else { + // Produces max_ind and max_val + top_data[top[0]->offset(i, 0, j)] = bottom_data_vector[j].second; + top_data[top[0]->offset(i, 1, j)] = bottom_data_vector[j].first; + } + } else { + // Produces max_ind per axis + top_data[(i / axis_dist * top_k_ + j) * axis_dist + i % axis_dist] = + bottom_data_vector[j].second; } } } From 9b2d267941411d9727a88ead18e3531bad50d14d Mon Sep 17 00:00:00 2001 From: Tim Meinhardt Date: Tue, 15 Sep 2015 16:56:45 +0200 Subject: [PATCH 110/223] Update ArgMaxLayer documentation for axis param --- include/caffe/common_layers.hpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index 491f9edb..d1ddaee4 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -21,7 +21,8 @@ namespace caffe { * * Intended for use after a classification layer to produce a prediction. * If parameter out_max_val is set to true, output is a vector of pairs - * (max_ind, max_val) for each image. + * (max_ind, max_val) for each image. The axis parameter specifies an axis + * along which to maximise. * * NOTE: does not implement Backwards operation. */ @@ -34,7 +35,11 @@ class ArgMaxLayer : public Layer { * - top_k (\b optional uint, default 1). * the number @f$ K @f$ of maximal items to output. * - out_max_val (\b optional bool, default false). - * if set, output a vector of pairs (max_ind, max_val) for each image. + * if set, output a vector of pairs (max_ind, max_val) unless axis is set then + * output max_val along the specified axis. + * - axis (\b optional int). + * if set, maximise along the specified axis else maximise the flattened + * trailing dimensions for each index of the first / num dimension. */ explicit ArgMaxLayer(const LayerParameter& param) : Layer(param) {} @@ -54,7 +59,8 @@ class ArgMaxLayer : public Layer { * the inputs @f$ x @f$ * @param top output Blob vector (length 1) * -# @f$ (N \times 1 \times K \times 1) @f$ or, if out_max_val - * @f$ (N \times 2 \times K \times 1) @f$ + * @f$ (N \times 2 \times K \times 1) @f$ unless axis set than e.g. + * @f$ (N \times K \times H \times W) @f$ if axis == 1 * the computed outputs @f$ * y_n = \arg\max\limits_i x_{ni} * @f$ (for @f$ K = 1 @f$). From a2a5e22d0b7d44a5e577edd53181d4802f057740 Mon Sep 17 00:00:00 2001 From: Tim Meinhardt Date: Tue, 15 Sep 2015 16:57:37 +0200 Subject: [PATCH 111/223] Generalise ArgMaxLayerTest bottom blob shape --- src/caffe/test/test_argmax_layer.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/caffe/test/test_argmax_layer.cpp b/src/caffe/test/test_argmax_layer.cpp index 895c3d37..d3018f90 100644 --- a/src/caffe/test/test_argmax_layer.cpp +++ b/src/caffe/test/test_argmax_layer.cpp @@ -16,7 +16,7 @@ template class ArgMaxLayerTest : public CPUDeviceTest { protected: ArgMaxLayerTest() - : blob_bottom_(new Blob(10, 20, 1, 1)), + : blob_bottom_(new Blob(10, 10, 20, 20)), blob_top_(new Blob()), top_k_(5) { Caffe::set_random_seed(1701); @@ -112,6 +112,7 @@ TYPED_TEST(ArgMaxLayerTest, TestCPUTopK) { layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); // Now, check values + const TypeParam* bottom_data = this->blob_bottom_->cpu_data(); int max_ind; TypeParam max_val; int num = this->blob_bottom_->num(); @@ -121,10 +122,10 @@ TYPED_TEST(ArgMaxLayerTest, TestCPUTopK) { EXPECT_LE(this->blob_top_->data_at(i, 0, 0, 0), dim); for (int j = 0; j < this->top_k_; ++j) { max_ind = this->blob_top_->data_at(i, 0, j, 0); - max_val = this->blob_bottom_->data_at(i, max_ind, 0, 0); + max_val = bottom_data[i * dim + max_ind]; int count = 0; for (int k = 0; k < dim; ++k) { - if (this->blob_bottom_->data_at(i, k, 0, 0) > max_val) { + if (bottom_data[i * dim + k] > max_val) { ++count; } } @@ -142,6 +143,7 @@ TYPED_TEST(ArgMaxLayerTest, TestCPUMaxValTopK) { layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); // Now, check values + const TypeParam* bottom_data = this->blob_bottom_->cpu_data(); int max_ind; TypeParam max_val; int num = this->blob_bottom_->num(); @@ -152,10 +154,10 @@ TYPED_TEST(ArgMaxLayerTest, TestCPUMaxValTopK) { for (int j = 0; j < this->top_k_; ++j) { max_ind = this->blob_top_->data_at(i, 0, j, 0); max_val = this->blob_top_->data_at(i, 1, j, 0); - EXPECT_EQ(this->blob_bottom_->data_at(i, max_ind, 0, 0), max_val); + EXPECT_EQ(bottom_data[i * dim + max_ind], max_val); int count = 0; for (int k = 0; k < dim; ++k) { - if (this->blob_bottom_->data_at(i, k, 0, 0) > max_val) { + if (bottom_data[i * dim + k] > max_val) { ++count; } } From def3d3cc49b908e54f787be377c299e6e6cbf16c Mon Sep 17 00:00:00 2001 From: Tim Meinhardt Date: Tue, 15 Sep 2015 16:57:55 +0200 Subject: [PATCH 112/223] Implement ArgMaxLayerTest for axis param --- src/caffe/layers/argmax_layer.cpp | 28 +++--- src/caffe/test/test_argmax_layer.cpp | 125 +++++++++++++++++++++++++++ 2 files changed, 140 insertions(+), 13 deletions(-) diff --git a/src/caffe/layers/argmax_layer.cpp b/src/caffe/layers/argmax_layer.cpp index 18ff5f5a..0c0a932d 100644 --- a/src/caffe/layers/argmax_layer.cpp +++ b/src/caffe/layers/argmax_layer.cpp @@ -33,17 +33,19 @@ void ArgMaxLayer::LayerSetUp(const vector*>& bottom, template void ArgMaxLayer::Reshape(const vector*>& bottom, const vector*>& top) { - std::vector shape(4, 1); - shape[0] = bottom[0]->shape(0); - // Produces max_ind - shape[2] = top_k_; + std::vector shape(bottom[0]->num_axes(), 1); if (has_axis_) { // Produces max_ind or max_val per axis shape = bottom[0]->shape(); shape[axis_] = top_k_; - } else if (out_max_val_) { - // Produces max_ind and max_val - shape[1] = 2; + } else { + shape[0] = bottom[0]->shape(0); + // Produces max_ind + shape[2] = top_k_; + if (out_max_val_) { + // Produces max_ind and max_val + shape[1] = 2; + } } top[0]->Reshape(shape); } @@ -76,17 +78,17 @@ void ArgMaxLayer::Forward_cpu(const vector*>& bottom, if (out_max_val_) { if (has_axis_) { // Produces max_val per axis - top_data[(i / axis_dist * top_k_ + j) * axis_dist + i % axis_dist] = - bottom_data_vector[j].first; + top_data[(i / axis_dist * top_k_ + j) * axis_dist + i % axis_dist] + = bottom_data_vector[j].first; } else { // Produces max_ind and max_val - top_data[top[0]->offset(i, 0, j)] = bottom_data_vector[j].second; - top_data[top[0]->offset(i, 1, j)] = bottom_data_vector[j].first; + top_data[2 * i * top_k_ + j] = bottom_data_vector[j].second; + top_data[2 * i * top_k_ + top_k_ + j] = bottom_data_vector[j].first; } } else { // Produces max_ind per axis - top_data[(i / axis_dist * top_k_ + j) * axis_dist + i % axis_dist] = - bottom_data_vector[j].second; + top_data[(i / axis_dist * top_k_ + j) * axis_dist + i % axis_dist] + = bottom_data_vector[j].second; } } } diff --git a/src/caffe/test/test_argmax_layer.cpp b/src/caffe/test/test_argmax_layer.cpp index d3018f90..bbf19099 100644 --- a/src/caffe/test/test_argmax_layer.cpp +++ b/src/caffe/test/test_argmax_layer.cpp @@ -55,6 +55,43 @@ TYPED_TEST(ArgMaxLayerTest, TestSetupMaxVal) { EXPECT_EQ(this->blob_top_->channels(), 2); } +TYPED_TEST(ArgMaxLayerTest, TestSetupAxis) { + LayerParameter layer_param; + ArgMaxParameter* argmax_param = layer_param.mutable_argmax_param(); + argmax_param->set_axis(0); + ArgMaxLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + EXPECT_EQ(this->blob_top_->shape(0), argmax_param->top_k()); + EXPECT_EQ(this->blob_top_->shape(1), this->blob_bottom_->shape(0)); + EXPECT_EQ(this->blob_top_->shape(2), this->blob_bottom_->shape(2)); + EXPECT_EQ(this->blob_top_->shape(3), this->blob_bottom_->shape(3)); +} + +TYPED_TEST(ArgMaxLayerTest, TestSetupAxisNegativeIndexing) { + LayerParameter layer_param; + ArgMaxParameter* argmax_param = layer_param.mutable_argmax_param(); + argmax_param->set_axis(-2); + ArgMaxLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + EXPECT_EQ(this->blob_top_->shape(0), this->blob_bottom_->shape(0)); + EXPECT_EQ(this->blob_top_->shape(1), this->blob_bottom_->shape(1)); + EXPECT_EQ(this->blob_top_->shape(2), argmax_param->top_k()); + EXPECT_EQ(this->blob_top_->shape(3), this->blob_bottom_->shape(3)); +} + +TYPED_TEST(ArgMaxLayerTest, TestSetupAxisMaxVal) { + LayerParameter layer_param; + ArgMaxParameter* argmax_param = layer_param.mutable_argmax_param(); + argmax_param->set_axis(2); + argmax_param->set_out_max_val(true); + ArgMaxLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + EXPECT_EQ(this->blob_top_->shape(0), this->blob_bottom_->shape(0)); + EXPECT_EQ(this->blob_top_->shape(1), this->blob_bottom_->shape(1)); + EXPECT_EQ(this->blob_top_->shape(2), argmax_param->top_k()); + EXPECT_EQ(this->blob_top_->shape(3), this->blob_bottom_->shape(3)); +} + TYPED_TEST(ArgMaxLayerTest, TestCPU) { LayerParameter layer_param; ArgMaxLayer layer(layer_param); @@ -166,5 +203,93 @@ TYPED_TEST(ArgMaxLayerTest, TestCPUMaxValTopK) { } } +TYPED_TEST(ArgMaxLayerTest, TestCPUAxis) { + LayerParameter layer_param; + ArgMaxParameter* argmax_param = layer_param.mutable_argmax_param(); + argmax_param->set_axis(0); + ArgMaxLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // Now, check values + int max_ind; + TypeParam max_val; + std::vector shape = this->blob_bottom_->shape(); + for (int i = 0; i < shape[1]; ++i) { + for (int j = 0; j < shape[2]; ++j) { + for (int k = 0; k < shape[3]; ++k) { + max_ind = this->blob_top_->data_at(0, i, j, k); + max_val = this->blob_bottom_->data_at(max_ind, i, j, k); + EXPECT_GE(max_ind, 0); + EXPECT_LE(max_ind, shape[0]); + for (int l = 0; l < shape[0]; ++l) { + EXPECT_LE(this->blob_bottom_->data_at(l, i, j, k), max_val); + } + } + } + } +} + +TYPED_TEST(ArgMaxLayerTest, TestCPUAxisTopK) { + LayerParameter layer_param; + ArgMaxParameter* argmax_param = layer_param.mutable_argmax_param(); + argmax_param->set_axis(2); + argmax_param->set_top_k(this->top_k_); + ArgMaxLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // Now, check values + int max_ind; + TypeParam max_val; + std::vector shape = this->blob_bottom_->shape(); + for (int i = 0; i < shape[0]; ++i) { + for (int j = 0; j < shape[1]; ++j) { + for (int k = 0; k < shape[3]; ++k) { + for (int m = 0; m < this->top_k_; ++m) { + max_ind = this->blob_top_->data_at(i, j, m, k); + max_val = this->blob_bottom_->data_at(i, j, max_ind, k); + EXPECT_GE(max_ind, 0); + EXPECT_LE(max_ind, shape[2]); + int count = 0; + for (int l = 0; l < shape[2]; ++l) { + if (this->blob_bottom_->data_at(i, j, l, k) > max_val) { + ++count; + } + } + EXPECT_EQ(m, count); + } + } + } + } +} + +TYPED_TEST(ArgMaxLayerTest, TestCPUAxisMaxValTopK) { + LayerParameter layer_param; + ArgMaxParameter* argmax_param = layer_param.mutable_argmax_param(); + argmax_param->set_axis(-1); + argmax_param->set_top_k(this->top_k_); + argmax_param->set_out_max_val(true); + ArgMaxLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // Now, check values + TypeParam max_val; + std::vector shape = this->blob_bottom_->shape(); + for (int i = 0; i < shape[0]; ++i) { + for (int j = 0; j < shape[1]; ++j) { + for (int k = 0; k < shape[2]; ++k) { + for (int m = 0; m < this->top_k_; ++m) { + max_val = this->blob_top_->data_at(i, j, k, m); + int count = 0; + for (int l = 0; l < shape[3]; ++l) { + if (this->blob_bottom_->data_at(i, j, k, l) > max_val) { + ++count; + } + } + EXPECT_EQ(m, count); + } + } + } + } +} } // namespace caffe From bd5f15427cc2f008f80378a5948ce379d93ebde6 Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Wed, 16 Sep 2015 13:39:23 -0700 Subject: [PATCH 113/223] Add flag on how host memory is allocated Add a bool flag to record whether a host memory is allocated using malloc or cudaMallocHost, and free correspondingly using this flag, instead of depending on Caffe::mode(), which is mutable during runtime. --- include/caffe/syncedmem.hpp | 15 ++++++++++----- src/caffe/syncedmem.cpp | 8 ++++---- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/include/caffe/syncedmem.hpp b/include/caffe/syncedmem.hpp index 62aadef4..3d92a0ea 100644 --- a/include/caffe/syncedmem.hpp +++ b/include/caffe/syncedmem.hpp @@ -13,20 +13,22 @@ namespace caffe { // The improvement in performance seems negligible in the single GPU case, // but might be more significant for parallel training. Most importantly, // it improved stability for large models on many GPUs. -inline void CaffeMallocHost(void** ptr, size_t size) { +inline void CaffeMallocHost(void** ptr, size_t size, bool* use_cuda) { #ifndef CPU_ONLY if (Caffe::mode() == Caffe::GPU) { CUDA_CHECK(cudaMallocHost(ptr, size)); + *use_cuda = true; return; } #endif *ptr = malloc(size); + *use_cuda = false; CHECK(*ptr) << "host allocation of size " << size << " failed"; } -inline void CaffeFreeHost(void* ptr) { +inline void CaffeFreeHost(void* ptr, bool use_cuda) { #ifndef CPU_ONLY - if (Caffe::mode() == Caffe::GPU) { + if (use_cuda) { CUDA_CHECK(cudaFreeHost(ptr)); return; } @@ -45,10 +47,12 @@ class SyncedMemory { public: SyncedMemory() : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(UNINITIALIZED), - own_cpu_data_(false), own_gpu_data_(false), gpu_device_(-1) {} + own_cpu_data_(false), cpu_malloc_use_cuda_(false), own_gpu_data_(false), + gpu_device_(-1) {} explicit SyncedMemory(size_t size) : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(size), head_(UNINITIALIZED), - own_cpu_data_(false), own_gpu_data_(false), gpu_device_(-1) {} + own_cpu_data_(false), cpu_malloc_use_cuda_(false), own_gpu_data_(false), + gpu_device_(-1) {} ~SyncedMemory(); const void* cpu_data(); void set_cpu_data(void* data); @@ -72,6 +76,7 @@ class SyncedMemory { size_t size_; SyncedHead head_; bool own_cpu_data_; + bool cpu_malloc_use_cuda_; bool own_gpu_data_; int gpu_device_; diff --git a/src/caffe/syncedmem.cpp b/src/caffe/syncedmem.cpp index a667a867..632bf1f1 100644 --- a/src/caffe/syncedmem.cpp +++ b/src/caffe/syncedmem.cpp @@ -8,7 +8,7 @@ namespace caffe { SyncedMemory::~SyncedMemory() { if (cpu_ptr_ && own_cpu_data_) { - CaffeFreeHost(cpu_ptr_); + CaffeFreeHost(cpu_ptr_, cpu_malloc_use_cuda_); } #ifndef CPU_ONLY @@ -27,7 +27,7 @@ SyncedMemory::~SyncedMemory() { inline void SyncedMemory::to_cpu() { switch (head_) { case UNINITIALIZED: - CaffeMallocHost(&cpu_ptr_, size_); + CaffeMallocHost(&cpu_ptr_, size_, &cpu_malloc_use_cuda_); caffe_memset(size_, 0, cpu_ptr_); head_ = HEAD_AT_CPU; own_cpu_data_ = true; @@ -35,7 +35,7 @@ inline void SyncedMemory::to_cpu() { case HEAD_AT_GPU: #ifndef CPU_ONLY if (cpu_ptr_ == NULL) { - CaffeMallocHost(&cpu_ptr_, size_); + CaffeMallocHost(&cpu_ptr_, size_, &cpu_malloc_use_cuda_); own_cpu_data_ = true; } caffe_gpu_memcpy(size_, gpu_ptr_, cpu_ptr_); @@ -86,7 +86,7 @@ const void* SyncedMemory::cpu_data() { void SyncedMemory::set_cpu_data(void* data) { CHECK(data); if (own_cpu_data_) { - CaffeFreeHost(cpu_ptr_); + CaffeFreeHost(cpu_ptr_, cpu_malloc_use_cuda_); } cpu_ptr_ = data; head_ = HEAD_AT_CPU; From aaf4a4557668dfb75c540903ec02ed5821f75835 Mon Sep 17 00:00:00 2001 From: Luke Yeager Date: Fri, 25 Sep 2015 15:43:47 -0700 Subject: [PATCH 114/223] Re-ordering some lines in build files Enforcing a consistent ordering - OpenCV, LevelDB, LMDB This will allow me to add the ALLOW_LMDB_NOLOCK option just after the USE_LMDB option, while keeping the IO dependency options together. --- CMakeLists.txt | 4 ++-- Makefile.config.example | 2 +- cmake/Summary.cmake | 4 ++-- cmake/Templates/caffe_config.h.in | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 37f937fe..277c3dc4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,9 +23,9 @@ set(python_version "2" CACHE STRING "Specify which Python version to use") caffe_option(BUILD_matlab "Build Matlab wrapper" OFF IF UNIX OR APPLE) caffe_option(BUILD_docs "Build documentation" ON IF UNIX OR APPLE) caffe_option(BUILD_python_layer "Build the Caffe Python layer" ON) -caffe_option(USE_LMDB "Build with lmdb" ON) -caffe_option(USE_LEVELDB "Build with levelDB" ON) caffe_option(USE_OPENCV "Build with OpenCV support" ON) +caffe_option(USE_LEVELDB "Build with levelDB" ON) +caffe_option(USE_LMDB "Build with lmdb" ON) # ---[ Dependencies include(cmake/Dependencies.cmake) diff --git a/Makefile.config.example b/Makefile.config.example index a20bad2f..42f86db4 100644 --- a/Makefile.config.example +++ b/Makefile.config.example @@ -8,9 +8,9 @@ # CPU_ONLY := 1 # uncomment to disable IO dependencies and corresponding data layers +# USE_OPENCV := 0 # USE_LEVELDB := 0 # USE_LMDB := 0 -# USE_OPENCV := 0 # To customize your choice of compiler, uncomment and set the following. # N.B. the default for Linux is g++ and the default for OSX is clang++ diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index 3d12e81a..703e22ac 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -114,9 +114,9 @@ function(caffe_print_configuration_summary) caffe_status(" BUILD_matlab : ${BUILD_matlab}") caffe_status(" BUILD_docs : ${BUILD_docs}") caffe_status(" CPU_ONLY : ${CPU_ONLY}") - caffe_status(" USE_LMDB : ${USE_LMDB}") - caffe_status(" USE_LEVELDB : ${USE_LEVELDB}") caffe_status(" USE_OPENCV : ${USE_OPENCV}") + caffe_status(" USE_LEVELDB : ${USE_LEVELDB}") + caffe_status(" USE_LMDB : ${USE_LMDB}") caffe_status("") caffe_status("Dependencies:") caffe_status(" BLAS : " APPLE THEN "Yes (vecLib)" ELSE "Yes (${BLAS})") diff --git a/cmake/Templates/caffe_config.h.in b/cmake/Templates/caffe_config.h.in index 9302022d..84377493 100644 --- a/cmake/Templates/caffe_config.h.in +++ b/cmake/Templates/caffe_config.h.in @@ -33,5 +33,5 @@ /* IO libraries */ #cmakedefine USE_OPENCV -#cmakedefine USE_LMDB #cmakedefine USE_LEVELDB +#cmakedefine USE_LMDB From b93afe8378cd66d9bf375a0f492a30f9db77e8ae Mon Sep 17 00:00:00 2001 From: Luke Yeager Date: Fri, 25 Sep 2015 15:53:54 -0700 Subject: [PATCH 115/223] Add ALLOW_LMDB_NOLOCK build option This option lets you open LMDB files with the MDB_NOLOCK flag. You should not set this flag if you will be reading LMDBs with any possibility of simultaneous read and write. --- CMakeLists.txt | 1 + Makefile | 3 +++ Makefile.config.example | 5 +++++ cmake/ConfigGen.cmake | 3 +++ cmake/Dependencies.cmake | 3 +++ cmake/Summary.cmake | 1 + cmake/Templates/caffe_config.h.in | 1 + src/caffe/util/db_lmdb.cpp | 17 ++++++++++++++++- 8 files changed, 33 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 277c3dc4..f8f75305 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,7 @@ caffe_option(BUILD_python_layer "Build the Caffe Python layer" ON) caffe_option(USE_OPENCV "Build with OpenCV support" ON) caffe_option(USE_LEVELDB "Build with levelDB" ON) caffe_option(USE_LMDB "Build with lmdb" ON) +caffe_option(ALLOW_LMDB_NOLOCK "Allow MDB_NOLOCK when reading LMDB files (only if necessary)" OFF) # ---[ Dependencies include(cmake/Dependencies.cmake) diff --git a/Makefile b/Makefile index 5fb6394e..7cc73931 100644 --- a/Makefile +++ b/Makefile @@ -313,6 +313,9 @@ ifeq ($(USE_LEVELDB), 1) endif ifeq ($(USE_LMDB), 1) COMMON_FLAGS += -DUSE_LMDB +ifeq ($(ALLOW_LMDB_NOLOCK), 1) + COMMON_FLAGS += -DALLOW_LMDB_NOLOCK +endif endif # CPU-only configuration diff --git a/Makefile.config.example b/Makefile.config.example index 42f86db4..bda66ea1 100644 --- a/Makefile.config.example +++ b/Makefile.config.example @@ -12,6 +12,11 @@ # USE_LEVELDB := 0 # USE_LMDB := 0 +# uncomment to allow MDB_NOLOCK when reading LMDB files (only if necessary) +# You should not set this flag if you will be reading LMDBs with any +# possibility of simultaneous read and write +# ALLOW_LMDB_NOLOCK := 1 + # To customize your choice of compiler, uncomment and set the following. # N.B. the default for Linux is g++ and the default for OSX is clang++ # CUSTOM_CXX := g++ diff --git a/cmake/ConfigGen.cmake b/cmake/ConfigGen.cmake index 8b259965..05637111 100644 --- a/cmake/ConfigGen.cmake +++ b/cmake/ConfigGen.cmake @@ -62,6 +62,9 @@ function(caffe_generate_export_configs) if(USE_LMDB) list(APPEND Caffe_DEFINITIONS -DUSE_LMDB) + if (ALLOW_LMDB_NOLOCK) + list(APPEND Caffe_DEFINITIONS -DALLOW_LMDB_NOLOCK) + endif() endif() if(USE_LEVELDB) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index d68d7bfb..a77ac6df 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -34,6 +34,9 @@ if(USE_LMDB) include_directories(SYSTEM ${LMDB_INCLUDE_DIR}) list(APPEND Caffe_LINKER_LIBS ${LMDB_LIBRARIES}) add_definitions(-DUSE_LMDB) + if(ALLOW_LMDB_NOLOCK) + add_definitions(-DALLOW_LMDB_NOLOCK) + endif() endif() # ---[ LevelDB diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index 703e22ac..6984f417 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -117,6 +117,7 @@ function(caffe_print_configuration_summary) caffe_status(" USE_OPENCV : ${USE_OPENCV}") caffe_status(" USE_LEVELDB : ${USE_LEVELDB}") caffe_status(" USE_LMDB : ${USE_LMDB}") + caffe_status(" ALLOW_LMDB_NOLOCK : ${ALLOW_LMDB_NOLOCK}") caffe_status("") caffe_status("Dependencies:") caffe_status(" BLAS : " APPLE THEN "Yes (vecLib)" ELSE "Yes (${BLAS})") diff --git a/cmake/Templates/caffe_config.h.in b/cmake/Templates/caffe_config.h.in index 84377493..8a31b43c 100644 --- a/cmake/Templates/caffe_config.h.in +++ b/cmake/Templates/caffe_config.h.in @@ -35,3 +35,4 @@ #cmakedefine USE_OPENCV #cmakedefine USE_LEVELDB #cmakedefine USE_LMDB +#cmakedefine ALLOW_LMDB_NOLOCK diff --git a/src/caffe/util/db_lmdb.cpp b/src/caffe/util/db_lmdb.cpp index 78dd880a..0bc82b53 100644 --- a/src/caffe/util/db_lmdb.cpp +++ b/src/caffe/util/db_lmdb.cpp @@ -19,7 +19,22 @@ void LMDB::Open(const string& source, Mode mode) { if (mode == READ) { flags = MDB_RDONLY | MDB_NOTLS; } - MDB_CHECK(mdb_env_open(mdb_env_, source.c_str(), flags, 0664)); + int rc = mdb_env_open(mdb_env_, source.c_str(), flags, 0664); +#ifndef ALLOW_LMDB_NOLOCK + MDB_CHECK(rc); +#else + if (rc == EACCES) { + LOG(WARNING) << "Permission denied. Trying with MDB_NOLOCK ..."; + // Close and re-open environment handle + mdb_env_close(mdb_env_); + MDB_CHECK(mdb_env_create(&mdb_env_)); + // Try again with MDB_NOLOCK + flags |= MDB_NOLOCK; + MDB_CHECK(mdb_env_open(mdb_env_, source.c_str(), flags, 0664)); + } else { + MDB_CHECK(rc); + } +#endif LOG(INFO) << "Opened lmdb " << source; } From e98b84762fb55daed5092225f71c3b76015aa4a4 Mon Sep 17 00:00:00 2001 From: Luke Yeager Date: Mon, 28 Sep 2015 15:35:24 -0700 Subject: [PATCH 116/223] Install libs as non-executable files According to the Debian policy manual, "Shared libraries should not be installed executable, since the dynamic linker does not require this and trying to execute a shared library usually results in a core dump." https://www.debian.org/doc/debian-policy/ch-sharedlibs.html#s-sharedlibs-runtime --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5fb6394e..fbda44be 100644 --- a/Makefile +++ b/Makefile @@ -652,7 +652,7 @@ $(DISTRIBUTE_DIR): all py | $(DISTRIBUTE_SUBDIRS) cp $(EXAMPLE_BINS) $(DISTRIBUTE_DIR)/bin # add libraries cp $(STATIC_NAME) $(DISTRIBUTE_DIR)/lib - cp $(DYNAMIC_NAME) $(DISTRIBUTE_DIR)/lib + install -m 644 $(DYNAMIC_NAME) $(DISTRIBUTE_DIR)/lib # add python - it's not the standard way, indeed... cp -r python $(DISTRIBUTE_DIR)/python From 96ba513f54ac7bfc62c40a2481c1556c2f743120 Mon Sep 17 00:00:00 2001 From: Yang Song Date: Tue, 29 Sep 2015 20:07:52 +0800 Subject: [PATCH 117/223] Fix a typo Fix a typo in the message. --- python/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 0e2bc7e6..a2264140 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,5 +1,5 @@ if(NOT HAVE_PYTHON) - message(STATUS "Python interface is disabled or not all required dependecies found. Building without it...") + message(STATUS "Python interface is disabled or not all required dependencies found. Building without it...") return() endif() From 98cc023939641482432d4082db061306a7ab1654 Mon Sep 17 00:00:00 2001 From: Youssef Kashef Date: Thu, 1 Oct 2015 18:20:23 +0200 Subject: [PATCH 118/223] add badge for travis build and license --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index ebec286d..44b9e62c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,8 @@ # Caffe +[![Build Status](https://travis-ci.org/BVLC/caffe.svg?branch=master)](https://travis-ci.org/BVLC/caffe) +[![License](https://img.shields.io/badge/license-BSD-blue.svg)](LICENSE) + Caffe is a deep learning framework made with expression, speed, and modularity in mind. It is developed by the Berkeley Vision and Learning Center ([BVLC](http://bvlc.eecs.berkeley.edu)) and community contributors. From 552a84aaddeabc074f4a0184b90b7194e7f7a44b Mon Sep 17 00:00:00 2001 From: zoharby Date: Fri, 11 Sep 2015 15:06:28 +0300 Subject: [PATCH 119/223] Add a caffe.io.write_mean function to the MATLAB interface Useful for exporting models from MATLAB (e.g. MatConvNet) to Caffe --- matlab/+caffe/io.m | 8 ++++++++ matlab/+caffe/private/caffe_.cpp | 24 ++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/matlab/+caffe/io.m b/matlab/+caffe/io.m index af8369dd..4b072fec 100644 --- a/matlab/+caffe/io.m +++ b/matlab/+caffe/io.m @@ -29,5 +29,13 @@ CHECK_FILE_EXIST(mean_proto_file); mean_data = caffe_('read_mean', mean_proto_file); end + function write_mean(mean_data, mean_proto_file) + % write_mean(mean_data, mean_proto_file) + % write image mean data to binaryproto file + % mean_data should be W x H x C with BGR channels + CHECK(ischar(mean_proto_file), 'mean_proto_file must be a string'); + CHECK(isa(mean_data, 'single'), 'mean_data must be a SINGLE matrix'); + caffe_('write_mean', mean_data, mean_proto_file); + end end end diff --git a/matlab/+caffe/private/caffe_.cpp b/matlab/+caffe/private/caffe_.cpp index 4e0ebc1c..7883f79e 100644 --- a/matlab/+caffe/private/caffe_.cpp +++ b/matlab/+caffe/private/caffe_.cpp @@ -478,6 +478,29 @@ static void read_mean(MEX_ARGS) { mxFree(mean_proto_file); } +// Usage: caffe_('write_mean', mean_data, mean_proto_file) +static void write_mean(MEX_ARGS) { + mxCHECK(nrhs == 2 && mxIsSingle(prhs[0]) && mxIsChar(prhs[1]), + "Usage: caffe_('write_mean', mean_data, mean_proto_file)"); + char* mean_proto_file = mxArrayToString(prhs[1]); + int ndims = mxGetNumberOfDimensions(prhs[0]); + mxCHECK(ndims >= 2 && ndims <= 3, "mean_data must have at 2 or 3 dimensions"); + const mwSize *dims = mxGetDimensions(prhs[0]); + int width = dims[0]; + int height = dims[1]; + int channels; + if (ndims == 3) + channels = dims[2]; + else + channels = 1; + Blob data_mean(1, channels, height, width); + mx_mat_to_blob(prhs[0], &data_mean, DATA); + BlobProto blob_proto; + data_mean.ToProto(&blob_proto, false); + WriteProtoToBinaryFile(blob_proto, mean_proto_file); + mxFree(mean_proto_file); +} + /** ----------------------------------------------------------------- ** Available commands. **/ @@ -515,6 +538,7 @@ static handler_registry handlers[] = { { "get_init_key", get_init_key }, { "reset", reset }, { "read_mean", read_mean }, + { "write_mean", write_mean }, // The end. { "END", NULL }, }; From 30dfb864f0e7ad8f39bcdb48200eccc2a0efa7d3 Mon Sep 17 00:00:00 2001 From: Jan Issac Date: Mon, 5 Oct 2015 11:13:26 +0200 Subject: [PATCH 120/223] minor typo fix --- cmake/Cuda.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/Cuda.cmake b/cmake/Cuda.cmake index ff58d31c..98aef268 100644 --- a/cmake/Cuda.cmake +++ b/cmake/Cuda.cmake @@ -132,7 +132,7 @@ function(caffe_select_nvcc_arch_flags out_variable) endfunction() ################################################################################################ -# Short command for cuda comnpilation +# Short command for cuda compilation # Usage: # caffe_cuda_compile( ) macro(caffe_cuda_compile objlist_variable) From 64f948a6829c53031632d87f78183dd87d5d6f71 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Mon, 5 Oct 2015 14:15:08 -0700 Subject: [PATCH 121/223] SilenceLayer Backward bugfix (fixes #3151) --- src/caffe/layers/silence_layer.cpp | 2 +- src/caffe/layers/silence_layer.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/caffe/layers/silence_layer.cpp b/src/caffe/layers/silence_layer.cpp index 4abf9eff..7e70ab43 100644 --- a/src/caffe/layers/silence_layer.cpp +++ b/src/caffe/layers/silence_layer.cpp @@ -12,7 +12,7 @@ void SilenceLayer::Backward_cpu(const vector*>& top, for (int i = 0; i < bottom.size(); ++i) { if (propagate_down[i]) { caffe_set(bottom[i]->count(), Dtype(0), - bottom[i]->mutable_cpu_data()); + bottom[i]->mutable_cpu_diff()); } } } diff --git a/src/caffe/layers/silence_layer.cu b/src/caffe/layers/silence_layer.cu index 8d044ee7..34faef22 100644 --- a/src/caffe/layers/silence_layer.cu +++ b/src/caffe/layers/silence_layer.cu @@ -18,7 +18,7 @@ void SilenceLayer::Backward_gpu(const vector*>& top, for (int i = 0; i < bottom.size(); ++i) { if (propagate_down[i]) { caffe_gpu_set(bottom[i]->count(), Dtype(0), - bottom[i]->mutable_gpu_data()); + bottom[i]->mutable_gpu_diff()); } } } From 19d9927d76d6655a3efc090611e59aa2ea0f25a5 Mon Sep 17 00:00:00 2001 From: Gustav Larsson Date: Mon, 5 Oct 2015 21:55:00 -0500 Subject: [PATCH 122/223] Add pycaffe test for solver.snapshot() --- python/caffe/test/test_solver.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/python/caffe/test/test_solver.py b/python/caffe/test/test_solver.py index 9cfc10d2..f618fded 100644 --- a/python/caffe/test/test_solver.py +++ b/python/caffe/test/test_solver.py @@ -16,7 +16,8 @@ def setUp(self): f.write("""net: '""" + net_f + """' test_iter: 10 test_interval: 10 base_lr: 0.01 momentum: 0.9 weight_decay: 0.0005 lr_policy: 'inv' gamma: 0.0001 power: 0.75 - display: 100 max_iter: 100 snapshot_after_train: false""") + display: 100 max_iter: 100 snapshot_after_train: false + snapshot_prefix: "model" """) f.close() self.solver = caffe.SGDSolver(f.name) # also make sure get_solver runs @@ -51,3 +52,11 @@ def test_net_memory(self): total += p.data.sum() + p.diff.sum() for bl in six.itervalues(net.blobs): total += bl.data.sum() + bl.diff.sum() + + def test_snapshot(self): + self.solver.snapshot() + # Check that these files exist and then remove them + files = ['model_iter_0.caffemodel', 'model_iter_0.solverstate'] + for fn in files: + assert os.path.isfile(fn) + os.remove(fn) From e0615464ddf550ee57c17733ba9c5a0fa71b8edb Mon Sep 17 00:00:00 2001 From: e3 Date: Wed, 7 Oct 2015 11:52:45 -0700 Subject: [PATCH 123/223] fixes BVLC/caffe#3163 --- docs/tutorial/layers.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/tutorial/layers.md b/docs/tutorial/layers.md index eabc792b..7362aac2 100644 --- a/docs/tutorial/layers.md +++ b/docs/tutorial/layers.md @@ -39,7 +39,7 @@ In contrast, other layers (with few exceptions) ignore the spatial structure of - `n * c_i * h_i * w_i` * Output - `n * c_o * h_o * w_o`, where `h_o = (h_i + 2 * pad_h - kernel_h) / stride_h + 1` and `w_o` likewise. -* Sample (as seen in `./examples/imagenet/imagenet_train_val.prototxt`) +* Sample (as seen in `./models/bvlc_reference_caffenet/train_val.prototxt`) layer { name: "conv1" @@ -83,7 +83,7 @@ The `Convolution` layer convolves the input image with a set of learnable filter - `n * c * h_i * w_i` * Output - `n * c * h_o * w_o`, where h_o and w_o are computed in the same way as convolution. -* Sample (as seen in `./examples/imagenet/imagenet_train_val.prototxt`) +* Sample (as seen in `./models/bvlc_reference_caffenet/train_val.prototxt`) layer { name: "pool1" @@ -197,7 +197,7 @@ In general, activation / Neuron layers are element-wise operators, taking one bo * Parameters (`ReLUParameter relu_param`) - Optional - `negative_slope` [default 0]: specifies whether to leak the negative part by multiplying it with the slope value rather than setting it to 0. -* Sample (as seen in `./examples/imagenet/imagenet_train_val.prototxt`) +* Sample (as seen in `./models/bvlc_reference_caffenet/train_val.prototxt`) layer { name: "relu1" From bda1a633ec874e313f5d5dddfc0afc70573847d7 Mon Sep 17 00:00:00 2001 From: Carl Doersch Date: Sun, 23 Aug 2015 20:47:25 -0700 Subject: [PATCH 124/223] BatchReindexLayer to shuffle, subsample, and replicate examples in a batch --- include/caffe/common_layers.hpp | 69 ++++++++++++ src/caffe/layers/batch_reindex_layer.cpp | 79 +++++++++++++ src/caffe/layers/batch_reindex_layer.cu | 107 ++++++++++++++++++ src/caffe/test/test_batch_reindex_layer.cpp | 119 ++++++++++++++++++++ 4 files changed, 374 insertions(+) create mode 100644 src/caffe/layers/batch_reindex_layer.cpp create mode 100644 src/caffe/layers/batch_reindex_layer.cu create mode 100644 src/caffe/test/test_batch_reindex_layer.cpp diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index d2c0ce6d..5d68e865 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -70,6 +70,75 @@ class ArgMaxLayer : public Layer { size_t top_k_; }; +/** + * @brief Index into the input blob along its first axis. + * + * This layer can be used to select, reorder, and even replicate examples in a + * batch. The second blob is cast to int and treated as an index into the + * first axis of the first blob. + */ +template +class BatchReindexLayer : public Layer { + public: + explicit BatchReindexLayer(const LayerParameter& param) + : Layer(param) {} + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "BatchReindex"; } + virtual inline int ExactNumBottomBlobs() const { return 2; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + /** + * @param bottom input Blob vector (length 2+) + * -# @f$ (N \times ...) @f$ + * the inputs @f$ x_1 @f$ + * -# @f$ (M) @f$ + * the inputs @f$ x_2 @f$ + * @param top output Blob vector (length 1) + * -# @f$ (M \times ...) @f$: + * the reindexed array @f$ + * y = x_1[x_2] + * @f$ + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the error gradient w.r.t. the reordered input. + * + * @param top output Blob vector (length 1), providing the error gradient + * with respect to the outputs + * -# @f$ (M \times ...) @f$: + * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ + * with respect to concatenated outputs @f$ y @f$ + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length 2): + * - @f$ \frac{\partial E}{\partial y} @f$ is de-indexed (summing where + * required) back to the input x_1 + * - This layer cannot backprop to x_2, i.e. propagate_down[1] must be + * false. + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + private: + struct pair_sort_first { + bool operator()(const std::pair &left, + const std::pair &right) { + return left.first < right.first; + } + }; + void check_batch_reindex(int initial_num, int final_num, + const Dtype* ridx_data); +}; + + /** * @brief Takes at least two Blob%s and concatenates them along either the num * or channel dimension, outputting the result. diff --git a/src/caffe/layers/batch_reindex_layer.cpp b/src/caffe/layers/batch_reindex_layer.cpp new file mode 100644 index 00000000..3bf757c7 --- /dev/null +++ b/src/caffe/layers/batch_reindex_layer.cpp @@ -0,0 +1,79 @@ +#include + +#include "caffe/layer.hpp" +#include "caffe/util/math_functions.hpp" +#include "caffe/vision_layers.hpp" + +namespace caffe { + +template +void BatchReindexLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + CHECK_EQ(1, bottom[1]->num_axes()); + vector newshape; + newshape.push_back(bottom[1]->shape(0)); + for (int i = 1; i < bottom[0]->shape().size(); ++i) { + newshape.push_back(bottom[0]->shape()[i]); + } + top[0]->Reshape(newshape); +} + +template +void BatchReindexLayer::check_batch_reindex(int initial_num, + int final_num, + const Dtype* ridx_data) { + for (int i = 0; i < final_num; ++i) { + CHECK_GE(ridx_data[i], 0) + << "Index specified for reindex layer was negative."; + CHECK_LT(ridx_data[i], initial_num) + << "Index specified for reindex layer was greater than batch size."; + } +} + +template +void BatchReindexLayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + check_batch_reindex(bottom[0]->shape(0), bottom[1]->count(), + bottom[1]->cpu_data()); + if (top[0]->count() == 0) { + return; + } + int inner_dim = bottom[0]->count() / bottom[0]->shape(0); + const Dtype* in = bottom[0]->cpu_data(); + const Dtype* permut = bottom[1]->cpu_data(); + Dtype* out = top[0]->mutable_cpu_data(); + for (int index = 0; index < top[0]->count(); ++index) { + int n = index / (inner_dim); + int in_n = static_cast(permut[n]); + out[index] = in[in_n * (inner_dim) + index % (inner_dim)]; + } +} + +template +void BatchReindexLayer::Backward_cpu( + const vector*>& top, const vector& propagate_down, + const vector*>& bottom) { + CHECK(!propagate_down[1]) << "Cannot backprop to index."; + if (!propagate_down[0]) { + return; + } + int inner_dim = bottom[0]->count() / bottom[0]->shape(0); + Dtype* bot_diff = bottom[0]->mutable_cpu_diff(); + const Dtype* permut = bottom[1]->cpu_data(); + const Dtype* top_diff = top[0]->cpu_diff(); + caffe_set(bottom[0]->count(), Dtype(0), bot_diff); + for (int index = 0; index < top[0]->count(); ++index) { + int n = index / (inner_dim); + int in_n = static_cast(permut[n]); + bot_diff[in_n * (inner_dim) + index % (inner_dim)] += top_diff[index]; + } +} + +#ifdef CPU_ONLY +STUB_GPU(BatchReindexLayer); +#endif + +INSTANTIATE_CLASS(BatchReindexLayer); +REGISTER_LAYER_CLASS(BatchReindex); + +} // namespace caffe diff --git a/src/caffe/layers/batch_reindex_layer.cu b/src/caffe/layers/batch_reindex_layer.cu new file mode 100644 index 00000000..c418cab9 --- /dev/null +++ b/src/caffe/layers/batch_reindex_layer.cu @@ -0,0 +1,107 @@ +#include +#include +#include + +#include "caffe/layer.hpp" +#include "caffe/util/math_functions.hpp" +#include "caffe/vision_layers.hpp" + +namespace caffe { + +template +__global__ void BRForward(const int count, const int inner_dim, const Dtype* in, + const Dtype* permut, Dtype* out) { + CUDA_KERNEL_LOOP(index, count) { + int n = index / (inner_dim); + int in_n = static_cast(permut[n]); + out[index] = in[in_n * (inner_dim) + index % (inner_dim)]; + } +} + +template +void BatchReindexLayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + check_batch_reindex(bottom[0]->shape(0), bottom[1]->count(), + bottom[1]->cpu_data()); + if (top[0]->count() == 0) { + return; + } + int threads = top[0]->count(); + // NOLINT_NEXT_LINE(whitespace/operators) + BRForward <<>>( + top[0]->count(), bottom[0]->count() / bottom[0]->shape(0), + bottom[0]->gpu_data(), bottom[1]->gpu_data(), top[0]->mutable_gpu_data()); + CUDA_POST_KERNEL_CHECK; +} + +template +__global__ void BRBackward(const int count, const int inner_dim, + const Dtype* in, const Dtype* top_indexes, + const Dtype* begins, const Dtype* counts, + Dtype* out) { + CUDA_KERNEL_LOOP(index, count) { + int n = index / (inner_dim); + out[index] = 0; + int lower = static_cast(begins[n]); + int upper = lower + static_cast(counts[n]); + for (int i = lower; i < upper; ++i) { + int in_n = static_cast(top_indexes[i]); + out[index] += in[in_n * (inner_dim) + index % (inner_dim)]; + } + } +} + +template +void BatchReindexLayer::Backward_gpu( + const vector*>& top, const vector& propagate_down, + const vector*>& bottom) { + CHECK(!propagate_down[1]) << "Cannot backprop to index."; + if (!propagate_down[0]) { + return; + } + + vector > mapping; + const Dtype* perm = bottom[1]->cpu_data(); + for (int i = 0; i < bottom[1]->count(); ++i) { + mapping.push_back(pair(static_cast(perm[i]), i)); + } + std::sort(mapping.begin(), mapping.end(), pair_sort_first()); + + // Each element of the bottom diff is potentially the sum of many top diffs. + // However, we'd like each CUDA thread to handle exactly one output. Hence, + // we first pre-compute a list of lists of indices that need to be summed for + // each output. `top_indexes` holds the data of this list of lists. The + // k'th element of `begins` points to the location in `top_indexes` where the + // list for the k'th example begin, and the k'th element of `counts` is the + // length of that list. + vector shape; + shape.push_back(bottom[1]->count()); + Blob top_indexes(shape); + shape[0] = bottom[0]->shape(0); + Blob counts(shape); + Blob begins(shape); + Dtype* t_i_data = top_indexes.mutable_cpu_data(); + Dtype* c_data = counts.mutable_cpu_data(); + Dtype* b_data = begins.mutable_cpu_data(); + caffe_set(begins.count(), Dtype(-1), b_data); + caffe_set(counts.count(), Dtype(0), c_data); + for (int i = 0; i < mapping.size(); ++i) { + t_i_data[i] = mapping[i].second; + if (b_data[mapping[i].first] == -1) { + b_data[mapping[i].first] = i; + } + c_data[mapping[i].first] += 1; + } + + int threads = bottom[0]->count(); + // NOLINT_NEXT_LINE(whitespace/operators) + BRBackward <<>>( + bottom[0]->count(), bottom[0]->count() / bottom[0]->shape(0), + top[0]->gpu_diff(), top_indexes.gpu_data(), begins.gpu_data(), + counts.gpu_data(), bottom[0]->mutable_gpu_diff()); + CUDA_POST_KERNEL_CHECK; +} + +INSTANTIATE_LAYER_GPU_FUNCS(BatchReindexLayer); + +} // namespace caffe diff --git a/src/caffe/test/test_batch_reindex_layer.cpp b/src/caffe/test/test_batch_reindex_layer.cpp new file mode 100644 index 00000000..985db343 --- /dev/null +++ b/src/caffe/test/test_batch_reindex_layer.cpp @@ -0,0 +1,119 @@ +#include +#include + +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/vision_layers.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +namespace caffe { + +template +class BatchReindexLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + + protected: + BatchReindexLayerTest() + : blob_bottom_(new Blob()), + blob_bottom_permute_(new Blob()), + blob_top_(new Blob()) { + } + virtual void SetUp() { + Caffe::set_random_seed(1701); + vector sz; + sz.push_back(5); + sz.push_back(4); + sz.push_back(3); + sz.push_back(2); + blob_bottom_->Reshape(sz); + vector permsz; + permsz.push_back(6); + blob_bottom_permute_->Reshape(permsz); + + // fill the values + FillerParameter filler_param; + GaussianFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + int perm[] = { 4, 0, 4, 0, 1, 2 }; + for (int i = 0; i < blob_bottom_permute_->count(); ++i) { + blob_bottom_permute_->mutable_cpu_data()[i] = perm[i]; + } + + blob_bottom_vec_.push_back(blob_bottom_); + blob_bottom_vec_.push_back(blob_bottom_permute_); + blob_top_vec_.push_back(blob_top_); + } + virtual ~BatchReindexLayerTest() { + delete blob_bottom_permute_; + delete blob_bottom_; + delete blob_top_; + } + Blob* const blob_bottom_; + Blob* const blob_bottom_permute_; + Blob* const blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; + + void TestForward() { + LayerParameter layer_param; + + vector sz; + sz.push_back(5); + sz.push_back(4); + sz.push_back(3); + sz.push_back(2); + blob_bottom_->Reshape(sz); + for (int i = 0; i < blob_bottom_->count(); ++i) { + blob_bottom_->mutable_cpu_data()[i] = i; + } + + vector permsz; + permsz.push_back(6); + blob_bottom_permute_->Reshape(permsz); + int perm[] = { 4, 0, 4, 0, 1, 2 }; + for (int i = 0; i < blob_bottom_permute_->count(); ++i) { + blob_bottom_permute_->mutable_cpu_data()[i] = perm[i]; + } + BatchReindexLayer layer(layer_param); + layer.SetUp(blob_bottom_vec_, blob_top_vec_); + EXPECT_EQ(blob_top_->num(), blob_bottom_permute_->num()); + EXPECT_EQ(blob_top_->channels(), blob_bottom_->channels()); + EXPECT_EQ(blob_top_->height(), blob_bottom_->height()); + EXPECT_EQ(blob_top_->width(), blob_bottom_->width()); + + layer.Forward(blob_bottom_vec_, blob_top_vec_); + int channels = blob_top_->channels(); + int height = blob_top_->height(); + int width = blob_top_->width(); + for (int i = 0; i < blob_top_->count(); ++i) { + int n = i / (channels * width * height); + int inner_idx = (i % (channels * width * height)); + EXPECT_EQ( + blob_top_->cpu_data()[i], + blob_bottom_->cpu_data()[perm[n] * channels * width * height + + inner_idx]); + } + } +}; + +TYPED_TEST_CASE(BatchReindexLayerTest, TestDtypesAndDevices); + +TYPED_TEST(BatchReindexLayerTest, TestForward) { + this->TestForward(); +} + +TYPED_TEST(BatchReindexLayerTest, TestGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + BatchReindexLayer layer(layer_param); + GradientChecker checker(1e-4, 1e-2); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_, 0); + } + +} // namespace caffe From c65ba61bdf273604c3edcd24ba7a80cc3835441a Mon Sep 17 00:00:00 2001 From: sh1r0 Date: Fri, 9 Oct 2015 00:31:05 +0800 Subject: [PATCH 125/223] Remove the 4D constraint of blobproto IO in python --- python/caffe/io.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/python/caffe/io.py b/python/caffe/io.py index 0cad7211..40b7ac1e 100644 --- a/python/caffe/io.py +++ b/python/caffe/io.py @@ -21,22 +21,18 @@ def blobproto_to_array(blob, return_diff=False): unless return_diff is True, in which case we will return the diff. """ if return_diff: - return np.array(blob.diff).reshape( - blob.num, blob.channels, blob.height, blob.width) + return np.array(blob.diff).reshape(*blob.shape.dim) else: - return np.array(blob.data).reshape( - blob.num, blob.channels, blob.height, blob.width) + return np.array(blob.data).reshape(*blob.shape.dim) def array_to_blobproto(arr, diff=None): - """Converts a 4-dimensional array to blob proto. If diff is given, also + """Converts a N-dimensional array to blob proto. If diff is given, also convert the diff. You need to make sure that arr and diff have the same shape, and this function does not do sanity check. """ - if arr.ndim != 4: - raise ValueError('Incorrect array shape.') blob = caffe_pb2.BlobProto() - blob.num, blob.channels, blob.height, blob.width = arr.shape + blob.shape.dim.extend(arr.shape) blob.data.extend(arr.astype(float).flat) if diff is not None: blob.diff.extend(diff.astype(float).flat) From ee5191b3e41fddae73653e0d61172360b89526ca Mon Sep 17 00:00:00 2001 From: Kang Kim Date: Thu, 8 Oct 2015 01:26:25 +0900 Subject: [PATCH 126/223] Improve numerical stability of variance computation in MVNLayer --- src/caffe/layers/mvn_layer.cpp | 42 ++++++++++----------------------- src/caffe/layers/mvn_layer.cu | 43 ++++++++++------------------------ 2 files changed, 25 insertions(+), 60 deletions(-) diff --git a/src/caffe/layers/mvn_layer.cpp b/src/caffe/layers/mvn_layer.cpp index 325691b1..61c2141e 100644 --- a/src/caffe/layers/mvn_layer.cpp +++ b/src/caffe/layers/mvn_layer.cpp @@ -42,29 +42,21 @@ void MVNLayer::Forward_cpu(const vector*>& bottom, int dim = bottom[0]->count() / num; - if (this->layer_param_.mvn_param().normalize_variance()) { - // put the squares of bottom into temp_ - caffe_powx(bottom[0]->count(), bottom_data, Dtype(2), - temp_.mutable_cpu_data()); + // subtract mean + caffe_cpu_gemv(CblasNoTrans, num, dim, 1. / dim, bottom_data, + sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); // EX + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., + mean_.cpu_data(), sum_multiplier_.cpu_data(), 0., + temp_.mutable_cpu_data()); + caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data); // X-EX - // computes variance using var(X) = E(X^2) - (EX)^2 - caffe_cpu_gemv(CblasNoTrans, num, dim, 1. / dim, bottom_data, - sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); // EX + if (this->layer_param_.mvn_param().normalize_variance()) { + // compute variance using var(X) = E((X-EX)^2) + caffe_powx(bottom[0]->count(), top_data, Dtype(2), + temp_.mutable_cpu_data()); // (X-EX)^2 caffe_cpu_gemv(CblasNoTrans, num, dim, 1. / dim, temp_.cpu_data(), sum_multiplier_.cpu_data(), 0., - variance_.mutable_cpu_data()); // E(X^2) - caffe_powx(mean_.count(), mean_.cpu_data(), Dtype(2), - temp_.mutable_cpu_data()); // (EX)^2 - caffe_sub(mean_.count(), variance_.cpu_data(), temp_.cpu_data(), - variance_.mutable_cpu_data()); // variance - - // do mean and variance normalization - // subtract mean - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., - mean_.cpu_data(), sum_multiplier_.cpu_data(), 0., - temp_.mutable_cpu_data()); - - caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data); + variance_.mutable_cpu_data()); // E((X-EX)^2) // normalize variance caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5), @@ -77,16 +69,6 @@ void MVNLayer::Forward_cpu(const vector*>& bottom, temp_.mutable_cpu_data()); caffe_div(temp_.count(), top_data, temp_.cpu_data(), top_data); - } else { - caffe_cpu_gemv(CblasNoTrans, num, dim, 1. / dim, bottom_data, - sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); // EX - - // subtract mean - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., - mean_.cpu_data(), sum_multiplier_.cpu_data(), 0., - temp_.mutable_cpu_data()); - - caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data); } } diff --git a/src/caffe/layers/mvn_layer.cu b/src/caffe/layers/mvn_layer.cu index d86a2e73..5cbb112d 100644 --- a/src/caffe/layers/mvn_layer.cu +++ b/src/caffe/layers/mvn_layer.cu @@ -20,29 +20,22 @@ void MVNLayer::Forward_gpu(const vector*>& bottom, int dim = bottom[0]->count() / num; - if (this->layer_param_.mvn_param().normalize_variance()) { - // put the squares of bottom into temp_ - caffe_gpu_powx(bottom[0]->count(), bottom_data, Dtype(2), - temp_.mutable_gpu_data()); + // subtract mean + caffe_gpu_gemv(CblasNoTrans, num, dim, 1. / dim, bottom_data, + sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data()); // EX + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., + mean_.gpu_data(), sum_multiplier_.gpu_data(), 0., + temp_.mutable_gpu_data()); + caffe_gpu_add(temp_.count(), bottom_data, temp_.gpu_data(), + top_data); // X-EX - // computes variance using var(X) = E(X^2) - (EX)^2 - caffe_gpu_gemv(CblasNoTrans, num, dim, 1. / dim, bottom_data, - sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data()); // EX + if (this->layer_param_.mvn_param().normalize_variance()) { + // compute variance using var(X) = E((X-EX)^2) + caffe_gpu_powx(bottom[0]->count(), top_data, Dtype(2), + temp_.mutable_gpu_data()); // (X-EX)^2 caffe_gpu_gemv(CblasNoTrans, num, dim, 1. / dim, temp_.gpu_data(), sum_multiplier_.gpu_data(), 0., - variance_.mutable_gpu_data()); // E(X^2) - caffe_gpu_powx(mean_.count(), mean_.gpu_data(), Dtype(2), - temp_.mutable_gpu_data()); // (EX)^2 - caffe_gpu_sub(mean_.count(), variance_.gpu_data(), temp_.gpu_data(), - variance_.mutable_gpu_data()); // variance - - // do mean and variance normalization - // subtract mean - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., - mean_.gpu_data(), sum_multiplier_.gpu_data(), 0., - temp_.mutable_gpu_data()); - - caffe_gpu_add(temp_.count(), bottom_data, temp_.gpu_data(), top_data); + variance_.mutable_gpu_data()); // E((X-EX)^2) // normalize variance caffe_gpu_powx(variance_.count(), variance_.gpu_data(), Dtype(0.5), @@ -55,16 +48,6 @@ void MVNLayer::Forward_gpu(const vector*>& bottom, temp_.mutable_gpu_data()); caffe_gpu_div(temp_.count(), top_data, temp_.gpu_data(), top_data); - } else { - caffe_gpu_gemv(CblasNoTrans, num, dim, 1. / dim, bottom_data, - sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data()); // EX - - // subtract mean - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., - mean_.gpu_data(), sum_multiplier_.gpu_data(), 0., - temp_.mutable_gpu_data()); - - caffe_gpu_add(temp_.count(), bottom_data, temp_.gpu_data(), top_data); } } From e5990b3dafa2b95fae7b7bfaac4dcd309a20d151 Mon Sep 17 00:00:00 2001 From: Brian Chu Date: Tue, 13 Oct 2015 03:50:53 -0700 Subject: [PATCH 127/223] In 00-classification example, get correct class label index --- examples/00-classification.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/00-classification.ipynb b/examples/00-classification.ipynb index 46bbb193..89b7dd34 100644 --- a/examples/00-classification.ipynb +++ b/examples/00-classification.ipynb @@ -119,7 +119,7 @@ "source": [ "net.blobs['data'].data[...] = transformer.preprocess('data', caffe.io.load_image(caffe_root + 'examples/images/cat.jpg'))\n", "out = net.forward()\n", - "print(\"Predicted class is #{}.\".format(out['prob'].argmax()))" + "print(\"Predicted class is #{}.\".format(out['prob'][0].argmax()))" ] }, { From ec94055a6c5f0f86b88f98f1659cc9f317df2e3e Mon Sep 17 00:00:00 2001 From: Alessandro Giusti Date: Tue, 13 Oct 2015 14:30:45 +0200 Subject: [PATCH 128/223] Update store2hdf5.m Fixed a bug in two assertions (the condition input argument must be a scalar logical) --- matlab/hdf5creation/store2hdf5.m | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/matlab/hdf5creation/store2hdf5.m b/matlab/hdf5creation/store2hdf5.m index 0a0016dc..4e8c81d9 100644 --- a/matlab/hdf5creation/store2hdf5.m +++ b/matlab/hdf5creation/store2hdf5.m @@ -39,8 +39,8 @@ info=h5info(filename); prev_dat_sz=info.Datasets(1).Dataspace.Size; prev_lab_sz=info.Datasets(2).Dataspace.Size; - assert(prev_dat_sz(1:end-1)==dat_dims(1:end-1), 'Data dimensions must match existing dimensions in dataset'); - assert(prev_lab_sz(1:end-1)==lab_dims(1:end-1), 'Label dimensions must match existing dimensions in dataset'); + assert(all(prev_dat_sz(1:end-1)==dat_dims(1:end-1)), 'Data dimensions must match existing dimensions in dataset'); + assert(all(prev_lab_sz(1:end-1)==lab_dims(1:end-1)), 'Label dimensions must match existing dimensions in dataset'); startloc.dat=[ones(1,length(dat_dims)-1), prev_dat_sz(end)+1]; startloc.lab=[ones(1,length(lab_dims)-1), prev_lab_sz(end)+1]; end From a8839dbcb3b16f8f5d3f8d17209a3c8c0142a51b Mon Sep 17 00:00:00 2001 From: Akash A Date: Tue, 13 Oct 2015 17:53:35 +0100 Subject: [PATCH 129/223] Add pyyaml as a requirement In getting the [web demo](http://caffe.berkeleyvision.org/gathered/examples/web_demo.html) started I get an `ImportError: No module named yaml` error when running `./scripts/download_model_binary.py models/bvlc_reference_caffenet`. --- examples/web_demo/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/web_demo/requirements.txt b/examples/web_demo/requirements.txt index 8fb1d2cc..43e1b98c 100644 --- a/examples/web_demo/requirements.txt +++ b/examples/web_demo/requirements.txt @@ -4,3 +4,4 @@ tornado numpy pandas pillow +pyyaml From ca4c6fb4e2106bbd3d6a2c09c34567558edde891 Mon Sep 17 00:00:00 2001 From: Brian Chu Date: Tue, 13 Oct 2015 13:24:42 -0700 Subject: [PATCH 130/223] Set CaffeNet train_val test mirroring to false --- models/bvlc_reference_caffenet/train_val.prototxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/bvlc_reference_caffenet/train_val.prototxt b/models/bvlc_reference_caffenet/train_val.prototxt index c79472e0..e3e42796 100644 --- a/models/bvlc_reference_caffenet/train_val.prototxt +++ b/models/bvlc_reference_caffenet/train_val.prototxt @@ -45,7 +45,7 @@ layer { # mean_value: 104 # mean_value: 117 # mean_value: 123 -# mirror: true +# mirror: false # } data_param { source: "examples/imagenet/ilsvrc12_val_lmdb" From e0c34cedde6e0d12d420a51cea7a98df50069559 Mon Sep 17 00:00:00 2001 From: Vladimir Date: Wed, 14 Oct 2015 12:00:14 +0900 Subject: [PATCH 131/223] Fixed drawing problems with repeated convolution --- python/caffe/draw.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/caffe/draw.py b/python/caffe/draw.py index a002b60b..f8bf5722 100644 --- a/python/caffe/draw.py +++ b/python/caffe/draw.py @@ -82,11 +82,11 @@ def get_layer_label(layer, rankdir): separator, layer.type, separator, - layer.convolution_param.kernel_size, + layer.convolution_param.kernel_size[0] if len(layer.convolution_param.kernel_size._values) else 1, separator, - layer.convolution_param.stride, + layer.convolution_param.stride[0] if len(layer.convolution_param.stride._values) else 1, separator, - layer.convolution_param.pad) + layer.convolution_param.pad[0] if len(layer.convolution_param.pad._values) else 0) elif layer.type == 'Pooling': pooling_types_dict = get_pooling_types_dict() node_label = '"%s%s(%s %s)%skernel size: %d%sstride: %d%spad: %d"' %\ From 75e859a522fdbf78a2ea58393500af6103bcce56 Mon Sep 17 00:00:00 2001 From: Luke Yeager Date: Thu, 15 Oct 2015 11:03:09 -0700 Subject: [PATCH 132/223] Allow old-style shape in blobproto_to_array Fixes #3199 Bug introduced in #3170 --- python/caffe/io.py | 11 ++++++++-- python/caffe/test/test_io.py | 41 ++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 python/caffe/test/test_io.py diff --git a/python/caffe/io.py b/python/caffe/io.py index 40b7ac1e..11c84260 100644 --- a/python/caffe/io.py +++ b/python/caffe/io.py @@ -20,11 +20,18 @@ def blobproto_to_array(blob, return_diff=False): Convert a blob proto to an array. In default, we will just return the data, unless return_diff is True, in which case we will return the diff. """ + # Read the data into an array if return_diff: - return np.array(blob.diff).reshape(*blob.shape.dim) + data = np.array(blob.diff) else: - return np.array(blob.data).reshape(*blob.shape.dim) + data = np.array(blob.data) + # Reshape the array + if blob.HasField('num') or blob.HasField('channels') or blob.HasField('height') or blob.HasField('width'): + # Use legacy 4D shape + return data.reshape(blob.num, blob.channels, blob.height, blob.width) + else: + return data.reshape(blob.shape.dim) def array_to_blobproto(arr, diff=None): """Converts a N-dimensional array to blob proto. If diff is given, also diff --git a/python/caffe/test/test_io.py b/python/caffe/test/test_io.py new file mode 100644 index 00000000..8c86ef75 --- /dev/null +++ b/python/caffe/test/test_io.py @@ -0,0 +1,41 @@ +import numpy as np +import unittest + +import caffe + +class TestBlobProtoToArray(unittest.TestCase): + + def test_old_format(self): + data = np.zeros((10,10)) + blob = caffe.proto.caffe_pb2.BlobProto() + blob.data.extend(list(data.flatten())) + shape = (1,1,10,10) + blob.num, blob.channels, blob.height, blob.width = shape + + arr = caffe.io.blobproto_to_array(blob) + self.assertEqual(arr.shape, shape) + + def test_new_format(self): + data = np.zeros((10,10)) + blob = caffe.proto.caffe_pb2.BlobProto() + blob.data.extend(list(data.flatten())) + blob.shape.dim.extend(list(data.shape)) + + arr = caffe.io.blobproto_to_array(blob) + self.assertEqual(arr.shape, data.shape) + + def test_no_shape(self): + data = np.zeros((10,10)) + blob = caffe.proto.caffe_pb2.BlobProto() + blob.data.extend(list(data.flatten())) + + with self.assertRaises(ValueError): + caffe.io.blobproto_to_array(blob) + + def test_scalar(self): + data = np.ones((1)) * 123 + blob = caffe.proto.caffe_pb2.BlobProto() + blob.data.extend(list(data.flatten())) + + arr = caffe.io.blobproto_to_array(blob) + self.assertEqual(arr, 123) From ecac7ff6286642420eb5db723c382e74bf82c9d7 Mon Sep 17 00:00:00 2001 From: Simon Layton Date: Wed, 8 Jul 2015 15:35:55 -0400 Subject: [PATCH 133/223] Initial cuDNN v3 support --- include/caffe/vision_layers.hpp | 74 +++++++++- src/caffe/layer_factory.cpp | 43 +++++- src/caffe/layers/cudnn_conv_layer.cpp | 138 +++++++++++++++++- src/caffe/layers/cudnn_conv_layer.cu | 58 ++------ src/caffe/layers/cudnn_lcn_layer.cpp | 77 ++++++++++ src/caffe/layers/cudnn_lcn_layer.cu | 50 +++++++ src/caffe/layers/cudnn_lrn_layer.cpp | 57 ++++++++ src/caffe/layers/cudnn_lrn_layer.cu | 48 +++++++ src/caffe/layers/lrn_layer.cpp | 1 - src/caffe/proto/caffe.proto | 6 + src/caffe/test/test_lrn_layer.cpp | 196 ++++++++++++++++++++++++++ 11 files changed, 692 insertions(+), 56 deletions(-) create mode 100644 src/caffe/layers/cudnn_lcn_layer.cpp create mode 100644 src/caffe/layers/cudnn_lcn_layer.cu create mode 100644 src/caffe/layers/cudnn_lrn_layer.cpp create mode 100644 src/caffe/layers/cudnn_lrn_layer.cu diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index 06bc0457..237b05d6 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -304,13 +304,24 @@ class CuDNNConvolutionLayer : public ConvolutionLayer { bool handles_setup_; cudnnHandle_t* handle_; cudaStream_t* stream_; + + // algorithms for forward and backwards convolutions + cudnnConvolutionFwdAlgo_t *fwd_algo_; + cudnnConvolutionBwdFilterAlgo_t *bwd_filter_algo_; + cudnnConvolutionBwdDataAlgo_t *bwd_data_algo_; + vector bottom_descs_, top_descs_; cudnnTensorDescriptor_t bias_desc_; cudnnFilterDescriptor_t filter_desc_; vector conv_descs_; int bottom_offset_, top_offset_, bias_offset_; - size_t workspaceSizeInBytes; - void *workspace; + + size_t *workspace_fwd_sizes_; + size_t *workspace_bwd_data_sizes_; + size_t *workspace_bwd_filter_sizes_; + size_t workspaceSizeInBytes; // size of underlying storage + void *workspaceData; // underlying storage + void **workspace; // aliases into workspaceData }; #endif @@ -442,6 +453,65 @@ class LRNLayer : public Layer { vector*> product_bottom_vec_; }; +#ifdef USE_CUDNN + +template +class CuDNNLRNLayer : public LRNLayer { + public: + explicit CuDNNLRNLayer(const LayerParameter& param) + : LRNLayer(param), handles_setup_(false) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + virtual ~CuDNNLRNLayer(); + + protected: + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + bool handles_setup_; + cudnnHandle_t handle_; + cudnnLRNDescriptor_t norm_desc_; + cudnnTensorDescriptor_t bottom_desc_, top_desc_; + + int size_; + Dtype alpha_, beta_, k_; +}; + +template +class CuDNNLCNLayer : public LRNLayer { + public: + explicit CuDNNLCNLayer(const LayerParameter& param) + : LRNLayer(param), handles_setup_(false), tempDataSize(0), + tempData1(NULL), tempData2(NULL) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + virtual ~CuDNNLCNLayer(); + + protected: + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + bool handles_setup_; + cudnnHandle_t handle_; + cudnnLRNDescriptor_t norm_desc_; + cudnnTensorDescriptor_t bottom_desc_, top_desc_; + + int size_, pre_pad_; + Dtype alpha_, beta_, k_; + + size_t tempDataSize; + void *tempData1, *tempData2; +}; + +#endif /** * @brief Pools the input image by taking the max, average, etc. within regions. diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp index 926c7d8f..417ffe98 100644 --- a/src/caffe/layer_factory.cpp +++ b/src/caffe/layer_factory.cpp @@ -54,10 +54,8 @@ shared_ptr > GetPoolingLayer(const LayerParameter& param) { return shared_ptr >(new PoolingLayer(param)); #ifdef USE_CUDNN } else if (engine == PoolingParameter_Engine_CUDNN) { - PoolingParameter p_param = param.pooling_param(); - if (p_param.pad() || p_param.pad_h() || p_param.pad_w() || - param.top_size() > 1) { - LOG(INFO) << "CUDNN does not support padding or multiple tops. " + if (param.top_size() > 1) { + LOG(INFO) << "cuDNN does not support multiple tops. " << "Using Caffe's own pooling layer."; return shared_ptr >(new PoolingLayer(param)); } @@ -70,6 +68,43 @@ shared_ptr > GetPoolingLayer(const LayerParameter& param) { REGISTER_LAYER_CREATOR(Pooling, GetPoolingLayer); +// Get LRN layer according to engine +template +shared_ptr > GetLRNLayer(const LayerParameter& param) { + LRNParameter_Engine engine = param.lrn_param().engine(); + + if (engine == LRNParameter_Engine_DEFAULT) { +#ifdef USE_CUDNN + engine = LRNParameter_Engine_CUDNN; +#else + engine = LRNParameter_Engine_CAFFE; +#endif + } + + if (engine == LRNParameter_Engine_CAFFE) { + return shared_ptr >(new LRNLayer(param)); +#ifdef USE_CUDNN + } else if (engine == LRNParameter_Engine_CUDNN) { + LRNParameter lrn_param = param.lrn_param(); + + if (lrn_param.norm_region() ==LRNParameter_NormRegion_WITHIN_CHANNEL) { + return shared_ptr >(new CuDNNLCNLayer(param)); + } else { + // local size is too big to be handled through cuDNN + if (param.lrn_param().local_size() > CUDNN_LRN_MAX_N) { + return shared_ptr >(new LRNLayer(param)); + } else { + return shared_ptr >(new CuDNNLRNLayer(param)); + } + } +#endif + } else { + LOG(FATAL) << "Layer " << param.name() << " has unknown engine."; + } +} + +REGISTER_LAYER_CREATOR(LRN, GetLRNLayer); + // Get relu layer according to engine. template shared_ptr > GetReLULayer(const LayerParameter& param) { diff --git a/src/caffe/layers/cudnn_conv_layer.cpp b/src/caffe/layers/cudnn_conv_layer.cpp index 3514fe2a..d7b1e0d6 100644 --- a/src/caffe/layers/cudnn_conv_layer.cpp +++ b/src/caffe/layers/cudnn_conv_layer.cpp @@ -1,4 +1,5 @@ #ifdef USE_CUDNN +#include #include #include "caffe/filler.hpp" @@ -24,13 +25,38 @@ void CuDNNConvolutionLayer::LayerSetUp( // Initialize CUDA streams and cuDNN. stream_ = new cudaStream_t[this->group_ * CUDNN_STREAMS_PER_GROUP]; handle_ = new cudnnHandle_t[this->group_ * CUDNN_STREAMS_PER_GROUP]; + + // Initialize algorithm arrays + fwd_algo_ = new cudnnConvolutionFwdAlgo_t[bottom.size()]; + bwd_filter_algo_= new cudnnConvolutionBwdFilterAlgo_t[bottom.size()]; + bwd_data_algo_ = new cudnnConvolutionBwdDataAlgo_t[bottom.size()]; + + // initialize size arrays + workspace_fwd_sizes_ = new size_t[bottom.size()]; + workspace_bwd_filter_sizes_ = new size_t[bottom.size()]; + workspace_bwd_data_sizes_ = new size_t[bottom.size()]; + + // workspace data workspaceSizeInBytes = 0; - workspace = NULL; + workspaceData = NULL; + workspace = new void*[this->group_ * CUDNN_STREAMS_PER_GROUP]; + + for (size_t i = 0; i < bottom.size(); ++i) { + // initialize all to default algorithms + fwd_algo_[i] = (cudnnConvolutionFwdAlgo_t)0; + bwd_filter_algo_[i] = (cudnnConvolutionBwdFilterAlgo_t)0; + bwd_data_algo_[i] = (cudnnConvolutionBwdDataAlgo_t)0; + // default algorithms don't require workspace + workspace_fwd_sizes_[i] = 0; + workspace_bwd_data_sizes_[i] = 0; + workspace_bwd_filter_sizes_[i] = 0; + } for (int g = 0; g < this->group_ * CUDNN_STREAMS_PER_GROUP; g++) { CUDA_CHECK(cudaStreamCreate(&stream_[g])); CUDNN_CHECK(cudnnCreate(&handle_[g])); CUDNN_CHECK(cudnnSetStream(handle_[g], stream_[g])); + workspace[g] = NULL; } // Set the indexing parameters. @@ -86,6 +112,10 @@ void CuDNNConvolutionLayer::Reshape( const int stride_h = stride_data[0]; const int stride_w = stride_data[1]; + // Specify workspace limit for kernels directly until we have a + // planning strategy and a rewrite of Caffe's GPU memory mangagement + size_t workspace_limit_bytes = 8*1024*1024; + for (int i = 0; i < bottom.size(); i++) { cudnn::setTensor4dDesc(&bottom_descs_[i], this->num_, @@ -98,7 +128,104 @@ void CuDNNConvolutionLayer::Reshape( this->num_output_ * this->out_spatial_dim_, this->out_spatial_dim_, width_out, 1); cudnn::setConvolutionDesc(&conv_descs_[i], bottom_descs_[i], - filter_desc_, pad_h, pad_w, stride_h, stride_w); + filter_desc_, pad_h, pad_w, + stride_h, stride_w); + + // choose forward and backward algorithms + workspace(s) + CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(handle_[0], + bottom_descs_[i], + filter_desc_, + conv_descs_[i], + top_descs_[i], + CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, + workspace_limit_bytes, + &fwd_algo_[i])); + + CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize(handle_[0], + bottom_descs_[i], + filter_desc_, + conv_descs_[i], + top_descs_[i], + fwd_algo_[i], + &(workspace_fwd_sizes_[i]))); + + // choose backward algorithm for filter + CUDNN_CHECK(cudnnGetConvolutionBackwardFilterAlgorithm(handle_[0], + bottom_descs_[i], top_descs_[i], conv_descs_[i], filter_desc_, + CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, + workspace_limit_bytes, &bwd_filter_algo_[i]) ); + + // get workspace for backwards filter algorithm + CUDNN_CHECK(cudnnGetConvolutionBackwardFilterWorkspaceSize(handle_[0], + bottom_descs_[i], top_descs_[i], conv_descs_[i], filter_desc_, + bwd_filter_algo_[i], &workspace_bwd_filter_sizes_[i])); + + // choose backward algo for data + CUDNN_CHECK(cudnnGetConvolutionBackwardDataAlgorithm(handle_[0], + filter_desc_, top_descs_[i], conv_descs_[i], bottom_descs_[i], + CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, + workspace_limit_bytes, &bwd_data_algo_[i])); + + // get workspace size + CUDNN_CHECK(cudnnGetConvolutionBackwardDataWorkspaceSize(handle_[0], + filter_desc_, top_descs_[i], conv_descs_[i], bottom_descs_[i], + bwd_data_algo_[i], &workspace_bwd_data_sizes_[i]) ); + } + + // reduce over all workspace sizes to get a maximum to allocate / reallocate + size_t total_workspace_fwd = 0; + size_t total_workspace_bwd_data = 0; + size_t total_workspace_bwd_filter = 0; + + for (size_t i = 0; i < bottom.size(); i++) { + total_workspace_fwd = std::max(total_workspace_fwd, + workspace_fwd_sizes_[i]); + total_workspace_bwd_data = std::max(total_workspace_bwd_data, + workspace_bwd_data_sizes_[i]); + total_workspace_bwd_filter = std::max(total_workspace_bwd_filter, + workspace_bwd_filter_sizes_[i]); + } + // get max over all operations + size_t max_workspace = std::max(total_workspace_fwd, + total_workspace_bwd_data); + max_workspace = std::max(max_workspace, total_workspace_bwd_filter); + // ensure all groups have enough workspace + size_t total_max_workspace = max_workspace * + (this->group_ * CUDNN_STREAMS_PER_GROUP); + + // this is the total amount of storage needed over all groups + streams + if (total_max_workspace > workspaceSizeInBytes) { + LOG(INFO) << "Reallocating workspace storage: " << total_max_workspace; + workspaceSizeInBytes = total_max_workspace; + + // free the existing workspace and allocate a new (larger) one + cudaFree(this->workspaceData); + + cudaError_t err = cudaMalloc(&(this->workspaceData), workspaceSizeInBytes); + if (err != cudaSuccess) { + // force zero memory path + for (int i = 0; i < bottom.size(); i++) { + workspace_fwd_sizes_[i] = 0; + workspace_bwd_filter_sizes_[i] = 0; + workspace_bwd_data_sizes_[i] = 0; + fwd_algo_[i] = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; + bwd_filter_algo_[i] = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0; + bwd_data_algo_[i] = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0; + } + + // NULL out all workspace pointers + for (int g = 0; g < (this->group_ * CUDNN_STREAMS_PER_GROUP); g++) { + workspace[g] = NULL; + } + // NULL out underlying data + workspaceData = NULL; + workspaceSizeInBytes = 0; + } + + // if we succeed in the allocation, set pointer aliases for workspaces + for (int g = 0; g < (this->group_ * CUDNN_STREAMS_PER_GROUP); g++) { + workspace[g] = reinterpret_cast(workspaceData) + g*max_workspace; + } } // Tensor descriptor for bias. @@ -128,8 +255,15 @@ CuDNNConvolutionLayer::~CuDNNConvolutionLayer() { cudnnDestroy(handle_[g]); } + cudaFree(workspaceData); delete [] stream_; delete [] handle_; + delete [] fwd_algo_; + delete [] bwd_filter_algo_; + delete [] bwd_data_algo_; + delete [] workspace_fwd_sizes_; + delete [] workspace_bwd_data_sizes_; + delete [] workspace_bwd_filter_sizes_; } INSTANTIATE_CLASS(CuDNNConvolutionLayer); diff --git a/src/caffe/layers/cudnn_conv_layer.cu b/src/caffe/layers/cudnn_conv_layer.cu index 69115202..e88e4dd3 100644 --- a/src/caffe/layers/cudnn_conv_layer.cu +++ b/src/caffe/layers/cudnn_conv_layer.cu @@ -14,11 +14,6 @@ __global__ void sync_conv_groups() { } template void CuDNNConvolutionLayer::Forward_gpu( const vector*>& bottom, const vector*>& top) { - const int* kernel_shape_data = this->kernel_shape_.cpu_data(); - const int kernel_h = kernel_shape_data[0]; - const int kernel_w = kernel_shape_data[1]; - const size_t workspace_limit_bytes = - kernel_h * kernel_w * this->channels_ * sizeof(int) + 1; const Dtype* weight = this->blobs_[0]->gpu_data(); for (int i = 0; i < bottom.size(); ++i) { const Dtype* bottom_data = bottom[i]->gpu_data(); @@ -26,52 +21,13 @@ void CuDNNConvolutionLayer::Forward_gpu( // Forward through cuDNN in parallel over groups. for (int g = 0; g < this->group_; g++) { - cudnnConvolutionFwdAlgo_t algo; - - // pick the convolution algorithm - // TODO(shelhamer) this should be done during reshape - // TODO(shelhamer) the choice of automatic or manual algorithm picking - // should be exposed in proto - CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(handle_[g], - bottom_descs_[i], - filter_desc_, - conv_descs_[i], - top_descs_[i], - CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, - workspace_limit_bytes, // memoryLimitInBytes, - &algo)); - - // get minimum size of the workspace needed for the desired algorithm - size_t workspaceSizeInBytes_temp = 0; - - CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize(handle_[g], - bottom_descs_[i], - filter_desc_, - conv_descs_[i], - top_descs_[i], - algo, - &workspaceSizeInBytes_temp)); - - if (workspaceSizeInBytes_temp > workspaceSizeInBytes) { - workspaceSizeInBytes = workspaceSizeInBytes_temp; - // free the existing workspace and allocate a new (larger) one - cudaFree(this->workspace); - cudaError_t err = cudaMalloc(&(this->workspace), workspaceSizeInBytes); - if (err != cudaSuccess) { - // force zero memory path - algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; - workspace = NULL; - workspaceSizeInBytes = 0; - } - } - // Filters. CUDNN_CHECK(cudnnConvolutionForward(handle_[g], cudnn::dataType::one, bottom_descs_[i], bottom_data + bottom_offset_ * g, filter_desc_, weight + this->weight_offset_ * g, conv_descs_[i], - algo, workspace, workspaceSizeInBytes, + fwd_algo_[i], workspace[g], workspace_fwd_sizes_[i], cudnn::dataType::zero, top_descs_[i], top_data + top_offset_ * g)); @@ -101,10 +57,12 @@ void CuDNNConvolutionLayer::Backward_gpu(const vector*>& top, if (this->param_propagate_down_[0]) { weight = this->blobs_[0]->gpu_data(); weight_diff = this->blobs_[0]->mutable_gpu_diff(); + caffe_gpu_set(this->blobs_[0]->count(), Dtype(0), weight_diff); } Dtype* bias_diff = NULL; if (this->bias_term_ && this->param_propagate_down_[1]) { bias_diff = this->blobs_[1]->mutable_gpu_diff(); + caffe_gpu_set(this->blobs_[1]->count(), Dtype(0), bias_diff); } for (int i = 0; i < top.size(); ++i) { const Dtype* top_diff = top[i]->gpu_diff(); @@ -122,11 +80,14 @@ void CuDNNConvolutionLayer::Backward_gpu(const vector*>& top, // Gradient w.r.t. weights. if (this->param_propagate_down_[0]) { const Dtype* bottom_data = bottom[i]->gpu_data(); - CUDNN_CHECK(cudnnConvolutionBackwardFilter(handle_[1*this->group_ + g], + CUDNN_CHECK(cudnnConvolutionBackwardFilter_v3( + handle_[1*this->group_ + g], cudnn::dataType::one, bottom_descs_[i], bottom_data + bottom_offset_ * g, top_descs_[i], top_diff + top_offset_ * g, conv_descs_[i], + bwd_filter_algo_[i], workspace[1*this->group_ + g], + workspace_bwd_filter_sizes_[i], cudnn::dataType::one, filter_desc_, weight_diff + this->weight_offset_ * g)); } @@ -137,11 +98,14 @@ void CuDNNConvolutionLayer::Backward_gpu(const vector*>& top, weight = this->blobs_[0]->gpu_data(); } Dtype* bottom_diff = bottom[i]->mutable_gpu_diff(); - CUDNN_CHECK(cudnnConvolutionBackwardData(handle_[2*this->group_ + g], + CUDNN_CHECK(cudnnConvolutionBackwardData_v3( + handle_[2*this->group_ + g], cudnn::dataType::one, filter_desc_, weight + this->weight_offset_ * g, top_descs_[i], top_diff + top_offset_ * g, conv_descs_[i], + bwd_data_algo_[i], workspace[2*this->group_ + g], + workspace_bwd_data_sizes_[i], cudnn::dataType::zero, bottom_descs_[i], bottom_diff + bottom_offset_ * g)); } diff --git a/src/caffe/layers/cudnn_lcn_layer.cpp b/src/caffe/layers/cudnn_lcn_layer.cpp new file mode 100644 index 00000000..866d810b --- /dev/null +++ b/src/caffe/layers/cudnn_lcn_layer.cpp @@ -0,0 +1,77 @@ +#ifdef USE_CUDNN +#include + +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/util/im2col.hpp" +#include "caffe/util/math_functions.hpp" +#include "caffe/vision_layers.hpp" + +namespace caffe { + +template +void CuDNNLCNLayer::LayerSetUp(const vector*>& bottom, + const vector*>& top) { + LRNLayer::LayerSetUp(bottom, top); + + CUDNN_CHECK(cudnnCreate(&handle_)); + CUDNN_CHECK(cudnnCreateLRNDescriptor(&norm_desc_)); + cudnn::createTensor4dDesc(&bottom_desc_); + cudnn::createTensor4dDesc(&top_desc_); + + // create a LRN handle + handles_setup_ = true; + + size_ = this->layer_param().lrn_param().local_size(); + pre_pad_ = (size_ - 1) / 2; + alpha_ = this->layer_param().lrn_param().alpha(); + beta_ = this->layer_param().lrn_param().beta(); + k_ = this->layer_param().lrn_param().k(); +} + +template +void CuDNNLCNLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + LRNLayer::Reshape(bottom, top); + cudnn::setTensor4dDesc(&bottom_desc_, bottom[0]->num(), + this->channels_, this->height_, this->width_); + cudnn::setTensor4dDesc(&top_desc_, bottom[0]->num(), + this->channels_, this->height_, this->width_); + CUDNN_CHECK(cudnnSetLRNDescriptor(norm_desc_, size_, alpha_, beta_, k_)); + + // allocate / reallocate tempData buffers + size_t totalSizeInBytes = sizeof(Dtype)*bottom[0]->num()* \ + this->channels_*this->height_*this->width_; + + if (totalSizeInBytes > tempDataSize) { + tempDataSize = totalSizeInBytes; + + cudaFree(tempData1); + cudaFree(tempData2); + + // allocate new buffers + CUDA_CHECK(cudaMalloc(&tempData1, totalSizeInBytes)); + CUDA_CHECK(cudaMalloc(&tempData2, totalSizeInBytes)); + } +} + +template +CuDNNLCNLayer::~CuDNNLCNLayer() { + // Check that handles have been setup before destroying. + if (!handles_setup_) { return; } + + cudnnDestroyTensorDescriptor(bottom_desc_); + cudnnDestroyTensorDescriptor(top_desc_); + + // destroy LRN handle + cudnnDestroy(handle_); + + // free temp buffers + cudaFree(tempData1); + cudaFree(tempData2); +} + +INSTANTIATE_CLASS(CuDNNLCNLayer); + +} // namespace caffe +#endif diff --git a/src/caffe/layers/cudnn_lcn_layer.cu b/src/caffe/layers/cudnn_lcn_layer.cu new file mode 100644 index 00000000..c07ade72 --- /dev/null +++ b/src/caffe/layers/cudnn_lcn_layer.cu @@ -0,0 +1,50 @@ +#ifdef USE_CUDNN +#include + +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/util/im2col.hpp" +#include "caffe/util/math_functions.hpp" +#include "caffe/vision_layers.hpp" + +namespace caffe { + +template +void CuDNNLCNLayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* top_data = top[0]->mutable_gpu_data(); + + CUDNN_CHECK(cudnnDivisiveNormalizationForward( + handle_, norm_desc_, CUDNN_DIVNORM_PRECOMPUTED_MEANS, + cudnn::dataType::one, + bottom_desc_, bottom_data, + NULL, // srcMeansData + this->tempData1, this->tempData2, + cudnn::dataType::zero, + top_desc_, top_data) ); +} + +template +void CuDNNLCNLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + const Dtype* top_diff = top[0]->gpu_diff(); + const Dtype* top_data = top[0]->gpu_data(); + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); + + CUDNN_CHECK(cudnnDivisiveNormalizationBackward( + handle_, norm_desc_, CUDNN_DIVNORM_PRECOMPUTED_MEANS, + cudnn::dataType::one, + bottom_desc_, bottom_data, + NULL, top_diff, // NULL - srcMeansData + this->tempData1, this->tempData2, + cudnn::dataType::zero, + bottom_desc_, bottom_diff, + NULL) ); +} + +INSTANTIATE_LAYER_GPU_FUNCS(CuDNNLCNLayer); + +} // namespace caffe +#endif diff --git a/src/caffe/layers/cudnn_lrn_layer.cpp b/src/caffe/layers/cudnn_lrn_layer.cpp new file mode 100644 index 00000000..6e992149 --- /dev/null +++ b/src/caffe/layers/cudnn_lrn_layer.cpp @@ -0,0 +1,57 @@ +#ifdef USE_CUDNN +#include + +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/util/im2col.hpp" +#include "caffe/util/math_functions.hpp" +#include "caffe/vision_layers.hpp" + +namespace caffe { + +template +void CuDNNLRNLayer::LayerSetUp(const vector*>& bottom, + const vector*>& top) { + LRNLayer::LayerSetUp(bottom, top); + + CUDNN_CHECK(cudnnCreate(&handle_)); + CUDNN_CHECK(cudnnCreateLRNDescriptor(&norm_desc_)); + cudnn::createTensor4dDesc(&bottom_desc_); + cudnn::createTensor4dDesc(&top_desc_); + + // create a LRN handle + handles_setup_ = true; + + size_ = this->layer_param().lrn_param().local_size(); + alpha_ = this->layer_param().lrn_param().alpha(); + beta_ = this->layer_param().lrn_param().beta(); + k_ = this->layer_param().lrn_param().k(); +} + +template +void CuDNNLRNLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + LRNLayer::Reshape(bottom, top); + cudnn::setTensor4dDesc(&bottom_desc_, bottom[0]->num(), + this->channels_, this->height_, this->width_); + cudnn::setTensor4dDesc(&top_desc_, bottom[0]->num(), + this->channels_, this->height_, this->width_); + CUDNN_CHECK(cudnnSetLRNDescriptor(norm_desc_, size_, alpha_, beta_, k_)); +} + +template +CuDNNLRNLayer::~CuDNNLRNLayer() { + // Check that handles have been setup before destroying. + if (!handles_setup_) { return; } + + cudnnDestroyTensorDescriptor(bottom_desc_); + cudnnDestroyTensorDescriptor(top_desc_); + + // destroy LRN handle + cudnnDestroy(handle_); +} + +INSTANTIATE_CLASS(CuDNNLRNLayer); + +} // namespace caffe +#endif diff --git a/src/caffe/layers/cudnn_lrn_layer.cu b/src/caffe/layers/cudnn_lrn_layer.cu new file mode 100644 index 00000000..f9923033 --- /dev/null +++ b/src/caffe/layers/cudnn_lrn_layer.cu @@ -0,0 +1,48 @@ +#ifdef USE_CUDNN +#include + +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/util/im2col.hpp" +#include "caffe/util/math_functions.hpp" +#include "caffe/vision_layers.hpp" + +namespace caffe { + +template +void CuDNNLRNLayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* top_data = top[0]->mutable_gpu_data(); + + CUDNN_CHECK(cudnnLRNCrossChannelForward( + handle_, norm_desc_, CUDNN_LRN_CROSS_CHANNEL_DIM1, + cudnn::dataType::one, + bottom_desc_, bottom_data, + cudnn::dataType::zero, + top_desc_, top_data) ); +} + +template +void CuDNNLRNLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + const Dtype* top_diff = top[0]->gpu_diff(); + const Dtype* top_data = top[0]->gpu_data(); + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); + + CUDNN_CHECK(cudnnLRNCrossChannelBackward( + handle_, norm_desc_, CUDNN_LRN_CROSS_CHANNEL_DIM1, + cudnn::dataType::one, + top_desc_, top_data, + top_desc_, top_diff, + bottom_desc_, bottom_data, + cudnn::dataType::zero, + bottom_desc_, bottom_diff) ); +} + +INSTANTIATE_LAYER_GPU_FUNCS(CuDNNLRNLayer); + +}; // namespace caffe + +#endif diff --git a/src/caffe/layers/lrn_layer.cpp b/src/caffe/layers/lrn_layer.cpp index 36c1ace4..d18a04ef 100644 --- a/src/caffe/layers/lrn_layer.cpp +++ b/src/caffe/layers/lrn_layer.cpp @@ -254,6 +254,5 @@ STUB_GPU_BACKWARD(LRNLayer, CrossChannelBackward); #endif INSTANTIATE_CLASS(LRNLayer); -REGISTER_LAYER_CLASS(LRN); } // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index f52c941b..af01b472 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -721,6 +721,12 @@ message LRNParameter { } optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS]; optional float k = 5 [default = 1.]; + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 6 [default = DEFAULT]; } message MemoryDataParameter { diff --git a/src/caffe/test/test_lrn_layer.cpp b/src/caffe/test/test_lrn_layer.cpp index c4e2f8ea..78cf2d9d 100644 --- a/src/caffe/test/test_lrn_layer.cpp +++ b/src/caffe/test/test_lrn_layer.cpp @@ -246,5 +246,201 @@ TYPED_TEST(LRNLayerTest, TestGradientWithinChannel) { this->blob_top_vec_); } +#ifdef USE_CUDNN +template +class CuDNNLRNLayerTest : public GPUDeviceTest { + protected: + CuDNNLRNLayerTest() + : epsilon_(Dtype(1e-5)), + blob_bottom_(new Blob()), + blob_top_(new Blob()) {} + virtual void SetUp() { + Caffe::set_random_seed(1701); + blob_bottom_->Reshape(2, 7, 3, 3); + // fill the values + FillerParameter filler_param; + GaussianFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + blob_bottom_vec_.push_back(blob_bottom_); + blob_top_vec_.push_back(blob_top_); + } + virtual ~CuDNNLRNLayerTest() { delete blob_bottom_; delete blob_top_; } + void ReferenceLRNForward(const Blob& blob_bottom, + const LayerParameter& layer_param, Blob* blob_top); + + Dtype epsilon_; + Blob* const blob_bottom_; + Blob* const blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +template +void CuDNNLRNLayerTest::ReferenceLRNForward( + const Blob& blob_bottom, const LayerParameter& layer_param, + Blob* blob_top) { + typedef TypeParam Dtype; + blob_top->Reshape(blob_bottom.num(), blob_bottom.channels(), + blob_bottom.height(), blob_bottom.width()); + Dtype* top_data = blob_top->mutable_cpu_data(); + LRNParameter lrn_param = layer_param.lrn_param(); + Dtype alpha = lrn_param.alpha(); + Dtype beta = lrn_param.beta(); + int size = lrn_param.local_size(); + switch (lrn_param.norm_region()) { + case LRNParameter_NormRegion_ACROSS_CHANNELS: + for (int n = 0; n < blob_bottom.num(); ++n) { + for (int c = 0; c < blob_bottom.channels(); ++c) { + for (int h = 0; h < blob_bottom.height(); ++h) { + for (int w = 0; w < blob_bottom.width(); ++w) { + int c_start = c - (size - 1) / 2; + int c_end = min(c_start + size, blob_bottom.channels()); + c_start = max(c_start, 0); + Dtype scale = 1.; + for (int i = c_start; i < c_end; ++i) { + Dtype value = blob_bottom.data_at(n, i, h, w); + scale += value * value * alpha / size; + } + *(top_data + blob_top->offset(n, c, h, w)) = + blob_bottom.data_at(n, c, h, w) / pow(scale, beta); + } + } + } + } + break; + case LRNParameter_NormRegion_WITHIN_CHANNEL: + for (int n = 0; n < blob_bottom.num(); ++n) { + for (int c = 0; c < blob_bottom.channels(); ++c) { + for (int h = 0; h < blob_bottom.height(); ++h) { + int h_start = h - (size - 1) / 2; + int h_end = min(h_start + size, blob_bottom.height()); + h_start = max(h_start, 0); + for (int w = 0; w < blob_bottom.width(); ++w) { + Dtype scale = 1.; + int w_start = w - (size - 1) / 2; + int w_end = min(w_start + size, blob_bottom.width()); + w_start = max(w_start, 0); + for (int nh = h_start; nh < h_end; ++nh) { + for (int nw = w_start; nw < w_end; ++nw) { + Dtype value = blob_bottom.data_at(n, c, nh, nw); + scale += value * value * alpha / (size * size); + } + } + *(top_data + blob_top->offset(n, c, h, w)) = + blob_bottom.data_at(n, c, h, w) / pow(scale, beta); + } + } + } + } + break; + default: + LOG(FATAL) << "Unknown normalization region."; + } +} + +TYPED_TEST_CASE(CuDNNLRNLayerTest, TestDtypes); + +TYPED_TEST(CuDNNLRNLayerTest, TestForwardAcrossChannelsCuDNN) { + // typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + CuDNNLRNLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + Blob top_reference; + this->ReferenceLRNForward(*(this->blob_bottom_), layer_param, + &top_reference); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + EXPECT_NEAR(this->blob_top_->cpu_data()[i], top_reference.cpu_data()[i], + this->epsilon_); + } +} + +TYPED_TEST(CuDNNLRNLayerTest, TestForwardAcrossChannelsLargeRegionCuDNN) { + typedef TypeParam Dtype; + LayerParameter layer_param; + layer_param.mutable_lrn_param()->set_local_size(15); + CuDNNLRNLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + Blob top_reference; + this->ReferenceLRNForward(*(this->blob_bottom_), layer_param, + &top_reference); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + EXPECT_NEAR(this->blob_top_->cpu_data()[i], top_reference.cpu_data()[i], + this->epsilon_); + } +} + +TYPED_TEST(CuDNNLRNLayerTest, TestGradientAcrossChannelsCuDNN) { + typedef TypeParam Dtype; + LayerParameter layer_param; + CuDNNLRNLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-2); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int i = 0; i < this->blob_top_->count(); ++i) { + this->blob_top_->mutable_cpu_diff()[i] = 1.; + } + vector propagate_down(this->blob_bottom_vec_.size(), true); + layer.Backward(this->blob_top_vec_, propagate_down, + this->blob_bottom_vec_); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(CuDNNLRNLayerTest, TestForwardWithinChannel) { + typedef TypeParam Dtype; + LayerParameter layer_param; + layer_param.mutable_lrn_param()->set_norm_region( + LRNParameter_NormRegion_WITHIN_CHANNEL); + layer_param.mutable_lrn_param()->set_local_size(3); + CuDNNLCNLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + Blob top_reference; + this->ReferenceLRNForward(*(this->blob_bottom_), layer_param, + &top_reference); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + EXPECT_NEAR(this->blob_top_->cpu_data()[i], top_reference.cpu_data()[i], + this->epsilon_); + } +} + +TYPED_TEST(CuDNNLRNLayerTest, TestGradientWithinChannel) { + typedef TypeParam Dtype; + LayerParameter layer_param; + layer_param.mutable_lrn_param()->set_norm_region( + LRNParameter_NormRegion_WITHIN_CHANNEL); + layer_param.mutable_lrn_param()->set_local_size(3); + CuDNNLCNLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-2); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int i = 0; i < this->blob_top_->count(); ++i) { + this->blob_top_->mutable_cpu_diff()[i] = 1.; + } + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(CuDNNLRNLayerTest, TestGradientAcrossChannelsLargeRegionCuDNN) { + typedef TypeParam Dtype; + LayerParameter layer_param; + layer_param.mutable_lrn_param()->set_local_size(15); + CuDNNLRNLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-2); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int i = 0; i < this->blob_top_->count(); ++i) { + this->blob_top_->mutable_cpu_diff()[i] = 1.; + } + vector propagate_down(this->blob_bottom_vec_.size(), true); + layer.Backward(this->blob_top_vec_, propagate_down, + this->blob_bottom_vec_); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +#endif } // namespace caffe From 1e75fb922f968a92071232c7e6b3332475141d47 Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Fri, 16 Oct 2015 16:33:06 -0700 Subject: [PATCH 134/223] rearrange upgrade helpers order from general helpers to specific upgrades in chronological order. --- include/caffe/util/upgrade_proto.hpp | 18 ++-- src/caffe/util/upgrade_proto.cpp | 122 +++++++++++++-------------- 2 files changed, 70 insertions(+), 70 deletions(-) diff --git a/include/caffe/util/upgrade_proto.hpp b/include/caffe/util/upgrade_proto.hpp index c1f21a0d..6a141843 100644 --- a/include/caffe/util/upgrade_proto.hpp +++ b/include/caffe/util/upgrade_proto.hpp @@ -10,6 +10,15 @@ namespace caffe { // Return true iff the net is not the current version. bool NetNeedsUpgrade(const NetParameter& net_param); +// Check for deprecations and upgrade the NetParameter as needed. +bool UpgradeNetAsNeeded(const string& param_file, NetParameter* param); + +// Read parameters from a file into a NetParameter proto message. +void ReadNetParamsFromTextFileOrDie(const string& param_file, + NetParameter* param); +void ReadNetParamsFromBinaryFileOrDie(const string& param_file, + NetParameter* param); + // Return true iff any layer contains parameters specified using // deprecated V0LayerParameter. bool NetNeedsV0ToV1Upgrade(const NetParameter& net_param); @@ -50,15 +59,6 @@ bool UpgradeV1LayerParameter(const V1LayerParameter& v1_layer_param, const char* UpgradeV1LayerType(const V1LayerParameter_LayerType type); -// Check for deprecations and upgrade the NetParameter as needed. -bool UpgradeNetAsNeeded(const string& param_file, NetParameter* param); - -// Read parameters from a file into a NetParameter proto message. -void ReadNetParamsFromTextFileOrDie(const string& param_file, - NetParameter* param); -void ReadNetParamsFromBinaryFileOrDie(const string& param_file, - NetParameter* param); - } // namespace caffe #endif // CAFFE_UTIL_UPGRADE_PROTO_H_ diff --git a/src/caffe/util/upgrade_proto.cpp b/src/caffe/util/upgrade_proto.cpp index ac379e50..6eae9fec 100644 --- a/src/caffe/util/upgrade_proto.cpp +++ b/src/caffe/util/upgrade_proto.cpp @@ -16,6 +16,67 @@ bool NetNeedsUpgrade(const NetParameter& net_param) { return NetNeedsV0ToV1Upgrade(net_param) || NetNeedsV1ToV2Upgrade(net_param); } +bool UpgradeNetAsNeeded(const string& param_file, NetParameter* param) { + bool success = true; + if (NetNeedsV0ToV1Upgrade(*param)) { + // NetParameter was specified using the old style (V0LayerParameter); try to + // upgrade it. + LOG(INFO) << "Attempting to upgrade input file specified using deprecated " + << "V0LayerParameter: " << param_file; + NetParameter original_param(*param); + if (!UpgradeV0Net(original_param, param)) { + success = false; + LOG(ERROR) << "Warning: had one or more problems upgrading " + << "V0NetParameter to NetParameter (see above); continuing anyway."; + } else { + LOG(INFO) << "Successfully upgraded file specified using deprecated " + << "V0LayerParameter"; + } + LOG(WARNING) << "Note that future Caffe releases will not support " + << "V0NetParameter; use ./build/tools/upgrade_net_proto_text for " + << "prototxt and ./build/tools/upgrade_net_proto_binary for model " + << "weights upgrade this and any other net protos to the new format."; + } + // NetParameter uses old style data transformation fields; try to upgrade it. + if (NetNeedsDataUpgrade(*param)) { + LOG(INFO) << "Attempting to upgrade input file specified using deprecated " + << "transformation parameters: " << param_file; + UpgradeNetDataTransformation(param); + LOG(INFO) << "Successfully upgraded file specified using deprecated " + << "data transformation parameters."; + LOG(WARNING) << "Note that future Caffe releases will only support " + << "transform_param messages for transformation fields."; + } + if (NetNeedsV1ToV2Upgrade(*param)) { + LOG(INFO) << "Attempting to upgrade input file specified using deprecated " + << "V1LayerParameter: " << param_file; + NetParameter original_param(*param); + if (!UpgradeV1Net(original_param, param)) { + success = false; + LOG(ERROR) << "Warning: had one or more problems upgrading " + << "V1LayerParameter (see above); continuing anyway."; + } else { + LOG(INFO) << "Successfully upgraded file specified using deprecated " + << "V1LayerParameter"; + } + } + return success; +} + +void ReadNetParamsFromTextFileOrDie(const string& param_file, + NetParameter* param) { + CHECK(ReadProtoFromTextFile(param_file, param)) + << "Failed to parse NetParameter file: " << param_file; + UpgradeNetAsNeeded(param_file, param); +} + +void ReadNetParamsFromBinaryFileOrDie(const string& param_file, + NetParameter* param) { + CHECK(ReadProtoFromBinaryFile(param_file, param)) + << "Failed to parse NetParameter file: " << param_file; + UpgradeNetAsNeeded(param_file, param); +} + bool NetNeedsV0ToV1Upgrade(const NetParameter& net_param) { for (int i = 0; i < net_param.layers_size(); ++i) { if (net_param.layers(i).has_layer()) { @@ -583,53 +644,6 @@ void UpgradeNetDataTransformation(NetParameter* net_param) { } } -bool UpgradeNetAsNeeded(const string& param_file, NetParameter* param) { - bool success = true; - if (NetNeedsV0ToV1Upgrade(*param)) { - // NetParameter was specified using the old style (V0LayerParameter); try to - // upgrade it. - LOG(INFO) << "Attempting to upgrade input file specified using deprecated " - << "V0LayerParameter: " << param_file; - NetParameter original_param(*param); - if (!UpgradeV0Net(original_param, param)) { - success = false; - LOG(ERROR) << "Warning: had one or more problems upgrading " - << "V0NetParameter to NetParameter (see above); continuing anyway."; - } else { - LOG(INFO) << "Successfully upgraded file specified using deprecated " - << "V0LayerParameter"; - } - LOG(WARNING) << "Note that future Caffe releases will not support " - << "V0NetParameter; use ./build/tools/upgrade_net_proto_text for " - << "prototxt and ./build/tools/upgrade_net_proto_binary for model " - << "weights upgrade this and any other net protos to the new format."; - } - // NetParameter uses old style data transformation fields; try to upgrade it. - if (NetNeedsDataUpgrade(*param)) { - LOG(INFO) << "Attempting to upgrade input file specified using deprecated " - << "transformation parameters: " << param_file; - UpgradeNetDataTransformation(param); - LOG(INFO) << "Successfully upgraded file specified using deprecated " - << "data transformation parameters."; - LOG(WARNING) << "Note that future Caffe releases will only support " - << "transform_param messages for transformation fields."; - } - if (NetNeedsV1ToV2Upgrade(*param)) { - LOG(INFO) << "Attempting to upgrade input file specified using deprecated " - << "V1LayerParameter: " << param_file; - NetParameter original_param(*param); - if (!UpgradeV1Net(original_param, param)) { - success = false; - LOG(ERROR) << "Warning: had one or more problems upgrading " - << "V1LayerParameter (see above); continuing anyway."; - } else { - LOG(INFO) << "Successfully upgraded file specified using deprecated " - << "V1LayerParameter"; - } - } - return success; -} - bool UpgradeV1Net(const NetParameter& v1_net_param, NetParameter* net_param) { bool is_fully_compatible = true; if (v1_net_param.layer_size() > 0) { @@ -923,18 +937,4 @@ const char* UpgradeV1LayerType(const V1LayerParameter_LayerType type) { } } -void ReadNetParamsFromTextFileOrDie(const string& param_file, - NetParameter* param) { - CHECK(ReadProtoFromTextFile(param_file, param)) - << "Failed to parse NetParameter file: " << param_file; - UpgradeNetAsNeeded(param_file, param); -} - -void ReadNetParamsFromBinaryFileOrDie(const string& param_file, - NetParameter* param) { - CHECK(ReadProtoFromBinaryFile(param_file, param)) - << "Failed to parse NetParameter file: " << param_file; - UpgradeNetAsNeeded(param_file, param); -} - } // namespace caffe From e5a74b282efb2293a05d91635a5b26837adc2aa3 Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Fri, 16 Oct 2015 21:19:59 -0700 Subject: [PATCH 135/223] Test reading and writing mean proto in matlab --- matlab/+caffe/+test/test_io.m | 18 ++++++++++++++++++ matlab/+caffe/run_tests.m | 3 ++- 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 matlab/+caffe/+test/test_io.m diff --git a/matlab/+caffe/+test/test_io.m b/matlab/+caffe/+test/test_io.m new file mode 100644 index 00000000..2c34bd1e --- /dev/null +++ b/matlab/+caffe/+test/test_io.m @@ -0,0 +1,18 @@ +classdef test_io < matlab.unittest.TestCase + methods (Test) + function test_read_write_mean(self) + % randomly generate mean data + width = 200; + height = 300; + channels = 3; + mean_data_write = 255 * rand(width, height, channels, 'single'); + % write mean data to binary proto + mean_proto_file = tempname(); + caffe.io.write_mean(mean_data_write, mean_proto_file); + % read mean data from saved binary proto and test whether they are equal + mean_data_read = caffe.io.read_mean(mean_proto_file); + self.verifyEqual(mean_data_write, mean_data_read) + delete(mean_proto_file); + end + end +end diff --git a/matlab/+caffe/run_tests.m b/matlab/+caffe/run_tests.m index 93896855..6dbf6b23 100644 --- a/matlab/+caffe/run_tests.m +++ b/matlab/+caffe/run_tests.m @@ -11,7 +11,8 @@ % put all test cases here results = [... run(caffe.test.test_net) ... - run(caffe.test.test_solver) ]; + run(caffe.test.test_solver) ... + run(caffe.test.test_io) ]; % reset caffe after testing caffe.reset_all(); From b822a702d19d4fbebbc91198a991f91c34e60650 Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Thu, 24 Sep 2015 17:11:07 -0700 Subject: [PATCH 136/223] Split solver code into one file per solver class --- include/caffe/sgd_solvers.hpp | 142 +++ include/caffe/solver.hpp | 158 +--- python/caffe/_caffe.cpp | 1 + src/caffe/solver.cpp | 811 ------------------ src/caffe/solver_factory.cpp | 32 + src/caffe/solvers/adadelta_solver.cpp | 155 ++++ src/caffe/solvers/adagrad_solver.cpp | 88 ++ src/caffe/solvers/adam_solver.cpp | 112 +++ src/caffe/solvers/nesterov_solver.cpp | 70 ++ src/caffe/solvers/rmsprop_solver.cpp | 84 ++ src/caffe/solvers/sgd_solver.cpp | 347 ++++++++ src/caffe/test/test_gradient_based_solver.cpp | 2 +- src/caffe/test/test_solver.cpp | 1 + 13 files changed, 1038 insertions(+), 965 deletions(-) create mode 100644 include/caffe/sgd_solvers.hpp create mode 100644 src/caffe/solver_factory.cpp create mode 100644 src/caffe/solvers/adadelta_solver.cpp create mode 100644 src/caffe/solvers/adagrad_solver.cpp create mode 100644 src/caffe/solvers/adam_solver.cpp create mode 100644 src/caffe/solvers/nesterov_solver.cpp create mode 100644 src/caffe/solvers/rmsprop_solver.cpp create mode 100644 src/caffe/solvers/sgd_solver.cpp diff --git a/include/caffe/sgd_solvers.hpp b/include/caffe/sgd_solvers.hpp new file mode 100644 index 00000000..6bf1d70c --- /dev/null +++ b/include/caffe/sgd_solvers.hpp @@ -0,0 +1,142 @@ +#ifndef CAFFE_SGD_SOLVERS_HPP_ +#define CAFFE_SGD_SOLVERS_HPP_ + +#include +#include + +#include "caffe/solver.hpp" + +namespace caffe { + +/** + * @brief Optimizes the parameters of a Net using + * stochastic gradient descent (SGD) with momentum. + */ +template +class SGDSolver : public Solver { + public: + explicit SGDSolver(const SolverParameter& param) + : Solver(param) { PreSolve(); } + explicit SGDSolver(const string& param_file) + : Solver(param_file) { PreSolve(); } + + const vector > >& history() { return history_; } + + protected: + void PreSolve(); + Dtype GetLearningRate(); + virtual void ApplyUpdate(); + virtual void Normalize(int param_id); + virtual void Regularize(int param_id); + virtual void ComputeUpdateValue(int param_id, Dtype rate); + virtual void ClipGradients(); + virtual void SnapshotSolverState(const string& model_filename); + virtual void SnapshotSolverStateToBinaryProto(const string& model_filename); + virtual void SnapshotSolverStateToHDF5(const string& model_filename); + virtual void RestoreSolverStateFromHDF5(const string& state_file); + virtual void RestoreSolverStateFromBinaryProto(const string& state_file); + // history maintains the historical momentum data. + // update maintains update related data and is not needed in snapshots. + // temp maintains other information that might be needed in computation + // of gradients/updates and is not needed in snapshots + vector > > history_, update_, temp_; + + DISABLE_COPY_AND_ASSIGN(SGDSolver); +}; + +template +class NesterovSolver : public SGDSolver { + public: + explicit NesterovSolver(const SolverParameter& param) + : SGDSolver(param) {} + explicit NesterovSolver(const string& param_file) + : SGDSolver(param_file) {} + + protected: + virtual void ComputeUpdateValue(int param_id, Dtype rate); + + DISABLE_COPY_AND_ASSIGN(NesterovSolver); +}; + +template +class AdaGradSolver : public SGDSolver { + public: + explicit AdaGradSolver(const SolverParameter& param) + : SGDSolver(param) { constructor_sanity_check(); } + explicit AdaGradSolver(const string& param_file) + : SGDSolver(param_file) { constructor_sanity_check(); } + + protected: + virtual void ComputeUpdateValue(int param_id, Dtype rate); + void constructor_sanity_check() { + CHECK_EQ(0, this->param_.momentum()) + << "Momentum cannot be used with AdaGrad."; + } + + DISABLE_COPY_AND_ASSIGN(AdaGradSolver); +}; + + +template +class RMSPropSolver : public SGDSolver { + public: + explicit RMSPropSolver(const SolverParameter& param) + : SGDSolver(param) { constructor_sanity_check(); } + explicit RMSPropSolver(const string& param_file) + : SGDSolver(param_file) { constructor_sanity_check(); } + + protected: + virtual void ComputeUpdateValue(int param_id, Dtype rate); + void constructor_sanity_check() { + CHECK_EQ(0, this->param_.momentum()) + << "Momentum cannot be used with RMSProp."; + CHECK_GE(this->param_.rms_decay(), 0) + << "rms_decay should lie between 0 and 1."; + CHECK_LT(this->param_.rms_decay(), 1) + << "rms_decay should lie between 0 and 1."; + } + + DISABLE_COPY_AND_ASSIGN(RMSPropSolver); +}; + +template +class AdaDeltaSolver : public SGDSolver { + public: + explicit AdaDeltaSolver(const SolverParameter& param) + : SGDSolver(param) { AdaDeltaPreSolve(); } + explicit AdaDeltaSolver(const string& param_file) + : SGDSolver(param_file) { AdaDeltaPreSolve(); } + + protected: + void AdaDeltaPreSolve(); + virtual void ComputeUpdateValue(int param_id, Dtype rate); + + DISABLE_COPY_AND_ASSIGN(AdaDeltaSolver); +}; + +/** + * @brief AdamSolver, an algorithm for first-order gradient-based optimization + * of stochastic objective functions, based on adaptive estimates of + * lower-order moments. Described in [1]. + * + * [1] D. P. Kingma and J. L. Ba, "ADAM: A Method for Stochastic Optimization." + * arXiv preprint arXiv:1412.6980v8 (2014). + */ +template +class AdamSolver : public SGDSolver { + public: + explicit AdamSolver(const SolverParameter& param) + : SGDSolver(param) { AdamPreSolve();} + explicit AdamSolver(const string& param_file) + : SGDSolver(param_file) { AdamPreSolve(); } + + protected: + void AdamPreSolve(); + virtual void ComputeUpdateValue(int param_id, Dtype rate); + + DISABLE_COPY_AND_ASSIGN(AdamSolver); +}; + +} // namespace caffe + +#endif // CAFFE_SGD_SOLVERS_HPP_ diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index 2ecf539b..a045ccf2 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -1,5 +1,5 @@ -#ifndef CAFFE_OPTIMIZATION_SOLVER_HPP_ -#define CAFFE_OPTIMIZATION_SOLVER_HPP_ +#ifndef CAFFE_SOLVER_HPP_ +#define CAFFE_SOLVER_HPP_ #include #include #include @@ -148,158 +148,10 @@ class WorkerSolver : public Solver { } }; -/** - * @brief Optimizes the parameters of a Net using - * stochastic gradient descent (SGD) with momentum. - */ -template -class SGDSolver : public Solver { - public: - explicit SGDSolver(const SolverParameter& param) - : Solver(param) { PreSolve(); } - explicit SGDSolver(const string& param_file) - : Solver(param_file) { PreSolve(); } - - const vector > >& history() { return history_; } - - protected: - void PreSolve(); - Dtype GetLearningRate(); - virtual void ApplyUpdate(); - virtual void Normalize(int param_id); - virtual void Regularize(int param_id); - virtual void ComputeUpdateValue(int param_id, Dtype rate); - virtual void ClipGradients(); - virtual void SnapshotSolverState(const string& model_filename); - virtual void SnapshotSolverStateToBinaryProto(const string& model_filename); - virtual void SnapshotSolverStateToHDF5(const string& model_filename); - virtual void RestoreSolverStateFromHDF5(const string& state_file); - virtual void RestoreSolverStateFromBinaryProto(const string& state_file); - // history maintains the historical momentum data. - // update maintains update related data and is not needed in snapshots. - // temp maintains other information that might be needed in computation - // of gradients/updates and is not needed in snapshots - vector > > history_, update_, temp_; - - DISABLE_COPY_AND_ASSIGN(SGDSolver); -}; - +// The solver factory function template -class NesterovSolver : public SGDSolver { - public: - explicit NesterovSolver(const SolverParameter& param) - : SGDSolver(param) {} - explicit NesterovSolver(const string& param_file) - : SGDSolver(param_file) {} - - protected: - virtual void ComputeUpdateValue(int param_id, Dtype rate); - - DISABLE_COPY_AND_ASSIGN(NesterovSolver); -}; - -template -class AdaGradSolver : public SGDSolver { - public: - explicit AdaGradSolver(const SolverParameter& param) - : SGDSolver(param) { constructor_sanity_check(); } - explicit AdaGradSolver(const string& param_file) - : SGDSolver(param_file) { constructor_sanity_check(); } - - protected: - virtual void ComputeUpdateValue(int param_id, Dtype rate); - void constructor_sanity_check() { - CHECK_EQ(0, this->param_.momentum()) - << "Momentum cannot be used with AdaGrad."; - } - - DISABLE_COPY_AND_ASSIGN(AdaGradSolver); -}; - - -template -class RMSPropSolver : public SGDSolver { - public: - explicit RMSPropSolver(const SolverParameter& param) - : SGDSolver(param) { constructor_sanity_check(); } - explicit RMSPropSolver(const string& param_file) - : SGDSolver(param_file) { constructor_sanity_check(); } - - protected: - virtual void ComputeUpdateValue(int param_id, Dtype rate); - void constructor_sanity_check() { - CHECK_EQ(0, this->param_.momentum()) - << "Momentum cannot be used with RMSProp."; - CHECK_GE(this->param_.rms_decay(), 0) - << "rms_decay should lie between 0 and 1."; - CHECK_LT(this->param_.rms_decay(), 1) - << "rms_decay should lie between 0 and 1."; - } - - DISABLE_COPY_AND_ASSIGN(RMSPropSolver); -}; - -template -class AdaDeltaSolver : public SGDSolver { - public: - explicit AdaDeltaSolver(const SolverParameter& param) - : SGDSolver(param) { AdaDeltaPreSolve(); } - explicit AdaDeltaSolver(const string& param_file) - : SGDSolver(param_file) { AdaDeltaPreSolve(); } - - protected: - void AdaDeltaPreSolve(); - virtual void ComputeUpdateValue(int param_id, Dtype rate); - - DISABLE_COPY_AND_ASSIGN(AdaDeltaSolver); -}; - -/** - * @brief AdamSolver, an algorithm for first-order gradient-based optimization - * of stochastic objective functions, based on adaptive estimates of - * lower-order moments. Described in [1]. - * - * [1] D. P. Kingma and J. L. Ba, "ADAM: A Method for Stochastic Optimization." - * arXiv preprint arXiv:1412.6980v8 (2014). - */ -template -class AdamSolver : public SGDSolver { - public: - explicit AdamSolver(const SolverParameter& param) - : SGDSolver(param) { AdamPreSolve();} - explicit AdamSolver(const string& param_file) - : SGDSolver(param_file) { AdamPreSolve(); } - - protected: - void AdamPreSolve(); - virtual void ComputeUpdateValue(int param_id, Dtype rate); - - DISABLE_COPY_AND_ASSIGN(AdamSolver); -}; - -template -Solver* GetSolver(const SolverParameter& param) { - SolverParameter_SolverType type = param.solver_type(); - - switch (type) { - case SolverParameter_SolverType_SGD: - return new SGDSolver(param); - case SolverParameter_SolverType_NESTEROV: - return new NesterovSolver(param); - case SolverParameter_SolverType_ADAGRAD: - return new AdaGradSolver(param); - case SolverParameter_SolverType_RMSPROP: - return new RMSPropSolver(param); - case SolverParameter_SolverType_ADADELTA: - return new AdaDeltaSolver(param); - case SolverParameter_SolverType_ADAM: - return new AdamSolver(param); - default: - LOG(FATAL) << "Unknown SolverType: " << type; - } - return (Solver*) NULL; -} +Solver* GetSolver(const SolverParameter& param); } // namespace caffe -#endif // CAFFE_OPTIMIZATION_SOLVER_HPP_ +#endif // CAFFE_SOLVER_HPP_ diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp index ccd5776a..0e38dee7 100644 --- a/python/caffe/_caffe.cpp +++ b/python/caffe/_caffe.cpp @@ -16,6 +16,7 @@ #include "caffe/caffe.hpp" #include "caffe/python_layer.hpp" +#include "caffe/sgd_solvers.hpp" // Temporary solution for numpy < 1.7 versions: old macro, no promises. // You're strongly advised to upgrade to >= 1.7. diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 12c13dd8..016a0288 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -1,18 +1,11 @@ #include -#include #include #include -#include "hdf5.h" -#include "hdf5_hl.h" - -#include "caffe/net.hpp" -#include "caffe/proto/caffe.pb.h" #include "caffe/solver.hpp" #include "caffe/util/hdf5.hpp" #include "caffe/util/io.hpp" -#include "caffe/util/math_functions.hpp" #include "caffe/util/upgrade_proto.hpp" namespace caffe { @@ -492,810 +485,6 @@ void Solver::Restore(const char* state_file) { } } -// Return the current learning rate. The currently implemented learning rate -// policies are as follows: -// - fixed: always return base_lr. -// - step: return base_lr * gamma ^ (floor(iter / step)) -// - exp: return base_lr * gamma ^ iter -// - inv: return base_lr * (1 + gamma * iter) ^ (- power) -// - multistep: similar to step but it allows non uniform steps defined by -// stepvalue -// - poly: the effective learning rate follows a polynomial decay, to be -// zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power) -// - sigmoid: the effective learning rate follows a sigmod decay -// return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize)))) -// -// where base_lr, max_iter, gamma, step, stepvalue and power are defined -// in the solver parameter protocol buffer, and iter is the current iteration. -template -Dtype SGDSolver::GetLearningRate() { - Dtype rate; - const string& lr_policy = this->param_.lr_policy(); - if (lr_policy == "fixed") { - rate = this->param_.base_lr(); - } else if (lr_policy == "step") { - this->current_step_ = this->iter_ / this->param_.stepsize(); - rate = this->param_.base_lr() * - pow(this->param_.gamma(), this->current_step_); - } else if (lr_policy == "exp") { - rate = this->param_.base_lr() * pow(this->param_.gamma(), this->iter_); - } else if (lr_policy == "inv") { - rate = this->param_.base_lr() * - pow(Dtype(1) + this->param_.gamma() * this->iter_, - - this->param_.power()); - } else if (lr_policy == "multistep") { - if (this->current_step_ < this->param_.stepvalue_size() && - this->iter_ >= this->param_.stepvalue(this->current_step_)) { - this->current_step_++; - LOG(INFO) << "MultiStep Status: Iteration " << - this->iter_ << ", step = " << this->current_step_; - } - rate = this->param_.base_lr() * - pow(this->param_.gamma(), this->current_step_); - } else if (lr_policy == "poly") { - rate = this->param_.base_lr() * pow(Dtype(1.) - - (Dtype(this->iter_) / Dtype(this->param_.max_iter())), - this->param_.power()); - } else if (lr_policy == "sigmoid") { - rate = this->param_.base_lr() * (Dtype(1.) / - (Dtype(1.) + exp(-this->param_.gamma() * (Dtype(this->iter_) - - Dtype(this->param_.stepsize()))))); - } else { - LOG(FATAL) << "Unknown learning rate policy: " << lr_policy; - } - return rate; -} - -template -void SGDSolver::PreSolve() { - // Initialize the history - const vector*>& net_params = this->net_->learnable_params(); - history_.clear(); - update_.clear(); - temp_.clear(); - for (int i = 0; i < net_params.size(); ++i) { - const vector& shape = net_params[i]->shape(); - history_.push_back(shared_ptr >(new Blob(shape))); - update_.push_back(shared_ptr >(new Blob(shape))); - temp_.push_back(shared_ptr >(new Blob(shape))); - } -} - -template -void SGDSolver::ClipGradients() { - const Dtype clip_gradients = this->param_.clip_gradients(); - if (clip_gradients < 0) { return; } - const vector*>& net_params = this->net_->learnable_params(); - Dtype sumsq_diff = 0; - for (int i = 0; i < net_params.size(); ++i) { - sumsq_diff += net_params[i]->sumsq_diff(); - } - const Dtype l2norm_diff = std::sqrt(sumsq_diff); - if (l2norm_diff > clip_gradients) { - Dtype scale_factor = clip_gradients / l2norm_diff; - LOG(INFO) << "Gradient clipping: scaling down gradients (L2 norm " - << l2norm_diff << " > " << clip_gradients << ") " - << "by scale factor " << scale_factor; - for (int i = 0; i < net_params.size(); ++i) { - net_params[i]->scale_diff(scale_factor); - } - } -} - -template -void SGDSolver::ApplyUpdate() { - CHECK(Caffe::root_solver()); - Dtype rate = GetLearningRate(); - if (this->param_.display() && this->iter_ % this->param_.display() == 0) { - LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate; - } - ClipGradients(); - for (int param_id = 0; param_id < this->net_->learnable_params().size(); - ++param_id) { - Normalize(param_id); - Regularize(param_id); - ComputeUpdateValue(param_id, rate); - } - this->net_->Update(); -} - -template -void SGDSolver::Normalize(int param_id) { - if (this->param_.iter_size() == 1) { return; } - // Scale gradient to counterbalance accumulation. - const vector*>& net_params = this->net_->learnable_params(); - const Dtype accum_normalization = Dtype(1.) / this->param_.iter_size(); - switch (Caffe::mode()) { - case Caffe::CPU: { - caffe_scal(net_params[param_id]->count(), accum_normalization, - net_params[param_id]->mutable_cpu_diff()); - break; - } - case Caffe::GPU: { -#ifndef CPU_ONLY - caffe_gpu_scal(net_params[param_id]->count(), accum_normalization, - net_params[param_id]->mutable_gpu_diff()); -#else - NO_GPU; -#endif - break; - } - default: - LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); - } -} - -template -void SGDSolver::Regularize(int param_id) { - const vector*>& net_params = this->net_->learnable_params(); - const vector& net_params_weight_decay = - this->net_->params_weight_decay(); - Dtype weight_decay = this->param_.weight_decay(); - string regularization_type = this->param_.regularization_type(); - Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; - switch (Caffe::mode()) { - case Caffe::CPU: { - if (local_decay) { - if (regularization_type == "L2") { - // add weight decay - caffe_axpy(net_params[param_id]->count(), - local_decay, - net_params[param_id]->cpu_data(), - net_params[param_id]->mutable_cpu_diff()); - } else if (regularization_type == "L1") { - caffe_cpu_sign(net_params[param_id]->count(), - net_params[param_id]->cpu_data(), - temp_[param_id]->mutable_cpu_data()); - caffe_axpy(net_params[param_id]->count(), - local_decay, - temp_[param_id]->cpu_data(), - net_params[param_id]->mutable_cpu_diff()); - } else { - LOG(FATAL) << "Unknown regularization type: " << regularization_type; - } - } - break; - } - case Caffe::GPU: { -#ifndef CPU_ONLY - if (local_decay) { - if (regularization_type == "L2") { - // add weight decay - caffe_gpu_axpy(net_params[param_id]->count(), - local_decay, - net_params[param_id]->gpu_data(), - net_params[param_id]->mutable_gpu_diff()); - } else if (regularization_type == "L1") { - caffe_gpu_sign(net_params[param_id]->count(), - net_params[param_id]->gpu_data(), - temp_[param_id]->mutable_gpu_data()); - caffe_gpu_axpy(net_params[param_id]->count(), - local_decay, - temp_[param_id]->gpu_data(), - net_params[param_id]->mutable_gpu_diff()); - } else { - LOG(FATAL) << "Unknown regularization type: " << regularization_type; - } - } -#else - NO_GPU; -#endif - break; - } - default: - LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); - } -} - -template -void SGDSolver::ComputeUpdateValue(int param_id, Dtype rate) { - const vector*>& net_params = this->net_->learnable_params(); - const vector& net_params_lr = this->net_->params_lr(); - Dtype momentum = this->param_.momentum(); - Dtype local_rate = rate * net_params_lr[param_id]; - // Compute the update to history, then copy it to the parameter diff. - switch (Caffe::mode()) { - case Caffe::CPU: { - caffe_cpu_axpby(net_params[param_id]->count(), local_rate, - net_params[param_id]->cpu_diff(), momentum, - history_[param_id]->mutable_cpu_data()); - caffe_copy(net_params[param_id]->count(), - history_[param_id]->cpu_data(), - net_params[param_id]->mutable_cpu_diff()); - break; - } - case Caffe::GPU: { -#ifndef CPU_ONLY - caffe_gpu_axpby(net_params[param_id]->count(), local_rate, - net_params[param_id]->gpu_diff(), momentum, - history_[param_id]->mutable_gpu_data()); - caffe_copy(net_params[param_id]->count(), - history_[param_id]->gpu_data(), - net_params[param_id]->mutable_gpu_diff()); -#else - NO_GPU; -#endif - break; - } - default: - LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); - } -} - -template -void SGDSolver::SnapshotSolverState(const string& model_filename) { - switch (this->param_.snapshot_format()) { - case caffe::SolverParameter_SnapshotFormat_BINARYPROTO: - SnapshotSolverStateToBinaryProto(model_filename); - break; - case caffe::SolverParameter_SnapshotFormat_HDF5: - SnapshotSolverStateToHDF5(model_filename); - break; - default: - LOG(FATAL) << "Unsupported snapshot format."; - } -} - -template -void SGDSolver::SnapshotSolverStateToBinaryProto( - const string& model_filename) { - SolverState state; - state.set_iter(this->iter_); - state.set_learned_net(model_filename); - state.set_current_step(this->current_step_); - state.clear_history(); - for (int i = 0; i < history_.size(); ++i) { - // Add history - BlobProto* history_blob = state.add_history(); - history_[i]->ToProto(history_blob); - } - string snapshot_filename = Solver::SnapshotFilename(".solverstate"); - LOG(INFO) - << "Snapshotting solver state to binary proto file " << snapshot_filename; - WriteProtoToBinaryFile(state, snapshot_filename.c_str()); -} - -template -void SGDSolver::SnapshotSolverStateToHDF5( - const string& model_filename) { - string snapshot_filename = - Solver::SnapshotFilename(".solverstate.h5"); - LOG(INFO) << "Snapshotting solver state to HDF5 file " << snapshot_filename; - hid_t file_hid = H5Fcreate(snapshot_filename.c_str(), H5F_ACC_TRUNC, - H5P_DEFAULT, H5P_DEFAULT); - CHECK_GE(file_hid, 0) - << "Couldn't open " << snapshot_filename << " to save solver state."; - hdf5_save_int(file_hid, "iter", this->iter_); - hdf5_save_string(file_hid, "learned_net", model_filename); - hdf5_save_int(file_hid, "current_step", this->current_step_); - hid_t history_hid = H5Gcreate2(file_hid, "history", H5P_DEFAULT, H5P_DEFAULT, - H5P_DEFAULT); - CHECK_GE(history_hid, 0) - << "Error saving solver state to " << snapshot_filename << "."; - for (int i = 0; i < history_.size(); ++i) { - ostringstream oss; - oss << i; - hdf5_save_nd_dataset(history_hid, oss.str(), *history_[i]); - } - H5Gclose(history_hid); - H5Fclose(file_hid); -} - -template -void SGDSolver::RestoreSolverStateFromBinaryProto( - const string& state_file) { - SolverState state; - ReadProtoFromBinaryFile(state_file, &state); - this->iter_ = state.iter(); - if (state.has_learned_net()) { - NetParameter net_param; - ReadNetParamsFromBinaryFileOrDie(state.learned_net().c_str(), &net_param); - this->net_->CopyTrainedLayersFrom(net_param); - } - this->current_step_ = state.current_step(); - CHECK_EQ(state.history_size(), history_.size()) - << "Incorrect length of history blobs."; - LOG(INFO) << "SGDSolver: restoring history"; - for (int i = 0; i < history_.size(); ++i) { - history_[i]->FromProto(state.history(i)); - } -} - -template -void SGDSolver::RestoreSolverStateFromHDF5(const string& state_file) { - hid_t file_hid = H5Fopen(state_file.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); - CHECK_GE(file_hid, 0) << "Couldn't open solver state file " << state_file; - this->iter_ = hdf5_load_int(file_hid, "iter"); - if (H5LTfind_dataset(file_hid, "learned_net")) { - string learned_net = hdf5_load_string(file_hid, "learned_net"); - this->net_->CopyTrainedLayersFrom(learned_net); - } - this->current_step_ = hdf5_load_int(file_hid, "current_step"); - hid_t history_hid = H5Gopen2(file_hid, "history", H5P_DEFAULT); - CHECK_GE(history_hid, 0) << "Error reading history from " << state_file; - int state_history_size = hdf5_get_num_links(history_hid); - CHECK_EQ(state_history_size, history_.size()) - << "Incorrect length of history blobs."; - for (int i = 0; i < history_.size(); ++i) { - ostringstream oss; - oss << i; - hdf5_load_nd_dataset(history_hid, oss.str().c_str(), 0, - kMaxBlobAxes, history_[i].get()); - } - H5Gclose(history_hid); - H5Fclose(file_hid); -} - -template -void NesterovSolver::ComputeUpdateValue(int param_id, Dtype rate) { - CHECK(Caffe::root_solver()); - const vector*>& net_params = this->net_->learnable_params(); - const vector& net_params_lr = this->net_->params_lr(); - Dtype momentum = this->param_.momentum(); - Dtype local_rate = rate * net_params_lr[param_id]; - switch (Caffe::mode()) { - case Caffe::CPU: { - // save history momentum for stepping back - caffe_copy(net_params[param_id]->count(), - this->history_[param_id]->cpu_data(), - this->update_[param_id]->mutable_cpu_data()); - - // update history - caffe_cpu_axpby(net_params[param_id]->count(), local_rate, - net_params[param_id]->cpu_diff(), momentum, - this->history_[param_id]->mutable_cpu_data()); - - // compute update: step back then over step - caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) + momentum, - this->history_[param_id]->cpu_data(), -momentum, - this->update_[param_id]->mutable_cpu_data()); - - // copy - caffe_copy(net_params[param_id]->count(), - this->update_[param_id]->cpu_data(), - net_params[param_id]->mutable_cpu_diff()); - break; - } - case Caffe::GPU: { -#ifndef CPU_ONLY - // save history momentum for stepping back - caffe_copy(net_params[param_id]->count(), - this->history_[param_id]->gpu_data(), - this->update_[param_id]->mutable_gpu_data()); - - // update history - caffe_gpu_axpby(net_params[param_id]->count(), local_rate, - net_params[param_id]->gpu_diff(), momentum, - this->history_[param_id]->mutable_gpu_data()); - - // compute update: step back then over step - caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) + momentum, - this->history_[param_id]->gpu_data(), -momentum, - this->update_[param_id]->mutable_gpu_data()); - - // copy - caffe_copy(net_params[param_id]->count(), - this->update_[param_id]->gpu_data(), - net_params[param_id]->mutable_gpu_diff()); -#else - NO_GPU; -#endif - break; - } - default: - LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); - } -} - -template -void AdaGradSolver::ComputeUpdateValue(int param_id, Dtype rate) { - CHECK(Caffe::root_solver()); - const vector*>& net_params = this->net_->learnable_params(); - const vector& net_params_lr = this->net_->params_lr(); - Dtype delta = this->param_.delta(); - Dtype local_rate = rate * net_params_lr[param_id]; - switch (Caffe::mode()) { - case Caffe::CPU: { - // compute square of gradient in update - caffe_powx(net_params[param_id]->count(), - net_params[param_id]->cpu_diff(), Dtype(2), - this->update_[param_id]->mutable_cpu_data()); - - // update history - caffe_add(net_params[param_id]->count(), - this->update_[param_id]->cpu_data(), - this->history_[param_id]->cpu_data(), - this->history_[param_id]->mutable_cpu_data()); - - // prepare update - caffe_powx(net_params[param_id]->count(), - this->history_[param_id]->cpu_data(), Dtype(0.5), - this->update_[param_id]->mutable_cpu_data()); - - caffe_add_scalar(net_params[param_id]->count(), - delta, this->update_[param_id]->mutable_cpu_data()); - - caffe_div(net_params[param_id]->count(), - net_params[param_id]->cpu_diff(), - this->update_[param_id]->cpu_data(), - this->update_[param_id]->mutable_cpu_data()); - - // scale and copy - caffe_cpu_axpby(net_params[param_id]->count(), local_rate, - this->update_[param_id]->cpu_data(), Dtype(0), - net_params[param_id]->mutable_cpu_diff()); - break; - } - case Caffe::GPU: { -#ifndef CPU_ONLY - // compute square of gradient in update - caffe_gpu_powx(net_params[param_id]->count(), - net_params[param_id]->gpu_diff(), Dtype(2), - this->update_[param_id]->mutable_gpu_data()); - - // update history - caffe_gpu_add(net_params[param_id]->count(), - this->update_[param_id]->gpu_data(), - this->history_[param_id]->gpu_data(), - this->history_[param_id]->mutable_gpu_data()); - - // prepare update - caffe_gpu_powx(net_params[param_id]->count(), - this->history_[param_id]->gpu_data(), Dtype(0.5), - this->update_[param_id]->mutable_gpu_data()); - - caffe_gpu_add_scalar(net_params[param_id]->count(), - delta, this->update_[param_id]->mutable_gpu_data()); - - caffe_gpu_div(net_params[param_id]->count(), - net_params[param_id]->gpu_diff(), - this->update_[param_id]->gpu_data(), - this->update_[param_id]->mutable_gpu_data()); - - // scale and copy - caffe_gpu_axpby(net_params[param_id]->count(), local_rate, - this->update_[param_id]->gpu_data(), Dtype(0), - net_params[param_id]->mutable_gpu_diff()); -#else - NO_GPU; -#endif - break; - } - default: - LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); - } -} - -template -void RMSPropSolver::ComputeUpdateValue(int param_id, Dtype rate) { - const vector*>& net_params = this->net_->learnable_params(); - const vector& net_params_lr = this->net_->params_lr(); - - // get the learning rate - Dtype delta = this->param_.delta(); - Dtype rms_decay = this->param_.rms_decay(); - Dtype local_rate = rate * net_params_lr[param_id]; - - switch (Caffe::mode()) { - case Caffe::CPU: - // compute square of gradient in update - caffe_powx(net_params[param_id]->count(), - net_params[param_id]->cpu_diff(), Dtype(2), - this->update_[param_id]->mutable_cpu_data()); - - // update history - caffe_cpu_axpby(net_params[param_id] -> count(), - Dtype(1-rms_decay), this->update_[param_id]->cpu_data(), - rms_decay, this->history_[param_id]-> mutable_cpu_data()); - - // prepare update - caffe_powx(net_params[param_id]->count(), - this->history_[param_id]->cpu_data(), Dtype(0.5), - this->update_[param_id]->mutable_cpu_data()); - - caffe_add_scalar(net_params[param_id]->count(), - delta, this->update_[param_id]->mutable_cpu_data()); - - caffe_div(net_params[param_id]->count(), - net_params[param_id]->cpu_diff(), this->update_[param_id]->cpu_data(), - this->update_[param_id]->mutable_cpu_data()); - - // scale and copy - caffe_cpu_axpby(net_params[param_id]->count(), local_rate, - this->update_[param_id]->cpu_data(), Dtype(0), - net_params[param_id]->mutable_cpu_diff()); - break; - case Caffe::GPU: -#ifndef CPU_ONLY - // compute square of gradient in update - caffe_gpu_powx(net_params[param_id]->count(), - net_params[param_id]->gpu_diff(), Dtype(2), - this->update_[param_id]->mutable_gpu_data()); - - // update history - caffe_gpu_axpby(net_params[param_id] -> count(), - Dtype(1-rms_decay), this->update_[param_id]->gpu_data(), - rms_decay, this->history_[param_id]-> mutable_gpu_data()); - - // prepare update - caffe_gpu_powx(net_params[param_id]->count(), - this->history_[param_id]->gpu_data(), Dtype(0.5), - this->update_[param_id]->mutable_gpu_data()); - - caffe_gpu_add_scalar(net_params[param_id]->count(), - delta, this->update_[param_id]->mutable_gpu_data()); - - caffe_gpu_div(net_params[param_id]->count(), - net_params[param_id]->gpu_diff(), this->update_[param_id]->gpu_data(), - this->update_[param_id]->mutable_gpu_data()); - - caffe_gpu_axpby(net_params[param_id]->count(), local_rate, - this->update_[param_id]->gpu_data(), Dtype(0), - net_params[param_id]->mutable_gpu_diff()); -#else - NO_GPU; -#endif - break; - default: - LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); - } -} - -template -void AdaDeltaSolver::AdaDeltaPreSolve() { - // Add the extra history entries for AdaDelta after those from - // SGDSolver::PreSolve - const vector*>& net_params = this->net_->learnable_params(); - for (int i = 0; i < net_params.size(); ++i) { - const vector& shape = net_params[i]->shape(); - this->history_.push_back( - shared_ptr >(new Blob(shape))); - } -} - -template -void AdaDeltaSolver::ComputeUpdateValue(int param_id, Dtype rate) { - const vector*>& net_params = this->net_->learnable_params(); - const vector& net_params_lr = this->net_->params_lr(); - Dtype delta = this->param_.delta(); - Dtype momentum = this->param_.momentum(); - Dtype local_rate = rate * net_params_lr[param_id]; - size_t update_history_offset = net_params.size(); - switch (Caffe::mode()) { - case Caffe::CPU: { - // compute square of gradient in update - caffe_powx(net_params[param_id]->count(), - net_params[param_id]->cpu_diff(), Dtype(2), - this->update_[param_id]->mutable_cpu_data()); - - // update history of gradients - caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, - this->update_[param_id]->cpu_data(), momentum, - this->history_[param_id]->mutable_cpu_data()); - - // add delta to history to guard against dividing by zero later - caffe_set(net_params[param_id]->count(), delta, - this->temp_[param_id]->mutable_cpu_data()); - - caffe_add(net_params[param_id]->count(), - this->temp_[param_id]->cpu_data(), - this->history_[update_history_offset + param_id]->cpu_data(), - this->update_[param_id]->mutable_cpu_data()); - - caffe_add(net_params[param_id]->count(), - this->temp_[param_id]->cpu_data(), - this->history_[param_id]->cpu_data(), - this->temp_[param_id]->mutable_cpu_data()); - - // divide history of updates by history of gradients - caffe_div(net_params[param_id]->count(), - this->update_[param_id]->cpu_data(), - this->temp_[param_id]->cpu_data(), - this->update_[param_id]->mutable_cpu_data()); - - // jointly compute the RMS of both for update and gradient history - caffe_powx(net_params[param_id]->count(), - this->update_[param_id]->cpu_data(), Dtype(0.5), - this->update_[param_id]->mutable_cpu_data()); - - // compute the update - caffe_mul(net_params[param_id]->count(), - net_params[param_id]->cpu_diff(), - this->update_[param_id]->cpu_data(), - net_params[param_id]->mutable_cpu_diff()); - - // compute square of update - caffe_powx(net_params[param_id]->count(), - net_params[param_id]->cpu_diff(), Dtype(2), - this->update_[param_id]->mutable_cpu_data()); - - // update history of updates - caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, - this->update_[param_id]->cpu_data(), momentum, - this->history_[update_history_offset + param_id]->mutable_cpu_data()); - - // apply learning rate - caffe_cpu_scale(net_params[param_id]->count(), local_rate, - net_params[param_id]->cpu_diff(), - net_params[param_id]->mutable_cpu_diff()); - break; - } - case Caffe::GPU: { -#ifndef CPU_ONLY - // compute square of gradient in update - caffe_gpu_powx(net_params[param_id]->count(), - net_params[param_id]->gpu_diff(), Dtype(2), - this->update_[param_id]->mutable_gpu_data()); - - // update history of gradients - caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, - this->update_[param_id]->gpu_data(), momentum, - this->history_[param_id]->mutable_gpu_data()); - - // add delta to history to guard against dividing by zero later - caffe_gpu_set(net_params[param_id]->count(), delta, - this->temp_[param_id]->mutable_gpu_data()); - - caffe_gpu_add(net_params[param_id]->count(), - this->temp_[param_id]->gpu_data(), - this->history_[update_history_offset + param_id]->gpu_data(), - this->update_[param_id]->mutable_gpu_data()); - - caffe_gpu_add(net_params[param_id]->count(), - this->temp_[param_id]->gpu_data(), - this->history_[param_id]->gpu_data(), - this->temp_[param_id]->mutable_gpu_data()); - - // divide history of updates by history of gradients - caffe_gpu_div(net_params[param_id]->count(), - this->update_[param_id]->gpu_data(), - this->temp_[param_id]->gpu_data(), - this->update_[param_id]->mutable_gpu_data()); - - // jointly compute the RMS of both for update and gradient history - caffe_gpu_powx(net_params[param_id]->count(), - this->update_[param_id]->gpu_data(), Dtype(0.5), - this->update_[param_id]->mutable_gpu_data()); - - // compute the update and copy to net_diff - caffe_gpu_mul(net_params[param_id]->count(), - net_params[param_id]->gpu_diff(), - this->update_[param_id]->gpu_data(), - net_params[param_id]->mutable_gpu_diff()); - - // compute square of update - caffe_gpu_powx(net_params[param_id]->count(), - net_params[param_id]->gpu_diff(), Dtype(2), - this->update_[param_id]->mutable_gpu_data()); - - // update history of updates - caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, - this->update_[param_id]->gpu_data(), momentum, - this->history_[update_history_offset + param_id]->mutable_gpu_data()); - - // apply learning rate - caffe_gpu_scale(net_params[param_id]->count(), local_rate, - net_params[param_id]->gpu_diff(), - net_params[param_id]->mutable_gpu_diff()); -#else - NO_GPU; -#endif - break; - } - default: - LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); - } -} - -template -void AdamSolver::AdamPreSolve() { - // Add the extra history entries for Adam after those from - // SGDSolver::PreSolve - const vector*>& net_params = this->net_->learnable_params(); - for (int i = 0; i < net_params.size(); ++i) { - const vector& shape = net_params[i]->shape(); - this->history_.push_back( - shared_ptr >(new Blob(shape))); - } -} - -template -void AdamSolver::ComputeUpdateValue(int param_id, Dtype rate) { - const vector*>& net_params = this->net_->learnable_params(); - const vector& net_params_lr = this->net_->params_lr(); - Dtype local_rate = rate * net_params_lr[param_id]; - const Dtype beta1 = this->param_.momentum(); - const Dtype beta2 = this->param_.momentum2(); - - // we create aliases for convenience - size_t update_history_offset = net_params.size(); - Blob* val_m = this->history_[param_id].get(); - Blob* val_v = this->history_[param_id + update_history_offset].get(); - Blob* val_t = this->temp_[param_id].get(); - - const int t = this->iter_ + 1; - const Dtype correction = std::sqrt(Dtype(1) - pow(beta2, t)) / - (Dtype(1.) - pow(beta1, t)); - const int N = net_params[param_id]->count(); - const Dtype eps_hat = this->param_.delta(); - - switch (Caffe::mode()) { - case Caffe::CPU: { - // update m <- \beta_1 m_{t-1} + (1-\beta_1)g_t - caffe_cpu_axpby(N, Dtype(1)-beta1, - net_params[param_id]->cpu_diff(), beta1, - val_m->mutable_cpu_data()); - - // update v <- \beta_2 m_{t-1} + (1-\beta_2)g_t^2 - caffe_mul(N, - net_params[param_id]->cpu_diff(), - net_params[param_id]->cpu_diff(), - val_t->mutable_cpu_data()); - caffe_cpu_axpby(N, Dtype(1)-beta2, - val_t->cpu_data(), beta2, - val_v->mutable_cpu_data()); - - // set update - caffe_powx(N, - val_v->cpu_data(), Dtype(0.5), - val_t->mutable_cpu_data()); - caffe_add_scalar(N, eps_hat, val_t->mutable_cpu_data()); - caffe_div(N, - val_m->cpu_data(), - val_t->cpu_data(), - val_t->mutable_cpu_data()); - - caffe_cpu_scale(N, local_rate*correction, - val_t->cpu_data(), - net_params[param_id]->mutable_cpu_diff()); - break; - } - case Caffe::GPU: { -#ifndef CPU_ONLY - // update m <- \beta_1 m_{t-1} + (1-\beta_1)g_t - caffe_gpu_axpby(N, Dtype(1)-beta1, - net_params[param_id]->gpu_diff(), beta1, - val_m->mutable_gpu_data()); - - // update v <- \beta_2 m_{t-1} + (1-\beta_2)g_t^2 - caffe_gpu_mul(N, - net_params[param_id]->gpu_diff(), - net_params[param_id]->gpu_diff(), - val_t->mutable_gpu_data()); - caffe_gpu_axpby(N, Dtype(1)-beta2, - val_t->gpu_data(), beta2, - val_v->mutable_gpu_data()); - - // set update - caffe_gpu_powx(N, - val_v->gpu_data(), Dtype(0.5), - val_t->mutable_gpu_data()); - caffe_gpu_add_scalar(N, eps_hat, - val_t->mutable_gpu_data()); - caffe_gpu_div(N, - val_m->gpu_data(), - val_t->gpu_data(), - val_t->mutable_gpu_data()); - - caffe_gpu_scale(N, local_rate*correction, - val_t->gpu_data(), - net_params[param_id]->mutable_gpu_diff()); -#else - NO_GPU; -#endif - break; - } - default: - LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); - } -} - INSTANTIATE_CLASS(Solver); -INSTANTIATE_CLASS(SGDSolver); -INSTANTIATE_CLASS(NesterovSolver); -INSTANTIATE_CLASS(AdaGradSolver); -INSTANTIATE_CLASS(RMSPropSolver); -INSTANTIATE_CLASS(AdaDeltaSolver); -INSTANTIATE_CLASS(AdamSolver); } // namespace caffe diff --git a/src/caffe/solver_factory.cpp b/src/caffe/solver_factory.cpp new file mode 100644 index 00000000..f78fab28 --- /dev/null +++ b/src/caffe/solver_factory.cpp @@ -0,0 +1,32 @@ +#include "caffe/solver.hpp" +#include "caffe/sgd_solvers.hpp" + +namespace caffe { + +template +Solver* GetSolver(const SolverParameter& param) { + SolverParameter_SolverType type = param.solver_type(); + + switch (type) { + case SolverParameter_SolverType_SGD: + return new SGDSolver(param); + case SolverParameter_SolverType_NESTEROV: + return new NesterovSolver(param); + case SolverParameter_SolverType_ADAGRAD: + return new AdaGradSolver(param); + case SolverParameter_SolverType_RMSPROP: + return new RMSPropSolver(param); + case SolverParameter_SolverType_ADADELTA: + return new AdaDeltaSolver(param); + case SolverParameter_SolverType_ADAM: + return new AdamSolver(param); + default: + LOG(FATAL) << "Unknown SolverType: " << type; + } + return (Solver*) NULL; +} + +template Solver* GetSolver(const SolverParameter& param); +template Solver* GetSolver(const SolverParameter& param); + +} // namespace caffe diff --git a/src/caffe/solvers/adadelta_solver.cpp b/src/caffe/solvers/adadelta_solver.cpp new file mode 100644 index 00000000..45cd4eb2 --- /dev/null +++ b/src/caffe/solvers/adadelta_solver.cpp @@ -0,0 +1,155 @@ +#include + +#include "caffe/sgd_solvers.hpp" + +namespace caffe { + +template +void AdaDeltaSolver::AdaDeltaPreSolve() { + // Add the extra history entries for AdaDelta after those from + // SGDSolver::PreSolve + const vector*>& net_params = this->net_->learnable_params(); + for (int i = 0; i < net_params.size(); ++i) { + const vector& shape = net_params[i]->shape(); + this->history_.push_back( + shared_ptr >(new Blob(shape))); + } +} + +template +void AdaDeltaSolver::ComputeUpdateValue(int param_id, Dtype rate) { + const vector*>& net_params = this->net_->learnable_params(); + const vector& net_params_lr = this->net_->params_lr(); + Dtype delta = this->param_.delta(); + Dtype momentum = this->param_.momentum(); + Dtype local_rate = rate * net_params_lr[param_id]; + size_t update_history_offset = net_params.size(); + switch (Caffe::mode()) { + case Caffe::CPU: { + // compute square of gradient in update + caffe_powx(net_params[param_id]->count(), + net_params[param_id]->cpu_diff(), Dtype(2), + this->update_[param_id]->mutable_cpu_data()); + + // update history of gradients + caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, + this->update_[param_id]->cpu_data(), momentum, + this->history_[param_id]->mutable_cpu_data()); + + // add delta to history to guard against dividing by zero later + caffe_set(net_params[param_id]->count(), delta, + this->temp_[param_id]->mutable_cpu_data()); + + caffe_add(net_params[param_id]->count(), + this->temp_[param_id]->cpu_data(), + this->history_[update_history_offset + param_id]->cpu_data(), + this->update_[param_id]->mutable_cpu_data()); + + caffe_add(net_params[param_id]->count(), + this->temp_[param_id]->cpu_data(), + this->history_[param_id]->cpu_data(), + this->temp_[param_id]->mutable_cpu_data()); + + // divide history of updates by history of gradients + caffe_div(net_params[param_id]->count(), + this->update_[param_id]->cpu_data(), + this->temp_[param_id]->cpu_data(), + this->update_[param_id]->mutable_cpu_data()); + + // jointly compute the RMS of both for update and gradient history + caffe_powx(net_params[param_id]->count(), + this->update_[param_id]->cpu_data(), Dtype(0.5), + this->update_[param_id]->mutable_cpu_data()); + + // compute the update + caffe_mul(net_params[param_id]->count(), + net_params[param_id]->cpu_diff(), + this->update_[param_id]->cpu_data(), + net_params[param_id]->mutable_cpu_diff()); + + // compute square of update + caffe_powx(net_params[param_id]->count(), + net_params[param_id]->cpu_diff(), Dtype(2), + this->update_[param_id]->mutable_cpu_data()); + + // update history of updates + caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, + this->update_[param_id]->cpu_data(), momentum, + this->history_[update_history_offset + param_id]->mutable_cpu_data()); + + // apply learning rate + caffe_cpu_scale(net_params[param_id]->count(), local_rate, + net_params[param_id]->cpu_diff(), + net_params[param_id]->mutable_cpu_diff()); + break; + } + case Caffe::GPU: { +#ifndef CPU_ONLY + // compute square of gradient in update + caffe_gpu_powx(net_params[param_id]->count(), + net_params[param_id]->gpu_diff(), Dtype(2), + this->update_[param_id]->mutable_gpu_data()); + + // update history of gradients + caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, + this->update_[param_id]->gpu_data(), momentum, + this->history_[param_id]->mutable_gpu_data()); + + // add delta to history to guard against dividing by zero later + caffe_gpu_set(net_params[param_id]->count(), delta, + this->temp_[param_id]->mutable_gpu_data()); + + caffe_gpu_add(net_params[param_id]->count(), + this->temp_[param_id]->gpu_data(), + this->history_[update_history_offset + param_id]->gpu_data(), + this->update_[param_id]->mutable_gpu_data()); + + caffe_gpu_add(net_params[param_id]->count(), + this->temp_[param_id]->gpu_data(), + this->history_[param_id]->gpu_data(), + this->temp_[param_id]->mutable_gpu_data()); + + // divide history of updates by history of gradients + caffe_gpu_div(net_params[param_id]->count(), + this->update_[param_id]->gpu_data(), + this->temp_[param_id]->gpu_data(), + this->update_[param_id]->mutable_gpu_data()); + + // jointly compute the RMS of both for update and gradient history + caffe_gpu_powx(net_params[param_id]->count(), + this->update_[param_id]->gpu_data(), Dtype(0.5), + this->update_[param_id]->mutable_gpu_data()); + + // compute the update and copy to net_diff + caffe_gpu_mul(net_params[param_id]->count(), + net_params[param_id]->gpu_diff(), + this->update_[param_id]->gpu_data(), + net_params[param_id]->mutable_gpu_diff()); + + // compute square of update + caffe_gpu_powx(net_params[param_id]->count(), + net_params[param_id]->gpu_diff(), Dtype(2), + this->update_[param_id]->mutable_gpu_data()); + + // update history of updates + caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, + this->update_[param_id]->gpu_data(), momentum, + this->history_[update_history_offset + param_id]->mutable_gpu_data()); + + // apply learning rate + caffe_gpu_scale(net_params[param_id]->count(), local_rate, + net_params[param_id]->gpu_diff(), + net_params[param_id]->mutable_gpu_diff()); +#else + NO_GPU; +#endif + break; + } + default: + LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); + } +} + +INSTANTIATE_CLASS(AdaDeltaSolver); + +} // namespace caffe diff --git a/src/caffe/solvers/adagrad_solver.cpp b/src/caffe/solvers/adagrad_solver.cpp new file mode 100644 index 00000000..627d816a --- /dev/null +++ b/src/caffe/solvers/adagrad_solver.cpp @@ -0,0 +1,88 @@ +#include + +#include "caffe/sgd_solvers.hpp" + +namespace caffe { + +template +void AdaGradSolver::ComputeUpdateValue(int param_id, Dtype rate) { + CHECK(Caffe::root_solver()); + const vector*>& net_params = this->net_->learnable_params(); + const vector& net_params_lr = this->net_->params_lr(); + Dtype delta = this->param_.delta(); + Dtype local_rate = rate * net_params_lr[param_id]; + switch (Caffe::mode()) { + case Caffe::CPU: { + // compute square of gradient in update + caffe_powx(net_params[param_id]->count(), + net_params[param_id]->cpu_diff(), Dtype(2), + this->update_[param_id]->mutable_cpu_data()); + + // update history + caffe_add(net_params[param_id]->count(), + this->update_[param_id]->cpu_data(), + this->history_[param_id]->cpu_data(), + this->history_[param_id]->mutable_cpu_data()); + + // prepare update + caffe_powx(net_params[param_id]->count(), + this->history_[param_id]->cpu_data(), Dtype(0.5), + this->update_[param_id]->mutable_cpu_data()); + + caffe_add_scalar(net_params[param_id]->count(), + delta, this->update_[param_id]->mutable_cpu_data()); + + caffe_div(net_params[param_id]->count(), + net_params[param_id]->cpu_diff(), + this->update_[param_id]->cpu_data(), + this->update_[param_id]->mutable_cpu_data()); + + // scale and copy + caffe_cpu_axpby(net_params[param_id]->count(), local_rate, + this->update_[param_id]->cpu_data(), Dtype(0), + net_params[param_id]->mutable_cpu_diff()); + break; + } + case Caffe::GPU: { +#ifndef CPU_ONLY + // compute square of gradient in update + caffe_gpu_powx(net_params[param_id]->count(), + net_params[param_id]->gpu_diff(), Dtype(2), + this->update_[param_id]->mutable_gpu_data()); + + // update history + caffe_gpu_add(net_params[param_id]->count(), + this->update_[param_id]->gpu_data(), + this->history_[param_id]->gpu_data(), + this->history_[param_id]->mutable_gpu_data()); + + // prepare update + caffe_gpu_powx(net_params[param_id]->count(), + this->history_[param_id]->gpu_data(), Dtype(0.5), + this->update_[param_id]->mutable_gpu_data()); + + caffe_gpu_add_scalar(net_params[param_id]->count(), + delta, this->update_[param_id]->mutable_gpu_data()); + + caffe_gpu_div(net_params[param_id]->count(), + net_params[param_id]->gpu_diff(), + this->update_[param_id]->gpu_data(), + this->update_[param_id]->mutable_gpu_data()); + + // scale and copy + caffe_gpu_axpby(net_params[param_id]->count(), local_rate, + this->update_[param_id]->gpu_data(), Dtype(0), + net_params[param_id]->mutable_gpu_diff()); +#else + NO_GPU; +#endif + break; + } + default: + LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); + } +} + +INSTANTIATE_CLASS(AdaGradSolver); + +} // namespace caffe diff --git a/src/caffe/solvers/adam_solver.cpp b/src/caffe/solvers/adam_solver.cpp new file mode 100644 index 00000000..8c334f66 --- /dev/null +++ b/src/caffe/solvers/adam_solver.cpp @@ -0,0 +1,112 @@ +#include + +#include "caffe/sgd_solvers.hpp" + +namespace caffe { + +template +void AdamSolver::AdamPreSolve() { + // Add the extra history entries for Adam after those from + // SGDSolver::PreSolve + const vector*>& net_params = this->net_->learnable_params(); + for (int i = 0; i < net_params.size(); ++i) { + const vector& shape = net_params[i]->shape(); + this->history_.push_back( + shared_ptr >(new Blob(shape))); + } +} + +template +void AdamSolver::ComputeUpdateValue(int param_id, Dtype rate) { + const vector*>& net_params = this->net_->learnable_params(); + const vector& net_params_lr = this->net_->params_lr(); + Dtype local_rate = rate * net_params_lr[param_id]; + const Dtype beta1 = this->param_.momentum(); + const Dtype beta2 = this->param_.momentum2(); + + // we create aliases for convenience + size_t update_history_offset = net_params.size(); + Blob* val_m = this->history_[param_id].get(); + Blob* val_v = this->history_[param_id + update_history_offset].get(); + Blob* val_t = this->temp_[param_id].get(); + + const int t = this->iter_ + 1; + const Dtype correction = std::sqrt(Dtype(1) - pow(beta2, t)) / + (Dtype(1.) - pow(beta1, t)); + const int N = net_params[param_id]->count(); + const Dtype eps_hat = this->param_.delta(); + + switch (Caffe::mode()) { + case Caffe::CPU: { + // update m <- \beta_1 m_{t-1} + (1-\beta_1)g_t + caffe_cpu_axpby(N, Dtype(1)-beta1, + net_params[param_id]->cpu_diff(), beta1, + val_m->mutable_cpu_data()); + + // update v <- \beta_2 m_{t-1} + (1-\beta_2)g_t^2 + caffe_mul(N, + net_params[param_id]->cpu_diff(), + net_params[param_id]->cpu_diff(), + val_t->mutable_cpu_data()); + caffe_cpu_axpby(N, Dtype(1)-beta2, + val_t->cpu_data(), beta2, + val_v->mutable_cpu_data()); + + // set update + caffe_powx(N, + val_v->cpu_data(), Dtype(0.5), + val_t->mutable_cpu_data()); + caffe_add_scalar(N, eps_hat, val_t->mutable_cpu_data()); + caffe_div(N, + val_m->cpu_data(), + val_t->cpu_data(), + val_t->mutable_cpu_data()); + + caffe_cpu_scale(N, local_rate*correction, + val_t->cpu_data(), + net_params[param_id]->mutable_cpu_diff()); + break; + } + case Caffe::GPU: { +#ifndef CPU_ONLY + // update m <- \beta_1 m_{t-1} + (1-\beta_1)g_t + caffe_gpu_axpby(N, Dtype(1)-beta1, + net_params[param_id]->gpu_diff(), beta1, + val_m->mutable_gpu_data()); + + // update v <- \beta_2 m_{t-1} + (1-\beta_2)g_t^2 + caffe_gpu_mul(N, + net_params[param_id]->gpu_diff(), + net_params[param_id]->gpu_diff(), + val_t->mutable_gpu_data()); + caffe_gpu_axpby(N, Dtype(1)-beta2, + val_t->gpu_data(), beta2, + val_v->mutable_gpu_data()); + + // set update + caffe_gpu_powx(N, + val_v->gpu_data(), Dtype(0.5), + val_t->mutable_gpu_data()); + caffe_gpu_add_scalar(N, eps_hat, + val_t->mutable_gpu_data()); + caffe_gpu_div(N, + val_m->gpu_data(), + val_t->gpu_data(), + val_t->mutable_gpu_data()); + + caffe_gpu_scale(N, local_rate*correction, + val_t->gpu_data(), + net_params[param_id]->mutable_gpu_diff()); +#else + NO_GPU; +#endif + break; + } + default: + LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); + } +} + +INSTANTIATE_CLASS(AdamSolver); + +} // namespace caffe diff --git a/src/caffe/solvers/nesterov_solver.cpp b/src/caffe/solvers/nesterov_solver.cpp new file mode 100644 index 00000000..8135ee2c --- /dev/null +++ b/src/caffe/solvers/nesterov_solver.cpp @@ -0,0 +1,70 @@ +#include + +#include "caffe/sgd_solvers.hpp" + +namespace caffe { + +template +void NesterovSolver::ComputeUpdateValue(int param_id, Dtype rate) { + CHECK(Caffe::root_solver()); + const vector*>& net_params = this->net_->learnable_params(); + const vector& net_params_lr = this->net_->params_lr(); + Dtype momentum = this->param_.momentum(); + Dtype local_rate = rate * net_params_lr[param_id]; + switch (Caffe::mode()) { + case Caffe::CPU: { + // save history momentum for stepping back + caffe_copy(net_params[param_id]->count(), + this->history_[param_id]->cpu_data(), + this->update_[param_id]->mutable_cpu_data()); + + // update history + caffe_cpu_axpby(net_params[param_id]->count(), local_rate, + net_params[param_id]->cpu_diff(), momentum, + this->history_[param_id]->mutable_cpu_data()); + + // compute update: step back then over step + caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) + momentum, + this->history_[param_id]->cpu_data(), -momentum, + this->update_[param_id]->mutable_cpu_data()); + + // copy + caffe_copy(net_params[param_id]->count(), + this->update_[param_id]->cpu_data(), + net_params[param_id]->mutable_cpu_diff()); + break; + } + case Caffe::GPU: { +#ifndef CPU_ONLY + // save history momentum for stepping back + caffe_copy(net_params[param_id]->count(), + this->history_[param_id]->gpu_data(), + this->update_[param_id]->mutable_gpu_data()); + + // update history + caffe_gpu_axpby(net_params[param_id]->count(), local_rate, + net_params[param_id]->gpu_diff(), momentum, + this->history_[param_id]->mutable_gpu_data()); + + // compute update: step back then over step + caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) + momentum, + this->history_[param_id]->gpu_data(), -momentum, + this->update_[param_id]->mutable_gpu_data()); + + // copy + caffe_copy(net_params[param_id]->count(), + this->update_[param_id]->gpu_data(), + net_params[param_id]->mutable_gpu_diff()); +#else + NO_GPU; +#endif + break; + } + default: + LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); + } +} + +INSTANTIATE_CLASS(NesterovSolver); + +} // namespace caffe diff --git a/src/caffe/solvers/rmsprop_solver.cpp b/src/caffe/solvers/rmsprop_solver.cpp new file mode 100644 index 00000000..96d1b3dd --- /dev/null +++ b/src/caffe/solvers/rmsprop_solver.cpp @@ -0,0 +1,84 @@ +#include + +#include "caffe/sgd_solvers.hpp" + +namespace caffe { + +template +void RMSPropSolver::ComputeUpdateValue(int param_id, Dtype rate) { + const vector*>& net_params = this->net_->learnable_params(); + const vector& net_params_lr = this->net_->params_lr(); + + // get the learning rate + Dtype delta = this->param_.delta(); + Dtype rms_decay = this->param_.rms_decay(); + Dtype local_rate = rate * net_params_lr[param_id]; + + switch (Caffe::mode()) { + case Caffe::CPU: + // compute square of gradient in update + caffe_powx(net_params[param_id]->count(), + net_params[param_id]->cpu_diff(), Dtype(2), + this->update_[param_id]->mutable_cpu_data()); + + // update history + caffe_cpu_axpby(net_params[param_id] -> count(), + Dtype(1-rms_decay), this->update_[param_id]->cpu_data(), + rms_decay, this->history_[param_id]-> mutable_cpu_data()); + + // prepare update + caffe_powx(net_params[param_id]->count(), + this->history_[param_id]->cpu_data(), Dtype(0.5), + this->update_[param_id]->mutable_cpu_data()); + + caffe_add_scalar(net_params[param_id]->count(), + delta, this->update_[param_id]->mutable_cpu_data()); + + caffe_div(net_params[param_id]->count(), + net_params[param_id]->cpu_diff(), this->update_[param_id]->cpu_data(), + this->update_[param_id]->mutable_cpu_data()); + + // scale and copy + caffe_cpu_axpby(net_params[param_id]->count(), local_rate, + this->update_[param_id]->cpu_data(), Dtype(0), + net_params[param_id]->mutable_cpu_diff()); + break; + case Caffe::GPU: +#ifndef CPU_ONLY + // compute square of gradient in update + caffe_gpu_powx(net_params[param_id]->count(), + net_params[param_id]->gpu_diff(), Dtype(2), + this->update_[param_id]->mutable_gpu_data()); + + // update history + caffe_gpu_axpby(net_params[param_id] -> count(), + Dtype(1-rms_decay), this->update_[param_id]->gpu_data(), + rms_decay, this->history_[param_id]-> mutable_gpu_data()); + + // prepare update + caffe_gpu_powx(net_params[param_id]->count(), + this->history_[param_id]->gpu_data(), Dtype(0.5), + this->update_[param_id]->mutable_gpu_data()); + + caffe_gpu_add_scalar(net_params[param_id]->count(), + delta, this->update_[param_id]->mutable_gpu_data()); + + caffe_gpu_div(net_params[param_id]->count(), + net_params[param_id]->gpu_diff(), this->update_[param_id]->gpu_data(), + this->update_[param_id]->mutable_gpu_data()); + + caffe_gpu_axpby(net_params[param_id]->count(), local_rate, + this->update_[param_id]->gpu_data(), Dtype(0), + net_params[param_id]->mutable_gpu_diff()); +#else + NO_GPU; +#endif + break; + default: + LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); + } +} + +INSTANTIATE_CLASS(RMSPropSolver); + +} // namespace caffe diff --git a/src/caffe/solvers/sgd_solver.cpp b/src/caffe/solvers/sgd_solver.cpp new file mode 100644 index 00000000..89ef5ec4 --- /dev/null +++ b/src/caffe/solvers/sgd_solver.cpp @@ -0,0 +1,347 @@ +#include +#include + +#include "caffe/sgd_solvers.hpp" +#include "caffe/util/hdf5.hpp" +#include "caffe/util/io.hpp" +#include "caffe/util/upgrade_proto.hpp" + +namespace caffe { + +// Return the current learning rate. The currently implemented learning rate +// policies are as follows: +// - fixed: always return base_lr. +// - step: return base_lr * gamma ^ (floor(iter / step)) +// - exp: return base_lr * gamma ^ iter +// - inv: return base_lr * (1 + gamma * iter) ^ (- power) +// - multistep: similar to step but it allows non uniform steps defined by +// stepvalue +// - poly: the effective learning rate follows a polynomial decay, to be +// zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power) +// - sigmoid: the effective learning rate follows a sigmod decay +// return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize)))) +// +// where base_lr, max_iter, gamma, step, stepvalue and power are defined +// in the solver parameter protocol buffer, and iter is the current iteration. +template +Dtype SGDSolver::GetLearningRate() { + Dtype rate; + const string& lr_policy = this->param_.lr_policy(); + if (lr_policy == "fixed") { + rate = this->param_.base_lr(); + } else if (lr_policy == "step") { + this->current_step_ = this->iter_ / this->param_.stepsize(); + rate = this->param_.base_lr() * + pow(this->param_.gamma(), this->current_step_); + } else if (lr_policy == "exp") { + rate = this->param_.base_lr() * pow(this->param_.gamma(), this->iter_); + } else if (lr_policy == "inv") { + rate = this->param_.base_lr() * + pow(Dtype(1) + this->param_.gamma() * this->iter_, + - this->param_.power()); + } else if (lr_policy == "multistep") { + if (this->current_step_ < this->param_.stepvalue_size() && + this->iter_ >= this->param_.stepvalue(this->current_step_)) { + this->current_step_++; + LOG(INFO) << "MultiStep Status: Iteration " << + this->iter_ << ", step = " << this->current_step_; + } + rate = this->param_.base_lr() * + pow(this->param_.gamma(), this->current_step_); + } else if (lr_policy == "poly") { + rate = this->param_.base_lr() * pow(Dtype(1.) - + (Dtype(this->iter_) / Dtype(this->param_.max_iter())), + this->param_.power()); + } else if (lr_policy == "sigmoid") { + rate = this->param_.base_lr() * (Dtype(1.) / + (Dtype(1.) + exp(-this->param_.gamma() * (Dtype(this->iter_) - + Dtype(this->param_.stepsize()))))); + } else { + LOG(FATAL) << "Unknown learning rate policy: " << lr_policy; + } + return rate; +} + +template +void SGDSolver::PreSolve() { + // Initialize the history + const vector*>& net_params = this->net_->learnable_params(); + history_.clear(); + update_.clear(); + temp_.clear(); + for (int i = 0; i < net_params.size(); ++i) { + const vector& shape = net_params[i]->shape(); + history_.push_back(shared_ptr >(new Blob(shape))); + update_.push_back(shared_ptr >(new Blob(shape))); + temp_.push_back(shared_ptr >(new Blob(shape))); + } +} + +template +void SGDSolver::ClipGradients() { + const Dtype clip_gradients = this->param_.clip_gradients(); + if (clip_gradients < 0) { return; } + const vector*>& net_params = this->net_->learnable_params(); + Dtype sumsq_diff = 0; + for (int i = 0; i < net_params.size(); ++i) { + sumsq_diff += net_params[i]->sumsq_diff(); + } + const Dtype l2norm_diff = std::sqrt(sumsq_diff); + if (l2norm_diff > clip_gradients) { + Dtype scale_factor = clip_gradients / l2norm_diff; + LOG(INFO) << "Gradient clipping: scaling down gradients (L2 norm " + << l2norm_diff << " > " << clip_gradients << ") " + << "by scale factor " << scale_factor; + for (int i = 0; i < net_params.size(); ++i) { + net_params[i]->scale_diff(scale_factor); + } + } +} + +template +void SGDSolver::ApplyUpdate() { + CHECK(Caffe::root_solver()); + Dtype rate = GetLearningRate(); + if (this->param_.display() && this->iter_ % this->param_.display() == 0) { + LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate; + } + ClipGradients(); + for (int param_id = 0; param_id < this->net_->learnable_params().size(); + ++param_id) { + Normalize(param_id); + Regularize(param_id); + ComputeUpdateValue(param_id, rate); + } + this->net_->Update(); +} + +template +void SGDSolver::Normalize(int param_id) { + if (this->param_.iter_size() == 1) { return; } + // Scale gradient to counterbalance accumulation. + const vector*>& net_params = this->net_->learnable_params(); + const Dtype accum_normalization = Dtype(1.) / this->param_.iter_size(); + switch (Caffe::mode()) { + case Caffe::CPU: { + caffe_scal(net_params[param_id]->count(), accum_normalization, + net_params[param_id]->mutable_cpu_diff()); + break; + } + case Caffe::GPU: { +#ifndef CPU_ONLY + caffe_gpu_scal(net_params[param_id]->count(), accum_normalization, + net_params[param_id]->mutable_gpu_diff()); +#else + NO_GPU; +#endif + break; + } + default: + LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); + } +} + +template +void SGDSolver::Regularize(int param_id) { + const vector*>& net_params = this->net_->learnable_params(); + const vector& net_params_weight_decay = + this->net_->params_weight_decay(); + Dtype weight_decay = this->param_.weight_decay(); + string regularization_type = this->param_.regularization_type(); + Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; + switch (Caffe::mode()) { + case Caffe::CPU: { + if (local_decay) { + if (regularization_type == "L2") { + // add weight decay + caffe_axpy(net_params[param_id]->count(), + local_decay, + net_params[param_id]->cpu_data(), + net_params[param_id]->mutable_cpu_diff()); + } else if (regularization_type == "L1") { + caffe_cpu_sign(net_params[param_id]->count(), + net_params[param_id]->cpu_data(), + temp_[param_id]->mutable_cpu_data()); + caffe_axpy(net_params[param_id]->count(), + local_decay, + temp_[param_id]->cpu_data(), + net_params[param_id]->mutable_cpu_diff()); + } else { + LOG(FATAL) << "Unknown regularization type: " << regularization_type; + } + } + break; + } + case Caffe::GPU: { +#ifndef CPU_ONLY + if (local_decay) { + if (regularization_type == "L2") { + // add weight decay + caffe_gpu_axpy(net_params[param_id]->count(), + local_decay, + net_params[param_id]->gpu_data(), + net_params[param_id]->mutable_gpu_diff()); + } else if (regularization_type == "L1") { + caffe_gpu_sign(net_params[param_id]->count(), + net_params[param_id]->gpu_data(), + temp_[param_id]->mutable_gpu_data()); + caffe_gpu_axpy(net_params[param_id]->count(), + local_decay, + temp_[param_id]->gpu_data(), + net_params[param_id]->mutable_gpu_diff()); + } else { + LOG(FATAL) << "Unknown regularization type: " << regularization_type; + } + } +#else + NO_GPU; +#endif + break; + } + default: + LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); + } +} + +template +void SGDSolver::ComputeUpdateValue(int param_id, Dtype rate) { + const vector*>& net_params = this->net_->learnable_params(); + const vector& net_params_lr = this->net_->params_lr(); + Dtype momentum = this->param_.momentum(); + Dtype local_rate = rate * net_params_lr[param_id]; + // Compute the update to history, then copy it to the parameter diff. + switch (Caffe::mode()) { + case Caffe::CPU: { + caffe_cpu_axpby(net_params[param_id]->count(), local_rate, + net_params[param_id]->cpu_diff(), momentum, + history_[param_id]->mutable_cpu_data()); + caffe_copy(net_params[param_id]->count(), + history_[param_id]->cpu_data(), + net_params[param_id]->mutable_cpu_diff()); + break; + } + case Caffe::GPU: { +#ifndef CPU_ONLY + caffe_gpu_axpby(net_params[param_id]->count(), local_rate, + net_params[param_id]->gpu_diff(), momentum, + history_[param_id]->mutable_gpu_data()); + caffe_copy(net_params[param_id]->count(), + history_[param_id]->gpu_data(), + net_params[param_id]->mutable_gpu_diff()); +#else + NO_GPU; +#endif + break; + } + default: + LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); + } +} + +template +void SGDSolver::SnapshotSolverState(const string& model_filename) { + switch (this->param_.snapshot_format()) { + case caffe::SolverParameter_SnapshotFormat_BINARYPROTO: + SnapshotSolverStateToBinaryProto(model_filename); + break; + case caffe::SolverParameter_SnapshotFormat_HDF5: + SnapshotSolverStateToHDF5(model_filename); + break; + default: + LOG(FATAL) << "Unsupported snapshot format."; + } +} + +template +void SGDSolver::SnapshotSolverStateToBinaryProto( + const string& model_filename) { + SolverState state; + state.set_iter(this->iter_); + state.set_learned_net(model_filename); + state.set_current_step(this->current_step_); + state.clear_history(); + for (int i = 0; i < history_.size(); ++i) { + // Add history + BlobProto* history_blob = state.add_history(); + history_[i]->ToProto(history_blob); + } + string snapshot_filename = Solver::SnapshotFilename(".solverstate"); + LOG(INFO) + << "Snapshotting solver state to binary proto file " << snapshot_filename; + WriteProtoToBinaryFile(state, snapshot_filename.c_str()); +} + +template +void SGDSolver::SnapshotSolverStateToHDF5( + const string& model_filename) { + string snapshot_filename = + Solver::SnapshotFilename(".solverstate.h5"); + LOG(INFO) << "Snapshotting solver state to HDF5 file " << snapshot_filename; + hid_t file_hid = H5Fcreate(snapshot_filename.c_str(), H5F_ACC_TRUNC, + H5P_DEFAULT, H5P_DEFAULT); + CHECK_GE(file_hid, 0) + << "Couldn't open " << snapshot_filename << " to save solver state."; + hdf5_save_int(file_hid, "iter", this->iter_); + hdf5_save_string(file_hid, "learned_net", model_filename); + hdf5_save_int(file_hid, "current_step", this->current_step_); + hid_t history_hid = H5Gcreate2(file_hid, "history", H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + CHECK_GE(history_hid, 0) + << "Error saving solver state to " << snapshot_filename << "."; + for (int i = 0; i < history_.size(); ++i) { + ostringstream oss; + oss << i; + hdf5_save_nd_dataset(history_hid, oss.str(), *history_[i]); + } + H5Gclose(history_hid); + H5Fclose(file_hid); +} + +template +void SGDSolver::RestoreSolverStateFromBinaryProto( + const string& state_file) { + SolverState state; + ReadProtoFromBinaryFile(state_file, &state); + this->iter_ = state.iter(); + if (state.has_learned_net()) { + NetParameter net_param; + ReadNetParamsFromBinaryFileOrDie(state.learned_net().c_str(), &net_param); + this->net_->CopyTrainedLayersFrom(net_param); + } + this->current_step_ = state.current_step(); + CHECK_EQ(state.history_size(), history_.size()) + << "Incorrect length of history blobs."; + LOG(INFO) << "SGDSolver: restoring history"; + for (int i = 0; i < history_.size(); ++i) { + history_[i]->FromProto(state.history(i)); + } +} + +template +void SGDSolver::RestoreSolverStateFromHDF5(const string& state_file) { + hid_t file_hid = H5Fopen(state_file.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); + CHECK_GE(file_hid, 0) << "Couldn't open solver state file " << state_file; + this->iter_ = hdf5_load_int(file_hid, "iter"); + if (H5LTfind_dataset(file_hid, "learned_net")) { + string learned_net = hdf5_load_string(file_hid, "learned_net"); + this->net_->CopyTrainedLayersFrom(learned_net); + } + this->current_step_ = hdf5_load_int(file_hid, "current_step"); + hid_t history_hid = H5Gopen2(file_hid, "history", H5P_DEFAULT); + CHECK_GE(history_hid, 0) << "Error reading history from " << state_file; + int state_history_size = hdf5_get_num_links(history_hid); + CHECK_EQ(state_history_size, history_.size()) + << "Incorrect length of history blobs."; + for (int i = 0; i < history_.size(); ++i) { + ostringstream oss; + oss << i; + hdf5_load_nd_dataset(history_hid, oss.str().c_str(), 0, + kMaxBlobAxes, history_[i].get()); + } + H5Gclose(history_hid); + H5Fclose(file_hid); +} + +INSTANTIATE_CLASS(SGDSolver); + +} // namespace caffe diff --git a/src/caffe/test/test_gradient_based_solver.cpp b/src/caffe/test/test_gradient_based_solver.cpp index 7ad7467f..1767ad3f 100644 --- a/src/caffe/test/test_gradient_based_solver.cpp +++ b/src/caffe/test/test_gradient_based_solver.cpp @@ -10,7 +10,7 @@ #include "caffe/common.hpp" #include "caffe/parallel.hpp" #include "caffe/proto/caffe.pb.h" -#include "caffe/solver.hpp" +#include "caffe/sgd_solvers.hpp" #include "caffe/util/io.hpp" #include "caffe/test/test_caffe_main.hpp" diff --git a/src/caffe/test/test_solver.cpp b/src/caffe/test/test_solver.cpp index ceabc9cd..b1816426 100644 --- a/src/caffe/test/test_solver.cpp +++ b/src/caffe/test/test_solver.cpp @@ -7,6 +7,7 @@ #include "caffe/common.hpp" #include "caffe/proto/caffe.pb.h" +#include "caffe/sgd_solvers.hpp" #include "caffe/solver.hpp" #include "caffe/test/test_caffe_main.hpp" From 0eea815ad6fa3313888b6229499a237820258deb Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Thu, 24 Sep 2015 19:40:45 -0700 Subject: [PATCH 137/223] Change solver type to string and provide solver registry --- include/caffe/caffe.hpp | 1 + include/caffe/sgd_solvers.hpp | 6 + include/caffe/solver.hpp | 9 +- include/caffe/solver_factory.hpp | 137 ++++++++++++++++++ src/caffe/proto/caffe.proto | 27 ++-- src/caffe/solver_factory.cpp | 32 ---- src/caffe/solvers/adadelta_solver.cpp | 1 + src/caffe/solvers/adagrad_solver.cpp | 1 + src/caffe/solvers/adam_solver.cpp | 1 + src/caffe/solvers/nesterov_solver.cpp | 1 + src/caffe/solvers/rmsprop_solver.cpp | 1 + src/caffe/solvers/sgd_solver.cpp | 1 + src/caffe/test/test_gradient_based_solver.cpp | 54 ++----- src/caffe/test/test_solver_factory.cpp | 50 +++++++ tools/caffe.cpp | 2 +- 15 files changed, 233 insertions(+), 91 deletions(-) create mode 100644 include/caffe/solver_factory.hpp delete mode 100644 src/caffe/solver_factory.cpp create mode 100644 src/caffe/test/test_solver_factory.cpp diff --git a/include/caffe/caffe.hpp b/include/caffe/caffe.hpp index 68a5e1d1..bd772830 100644 --- a/include/caffe/caffe.hpp +++ b/include/caffe/caffe.hpp @@ -13,6 +13,7 @@ #include "caffe/parallel.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/solver.hpp" +#include "caffe/solver_factory.hpp" #include "caffe/util/benchmark.hpp" #include "caffe/util/io.hpp" #include "caffe/vision_layers.hpp" diff --git a/include/caffe/sgd_solvers.hpp b/include/caffe/sgd_solvers.hpp index 6bf1d70c..1fc52d87 100644 --- a/include/caffe/sgd_solvers.hpp +++ b/include/caffe/sgd_solvers.hpp @@ -19,6 +19,7 @@ class SGDSolver : public Solver { : Solver(param) { PreSolve(); } explicit SGDSolver(const string& param_file) : Solver(param_file) { PreSolve(); } + virtual inline const char* type() const { return "SGD"; } const vector > >& history() { return history_; } @@ -51,6 +52,7 @@ class NesterovSolver : public SGDSolver { : SGDSolver(param) {} explicit NesterovSolver(const string& param_file) : SGDSolver(param_file) {} + virtual inline const char* type() const { return "Nesterov"; } protected: virtual void ComputeUpdateValue(int param_id, Dtype rate); @@ -65,6 +67,7 @@ class AdaGradSolver : public SGDSolver { : SGDSolver(param) { constructor_sanity_check(); } explicit AdaGradSolver(const string& param_file) : SGDSolver(param_file) { constructor_sanity_check(); } + virtual inline const char* type() const { return "AdaGrad"; } protected: virtual void ComputeUpdateValue(int param_id, Dtype rate); @@ -84,6 +87,7 @@ class RMSPropSolver : public SGDSolver { : SGDSolver(param) { constructor_sanity_check(); } explicit RMSPropSolver(const string& param_file) : SGDSolver(param_file) { constructor_sanity_check(); } + virtual inline const char* type() const { return "RMSProp"; } protected: virtual void ComputeUpdateValue(int param_id, Dtype rate); @@ -106,6 +110,7 @@ class AdaDeltaSolver : public SGDSolver { : SGDSolver(param) { AdaDeltaPreSolve(); } explicit AdaDeltaSolver(const string& param_file) : SGDSolver(param_file) { AdaDeltaPreSolve(); } + virtual inline const char* type() const { return "AdaDelta"; } protected: void AdaDeltaPreSolve(); @@ -129,6 +134,7 @@ class AdamSolver : public SGDSolver { : SGDSolver(param) { AdamPreSolve();} explicit AdamSolver(const string& param_file) : SGDSolver(param_file) { AdamPreSolve(); } + virtual inline const char* type() const { return "Adam"; } protected: void AdamPreSolve(); diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index a045ccf2..298a68f3 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -5,6 +5,7 @@ #include #include "caffe/net.hpp" +#include "caffe/solver_factory.hpp" namespace caffe { @@ -83,6 +84,10 @@ class Solver { } void CheckSnapshotWritePermissions(); + /** + * @brief Returns the solver type. + */ + virtual inline const char* type() const { return ""; } protected: // Make and apply the update value for the current iteration. @@ -148,10 +153,6 @@ class WorkerSolver : public Solver { } }; -// The solver factory function -template -Solver* GetSolver(const SolverParameter& param); - } // namespace caffe #endif // CAFFE_SOLVER_HPP_ diff --git a/include/caffe/solver_factory.hpp b/include/caffe/solver_factory.hpp new file mode 100644 index 00000000..cfff721a --- /dev/null +++ b/include/caffe/solver_factory.hpp @@ -0,0 +1,137 @@ +/** + * @brief A solver factory that allows one to register solvers, similar to + * layer factory. During runtime, registered solvers could be called by passing + * a SolverParameter protobuffer to the CreateSolver function: + * + * SolverRegistry::CreateSolver(param); + * + * There are two ways to register a solver. Assuming that we have a solver like: + * + * template + * class MyAwesomeSolver : public Solver { + * // your implementations + * }; + * + * and its type is its C++ class name, but without the "Solver" at the end + * ("MyAwesomeSolver" -> "MyAwesome"). + * + * If the solver is going to be created simply by its constructor, in your c++ + * file, add the following line: + * + * REGISTER_SOLVER_CLASS(MyAwesome); + * + * Or, if the solver is going to be created by another creator function, in the + * format of: + * + * template + * Solver GetMyAwesomeSolver(const SolverParameter& param) { + * // your implementation + * } + * + * then you can register the creator function instead, like + * + * REGISTER_SOLVER_CREATOR(MyAwesome, GetMyAwesomeSolver) + * + * Note that each solver type should only be registered once. + */ + +#ifndef CAFFE_SOLVER_FACTORY_H_ +#define CAFFE_SOLVER_FACTORY_H_ + +#include +#include +#include + +#include "caffe/common.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +template +class Solver; + +template +class SolverRegistry { + public: + typedef Solver* (*Creator)(const SolverParameter&); + typedef std::map CreatorRegistry; + + static CreatorRegistry& Registry() { + static CreatorRegistry* g_registry_ = new CreatorRegistry(); + return *g_registry_; + } + + // Adds a creator. + static void AddCreator(const string& type, Creator creator) { + CreatorRegistry& registry = Registry(); + CHECK_EQ(registry.count(type), 0) + << "Solver type " << type << " already registered."; + registry[type] = creator; + } + + // Get a solver using a SolverParameter. + static Solver* CreateSolver(const SolverParameter& param) { + const string& type = param.type(); + CreatorRegistry& registry = Registry(); + CHECK_EQ(registry.count(type), 1) << "Unknown solver type: " << type + << " (known types: " << SolverTypeListString() << ")"; + return registry[type](param); + } + + static vector SolverTypeList() { + CreatorRegistry& registry = Registry(); + vector solver_types; + for (typename CreatorRegistry::iterator iter = registry.begin(); + iter != registry.end(); ++iter) { + solver_types.push_back(iter->first); + } + return solver_types; + } + + private: + // Solver registry should never be instantiated - everything is done with its + // static variables. + SolverRegistry() {} + + static string SolverTypeListString() { + vector solver_types = SolverTypeList(); + string solver_types_str; + for (vector::iterator iter = solver_types.begin(); + iter != solver_types.end(); ++iter) { + if (iter != solver_types.begin()) { + solver_types_str += ", "; + } + solver_types_str += *iter; + } + return solver_types_str; + } +}; + + +template +class SolverRegisterer { + public: + SolverRegisterer(const string& type, + Solver* (*creator)(const SolverParameter&)) { + // LOG(INFO) << "Registering solver type: " << type; + SolverRegistry::AddCreator(type, creator); + } +}; + + +#define REGISTER_SOLVER_CREATOR(type, creator) \ + static SolverRegisterer g_creator_f_##type(#type, creator); \ + static SolverRegisterer g_creator_d_##type(#type, creator) \ + +#define REGISTER_SOLVER_CLASS(type) \ + template \ + Solver* Creator_##type##Solver( \ + const SolverParameter& param) \ + { \ + return new type##Solver(param); \ + } \ + REGISTER_SOLVER_CREATOR(type, Creator_##type##Solver) + +} // namespace caffe + +#endif // CAFFE_SOLVER_FACTORY_H_ diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 4794991f..76c869c1 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -98,7 +98,7 @@ message NetParameter { // NOTE // Update the next available ID when you add a new SolverParameter field. // -// SolverParameter next available ID: 40 (last added: momentum2) +// SolverParameter next available ID: 41 (last added: type) message SolverParameter { ////////////////////////////////////////////////////////////////////////////// // Specifying the train and test networks @@ -209,16 +209,9 @@ message SolverParameter { // (and by default) initialize using a seed derived from the system clock. optional int64 random_seed = 20 [default = -1]; - // Solver type - enum SolverType { - SGD = 0; - NESTEROV = 1; - ADAGRAD = 2; - RMSPROP = 3; - ADADELTA = 4; - ADAM = 5; - } - optional SolverType solver_type = 30 [default = SGD]; + // type of the solver + optional string type = 40 [default = "SGD"]; + // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam optional float delta = 31 [default = 1e-8]; // parameters for the Adam solver @@ -234,6 +227,18 @@ message SolverParameter { // If false, don't save a snapshot after training finishes. optional bool snapshot_after_train = 28 [default = true]; + + // DEPRECATED: old solver enum types, use string instead + enum SolverType { + SGD = 0; + NESTEROV = 1; + ADAGRAD = 2; + RMSPROP = 3; + ADADELTA = 4; + ADAM = 5; + } + // DEPRECATED: use type instead of solver_type + optional SolverType solver_type = 30 [default = SGD]; } // A message that stores the solver snapshots diff --git a/src/caffe/solver_factory.cpp b/src/caffe/solver_factory.cpp deleted file mode 100644 index f78fab28..00000000 --- a/src/caffe/solver_factory.cpp +++ /dev/null @@ -1,32 +0,0 @@ -#include "caffe/solver.hpp" -#include "caffe/sgd_solvers.hpp" - -namespace caffe { - -template -Solver* GetSolver(const SolverParameter& param) { - SolverParameter_SolverType type = param.solver_type(); - - switch (type) { - case SolverParameter_SolverType_SGD: - return new SGDSolver(param); - case SolverParameter_SolverType_NESTEROV: - return new NesterovSolver(param); - case SolverParameter_SolverType_ADAGRAD: - return new AdaGradSolver(param); - case SolverParameter_SolverType_RMSPROP: - return new RMSPropSolver(param); - case SolverParameter_SolverType_ADADELTA: - return new AdaDeltaSolver(param); - case SolverParameter_SolverType_ADAM: - return new AdamSolver(param); - default: - LOG(FATAL) << "Unknown SolverType: " << type; - } - return (Solver*) NULL; -} - -template Solver* GetSolver(const SolverParameter& param); -template Solver* GetSolver(const SolverParameter& param); - -} // namespace caffe diff --git a/src/caffe/solvers/adadelta_solver.cpp b/src/caffe/solvers/adadelta_solver.cpp index 45cd4eb2..a37899eb 100644 --- a/src/caffe/solvers/adadelta_solver.cpp +++ b/src/caffe/solvers/adadelta_solver.cpp @@ -151,5 +151,6 @@ void AdaDeltaSolver::ComputeUpdateValue(int param_id, Dtype rate) { } INSTANTIATE_CLASS(AdaDeltaSolver); +REGISTER_SOLVER_CLASS(AdaDelta); } // namespace caffe diff --git a/src/caffe/solvers/adagrad_solver.cpp b/src/caffe/solvers/adagrad_solver.cpp index 627d816a..5e406326 100644 --- a/src/caffe/solvers/adagrad_solver.cpp +++ b/src/caffe/solvers/adagrad_solver.cpp @@ -84,5 +84,6 @@ void AdaGradSolver::ComputeUpdateValue(int param_id, Dtype rate) { } INSTANTIATE_CLASS(AdaGradSolver); +REGISTER_SOLVER_CLASS(AdaGrad); } // namespace caffe diff --git a/src/caffe/solvers/adam_solver.cpp b/src/caffe/solvers/adam_solver.cpp index 8c334f66..cb0fbfe2 100644 --- a/src/caffe/solvers/adam_solver.cpp +++ b/src/caffe/solvers/adam_solver.cpp @@ -108,5 +108,6 @@ void AdamSolver::ComputeUpdateValue(int param_id, Dtype rate) { } INSTANTIATE_CLASS(AdamSolver); +REGISTER_SOLVER_CLASS(Adam); } // namespace caffe diff --git a/src/caffe/solvers/nesterov_solver.cpp b/src/caffe/solvers/nesterov_solver.cpp index 8135ee2c..34bf01eb 100644 --- a/src/caffe/solvers/nesterov_solver.cpp +++ b/src/caffe/solvers/nesterov_solver.cpp @@ -66,5 +66,6 @@ void NesterovSolver::ComputeUpdateValue(int param_id, Dtype rate) { } INSTANTIATE_CLASS(NesterovSolver); +REGISTER_SOLVER_CLASS(Nesterov); } // namespace caffe diff --git a/src/caffe/solvers/rmsprop_solver.cpp b/src/caffe/solvers/rmsprop_solver.cpp index 96d1b3dd..c6247676 100644 --- a/src/caffe/solvers/rmsprop_solver.cpp +++ b/src/caffe/solvers/rmsprop_solver.cpp @@ -80,5 +80,6 @@ void RMSPropSolver::ComputeUpdateValue(int param_id, Dtype rate) { } INSTANTIATE_CLASS(RMSPropSolver); +REGISTER_SOLVER_CLASS(RMSProp); } // namespace caffe diff --git a/src/caffe/solvers/sgd_solver.cpp b/src/caffe/solvers/sgd_solver.cpp index 89ef5ec4..32bf19b1 100644 --- a/src/caffe/solvers/sgd_solver.cpp +++ b/src/caffe/solvers/sgd_solver.cpp @@ -343,5 +343,6 @@ void SGDSolver::RestoreSolverStateFromHDF5(const string& state_file) { } INSTANTIATE_CLASS(SGDSolver); +REGISTER_SOLVER_CLASS(SGD); } // namespace caffe diff --git a/src/caffe/test/test_gradient_based_solver.cpp b/src/caffe/test/test_gradient_based_solver.cpp index 1767ad3f..84c6747f 100644 --- a/src/caffe/test/test_gradient_based_solver.cpp +++ b/src/caffe/test/test_gradient_based_solver.cpp @@ -47,7 +47,6 @@ class GradientBasedSolverTest : public MultiDeviceTest { // Test data: check out generate_sample_data.py in the same directory. string* input_file_; - virtual SolverParameter_SolverType solver_type() = 0; virtual void InitSolver(const SolverParameter& param) = 0; virtual void InitSolverFromProtoString(const string& proto) { @@ -290,8 +289,8 @@ class GradientBasedSolverTest : public MultiDeviceTest { ((i == D) ? bias.cpu_data()[0] : weights.cpu_data()[i]); // Finally, compute update. const vector > >& history = solver_->history(); - if (solver_type() != SolverParameter_SolverType_ADADELTA - && solver_type() != SolverParameter_SolverType_ADAM) { + if (solver_->type() != string("AdaDelta") + && solver_->type() != string("Adam")) { ASSERT_EQ(2, history.size()); // 1 blob for weights, 1 for bias } else { ASSERT_EQ(4, history.size()); // additional blobs for update history @@ -300,26 +299,19 @@ class GradientBasedSolverTest : public MultiDeviceTest { const Dtype history_value = (i == D) ? history[1]->cpu_data()[0] : history[0]->cpu_data()[i]; const Dtype temp = momentum * history_value; - switch (solver_type()) { - case SolverParameter_SolverType_SGD: + if (solver_->type() == string("SGD")) { update_value += temp; - break; - case SolverParameter_SolverType_NESTEROV: + } else if (solver_->type() == string("Nesterov")) { update_value += temp; // step back then over-step update_value = (1 + momentum) * update_value - temp; - break; - case SolverParameter_SolverType_ADAGRAD: + } else if (solver_->type() == string("AdaGrad")) { update_value /= std::sqrt(history_value + grad * grad) + delta_; - break; - case SolverParameter_SolverType_RMSPROP: { + } else if (solver_->type() == string("RMSProp")) { const Dtype rms_decay = 0.95; update_value /= std::sqrt(rms_decay*history_value + grad * grad * (1 - rms_decay)) + delta_; - } - break; - case SolverParameter_SolverType_ADADELTA: - { + } else if (solver_->type() == string("AdaDelta")) { const Dtype update_history_value = (i == D) ? history[1 + num_param_blobs]->cpu_data()[0] : history[0 + num_param_blobs]->cpu_data()[i]; @@ -330,9 +322,7 @@ class GradientBasedSolverTest : public MultiDeviceTest { // not actually needed, just here for illustrative purposes // const Dtype weighted_update_average = // momentum * update_history_value + (1 - momentum) * (update_value); - break; - } - case SolverParameter_SolverType_ADAM: { + } else if (solver_->type() == string("Adam")) { const Dtype momentum2 = 0.999; const Dtype m = history_value; const Dtype v = (i == D) ? @@ -344,10 +334,8 @@ class GradientBasedSolverTest : public MultiDeviceTest { std::sqrt(Dtype(1) - pow(momentum2, num_iters)) / (Dtype(1.) - pow(momentum, num_iters)); update_value = alpha_t * val_m / (std::sqrt(val_v) + delta_); - break; - } - default: - LOG(FATAL) << "Unknown solver type: " << solver_type(); + } else { + LOG(FATAL) << "Unknown solver type: " << solver_->type(); } if (i == D) { updated_bias.mutable_cpu_diff()[0] = update_value; @@ -392,7 +380,7 @@ class GradientBasedSolverTest : public MultiDeviceTest { EXPECT_NEAR(expected_updated_bias, solver_updated_bias, error_margin); // Check the solver's history -- should contain the previous update value. - if (solver_type() == SolverParameter_SolverType_SGD) { + if (solver_->type() == string("SGD")) { const vector > >& history = solver_->history(); ASSERT_EQ(2, history.size()); for (int i = 0; i < D; ++i) { @@ -581,10 +569,6 @@ class SGDSolverTest : public GradientBasedSolverTest { virtual void InitSolver(const SolverParameter& param) { this->solver_.reset(new SGDSolver(param)); } - - virtual SolverParameter_SolverType solver_type() { - return SolverParameter_SolverType_SGD; - } }; TYPED_TEST_CASE(SGDSolverTest, TestDtypesAndDevices); @@ -721,9 +705,6 @@ class AdaGradSolverTest : public GradientBasedSolverTest { virtual void InitSolver(const SolverParameter& param) { this->solver_.reset(new AdaGradSolver(param)); } - virtual SolverParameter_SolverType solver_type() { - return SolverParameter_SolverType_ADAGRAD; - } }; TYPED_TEST_CASE(AdaGradSolverTest, TestDtypesAndDevices); @@ -824,9 +805,6 @@ class NesterovSolverTest : public GradientBasedSolverTest { virtual void InitSolver(const SolverParameter& param) { this->solver_.reset(new NesterovSolver(param)); } - virtual SolverParameter_SolverType solver_type() { - return SolverParameter_SolverType_NESTEROV; - } }; TYPED_TEST_CASE(NesterovSolverTest, TestDtypesAndDevices); @@ -960,10 +938,6 @@ class AdaDeltaSolverTest : public GradientBasedSolverTest { virtual void InitSolver(const SolverParameter& param) { this->solver_.reset(new AdaDeltaSolver(param)); } - - virtual SolverParameter_SolverType solver_type() { - return SolverParameter_SolverType_ADADELTA; - } }; TYPED_TEST_CASE(AdaDeltaSolverTest, TestDtypesAndDevices); @@ -1098,9 +1072,6 @@ class AdamSolverTest : public GradientBasedSolverTest { new_param.set_momentum2(momentum2); this->solver_.reset(new AdamSolver(new_param)); } - virtual SolverParameter_SolverType solver_type() { - return SolverParameter_SolverType_ADAM; - } }; TYPED_TEST_CASE(AdamSolverTest, TestDtypesAndDevices); @@ -1201,9 +1172,6 @@ class RMSPropSolverTest : public GradientBasedSolverTest { new_param.set_rms_decay(rms_decay); this->solver_.reset(new RMSPropSolver(new_param)); } - virtual SolverParameter_SolverType solver_type() { - return SolverParameter_SolverType_RMSPROP; - } }; TYPED_TEST_CASE(RMSPropSolverTest, TestDtypesAndDevices); diff --git a/src/caffe/test/test_solver_factory.cpp b/src/caffe/test/test_solver_factory.cpp new file mode 100644 index 00000000..eef5290f --- /dev/null +++ b/src/caffe/test/test_solver_factory.cpp @@ -0,0 +1,50 @@ +#include +#include + +#include "boost/scoped_ptr.hpp" +#include "google/protobuf/text_format.h" +#include "gtest/gtest.h" + +#include "caffe/common.hpp" +#include "caffe/solver.hpp" +#include "caffe/solver_factory.hpp" + +#include "caffe/test/test_caffe_main.hpp" + +namespace caffe { + +template +class SolverFactoryTest : public MultiDeviceTest { + protected: + SolverParameter simple_solver_param() { + const string solver_proto = + "train_net_param { " + " layer { " + " name: 'data' type: 'DummyData' top: 'data' " + " dummy_data_param { shape { dim: 1 } } " + " } " + "} "; + SolverParameter solver_param; + CHECK(google::protobuf::TextFormat::ParseFromString( + solver_proto, &solver_param)); + return solver_param; + } +}; + +TYPED_TEST_CASE(SolverFactoryTest, TestDtypesAndDevices); + +TYPED_TEST(SolverFactoryTest, TestCreateSolver) { + typedef typename TypeParam::Dtype Dtype; + typename SolverRegistry::CreatorRegistry& registry = + SolverRegistry::Registry(); + shared_ptr > solver; + SolverParameter solver_param = this->simple_solver_param(); + for (typename SolverRegistry::CreatorRegistry::iterator iter = + registry.begin(); iter != registry.end(); ++iter) { + solver_param.set_type(iter->first); + solver.reset(SolverRegistry::CreateSolver(solver_param)); + EXPECT_EQ(iter->first, solver->type()); + } +} + +} // namespace caffe diff --git a/tools/caffe.cpp b/tools/caffe.cpp index e3f684b5..1cb6ad89 100644 --- a/tools/caffe.cpp +++ b/tools/caffe.cpp @@ -194,7 +194,7 @@ int train() { GetRequestedAction(FLAGS_sighup_effect)); shared_ptr > - solver(caffe::GetSolver(solver_param)); + solver(caffe::SolverRegistry::CreateSolver(solver_param)); solver->SetActionFunction(signal_handler.GetActionFunction()); From c1f7fe1cffa4388886b735f49cd915fad905fca4 Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Sat, 26 Sep 2015 11:47:02 -0700 Subject: [PATCH 138/223] Add automatic upgrade for solver type --- include/caffe/caffe.hpp | 1 + include/caffe/util/upgrade_proto.hpp | 12 +++++ matlab/+caffe/private/caffe_.cpp | 5 +- python/caffe/_caffe.cpp | 4 +- src/caffe/solver.cpp | 2 +- src/caffe/test/test_upgrade_proto.cpp | 61 ++++++++++++++++++++++ src/caffe/util/upgrade_proto.cpp | 74 +++++++++++++++++++++++++++ tools/caffe.cpp | 2 +- tools/upgrade_solver_proto_text.cpp | 50 ++++++++++++++++++ 9 files changed, 206 insertions(+), 5 deletions(-) create mode 100644 tools/upgrade_solver_proto_text.cpp diff --git a/include/caffe/caffe.hpp b/include/caffe/caffe.hpp index bd772830..a339efba 100644 --- a/include/caffe/caffe.hpp +++ b/include/caffe/caffe.hpp @@ -16,6 +16,7 @@ #include "caffe/solver_factory.hpp" #include "caffe/util/benchmark.hpp" #include "caffe/util/io.hpp" +#include "caffe/util/upgrade_proto.hpp" #include "caffe/vision_layers.hpp" #endif // CAFFE_CAFFE_HPP_ diff --git a/include/caffe/util/upgrade_proto.hpp b/include/caffe/util/upgrade_proto.hpp index 6a141843..c94bb3ca 100644 --- a/include/caffe/util/upgrade_proto.hpp +++ b/include/caffe/util/upgrade_proto.hpp @@ -59,6 +59,18 @@ bool UpgradeV1LayerParameter(const V1LayerParameter& v1_layer_param, const char* UpgradeV1LayerType(const V1LayerParameter_LayerType type); +// Return true iff the solver contains any old solver_type specified as enums +bool SolverNeedsTypeUpgrade(const SolverParameter& solver_param); + +bool UpgradeSolverType(SolverParameter* solver_param); + +// Check for deprecations and upgrade the SolverParameter as needed. +bool UpgradeSolverAsNeeded(const string& param_file, SolverParameter* param); + +// Read parameters from a file into a SolverParameter proto message. +void ReadSolverParamsFromTextFileOrDie(const string& param_file, + SolverParameter* param); + } // namespace caffe #endif // CAFFE_UTIL_UPGRADE_PROTO_H_ diff --git a/matlab/+caffe/private/caffe_.cpp b/matlab/+caffe/private/caffe_.cpp index 7883f79e..1641e14b 100644 --- a/matlab/+caffe/private/caffe_.cpp +++ b/matlab/+caffe/private/caffe_.cpp @@ -188,7 +188,10 @@ static void get_solver(MEX_ARGS) { "Usage: caffe_('get_solver', solver_file)"); char* solver_file = mxArrayToString(prhs[0]); mxCHECK_FILE_EXIST(solver_file); - shared_ptr > solver(new caffe::SGDSolver(solver_file)); + SolverParameter solver_param; + ReadSolverParamsFromTextFileOrDie(solver_file, &solver_param); + shared_ptr > solver( + SolverRegistry::CreateSolver(solver_param)); solvers_.push_back(solver); plhs[0] = ptr_to_handle >(solver.get()); mxFree(solver_file); diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp index 0e38dee7..8687dd87 100644 --- a/python/caffe/_caffe.cpp +++ b/python/caffe/_caffe.cpp @@ -134,8 +134,8 @@ void Net_SetInputArrays(Net* net, bp::object data_obj, Solver* GetSolverFromFile(const string& filename) { SolverParameter param; - ReadProtoFromTextFileOrDie(filename, ¶m); - return GetSolver(param); + ReadSolverParamsFromTextFileOrDie(filename, ¶m); + return SolverRegistry::CreateSolver(param); } struct NdarrayConverterGenerator { diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 016a0288..d3bc7361 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -36,7 +36,7 @@ Solver::Solver(const string& param_file, const Solver* root_solver) : net_(), callbacks_(), root_solver_(root_solver), requested_early_exit_(false) { SolverParameter param; - ReadProtoFromTextFileOrDie(param_file, ¶m); + ReadSolverParamsFromTextFileOrDie(param_file, ¶m); Init(param); } diff --git a/src/caffe/test/test_upgrade_proto.cpp b/src/caffe/test/test_upgrade_proto.cpp index ee05b151..df9aeb62 100644 --- a/src/caffe/test/test_upgrade_proto.cpp +++ b/src/caffe/test/test_upgrade_proto.cpp @@ -2928,4 +2928,65 @@ TEST_F(NetUpgradeTest, TestUpgradeV1LayerType) { } } #endif // USE_OPENCV + +class SolverTypeUpgradeTest : public ::testing::Test { + protected: + void RunSolverTypeUpgradeTest( + const string& input_param_string, const string& output_param_string) { + // Test upgrading old solver_type field (enum) to new type field (string) + SolverParameter input_param; + CHECK(google::protobuf::TextFormat::ParseFromString( + input_param_string, &input_param)); + SolverParameter expected_output_param; + CHECK(google::protobuf::TextFormat::ParseFromString( + output_param_string, &expected_output_param)); + SolverParameter actual_output_param = input_param; + UpgradeSolverType(&actual_output_param); + EXPECT_EQ(expected_output_param.DebugString(), + actual_output_param.DebugString()); + } +}; + +TEST_F(SolverTypeUpgradeTest, TestSimple) { + const char* old_type_vec[6] = { "SGD", "ADAGRAD", "NESTEROV", "RMSPROP", + "ADADELTA", "ADAM" }; + const char* new_type_vec[6] = { "SGD", "AdaGrad", "Nesterov", "RMSProp", + "AdaDelta", "Adam" }; + for (int i = 0; i < 6; ++i) { + const string& input_proto = + "net: 'examples/mnist/lenet_train_test.prototxt' " + "test_iter: 100 " + "test_interval: 500 " + "base_lr: 0.01 " + "momentum: 0.0 " + "weight_decay: 0.0005 " + "lr_policy: 'inv' " + "gamma: 0.0001 " + "power: 0.75 " + "display: 100 " + "max_iter: 10000 " + "snapshot: 5000 " + "snapshot_prefix: 'examples/mnist/lenet_rmsprop' " + "solver_mode: GPU " + "solver_type: " + std::string(old_type_vec[i]) + " "; + const string& expected_output_proto = + "net: 'examples/mnist/lenet_train_test.prototxt' " + "test_iter: 100 " + "test_interval: 500 " + "base_lr: 0.01 " + "momentum: 0.0 " + "weight_decay: 0.0005 " + "lr_policy: 'inv' " + "gamma: 0.0001 " + "power: 0.75 " + "display: 100 " + "max_iter: 10000 " + "snapshot: 5000 " + "snapshot_prefix: 'examples/mnist/lenet_rmsprop' " + "solver_mode: GPU " + "type: '" + std::string(new_type_vec[i]) + "' "; + this->RunSolverTypeUpgradeTest(input_proto, expected_output_proto); + } +} + } // NOLINT(readability/fn_size) // namespace caffe diff --git a/src/caffe/util/upgrade_proto.cpp b/src/caffe/util/upgrade_proto.cpp index 6eae9fec..ff3f8ffc 100644 --- a/src/caffe/util/upgrade_proto.cpp +++ b/src/caffe/util/upgrade_proto.cpp @@ -937,4 +937,78 @@ const char* UpgradeV1LayerType(const V1LayerParameter_LayerType type) { } } +// Return true iff the solver contains any old solver_type specified as enums +bool SolverNeedsTypeUpgrade(const SolverParameter& solver_param) { + if (solver_param.has_solver_type()) { + return true; + } + return false; +} + +bool UpgradeSolverType(SolverParameter* solver_param) { + CHECK(!solver_param->has_solver_type() || !solver_param->has_type()) + << "Failed to upgrade solver: old solver_type field (enum) and new type " + << "field (string) cannot be both specified in solver proto text."; + if (solver_param->has_solver_type()) { + string type; + switch (solver_param->solver_type()) { + case SolverParameter_SolverType_SGD: + type = "SGD"; + break; + case SolverParameter_SolverType_NESTEROV: + type = "Nesterov"; + break; + case SolverParameter_SolverType_ADAGRAD: + type = "AdaGrad"; + break; + case SolverParameter_SolverType_RMSPROP: + type = "RMSProp"; + break; + case SolverParameter_SolverType_ADADELTA: + type = "AdaDelta"; + break; + case SolverParameter_SolverType_ADAM: + type = "Adam"; + break; + default: + LOG(FATAL) << "Unknown SolverParameter solver_type: " << type; + } + solver_param->set_type(type); + solver_param->clear_solver_type(); + } else { + LOG(ERROR) << "Warning: solver type already up to date. "; + return false; + } + return true; +} + +// Check for deprecations and upgrade the SolverParameter as needed. +bool UpgradeSolverAsNeeded(const string& param_file, SolverParameter* param) { + bool success = true; + // Try to upgrade old style solver_type enum fields into new string type + if (SolverNeedsTypeUpgrade(*param)) { + LOG(INFO) << "Attempting to upgrade input file specified using deprecated " + << "'solver_type' field (enum)': " << param_file; + if (!UpgradeSolverType(param)) { + success = false; + LOG(ERROR) << "Warning: had one or more problems upgrading " + << "SolverType (see above)."; + } else { + LOG(INFO) << "Successfully upgraded file specified using deprecated " + << "'solver_type' field (enum) to 'type' field (string)."; + LOG(WARNING) << "Note that future Caffe releases will only support " + << "'type' field (string) for a solver's type."; + } + } + return success; +} + +// Read parameters from a file into a SolverParameter proto message. +void ReadSolverParamsFromTextFileOrDie(const string& param_file, + SolverParameter* param) { + CHECK(ReadProtoFromTextFile(param_file, param)) + << "Failed to parse SolverParameter file: " << param_file; + UpgradeSolverAsNeeded(param_file, param); +} + } // namespace caffe diff --git a/tools/caffe.cpp b/tools/caffe.cpp index 1cb6ad89..305cfc36 100644 --- a/tools/caffe.cpp +++ b/tools/caffe.cpp @@ -157,7 +157,7 @@ int train() { "but not both."; caffe::SolverParameter solver_param; - caffe::ReadProtoFromTextFileOrDie(FLAGS_solver, &solver_param); + caffe::ReadSolverParamsFromTextFileOrDie(FLAGS_solver, &solver_param); // If the gpus flag is not provided, allow the mode and device to be set // in the solver prototxt. diff --git a/tools/upgrade_solver_proto_text.cpp b/tools/upgrade_solver_proto_text.cpp new file mode 100644 index 00000000..7130232a --- /dev/null +++ b/tools/upgrade_solver_proto_text.cpp @@ -0,0 +1,50 @@ +// This is a script to upgrade old solver prototxts to the new format. +// Usage: +// upgrade_solver_proto_text old_solver_proto_file_in solver_proto_file_out + +#include +#include // NOLINT(readability/streams) +#include // NOLINT(readability/streams) +#include + +#include "caffe/caffe.hpp" +#include "caffe/util/io.hpp" +#include "caffe/util/upgrade_proto.hpp" + +using std::ofstream; + +using namespace caffe; // NOLINT(build/namespaces) + +int main(int argc, char** argv) { + ::google::InitGoogleLogging(argv[0]); + if (argc != 3) { + LOG(ERROR) << "Usage: upgrade_solver_proto_text " + << "old_solver_proto_file_in solver_proto_file_out"; + return 1; + } + + SolverParameter solver_param; + string input_filename(argv[1]); + if (!ReadProtoFromTextFile(input_filename, &solver_param)) { + LOG(ERROR) << "Failed to parse input text file as SolverParameter: " + << input_filename; + return 2; + } + bool need_upgrade = SolverNeedsTypeUpgrade(solver_param); + bool success = true; + if (need_upgrade) { + success = UpgradeSolverAsNeeded(input_filename, &solver_param); + if (!success) { + LOG(ERROR) << "Encountered error(s) while upgrading prototxt; " + << "see details above."; + } + } else { + LOG(ERROR) << "File already in latest proto format: " << input_filename; + } + + // Save new format prototxt. + WriteProtoToTextFile(solver_param, argv[2]); + + LOG(ERROR) << "Wrote upgraded SolverParameter text proto to " << argv[2]; + return !success; +} From 9563537e86363fac2768200f5748000ec6b3a911 Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Sat, 26 Sep 2015 11:47:32 -0700 Subject: [PATCH 139/223] Update examples and docs --- docs/tutorial/solver.md | 28 +++++++++---------- examples/mnist/lenet_adadelta_solver.prototxt | 2 +- examples/mnist/lenet_solver_adam.prototxt | 2 +- examples/mnist/lenet_solver_rmsprop.prototxt | 2 +- ...mnist_autoencoder_solver_adadelta.prototxt | 2 +- .../mnist_autoencoder_solver_adagrad.prototxt | 2 +- ...mnist_autoencoder_solver_nesterov.prototxt | 2 +- 7 files changed, 20 insertions(+), 20 deletions(-) diff --git a/docs/tutorial/solver.md b/docs/tutorial/solver.md index b150f648..b719f715 100644 --- a/docs/tutorial/solver.md +++ b/docs/tutorial/solver.md @@ -8,12 +8,12 @@ The responsibilities of learning are divided between the Solver for overseeing t The Caffe solvers are: -- Stochastic Gradient Descent (`SGD`), -- AdaDelta (`ADADELTA`), -- Adaptive Gradient (`ADAGRAD`), -- Adam (`ADAM`), -- Nesterov's Accelerated Gradient (`NESTEROV`) and -- RMSprop (`RMSPROP`) +- Stochastic Gradient Descent (`type: "SGD"`), +- AdaDelta (`type: "AdaDelta"`), +- Adaptive Gradient (`type: "AdaGrad"`), +- Adam (`type: "Adam"`), +- Nesterov's Accelerated Gradient (`type: "Nesterov"`) and +- RMSprop (`type: "RMSProp"`) The solver @@ -51,7 +51,7 @@ The parameter update $$\Delta W$$ is formed by the solver from the error gradien ### SGD -**Stochastic gradient descent** (`solver_type: SGD`) updates the weights $$ W $$ by a linear combination of the negative gradient $$ \nabla L(W) $$ and the previous weight update $$ V_t $$. +**Stochastic gradient descent** (`type: "SGD"`) updates the weights $$ W $$ by a linear combination of the negative gradient $$ \nabla L(W) $$ and the previous weight update $$ V_t $$. The **learning rate** $$ \alpha $$ is the weight of the negative gradient. The **momentum** $$ \mu $$ is the weight of the previous update. @@ -113,7 +113,7 @@ If learning diverges (e.g., you start to see very large or `NaN` or `inf` loss v ### AdaDelta -The **AdaDelta** (`solver_type: ADADELTA`) method (M. Zeiler [1]) is a "robust learning rate method". It is a gradient-based optimization method (like SGD). The update formulas are +The **AdaDelta** (`type: "AdaDelta"`) method (M. Zeiler [1]) is a "robust learning rate method". It is a gradient-based optimization method (like SGD). The update formulas are $$ \begin{align} @@ -125,7 +125,7 @@ E[g^2]_t &= \delta{E[g^2]_{t-1} } + (1-\delta)g_{t}^2 \end{align} $$ -and +and $$ (W_{t+1})_i = @@ -139,7 +139,7 @@ $$ ### AdaGrad -The **adaptive gradient** (`solver_type: ADAGRAD`) method (Duchi et al. [1]) is a gradient-based optimization method (like SGD) that attempts to "find needles in haystacks in the form of very predictive but rarely seen features," in Duchi et al.'s words. +The **adaptive gradient** (`type: "AdaGrad"`) method (Duchi et al. [1]) is a gradient-based optimization method (like SGD) that attempts to "find needles in haystacks in the form of very predictive but rarely seen features," in Duchi et al.'s words. Given the update information from all previous iterations $$ \left( \nabla L(W) \right)_{t'} $$ for $$ t' \in \{1, 2, ..., t\} $$, the update formulas proposed by [1] are as follows, specified for each component $$i$$ of the weights $$W$$: @@ -159,7 +159,7 @@ Note that in practice, for weights $$ W \in \mathcal{R}^d $$, AdaGrad implementa ### Adam -The **Adam** (`solver_type: ADAM`), proposed in Kingma et al. [1], is a gradient-based optimization method (like SGD). This includes an "adaptive moment estimation" ($$m_t, v_t$$) and can be regarded as a generalization of AdaGrad. The update formulas are +The **Adam** (`type: "Adam"`), proposed in Kingma et al. [1], is a gradient-based optimization method (like SGD). This includes an "adaptive moment estimation" ($$m_t, v_t$$) and can be regarded as a generalization of AdaGrad. The update formulas are $$ (m_t)_i = \beta_1 (m_{t-1})_i + (1-\beta_1)(\nabla L(W_t))_i,\\ @@ -181,7 +181,7 @@ Kingma et al. [1] proposed to use $$\beta_1 = 0.9, \beta_2 = 0.999, \varepsilon ### NAG -**Nesterov's accelerated gradient** (`solver_type: NESTEROV`) was proposed by Nesterov [1] as an "optimal" method of convex optimization, achieving a convergence rate of $$ \mathcal{O}(1/t^2) $$ rather than the $$ \mathcal{O}(1/t) $$. +**Nesterov's accelerated gradient** (`type: "Nesterov"`) was proposed by Nesterov [1] as an "optimal" method of convex optimization, achieving a convergence rate of $$ \mathcal{O}(1/t^2) $$ rather than the $$ \mathcal{O}(1/t) $$. Though the required assumptions to achieve the $$ \mathcal{O}(1/t^2) $$ convergence typically will not hold for deep networks trained with Caffe (e.g., due to non-smoothness and non-convexity), in practice NAG can be a very effective method for optimizing certain types of deep learning architectures, as demonstrated for deep MNIST autoencoders by Sutskever et al. [2]. The weight update formulas look very similar to the SGD updates given above: @@ -206,10 +206,10 @@ What distinguishes the method from SGD is the weight setting $$ W $$ on which we ### RMSprop -The **RMSprop** (`solver_type: RMSPROP`), suggested by Tieleman in a Coursera course lecture, is a gradient-based optimization method (like SGD). The update formulas are +The **RMSprop** (`type: "RMSProp"`), suggested by Tieleman in a Coursera course lecture, is a gradient-based optimization method (like SGD). The update formulas are $$ -(v_t)_i = +(v_t)_i = \begin{cases} (v_{t-1})_i + \delta, &(\nabla L(W_t))_i(\nabla L(W_{t-1}))_i > 0\\ (v_{t-1})_i \cdot (1-\delta), & \text{else} diff --git a/examples/mnist/lenet_adadelta_solver.prototxt b/examples/mnist/lenet_adadelta_solver.prototxt index 776d1e06..16176c0f 100644 --- a/examples/mnist/lenet_adadelta_solver.prototxt +++ b/examples/mnist/lenet_adadelta_solver.prototxt @@ -20,5 +20,5 @@ snapshot: 5000 snapshot_prefix: "examples/mnist/lenet_adadelta" # solver mode: CPU or GPU solver_mode: GPU -solver_type: ADADELTA +type: "AdaDelta" delta: 1e-6 diff --git a/examples/mnist/lenet_solver_adam.prototxt b/examples/mnist/lenet_solver_adam.prototxt index d22c5718..4b5336b1 100644 --- a/examples/mnist/lenet_solver_adam.prototxt +++ b/examples/mnist/lenet_solver_adam.prototxt @@ -22,5 +22,5 @@ max_iter: 10000 snapshot: 5000 snapshot_prefix: "examples/mnist/lenet" # solver mode: CPU or GPU -solver_type: ADAM +type: "Adam" solver_mode: GPU diff --git a/examples/mnist/lenet_solver_rmsprop.prototxt b/examples/mnist/lenet_solver_rmsprop.prototxt index 74dadc51..924b72d3 100644 --- a/examples/mnist/lenet_solver_rmsprop.prototxt +++ b/examples/mnist/lenet_solver_rmsprop.prototxt @@ -23,5 +23,5 @@ snapshot: 5000 snapshot_prefix: "examples/mnist/lenet_rmsprop" # solver mode: CPU or GPU solver_mode: GPU -solver_type: RMSPROP +type: "RMSProp" rms_decay: 0.98 diff --git a/examples/mnist/mnist_autoencoder_solver_adadelta.prototxt b/examples/mnist/mnist_autoencoder_solver_adadelta.prototxt index 065647df..26c4084a 100644 --- a/examples/mnist/mnist_autoencoder_solver_adadelta.prototxt +++ b/examples/mnist/mnist_autoencoder_solver_adadelta.prototxt @@ -16,4 +16,4 @@ snapshot: 10000 snapshot_prefix: "examples/mnist/mnist_autoencoder_adadelta_train" # solver mode: CPU or GPU solver_mode: GPU -solver_type: ADADELTA +type: "AdaDelta" diff --git a/examples/mnist/mnist_autoencoder_solver_adagrad.prototxt b/examples/mnist/mnist_autoencoder_solver_adagrad.prototxt index cc0ed9e3..065cdb20 100644 --- a/examples/mnist/mnist_autoencoder_solver_adagrad.prototxt +++ b/examples/mnist/mnist_autoencoder_solver_adagrad.prototxt @@ -14,4 +14,4 @@ snapshot: 10000 snapshot_prefix: "examples/mnist/mnist_autoencoder_adagrad_train" # solver mode: CPU or GPU solver_mode: GPU -solver_type: ADAGRAD +type: "AdaGrad" diff --git a/examples/mnist/mnist_autoencoder_solver_nesterov.prototxt b/examples/mnist/mnist_autoencoder_solver_nesterov.prototxt index 2a59fd45..c95e3fe7 100644 --- a/examples/mnist/mnist_autoencoder_solver_nesterov.prototxt +++ b/examples/mnist/mnist_autoencoder_solver_nesterov.prototxt @@ -17,4 +17,4 @@ snapshot_prefix: "examples/mnist/mnist_autoencoder_nesterov_train" momentum: 0.95 # solver mode: CPU or GPU solver_mode: GPU -solver_type: NESTEROV +type: "Nesterov" From 6f8370a1f3917b525e60896586cac41bb829ac2b Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Fri, 16 Oct 2015 17:32:27 -0700 Subject: [PATCH 140/223] clean up logging for Net init - condense conditions by `LOG_IF` - only log memory use once after all tops --- src/caffe/net.cpp | 182 +++++++++++++++++++--------------------------- 1 file changed, 76 insertions(+), 106 deletions(-) diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index ebb8b5d2..1ad93e6a 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -46,10 +46,9 @@ void Net::Init(const NetParameter& in_param) { // the current NetState. NetParameter filtered_param; FilterNet(in_param, &filtered_param); - if (Caffe::root_solver()) { - LOG(INFO) << "Initializing net from parameters: " << std::endl - << filtered_param.DebugString(); - } + LOG_IF(INFO, Caffe::root_solver()) + << "Initializing net from parameters: " << std::endl + << filtered_param.DebugString(); // Create a copy of filtered_param with splits added where necessary. NetParameter param; InsertSplits(filtered_param, ¶m); @@ -73,8 +72,6 @@ void Net::Init(const NetParameter& in_param) { const int layer_id = -1; // inputs have fake layer ID -1 AppendTop(param, layer_id, input_id, &available_blobs, &blob_name_to_idx); } - DLOG_IF(INFO, Caffe::root_solver()) - << "Memory required for data: " << memory_used_ * sizeof(Dtype); // For each layer, set up its input and output bottom_vecs_.resize(param.layer_size()); top_vecs_.resize(param.layer_size()); @@ -106,9 +103,8 @@ void Net::Init(const NetParameter& in_param) { layers_.push_back(LayerRegistry::CreateLayer(layer_param)); } layer_names_.push_back(layer_param.name()); - if (Caffe::root_solver()) { - LOG(INFO) << "Creating Layer " << layer_param.name(); - } + LOG_IF(INFO, Caffe::root_solver()) + << "Creating Layer " << layer_param.name(); bool need_backward = false; // Figure out this layer's input and output @@ -151,29 +147,23 @@ void Net::Init(const NetParameter& in_param) { } else { layers_[layer_id]->SetUp(bottom_vecs_[layer_id], top_vecs_[layer_id]); } - if (Caffe::root_solver()) { - LOG(INFO) << "Setting up " << layer_names_[layer_id]; - } + LOG_IF(INFO, Caffe::root_solver()) + << "Setting up " << layer_names_[layer_id]; for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) { if (blob_loss_weights_.size() <= top_id_vecs_[layer_id][top_id]) { blob_loss_weights_.resize(top_id_vecs_[layer_id][top_id] + 1, Dtype(0)); } blob_loss_weights_[top_id_vecs_[layer_id][top_id]] = layer->loss(top_id); - if (Caffe::root_solver()) { - LOG(INFO) << "Top shape: " - << top_vecs_[layer_id][top_id]->shape_string(); - } + LOG_IF(INFO, Caffe::root_solver()) + << "Top shape: " << top_vecs_[layer_id][top_id]->shape_string(); if (layer->loss(top_id)) { - if (Caffe::root_solver()) { - LOG(INFO) << " with loss weight " << layer->loss(top_id); - } + LOG_IF(INFO, Caffe::root_solver()) + << " with loss weight " << layer->loss(top_id); } memory_used_ += top_vecs_[layer_id][top_id]->count(); } - if (Caffe::root_solver()) { - DLOG(INFO) << "Memory required for data: " - << memory_used_ * sizeof(Dtype); - } + LOG_IF(INFO, Caffe::root_solver()) + << "Memory required for data: " << memory_used_ * sizeof(Dtype); const int param_size = layer_param.param_size(); const int num_param_blobs = layers_[layer_id]->blobs().size(); CHECK_LE(param_size, num_param_blobs) @@ -231,14 +221,12 @@ void Net::Init(const NetParameter& in_param) { } } if (!layer_contributes_loss) { layer_need_backward_[layer_id] = false; } - if (layer_need_backward_[layer_id]) { - if (Caffe::root_solver()) { + if (Caffe::root_solver()) { + if (layer_need_backward_[layer_id]) { LOG(INFO) << layer_names_[layer_id] << " needs backward computation."; - } - } else { - if (Caffe::root_solver()) { + } else { LOG(INFO) << layer_names_[layer_id] - << " does not need backward computation."; + << " does not need backward computation."; } } for (int bottom_id = 0; bottom_id < bottom_vecs_[layer_id].size(); @@ -279,9 +267,8 @@ void Net::Init(const NetParameter& in_param) { // In the end, all remaining blobs are considered output blobs. for (set::iterator it = available_blobs.begin(); it != available_blobs.end(); ++it) { - if (Caffe::root_solver()) { - LOG(INFO) << "This network produces output " << *it; - } + LOG_IF(INFO, Caffe::root_solver()) + << "This network produces output " << *it; net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get()); net_output_blob_indices_.push_back(blob_name_to_idx[*it]); } @@ -293,10 +280,7 @@ void Net::Init(const NetParameter& in_param) { } ShareWeights(); debug_info_ = param.debug_info(); - if (Caffe::root_solver()) { - LOG(INFO) << "Network initialization done."; - LOG(INFO) << "Memory required for data: " << memory_used_ * sizeof(Dtype); - } + LOG_IF(INFO, Caffe::root_solver()) << "Network initialization done."; } template @@ -335,33 +319,30 @@ bool Net::StateMeetsRule(const NetState& state, // Check whether the rule is broken due to phase. if (rule.has_phase()) { if (rule.phase() != state.phase()) { - if (Caffe::root_solver()) { - LOG(INFO) << "The NetState phase (" << state.phase() - << ") differed from the phase (" << rule.phase() - << ") specified by a rule in layer " << layer_name; - } + LOG_IF(INFO, Caffe::root_solver()) + << "The NetState phase (" << state.phase() + << ") differed from the phase (" << rule.phase() + << ") specified by a rule in layer " << layer_name; return false; } } // Check whether the rule is broken due to min level. if (rule.has_min_level()) { if (state.level() < rule.min_level()) { - if (Caffe::root_solver()) { - LOG(INFO) << "The NetState level (" << state.level() - << ") is above the min_level (" << rule.min_level() - << ") specified by a rule in layer " << layer_name; - } + LOG_IF(INFO, Caffe::root_solver()) + << "The NetState level (" << state.level() + << ") is above the min_level (" << rule.min_level() + << ") specified by a rule in layer " << layer_name; return false; } } // Check whether the rule is broken due to max level. if (rule.has_max_level()) { if (state.level() > rule.max_level()) { - if (Caffe::root_solver()) { - LOG(INFO) << "The NetState level (" << state.level() - << ") is above the max_level (" << rule.max_level() - << ") specified by a rule in layer " << layer_name; - } + LOG_IF(INFO, Caffe::root_solver()) + << "The NetState level (" << state.level() + << ") is above the max_level (" << rule.max_level() + << ") specified by a rule in layer " << layer_name; return false; } } @@ -374,10 +355,9 @@ bool Net::StateMeetsRule(const NetState& state, if (rule.stage(i) == state.stage(j)) { has_stage = true; } } if (!has_stage) { - if (Caffe::root_solver()) { - LOG(INFO) << "The NetState did not contain stage '" << rule.stage(i) - << "' specified by a rule in layer " << layer_name; - } + LOG_IF(INFO, Caffe::root_solver()) + << "The NetState did not contain stage '" << rule.stage(i) + << "' specified by a rule in layer " << layer_name; return false; } } @@ -390,10 +370,9 @@ bool Net::StateMeetsRule(const NetState& state, if (rule.not_stage(i) == state.stage(j)) { has_stage = true; } } if (has_stage) { - if (Caffe::root_solver()) { - LOG(INFO) << "The NetState contained a not_stage '" << rule.not_stage(i) - << "' specified by a rule in layer " << layer_name; - } + LOG_IF(INFO, Caffe::root_solver()) + << "The NetState contained a not_stage '" << rule.not_stage(i) + << "' specified by a rule in layer " << layer_name; return false; } } @@ -415,9 +394,8 @@ void Net::AppendTop(const NetParameter& param, const int layer_id, if (blob_name_to_idx && layer_param && layer_param->bottom_size() > top_id && blob_name == layer_param->bottom(top_id)) { // In-place computation - if (Caffe::root_solver()) { - LOG(INFO) << layer_param->name() << " -> " << blob_name << " (in-place)"; - } + LOG_IF(INFO, Caffe::root_solver()) + << layer_param->name() << " -> " << blob_name << " (in-place)"; top_vecs_[layer_id].push_back(blobs_[(*blob_name_to_idx)[blob_name]].get()); top_id_vecs_[layer_id].push_back((*blob_name_to_idx)[blob_name]); } else if (blob_name_to_idx && @@ -473,9 +451,8 @@ int Net::AppendBottom(const NetParameter& param, const int layer_id, << layer_param.name() << "', bottom index " << bottom_id << ")"; } const int blob_id = (*blob_name_to_idx)[blob_name]; - if (Caffe::root_solver()) { - LOG(INFO) << layer_names_[layer_id] << " <- " << blob_name; - } + LOG_IF(INFO, Caffe::root_solver()) + << layer_names_[layer_id] << " <- " << blob_name; bottom_vecs_[layer_id].push_back(blobs_[blob_id].get()); bottom_id_vecs_[layer_id].push_back(blob_id); available_blobs->erase(blob_name); @@ -672,10 +649,9 @@ void Net::InputDebugInfo(const int input_id) { const Blob& blob = *net_input_blobs_[input_id]; const string& blob_name = blob_names_[net_input_blob_indices_[input_id]]; const Dtype data_abs_val_mean = blob.asum_data() / blob.count(); - if (Caffe::root_solver()) { - LOG(INFO) << " [Forward] " - << "Input " << blob_name << " data: " << data_abs_val_mean; - } + LOG_IF(INFO, Caffe::root_solver()) + << " [Forward] " + << "Input " << blob_name << " data: " << data_abs_val_mean; } template @@ -684,12 +660,11 @@ void Net::ForwardDebugInfo(const int layer_id) { const Blob& blob = *top_vecs_[layer_id][top_id]; const string& blob_name = blob_names_[top_id_vecs_[layer_id][top_id]]; const Dtype data_abs_val_mean = blob.asum_data() / blob.count(); - if (Caffe::root_solver()) { - LOG(INFO) << " [Forward] " - << "Layer " << layer_names_[layer_id] - << ", top blob " << blob_name - << " data: " << data_abs_val_mean; - } + LOG_IF(INFO, Caffe::root_solver()) + << " [Forward] " + << "Layer " << layer_names_[layer_id] + << ", top blob " << blob_name + << " data: " << data_abs_val_mean; } for (int param_id = 0; param_id < layers_[layer_id]->blobs().size(); ++param_id) { @@ -697,12 +672,11 @@ void Net::ForwardDebugInfo(const int layer_id) { const int net_param_id = param_id_vecs_[layer_id][param_id]; const string& blob_name = param_display_names_[net_param_id]; const Dtype data_abs_val_mean = blob.asum_data() / blob.count(); - if (Caffe::root_solver()) { - LOG(INFO) << " [Forward] " - << "Layer " << layer_names_[layer_id] - << ", param blob " << blob_name - << " data: " << data_abs_val_mean; - } + LOG_IF(INFO, Caffe::root_solver()) + << " [Forward] " + << "Layer " << layer_names_[layer_id] + << ", param blob " << blob_name + << " data: " << data_abs_val_mean; } } @@ -714,24 +688,22 @@ void Net::BackwardDebugInfo(const int layer_id) { const Blob& blob = *bottom_vec[bottom_id]; const string& blob_name = blob_names_[bottom_id_vecs_[layer_id][bottom_id]]; const Dtype diff_abs_val_mean = blob.asum_diff() / blob.count(); - if (Caffe::root_solver()) { - LOG(INFO) << " [Backward] " - << "Layer " << layer_names_[layer_id] - << ", bottom blob " << blob_name - << " diff: " << diff_abs_val_mean; - } + LOG_IF(INFO, Caffe::root_solver()) + << " [Backward] " + << "Layer " << layer_names_[layer_id] + << ", bottom blob " << blob_name + << " diff: " << diff_abs_val_mean; } for (int param_id = 0; param_id < layers_[layer_id]->blobs().size(); ++param_id) { if (!layers_[layer_id]->param_propagate_down(param_id)) { continue; } const Blob& blob = *layers_[layer_id]->blobs()[param_id]; const Dtype diff_abs_val_mean = blob.asum_diff() / blob.count(); - if (Caffe::root_solver()) { - LOG(INFO) << " [Backward] " - << "Layer " << layer_names_[layer_id] - << ", param blob " << param_id - << " diff: " << diff_abs_val_mean; - } + LOG_IF(INFO, Caffe::root_solver()) + << " [Backward] " + << "Layer " << layer_names_[layer_id] + << ", param blob " << param_id + << " diff: " << diff_abs_val_mean; } } @@ -744,22 +716,20 @@ void Net::UpdateDebugInfo(const int param_id) { const Dtype diff_abs_val_mean = blob.asum_diff() / blob.count(); if (param_owner < 0) { const Dtype data_abs_val_mean = blob.asum_data() / blob.count(); - if (Caffe::root_solver()) { - LOG(INFO) << " [Update] Layer " << layer_name - << ", param " << param_display_name - << " data: " << data_abs_val_mean - << "; diff: " << diff_abs_val_mean; - } + LOG_IF(INFO, Caffe::root_solver()) + << " [Update] Layer " << layer_name + << ", param " << param_display_name + << " data: " << data_abs_val_mean + << "; diff: " << diff_abs_val_mean; } else { const string& owner_layer_name = layer_names_[param_layer_indices_[param_owner].first]; - if (Caffe::root_solver()) { - LOG(INFO) << " [Update] Layer " << layer_name - << ", param blob " << param_display_name - << " (owned by layer " << owner_layer_name << ", " << "param " - << param_display_names_[param_owners_[param_id]] << ")" - << " diff: " << diff_abs_val_mean; - } + LOG_IF(INFO, Caffe::root_solver()) + << " [Update] Layer " << layer_name + << ", param blob " << param_display_name + << " (owned by layer " << owner_layer_name << ", " << "param " + << param_display_names_[param_owners_[param_id]] << ")" + << " diff: " << diff_abs_val_mean; } } From 4c93b3dc555891ae0ad75092b6c0f77508740ecf Mon Sep 17 00:00:00 2001 From: Mausoom Sarkar Date: Tue, 13 Oct 2015 18:35:32 +0530 Subject: [PATCH 141/223] Moved the loop inside PReLUParamBackward to do the reduction inside the kernel Now PReLU backward is taking the same time as forward Code cleanup Removed unnecessary code Fixed indent merge if(channed_shared_) --- src/caffe/layers/prelu_layer.cu | 44 ++++++++++++++++----------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/caffe/layers/prelu_layer.cu b/src/caffe/layers/prelu_layer.cu index e1f20048..1225334f 100644 --- a/src/caffe/layers/prelu_layer.cu +++ b/src/caffe/layers/prelu_layer.cu @@ -31,10 +31,15 @@ __global__ void PReLUBackward(const int n, const int channels, const int dim, // CUDA kernel for element-wise parameter backward template -__global__ void PReLUParamBackward(const int n, const Dtype* in_diff, +__global__ void PReLUParamBackward(const int n, + const int rows, const int rowPitch, const Dtype* in_diff, const Dtype* in_data, Dtype* out_diff) { CUDA_KERNEL_LOOP(index, n) { out_diff[index] = in_diff[index] * in_data[index] * (in_data[index] <= 0); + for ( int k = 1; k < rows; k++ ) { + out_diff[index] += in_diff[index + k*rowPitch] + * in_data[index + k*rowPitch] * (in_data[index + k*rowPitch] <= 0); + } } } @@ -82,29 +87,24 @@ void PReLULayer::Backward_gpu(const vector*>& top, if (this->param_propagate_down_[0]) { Dtype* slope_diff = this->blobs_[0]->mutable_gpu_diff(); int cdim = channels * dim; - Dtype dsum = 0.; - for (int n = 0; n < bottom[0]->num(); ++n) { - // compute element-wise diff - // NOLINT_NEXT_LINE(whitespace/operators) - PReLUParamBackward<<>>( - cdim, top_diff + top[0]->offset(n), - bottom_data + bottom[0]->offset(n), - backward_buff_.mutable_gpu_diff()); - CUDA_POST_KERNEL_CHECK; - if (channel_shared_) { - Dtype d; - caffe_gpu_dot(channels * dim, backward_buff_.gpu_diff(), - multiplier_.gpu_data(), &d); - dsum += d; - } else { - caffe_gpu_gemv(CblasNoTrans, channels, dim, 1., - backward_buff_.gpu_diff(), multiplier_.gpu_data(), 1., - slope_diff); - } - } + + // compute element-wise diff + // NOLINT_NEXT_LINE(whitespace/operators) + PReLUParamBackward<<>>( + cdim, bottom[0]->num(), top[0]->offset(1), top_diff , + bottom_data , + backward_buff_.mutable_gpu_diff()); + CUDA_POST_KERNEL_CHECK; if (channel_shared_) { + Dtype dsum; + caffe_gpu_dot(channels * dim, backward_buff_.gpu_diff(), + multiplier_.gpu_data(), &dsum); caffe_gpu_add_scalar(this->blobs_[0]->count(), Dtype(dsum), slope_diff); + } else { + caffe_gpu_gemv(CblasNoTrans, channels, dim, 1., + backward_buff_.gpu_diff(), multiplier_.gpu_data(), 1., + slope_diff); } } // Propagate to bottom From a7d84f3c7e2db7f400c933349edcd4bcf46903b8 Mon Sep 17 00:00:00 2001 From: "T.E.A de Souza" Date: Mon, 19 Oct 2015 18:19:38 +0800 Subject: [PATCH 142/223] Qualify messages issued by CMake when CUDA is unavailable --- cmake/Dependencies.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index d68d7bfb..2005b992 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -55,9 +55,9 @@ endif() include(cmake/Cuda.cmake) if(NOT HAVE_CUDA) if(CPU_ONLY) - message("-- CUDA is disabled. Building without it...") + message(STATUS "-- CUDA is disabled. Building without it...") else() - message("-- CUDA is not detected by cmake. Building without it...") + message(WARNING "-- CUDA is not detected by cmake. Building without it...") endif() # TODO: remove this not cross platform define in future. Use caffe_config.h instead. From 52429c77cb84b06bf7564f5df619f9f489fe5f72 Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Mon, 19 Oct 2015 11:36:38 -0700 Subject: [PATCH 143/223] installation questions -> caffe-users --- INSTALL.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index 42fcf027..05c714db 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -3,5 +3,5 @@ See http://caffe.berkeleyvision.org/installation.html for the latest installation instructions. -Check the issue tracker in case you need help: -https://github.com/BVLC/caffe/issues +Check the users group in case you need help: +https://groups.google.com/forum/#!forum/caffe-users From 2aabba4f8e33a1d0d474a17fff445e9d12201be4 Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Mon, 19 Oct 2015 11:39:29 -0700 Subject: [PATCH 144/223] [docs] cuDNN v3 compatible --- docs/installation.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/installation.md b/docs/installation.md index 89a8c71c..cce7ec35 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -30,13 +30,14 @@ Optional dependencies: * [OpenCV](http://opencv.org/) >= 2.4 including 3.0 * IO libraries: `lmdb`, `leveldb` (note: leveldb requires `snappy`) +* cuDNN for GPU acceleration (v3) Pycaffe and Matcaffe interfaces have their own natural needs. * For Python Caffe: `Python 2.7` or `Python 3.3+`, `numpy (>= 1.7)`, boost-provided `boost.python` * For MATLAB Caffe: MATLAB with the `mex` compiler. -**cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. For now cuDNN v1 is integrated but see [PR #1731](https://github.com/BVLC/caffe/pull/1731) for v2. +**cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. The current version is cuDNN v3; older versions are supported in older Caffe. **CPU-only Caffe**: for cold-brewed CPU-only Caffe uncomment the `CPU_ONLY := 1` flag in `Makefile.config` to configure and build Caffe without CUDA. This is helpful for cloud or cluster deployment. From 33a8ba64145e308aefefd5997d06ad53038c4f21 Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Fri, 16 Oct 2015 17:13:24 -0700 Subject: [PATCH 145/223] [test] drop bogus OpenCV guard for layer type --- src/caffe/test/test_upgrade_proto.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/caffe/test/test_upgrade_proto.cpp b/src/caffe/test/test_upgrade_proto.cpp index df9aeb62..23deddd4 100644 --- a/src/caffe/test/test_upgrade_proto.cpp +++ b/src/caffe/test/test_upgrade_proto.cpp @@ -2892,7 +2892,6 @@ TEST_F(NetUpgradeTest, TestImageNet) { this->RunV1UpgradeTest(expected_v1_proto, expected_v2_proto); } // NOLINT(readability/fn_size) -#ifdef USE_OPENCV TEST_F(NetUpgradeTest, TestUpgradeV1LayerType) { LayerParameter layer_param; shared_ptr > layer; @@ -2927,7 +2926,6 @@ TEST_F(NetUpgradeTest, TestUpgradeV1LayerType) { EXPECT_EQ(v2_layer_type, layer->type()); } } -#endif // USE_OPENCV class SolverTypeUpgradeTest : public ::testing::Test { protected: From 1caaf38370a6dd1bd7bc91fe3b5242ae63be6a22 Mon Sep 17 00:00:00 2001 From: "T.E.A de Souza" Date: Mon, 19 Oct 2015 18:13:57 +0800 Subject: [PATCH 146/223] Endorse CMP0046, CMP0054 Set policies to NEW to silence warnings in CMake 3.02 and later. --- CMakeLists.txt | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 37f937fe..82742daf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,10 @@ cmake_minimum_required(VERSION 2.8.7) +if(POLICY CMP0046) + cmake_policy(SET CMP0046 NEW) +endif() +if(POLICY CMP0054) + cmake_policy(SET CMP0054 NEW) +endif() # ---[ Caffe project project(Caffe C CXX) @@ -66,8 +72,10 @@ add_subdirectory(docs) add_custom_target(lint COMMAND ${CMAKE_COMMAND} -P ${PROJECT_SOURCE_DIR}/cmake/lint.cmake) # ---[ pytest target -add_custom_target(pytest COMMAND python${python_version} -m unittest discover -s caffe/test WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/python ) -add_dependencies(pytest pycaffe) +if(BUILD_python) + add_custom_target(pytest COMMAND python${python_version} -m unittest discover -s caffe/test WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/python ) + add_dependencies(pytest pycaffe) +endif() # ---[ Configuration summary caffe_print_configuration_summary() From 93212e61aa9382762954a01c62f9f0a96d9ff00d Mon Sep 17 00:00:00 2001 From: Kang Kim Date: Sun, 18 Oct 2015 16:52:19 +0900 Subject: [PATCH 147/223] Move HDF5 defines to data_layers header --- include/caffe/data_layers.hpp | 3 +++ include/caffe/neuron_layers.hpp | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp index 90fd0d19..aa0ab7df 100644 --- a/include/caffe/data_layers.hpp +++ b/include/caffe/data_layers.hpp @@ -17,6 +17,9 @@ #include "caffe/util/blocking_queue.hpp" #include "caffe/util/db.hpp" +#define HDF5_DATA_DATASET_NAME "data" +#define HDF5_DATA_LABEL_NAME "label" + namespace caffe { /** diff --git a/include/caffe/neuron_layers.hpp b/include/caffe/neuron_layers.hpp index c2e0774a..4fa330ec 100644 --- a/include/caffe/neuron_layers.hpp +++ b/include/caffe/neuron_layers.hpp @@ -10,9 +10,6 @@ #include "caffe/layer.hpp" #include "caffe/proto/caffe.pb.h" -#define HDF5_DATA_DATASET_NAME "data" -#define HDF5_DATA_LABEL_NAME "label" - namespace caffe { /** From 80d045263f26c41a1e886906a30d649a5c812038 Mon Sep 17 00:00:00 2001 From: Kang Kim Date: Mon, 19 Oct 2015 00:58:55 +0900 Subject: [PATCH 148/223] Clean redundant/unnecessary headers --- include/caffe/blob.hpp | 1 - include/caffe/common_layers.hpp | 5 ----- include/caffe/filler.hpp | 1 - include/caffe/layer.hpp | 2 +- include/caffe/loss_layers.hpp | 1 - include/caffe/syncedmem.hpp | 1 - include/caffe/util/blocking_queue.hpp | 2 -- include/caffe/util/io.hpp | 1 - src/caffe/layers/absval_layer.cpp | 1 - src/caffe/layers/absval_layer.cu | 3 +-- src/caffe/layers/accuracy_layer.cpp | 5 +---- src/caffe/layers/argmax_layer.cpp | 3 +-- src/caffe/layers/base_conv_layer.cpp | 1 - src/caffe/layers/base_data_layer.cpp | 3 --- src/caffe/layers/batch_reindex_layer.cpp | 3 +-- src/caffe/layers/batch_reindex_layer.cu | 3 +-- src/caffe/layers/bnll_layer.cpp | 3 +-- src/caffe/layers/bnll_layer.cu | 3 +-- src/caffe/layers/concat_layer.cpp | 3 +-- src/caffe/layers/concat_layer.cu | 3 +-- src/caffe/layers/contrastive_loss_layer.cpp | 2 -- src/caffe/layers/contrastive_loss_layer.cu | 4 +--- src/caffe/layers/conv_layer.cpp | 4 ---- src/caffe/layers/conv_layer.cu | 4 ---- src/caffe/layers/cudnn_conv_layer.cpp | 4 ---- src/caffe/layers/cudnn_conv_layer.cu | 4 ---- src/caffe/layers/cudnn_lcn_layer.cpp | 4 ---- src/caffe/layers/cudnn_lcn_layer.cu | 4 ---- src/caffe/layers/cudnn_lrn_layer.cpp | 4 ---- src/caffe/layers/cudnn_lrn_layer.cu | 4 ---- src/caffe/layers/cudnn_pooling_layer.cpp | 4 ---- src/caffe/layers/cudnn_pooling_layer.cu | 4 ---- src/caffe/layers/cudnn_relu_layer.cpp | 2 -- src/caffe/layers/cudnn_relu_layer.cu | 2 -- src/caffe/layers/cudnn_sigmoid_layer.cpp | 2 -- src/caffe/layers/cudnn_sigmoid_layer.cu | 2 -- src/caffe/layers/cudnn_softmax_layer.cpp | 4 ---- src/caffe/layers/cudnn_softmax_layer.cu | 4 ---- src/caffe/layers/cudnn_tanh_layer.cpp | 4 +--- src/caffe/layers/cudnn_tanh_layer.cu | 4 +--- src/caffe/layers/data_layer.cpp | 4 ---- src/caffe/layers/deconv_layer.cpp | 4 ---- src/caffe/layers/deconv_layer.cu | 4 ---- src/caffe/layers/dropout_layer.cpp | 5 +---- src/caffe/layers/dropout_layer.cu | 7 +------ src/caffe/layers/dummy_data_layer.cpp | 3 +-- src/caffe/layers/eltwise_layer.cpp | 3 +-- src/caffe/layers/eltwise_layer.cu | 3 +-- src/caffe/layers/embed_layer.cpp | 3 --- src/caffe/layers/embed_layer.cu | 3 --- src/caffe/layers/euclidean_loss_layer.cpp | 4 +--- src/caffe/layers/euclidean_loss_layer.cu | 4 +--- src/caffe/layers/exp_layer.cpp | 4 +--- src/caffe/layers/exp_layer.cu | 4 +--- src/caffe/layers/filter_layer.cpp | 4 +--- src/caffe/layers/filter_layer.cu | 3 +-- src/caffe/layers/flatten_layer.cpp | 4 +--- src/caffe/layers/hdf5_data_layer.cpp | 1 - src/caffe/layers/hdf5_data_layer.cu | 3 --- src/caffe/layers/hdf5_output_layer.cpp | 5 +---- src/caffe/layers/hdf5_output_layer.cu | 5 +---- src/caffe/layers/hinge_loss_layer.cpp | 6 +----- src/caffe/layers/im2col_layer.cpp | 2 -- src/caffe/layers/im2col_layer.cu | 2 -- src/caffe/layers/image_data_layer.cpp | 1 - src/caffe/layers/infogain_loss_layer.cpp | 5 +---- src/caffe/layers/inner_product_layer.cpp | 5 +---- src/caffe/layers/inner_product_layer.cu | 5 +---- src/caffe/layers/log_layer.cpp | 2 -- src/caffe/layers/log_layer.cu | 2 -- src/caffe/layers/loss_layer.cpp | 8 +------- src/caffe/layers/lrn_layer.cpp | 1 - src/caffe/layers/lrn_layer.cu | 1 - src/caffe/layers/memory_data_layer.cpp | 2 -- src/caffe/layers/multinomial_logistic_loss_layer.cpp | 5 +---- src/caffe/layers/mvn_layer.cpp | 2 -- src/caffe/layers/mvn_layer.cu | 2 -- src/caffe/layers/neuron_layer.cpp | 3 +-- src/caffe/layers/pooling_layer.cpp | 3 --- src/caffe/layers/pooling_layer.cu | 1 - src/caffe/layers/power_layer.cpp | 4 +--- src/caffe/layers/power_layer.cu | 4 +--- src/caffe/layers/prelu_layer.cpp | 3 +-- src/caffe/layers/prelu_layer.cu | 3 +-- src/caffe/layers/reduction_layer.cpp | 5 +---- src/caffe/layers/reduction_layer.cu | 4 +--- src/caffe/layers/relu_layer.cpp | 3 +-- src/caffe/layers/relu_layer.cu | 3 +-- src/caffe/layers/reshape_layer.cpp | 1 - src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp | 5 +---- src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu | 5 +---- src/caffe/layers/sigmoid_layer.cpp | 4 +--- src/caffe/layers/sigmoid_layer.cu | 4 +--- src/caffe/layers/silence_layer.cpp | 1 - src/caffe/layers/silence_layer.cu | 1 - src/caffe/layers/slice_layer.cpp | 3 +-- src/caffe/layers/slice_layer.cu | 3 +-- src/caffe/layers/softmax_layer.cpp | 3 +-- src/caffe/layers/softmax_layer.cu | 3 +-- src/caffe/layers/softmax_loss_layer.cpp | 4 +--- src/caffe/layers/softmax_loss_layer.cu | 3 +-- src/caffe/layers/split_layer.cpp | 3 +-- src/caffe/layers/split_layer.cu | 3 +-- src/caffe/layers/spp_layer.cpp | 5 ----- src/caffe/layers/tanh_layer.cpp | 4 +--- src/caffe/layers/tanh_layer.cu | 4 +--- src/caffe/layers/threshold_layer.cpp | 3 +-- src/caffe/layers/threshold_layer.cu | 4 +--- src/caffe/layers/tile_layer.cpp | 1 - src/caffe/layers/tile_layer.cu | 1 - src/caffe/layers/window_data_layer.cpp | 2 -- src/caffe/parallel.cpp | 1 - src/caffe/syncedmem.cpp | 2 -- src/caffe/test/test_accuracy_layer.cpp | 4 +--- src/caffe/test/test_argmax_layer.cpp | 2 +- src/caffe/test/test_batch_reindex_layer.cpp | 3 +-- src/caffe/test/test_blob.cpp | 1 - src/caffe/test/test_common.cpp | 2 -- src/caffe/test/test_concat_layer.cpp | 3 +-- src/caffe/test/test_contrastive_loss_layer.cpp | 4 +--- src/caffe/test/test_convolution_layer.cpp | 1 - src/caffe/test/test_deconvolution_layer.cpp | 1 - src/caffe/test/test_eltwise_layer.cpp | 2 +- src/caffe/test/test_embed_layer.cpp | 3 +-- src/caffe/test/test_euclidean_loss_layer.cpp | 4 +--- src/caffe/test/test_filler.cpp | 2 -- src/caffe/test/test_filter_layer.cpp | 4 +--- src/caffe/test/test_flatten_layer.cpp | 3 +-- src/caffe/test/test_hdf5_output_layer.cpp | 2 +- src/caffe/test/test_hdf5data_layer.cpp | 3 +-- src/caffe/test/test_hinge_loss_layer.cpp | 4 +--- src/caffe/test/test_im2col_kernel.cu | 1 - src/caffe/test/test_im2col_layer.cpp | 1 - src/caffe/test/test_image_data_layer.cpp | 2 +- src/caffe/test/test_infogain_loss_layer.cpp | 3 --- src/caffe/test/test_inner_product_layer.cpp | 3 +-- src/caffe/test/test_lrn_layer.cpp | 1 - src/caffe/test/test_math_functions.cpp | 2 -- src/caffe/test/test_maxpool_dropout_layers.cpp | 1 - src/caffe/test/test_multinomial_logistic_loss_layer.cpp | 5 +---- src/caffe/test/test_mvn_layer.cpp | 2 -- src/caffe/test/test_neuron_layer.cpp | 4 ++-- src/caffe/test/test_pooling_layer.cpp | 1 - src/caffe/test/test_power_layer.cpp | 2 +- src/caffe/test/test_random_number_generator.cpp | 1 - src/caffe/test/test_reduction_layer.cpp | 3 +-- src/caffe/test/test_reshape_layer.cpp | 1 - src/caffe/test/test_sigmoid_cross_entropy_loss_layer.cpp | 4 +--- src/caffe/test/test_slice_layer.cpp | 3 +-- src/caffe/test/test_softmax_layer.cpp | 3 +-- src/caffe/test/test_softmax_with_loss_layer.cpp | 4 +--- src/caffe/test/test_split_layer.cpp | 3 +-- src/caffe/test/test_spp_layer.cpp | 2 -- src/caffe/test/test_stochastic_pooling.cpp | 1 - src/caffe/test/test_syncedmem.cpp | 1 - src/caffe/test/test_tanh_layer.cpp | 2 +- src/caffe/test/test_threshold_layer.cpp | 2 +- src/caffe/test/test_tile_layer.cpp | 3 +-- src/caffe/test/test_upgrade_proto.cpp | 1 - src/caffe/test/test_util_blas.cpp | 2 -- src/caffe/util/im2col.cpp | 3 --- src/caffe/util/im2col.cu | 3 --- src/caffe/util/math_functions.cu | 2 -- 163 files changed, 86 insertions(+), 392 deletions(-) diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp index fea5117e..af360ac2 100644 --- a/include/caffe/blob.hpp +++ b/include/caffe/blob.hpp @@ -8,7 +8,6 @@ #include "caffe/common.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/syncedmem.hpp" -#include "caffe/util/math_functions.hpp" const int kMaxBlobAxes = 32; diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index 21a27d75..95358d4c 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -1,16 +1,11 @@ #ifndef CAFFE_COMMON_LAYERS_HPP_ #define CAFFE_COMMON_LAYERS_HPP_ -#include #include #include #include "caffe/blob.hpp" -#include "caffe/common.hpp" -#include "caffe/data_layers.hpp" #include "caffe/layer.hpp" -#include "caffe/loss_layers.hpp" -#include "caffe/neuron_layers.hpp" #include "caffe/proto/caffe.pb.h" namespace caffe { diff --git a/include/caffe/filler.hpp b/include/caffe/filler.hpp index 888f4a4b..dad9ad46 100644 --- a/include/caffe/filler.hpp +++ b/include/caffe/filler.hpp @@ -8,7 +8,6 @@ #include #include "caffe/blob.hpp" -#include "caffe/common.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/syncedmem.hpp" #include "caffe/util/math_functions.hpp" diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp index a0d1d4ec..10f353f9 100644 --- a/include/caffe/layer.hpp +++ b/include/caffe/layer.hpp @@ -9,7 +9,7 @@ #include "caffe/common.hpp" #include "caffe/layer_factory.hpp" #include "caffe/proto/caffe.pb.h" -#include "caffe/util/device_alternate.hpp" +#include "caffe/util/math_functions.hpp" /** Forward declare boost::thread instead of including boost/thread.hpp diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp index 8d41af34..d08ad9b6 100644 --- a/include/caffe/loss_layers.hpp +++ b/include/caffe/loss_layers.hpp @@ -6,7 +6,6 @@ #include #include "caffe/blob.hpp" -#include "caffe/common.hpp" #include "caffe/layer.hpp" #include "caffe/neuron_layers.hpp" #include "caffe/proto/caffe.pb.h" diff --git a/include/caffe/syncedmem.hpp b/include/caffe/syncedmem.hpp index 3d92a0ea..38ee4664 100644 --- a/include/caffe/syncedmem.hpp +++ b/include/caffe/syncedmem.hpp @@ -4,7 +4,6 @@ #include #include "caffe/common.hpp" -#include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/include/caffe/util/blocking_queue.hpp b/include/caffe/util/blocking_queue.hpp index 955e12cc..d3de2e59 100644 --- a/include/caffe/util/blocking_queue.hpp +++ b/include/caffe/util/blocking_queue.hpp @@ -4,8 +4,6 @@ #include #include -#include "caffe/common.hpp" - namespace caffe { template diff --git a/include/caffe/util/io.hpp b/include/caffe/util/io.hpp index 6070b4c7..d6cfa442 100644 --- a/include/caffe/util/io.hpp +++ b/include/caffe/util/io.hpp @@ -6,7 +6,6 @@ #include "google/protobuf/message.h" -#include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/proto/caffe.pb.h" diff --git a/src/caffe/layers/absval_layer.cpp b/src/caffe/layers/absval_layer.cpp index 5ce28c9e..7e552352 100644 --- a/src/caffe/layers/absval_layer.cpp +++ b/src/caffe/layers/absval_layer.cpp @@ -1,6 +1,5 @@ #include -#include "caffe/layer.hpp" #include "caffe/neuron_layers.hpp" #include "caffe/util/math_functions.hpp" diff --git a/src/caffe/layers/absval_layer.cu b/src/caffe/layers/absval_layer.cu index bb310e1a..b5a6c25a 100644 --- a/src/caffe/layers/absval_layer.cu +++ b/src/caffe/layers/absval_layer.cu @@ -1,8 +1,7 @@ #include -#include "caffe/layer.hpp" +#include "caffe/neuron_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/accuracy_layer.cpp b/src/caffe/layers/accuracy_layer.cpp index e2d8d9f8..ae2df1f1 100644 --- a/src/caffe/layers/accuracy_layer.cpp +++ b/src/caffe/layers/accuracy_layer.cpp @@ -1,12 +1,9 @@ -#include #include #include #include -#include "caffe/layer.hpp" -#include "caffe/util/io.hpp" +#include "caffe/loss_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/argmax_layer.cpp b/src/caffe/layers/argmax_layer.cpp index 0c0a932d..44df8d4e 100644 --- a/src/caffe/layers/argmax_layer.cpp +++ b/src/caffe/layers/argmax_layer.cpp @@ -3,8 +3,7 @@ #include #include -#include "caffe/layer.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/common_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/base_conv_layer.cpp b/src/caffe/layers/base_conv_layer.cpp index c6b47550..316cb0fd 100644 --- a/src/caffe/layers/base_conv_layer.cpp +++ b/src/caffe/layers/base_conv_layer.cpp @@ -2,7 +2,6 @@ #include #include "caffe/filler.hpp" -#include "caffe/layer.hpp" #include "caffe/util/im2col.hpp" #include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" diff --git a/src/caffe/layers/base_data_layer.cpp b/src/caffe/layers/base_data_layer.cpp index b90bd4e0..d77f91c9 100644 --- a/src/caffe/layers/base_data_layer.cpp +++ b/src/caffe/layers/base_data_layer.cpp @@ -1,10 +1,7 @@ #include -#include #include #include "caffe/data_layers.hpp" -#include "caffe/net.hpp" -#include "caffe/util/io.hpp" namespace caffe { diff --git a/src/caffe/layers/batch_reindex_layer.cpp b/src/caffe/layers/batch_reindex_layer.cpp index 3bf757c7..3d3ce32c 100644 --- a/src/caffe/layers/batch_reindex_layer.cpp +++ b/src/caffe/layers/batch_reindex_layer.cpp @@ -1,8 +1,7 @@ #include -#include "caffe/layer.hpp" +#include "caffe/common_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/batch_reindex_layer.cu b/src/caffe/layers/batch_reindex_layer.cu index c418cab9..0b5ccf09 100644 --- a/src/caffe/layers/batch_reindex_layer.cu +++ b/src/caffe/layers/batch_reindex_layer.cu @@ -2,9 +2,8 @@ #include #include -#include "caffe/layer.hpp" +#include "caffe/common_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/bnll_layer.cpp b/src/caffe/layers/bnll_layer.cpp index 9ba0ea9a..1e422a54 100644 --- a/src/caffe/layers/bnll_layer.cpp +++ b/src/caffe/layers/bnll_layer.cpp @@ -1,8 +1,7 @@ #include #include -#include "caffe/layer.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/neuron_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/bnll_layer.cu b/src/caffe/layers/bnll_layer.cu index d963d068..3e328ef7 100644 --- a/src/caffe/layers/bnll_layer.cu +++ b/src/caffe/layers/bnll_layer.cu @@ -1,8 +1,7 @@ #include #include -#include "caffe/layer.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/neuron_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/concat_layer.cpp b/src/caffe/layers/concat_layer.cpp index 86b500de..14cbfb11 100644 --- a/src/caffe/layers/concat_layer.cpp +++ b/src/caffe/layers/concat_layer.cpp @@ -1,8 +1,7 @@ #include -#include "caffe/layer.hpp" +#include "caffe/common_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/concat_layer.cu b/src/caffe/layers/concat_layer.cu index 617701e2..e1e9449e 100644 --- a/src/caffe/layers/concat_layer.cu +++ b/src/caffe/layers/concat_layer.cu @@ -1,8 +1,7 @@ #include -#include "caffe/layer.hpp" +#include "caffe/common_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/contrastive_loss_layer.cpp b/src/caffe/layers/contrastive_loss_layer.cpp index 25e16781..74002087 100644 --- a/src/caffe/layers/contrastive_loss_layer.cpp +++ b/src/caffe/layers/contrastive_loss_layer.cpp @@ -1,9 +1,7 @@ #include #include -#include "caffe/layer.hpp" #include "caffe/loss_layers.hpp" -#include "caffe/util/io.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/contrastive_loss_layer.cu b/src/caffe/layers/contrastive_loss_layer.cu index 93123931..ee278407 100644 --- a/src/caffe/layers/contrastive_loss_layer.cu +++ b/src/caffe/layers/contrastive_loss_layer.cu @@ -1,10 +1,8 @@ #include #include -#include "caffe/layer.hpp" -#include "caffe/util/io.hpp" +#include "caffe/loss_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp index fb50bb09..efd69d45 100644 --- a/src/caffe/layers/conv_layer.cpp +++ b/src/caffe/layers/conv_layer.cpp @@ -1,9 +1,5 @@ #include -#include "caffe/filler.hpp" -#include "caffe/layer.hpp" -#include "caffe/util/im2col.hpp" -#include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu index b429d2b4..a534b356 100644 --- a/src/caffe/layers/conv_layer.cu +++ b/src/caffe/layers/conv_layer.cu @@ -1,9 +1,5 @@ #include -#include "caffe/filler.hpp" -#include "caffe/layer.hpp" -#include "caffe/util/im2col.hpp" -#include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_conv_layer.cpp b/src/caffe/layers/cudnn_conv_layer.cpp index d7b1e0d6..8b61249a 100644 --- a/src/caffe/layers/cudnn_conv_layer.cpp +++ b/src/caffe/layers/cudnn_conv_layer.cpp @@ -2,10 +2,6 @@ #include #include -#include "caffe/filler.hpp" -#include "caffe/layer.hpp" -#include "caffe/util/im2col.hpp" -#include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_conv_layer.cu b/src/caffe/layers/cudnn_conv_layer.cu index e88e4dd3..63b6ab9c 100644 --- a/src/caffe/layers/cudnn_conv_layer.cu +++ b/src/caffe/layers/cudnn_conv_layer.cu @@ -1,10 +1,6 @@ #ifdef USE_CUDNN #include -#include "caffe/filler.hpp" -#include "caffe/layer.hpp" -#include "caffe/util/im2col.hpp" -#include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_lcn_layer.cpp b/src/caffe/layers/cudnn_lcn_layer.cpp index 866d810b..4c700786 100644 --- a/src/caffe/layers/cudnn_lcn_layer.cpp +++ b/src/caffe/layers/cudnn_lcn_layer.cpp @@ -1,10 +1,6 @@ #ifdef USE_CUDNN #include -#include "caffe/filler.hpp" -#include "caffe/layer.hpp" -#include "caffe/util/im2col.hpp" -#include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_lcn_layer.cu b/src/caffe/layers/cudnn_lcn_layer.cu index c07ade72..e79c7458 100644 --- a/src/caffe/layers/cudnn_lcn_layer.cu +++ b/src/caffe/layers/cudnn_lcn_layer.cu @@ -1,10 +1,6 @@ #ifdef USE_CUDNN #include -#include "caffe/filler.hpp" -#include "caffe/layer.hpp" -#include "caffe/util/im2col.hpp" -#include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_lrn_layer.cpp b/src/caffe/layers/cudnn_lrn_layer.cpp index 6e992149..a03db3bd 100644 --- a/src/caffe/layers/cudnn_lrn_layer.cpp +++ b/src/caffe/layers/cudnn_lrn_layer.cpp @@ -1,10 +1,6 @@ #ifdef USE_CUDNN #include -#include "caffe/filler.hpp" -#include "caffe/layer.hpp" -#include "caffe/util/im2col.hpp" -#include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_lrn_layer.cu b/src/caffe/layers/cudnn_lrn_layer.cu index f9923033..327e44b4 100644 --- a/src/caffe/layers/cudnn_lrn_layer.cu +++ b/src/caffe/layers/cudnn_lrn_layer.cu @@ -1,10 +1,6 @@ #ifdef USE_CUDNN #include -#include "caffe/filler.hpp" -#include "caffe/layer.hpp" -#include "caffe/util/im2col.hpp" -#include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_pooling_layer.cpp b/src/caffe/layers/cudnn_pooling_layer.cpp index c92c4e47..5f995d45 100644 --- a/src/caffe/layers/cudnn_pooling_layer.cpp +++ b/src/caffe/layers/cudnn_pooling_layer.cpp @@ -1,10 +1,6 @@ #ifdef USE_CUDNN #include -#include "caffe/filler.hpp" -#include "caffe/layer.hpp" -#include "caffe/util/im2col.hpp" -#include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_pooling_layer.cu b/src/caffe/layers/cudnn_pooling_layer.cu index a952b855..9aa39ed8 100644 --- a/src/caffe/layers/cudnn_pooling_layer.cu +++ b/src/caffe/layers/cudnn_pooling_layer.cu @@ -1,10 +1,6 @@ #ifdef USE_CUDNN #include -#include "caffe/filler.hpp" -#include "caffe/layer.hpp" -#include "caffe/util/im2col.hpp" -#include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_relu_layer.cpp b/src/caffe/layers/cudnn_relu_layer.cpp index 759d8398..e6b6d5a9 100644 --- a/src/caffe/layers/cudnn_relu_layer.cpp +++ b/src/caffe/layers/cudnn_relu_layer.cpp @@ -1,8 +1,6 @@ #ifdef USE_CUDNN -#include #include -#include "caffe/layer.hpp" #include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_relu_layer.cu b/src/caffe/layers/cudnn_relu_layer.cu index 21d14857..2a53a49b 100644 --- a/src/caffe/layers/cudnn_relu_layer.cu +++ b/src/caffe/layers/cudnn_relu_layer.cu @@ -1,8 +1,6 @@ #ifdef USE_CUDNN -#include #include -#include "caffe/layer.hpp" #include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_sigmoid_layer.cpp b/src/caffe/layers/cudnn_sigmoid_layer.cpp index 32637873..4b489fa5 100644 --- a/src/caffe/layers/cudnn_sigmoid_layer.cpp +++ b/src/caffe/layers/cudnn_sigmoid_layer.cpp @@ -1,8 +1,6 @@ #ifdef USE_CUDNN -#include #include -#include "caffe/layer.hpp" #include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_sigmoid_layer.cu b/src/caffe/layers/cudnn_sigmoid_layer.cu index 7a06cf72..9de5c742 100644 --- a/src/caffe/layers/cudnn_sigmoid_layer.cu +++ b/src/caffe/layers/cudnn_sigmoid_layer.cu @@ -1,8 +1,6 @@ #ifdef USE_CUDNN -#include #include -#include "caffe/layer.hpp" #include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_softmax_layer.cpp b/src/caffe/layers/cudnn_softmax_layer.cpp index 77a3225a..f5cd0450 100644 --- a/src/caffe/layers/cudnn_softmax_layer.cpp +++ b/src/caffe/layers/cudnn_softmax_layer.cpp @@ -1,12 +1,8 @@ #ifdef USE_CUDNN -#include -#include #include #include "thrust/device_vector.h" -#include "caffe/layer.hpp" -#include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_softmax_layer.cu b/src/caffe/layers/cudnn_softmax_layer.cu index a9e2fcef..c270202f 100644 --- a/src/caffe/layers/cudnn_softmax_layer.cu +++ b/src/caffe/layers/cudnn_softmax_layer.cu @@ -1,12 +1,8 @@ #ifdef USE_CUDNN -#include -#include #include #include "thrust/device_vector.h" -#include "caffe/layer.hpp" -#include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_tanh_layer.cpp b/src/caffe/layers/cudnn_tanh_layer.cpp index 376faad3..46296818 100644 --- a/src/caffe/layers/cudnn_tanh_layer.cpp +++ b/src/caffe/layers/cudnn_tanh_layer.cpp @@ -1,9 +1,7 @@ #ifdef USE_CUDNN -#include #include -#include "caffe/layer.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/neuron_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_tanh_layer.cu b/src/caffe/layers/cudnn_tanh_layer.cu index d287f6fe..84f784b3 100644 --- a/src/caffe/layers/cudnn_tanh_layer.cu +++ b/src/caffe/layers/cudnn_tanh_layer.cu @@ -1,9 +1,7 @@ #ifdef USE_CUDNN -#include #include -#include "caffe/layer.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/neuron_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp index 71f8cb09..49ac858e 100644 --- a/src/caffe/layers/data_layer.cpp +++ b/src/caffe/layers/data_layer.cpp @@ -3,15 +3,11 @@ #endif // USE_OPENCV #include -#include #include -#include "caffe/common.hpp" #include "caffe/data_layers.hpp" -#include "caffe/layer.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/benchmark.hpp" -#include "caffe/util/io.hpp" namespace caffe { diff --git a/src/caffe/layers/deconv_layer.cpp b/src/caffe/layers/deconv_layer.cpp index 91aabb31..5038b638 100644 --- a/src/caffe/layers/deconv_layer.cpp +++ b/src/caffe/layers/deconv_layer.cpp @@ -1,9 +1,5 @@ #include -#include "caffe/filler.hpp" -#include "caffe/layer.hpp" -#include "caffe/util/im2col.hpp" -#include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/deconv_layer.cu b/src/caffe/layers/deconv_layer.cu index 5dbdcc31..0e8e2ede 100644 --- a/src/caffe/layers/deconv_layer.cu +++ b/src/caffe/layers/deconv_layer.cu @@ -1,9 +1,5 @@ #include -#include "caffe/filler.hpp" -#include "caffe/layer.hpp" -#include "caffe/util/im2col.hpp" -#include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/dropout_layer.cpp b/src/caffe/layers/dropout_layer.cpp index ec1256fd..eb7a8a9a 100644 --- a/src/caffe/layers/dropout_layer.cpp +++ b/src/caffe/layers/dropout_layer.cpp @@ -2,11 +2,8 @@ #include -#include "caffe/common.hpp" -#include "caffe/layer.hpp" -#include "caffe/syncedmem.hpp" +#include "caffe/neuron_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/dropout_layer.cu b/src/caffe/layers/dropout_layer.cu index f9ea04f4..028fc026 100644 --- a/src/caffe/layers/dropout_layer.cu +++ b/src/caffe/layers/dropout_layer.cu @@ -1,12 +1,7 @@ -#include -#include #include -#include "caffe/common.hpp" -#include "caffe/layer.hpp" -#include "caffe/syncedmem.hpp" +#include "caffe/neuron_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/dummy_data_layer.cpp b/src/caffe/layers/dummy_data_layer.cpp index 6b0d6174..ab0478c8 100644 --- a/src/caffe/layers/dummy_data_layer.cpp +++ b/src/caffe/layers/dummy_data_layer.cpp @@ -1,8 +1,7 @@ #include +#include "caffe/data_layers.hpp" #include "caffe/filler.hpp" -#include "caffe/layer.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/eltwise_layer.cpp b/src/caffe/layers/eltwise_layer.cpp index a8070073..7924fbee 100644 --- a/src/caffe/layers/eltwise_layer.cpp +++ b/src/caffe/layers/eltwise_layer.cpp @@ -1,9 +1,8 @@ #include #include -#include "caffe/layer.hpp" +#include "caffe/common_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/eltwise_layer.cu b/src/caffe/layers/eltwise_layer.cu index 2247870d..01404209 100644 --- a/src/caffe/layers/eltwise_layer.cu +++ b/src/caffe/layers/eltwise_layer.cu @@ -1,9 +1,8 @@ #include #include -#include "caffe/layer.hpp" +#include "caffe/common_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/embed_layer.cpp b/src/caffe/layers/embed_layer.cpp index be6b2cd2..52704a06 100644 --- a/src/caffe/layers/embed_layer.cpp +++ b/src/caffe/layers/embed_layer.cpp @@ -1,10 +1,7 @@ #include -#include "caffe/blob.hpp" -#include "caffe/common.hpp" #include "caffe/common_layers.hpp" #include "caffe/filler.hpp" -#include "caffe/layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/embed_layer.cu b/src/caffe/layers/embed_layer.cu index 62a4db81..cd4b40f5 100644 --- a/src/caffe/layers/embed_layer.cu +++ b/src/caffe/layers/embed_layer.cu @@ -1,10 +1,7 @@ #include -#include "caffe/blob.hpp" -#include "caffe/common.hpp" #include "caffe/common_layers.hpp" #include "caffe/filler.hpp" -#include "caffe/layer.hpp" #include "caffe/util/gpu_util.cuh" #include "caffe/util/math_functions.hpp" diff --git a/src/caffe/layers/euclidean_loss_layer.cpp b/src/caffe/layers/euclidean_loss_layer.cpp index 80efa31b..7338953d 100644 --- a/src/caffe/layers/euclidean_loss_layer.cpp +++ b/src/caffe/layers/euclidean_loss_layer.cpp @@ -1,9 +1,7 @@ #include -#include "caffe/layer.hpp" -#include "caffe/util/io.hpp" +#include "caffe/loss_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/euclidean_loss_layer.cu b/src/caffe/layers/euclidean_loss_layer.cu index 5b1de3ad..1aa79bd5 100644 --- a/src/caffe/layers/euclidean_loss_layer.cu +++ b/src/caffe/layers/euclidean_loss_layer.cu @@ -1,9 +1,7 @@ #include -#include "caffe/layer.hpp" -#include "caffe/util/io.hpp" +#include "caffe/loss_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/exp_layer.cpp b/src/caffe/layers/exp_layer.cpp index c7e7c60c..f85692d6 100644 --- a/src/caffe/layers/exp_layer.cpp +++ b/src/caffe/layers/exp_layer.cpp @@ -1,9 +1,7 @@ -#include #include -#include "caffe/layer.hpp" +#include "caffe/neuron_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/exp_layer.cu b/src/caffe/layers/exp_layer.cu index 2d75d8dd..9e24bbee 100644 --- a/src/caffe/layers/exp_layer.cu +++ b/src/caffe/layers/exp_layer.cu @@ -1,9 +1,7 @@ -#include #include -#include "caffe/layer.hpp" +#include "caffe/neuron_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/filter_layer.cpp b/src/caffe/layers/filter_layer.cpp index be1db32d..e8b62a5d 100644 --- a/src/caffe/layers/filter_layer.cpp +++ b/src/caffe/layers/filter_layer.cpp @@ -1,9 +1,7 @@ -#include #include -#include "caffe/layer.hpp" +#include "caffe/common_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/filter_layer.cu b/src/caffe/layers/filter_layer.cu index cf929eee..746e91c9 100644 --- a/src/caffe/layers/filter_layer.cu +++ b/src/caffe/layers/filter_layer.cu @@ -1,8 +1,7 @@ #include -#include "caffe/layer.hpp" +#include "caffe/common_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/flatten_layer.cpp b/src/caffe/layers/flatten_layer.cpp index f7e5c9c2..d831fb5c 100644 --- a/src/caffe/layers/flatten_layer.cpp +++ b/src/caffe/layers/flatten_layer.cpp @@ -1,8 +1,6 @@ #include -#include "caffe/layer.hpp" -#include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/common_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/hdf5_data_layer.cpp b/src/caffe/layers/hdf5_data_layer.cpp index 8ced5103..c765fa02 100644 --- a/src/caffe/layers/hdf5_data_layer.cpp +++ b/src/caffe/layers/hdf5_data_layer.cpp @@ -15,7 +15,6 @@ #include "stdint.h" #include "caffe/data_layers.hpp" -#include "caffe/layer.hpp" #include "caffe/util/hdf5.hpp" namespace caffe { diff --git a/src/caffe/layers/hdf5_data_layer.cu b/src/caffe/layers/hdf5_data_layer.cu index 5e3e4ced..6ac499c6 100644 --- a/src/caffe/layers/hdf5_data_layer.cu +++ b/src/caffe/layers/hdf5_data_layer.cu @@ -4,15 +4,12 @@ TODO: */ #include -#include #include #include "hdf5.h" #include "hdf5_hl.h" #include "caffe/data_layers.hpp" -#include "caffe/layer.hpp" -#include "caffe/util/io.hpp" namespace caffe { diff --git a/src/caffe/layers/hdf5_output_layer.cpp b/src/caffe/layers/hdf5_output_layer.cpp index 56788c21..dbde65da 100644 --- a/src/caffe/layers/hdf5_output_layer.cpp +++ b/src/caffe/layers/hdf5_output_layer.cpp @@ -3,11 +3,8 @@ #include "hdf5.h" #include "hdf5_hl.h" -#include "caffe/blob.hpp" -#include "caffe/common.hpp" -#include "caffe/layer.hpp" +#include "caffe/data_layers.hpp" #include "caffe/util/hdf5.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/hdf5_output_layer.cu b/src/caffe/layers/hdf5_output_layer.cu index eb6d0e47..ca8f2616 100644 --- a/src/caffe/layers/hdf5_output_layer.cu +++ b/src/caffe/layers/hdf5_output_layer.cu @@ -3,10 +3,7 @@ #include "hdf5.h" #include "hdf5_hl.h" -#include "caffe/blob.hpp" -#include "caffe/common.hpp" -#include "caffe/layer.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/data_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/hinge_loss_layer.cpp b/src/caffe/layers/hinge_loss_layer.cpp index a2fb2a18..a88c8775 100644 --- a/src/caffe/layers/hinge_loss_layer.cpp +++ b/src/caffe/layers/hinge_loss_layer.cpp @@ -1,12 +1,8 @@ #include -#include -#include #include -#include "caffe/layer.hpp" -#include "caffe/util/io.hpp" +#include "caffe/loss_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/im2col_layer.cpp b/src/caffe/layers/im2col_layer.cpp index 595c9dbb..f3b0f710 100644 --- a/src/caffe/layers/im2col_layer.cpp +++ b/src/caffe/layers/im2col_layer.cpp @@ -1,7 +1,5 @@ #include -#include "caffe/common.hpp" -#include "caffe/layer.hpp" #include "caffe/util/im2col.hpp" #include "caffe/vision_layers.hpp" diff --git a/src/caffe/layers/im2col_layer.cu b/src/caffe/layers/im2col_layer.cu index cd507623..4633628b 100644 --- a/src/caffe/layers/im2col_layer.cu +++ b/src/caffe/layers/im2col_layer.cu @@ -1,7 +1,5 @@ #include -#include "caffe/common.hpp" -#include "caffe/layer.hpp" #include "caffe/util/im2col.hpp" #include "caffe/vision_layers.hpp" diff --git a/src/caffe/layers/image_data_layer.cpp b/src/caffe/layers/image_data_layer.cpp index 3d2190f8..9a7df5a7 100644 --- a/src/caffe/layers/image_data_layer.cpp +++ b/src/caffe/layers/image_data_layer.cpp @@ -8,7 +8,6 @@ #include #include "caffe/data_layers.hpp" -#include "caffe/layer.hpp" #include "caffe/util/benchmark.hpp" #include "caffe/util/io.hpp" #include "caffe/util/math_functions.hpp" diff --git a/src/caffe/layers/infogain_loss_layer.cpp b/src/caffe/layers/infogain_loss_layer.cpp index a1e0b40d..88bd8aaf 100644 --- a/src/caffe/layers/infogain_loss_layer.cpp +++ b/src/caffe/layers/infogain_loss_layer.cpp @@ -1,12 +1,9 @@ #include -#include #include #include -#include "caffe/layer.hpp" +#include "caffe/loss_layers.hpp" #include "caffe/util/io.hpp" -#include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/inner_product_layer.cpp b/src/caffe/layers/inner_product_layer.cpp index 83c3235e..274744ea 100644 --- a/src/caffe/layers/inner_product_layer.cpp +++ b/src/caffe/layers/inner_product_layer.cpp @@ -1,11 +1,8 @@ #include -#include "caffe/blob.hpp" -#include "caffe/common.hpp" +#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" -#include "caffe/layer.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/inner_product_layer.cu b/src/caffe/layers/inner_product_layer.cu index c0ebd2c4..e91e94fc 100644 --- a/src/caffe/layers/inner_product_layer.cu +++ b/src/caffe/layers/inner_product_layer.cu @@ -1,11 +1,8 @@ #include -#include "caffe/blob.hpp" -#include "caffe/common.hpp" +#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" -#include "caffe/layer.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/log_layer.cpp b/src/caffe/layers/log_layer.cpp index 55a227f6..a1876b9d 100644 --- a/src/caffe/layers/log_layer.cpp +++ b/src/caffe/layers/log_layer.cpp @@ -1,7 +1,5 @@ -#include #include -#include "caffe/layer.hpp" #include "caffe/neuron_layers.hpp" #include "caffe/util/math_functions.hpp" diff --git a/src/caffe/layers/log_layer.cu b/src/caffe/layers/log_layer.cu index 847c86cd..055b713b 100644 --- a/src/caffe/layers/log_layer.cu +++ b/src/caffe/layers/log_layer.cu @@ -1,7 +1,5 @@ -#include #include -#include "caffe/layer.hpp" #include "caffe/neuron_layers.hpp" #include "caffe/util/math_functions.hpp" diff --git a/src/caffe/layers/loss_layer.cpp b/src/caffe/layers/loss_layer.cpp index 3496a5c2..c10466db 100644 --- a/src/caffe/layers/loss_layer.cpp +++ b/src/caffe/layers/loss_layer.cpp @@ -1,12 +1,6 @@ -#include -#include -#include #include -#include "caffe/layer.hpp" -#include "caffe/util/io.hpp" -#include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/loss_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/lrn_layer.cpp b/src/caffe/layers/lrn_layer.cpp index d18a04ef..cc561811 100644 --- a/src/caffe/layers/lrn_layer.cpp +++ b/src/caffe/layers/lrn_layer.cpp @@ -1,6 +1,5 @@ #include -#include "caffe/layer.hpp" #include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" diff --git a/src/caffe/layers/lrn_layer.cu b/src/caffe/layers/lrn_layer.cu index 001b3c34..4523d410 100644 --- a/src/caffe/layers/lrn_layer.cu +++ b/src/caffe/layers/lrn_layer.cu @@ -1,6 +1,5 @@ #include -#include "caffe/layer.hpp" #include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" diff --git a/src/caffe/layers/memory_data_layer.cpp b/src/caffe/layers/memory_data_layer.cpp index 2370aa04..13a3d9f6 100644 --- a/src/caffe/layers/memory_data_layer.cpp +++ b/src/caffe/layers/memory_data_layer.cpp @@ -5,8 +5,6 @@ #include #include "caffe/data_layers.hpp" -#include "caffe/layer.hpp" -#include "caffe/util/io.hpp" namespace caffe { diff --git a/src/caffe/layers/multinomial_logistic_loss_layer.cpp b/src/caffe/layers/multinomial_logistic_loss_layer.cpp index 4267a594..59745923 100644 --- a/src/caffe/layers/multinomial_logistic_loss_layer.cpp +++ b/src/caffe/layers/multinomial_logistic_loss_layer.cpp @@ -1,12 +1,9 @@ #include -#include #include #include -#include "caffe/layer.hpp" -#include "caffe/util/io.hpp" +#include "caffe/loss_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/mvn_layer.cpp b/src/caffe/layers/mvn_layer.cpp index 61c2141e..0e730144 100644 --- a/src/caffe/layers/mvn_layer.cpp +++ b/src/caffe/layers/mvn_layer.cpp @@ -1,8 +1,6 @@ -#include #include #include "caffe/common_layers.hpp" -#include "caffe/layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/mvn_layer.cu b/src/caffe/layers/mvn_layer.cu index 5cbb112d..b7e3b3ce 100644 --- a/src/caffe/layers/mvn_layer.cu +++ b/src/caffe/layers/mvn_layer.cu @@ -1,8 +1,6 @@ -#include #include #include "caffe/common_layers.hpp" -#include "caffe/layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/neuron_layer.cpp b/src/caffe/layers/neuron_layer.cpp index ba67b438..1dcb2c06 100644 --- a/src/caffe/layers/neuron_layer.cpp +++ b/src/caffe/layers/neuron_layer.cpp @@ -1,7 +1,6 @@ #include -#include "caffe/layer.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/neuron_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/pooling_layer.cpp b/src/caffe/layers/pooling_layer.cpp index c8d41499..3a7de42c 100644 --- a/src/caffe/layers/pooling_layer.cpp +++ b/src/caffe/layers/pooling_layer.cpp @@ -2,9 +2,6 @@ #include #include -#include "caffe/common.hpp" -#include "caffe/layer.hpp" -#include "caffe/syncedmem.hpp" #include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" diff --git a/src/caffe/layers/pooling_layer.cu b/src/caffe/layers/pooling_layer.cu index ca4b13f7..5e94ce2b 100644 --- a/src/caffe/layers/pooling_layer.cu +++ b/src/caffe/layers/pooling_layer.cu @@ -2,7 +2,6 @@ #include #include -#include "caffe/layer.hpp" #include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" diff --git a/src/caffe/layers/power_layer.cpp b/src/caffe/layers/power_layer.cpp index 4fe34c49..6304fadd 100644 --- a/src/caffe/layers/power_layer.cpp +++ b/src/caffe/layers/power_layer.cpp @@ -1,9 +1,7 @@ -#include #include -#include "caffe/layer.hpp" +#include "caffe/neuron_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/power_layer.cu b/src/caffe/layers/power_layer.cu index 90d94405..680faad4 100644 --- a/src/caffe/layers/power_layer.cu +++ b/src/caffe/layers/power_layer.cu @@ -1,9 +1,7 @@ -#include #include -#include "caffe/layer.hpp" +#include "caffe/neuron_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/prelu_layer.cpp b/src/caffe/layers/prelu_layer.cpp index 81831755..b5a294e1 100644 --- a/src/caffe/layers/prelu_layer.cpp +++ b/src/caffe/layers/prelu_layer.cpp @@ -2,8 +2,7 @@ #include #include "caffe/filler.hpp" -#include "caffe/layer.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/neuron_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/prelu_layer.cu b/src/caffe/layers/prelu_layer.cu index 1225334f..992cd885 100644 --- a/src/caffe/layers/prelu_layer.cu +++ b/src/caffe/layers/prelu_layer.cu @@ -1,8 +1,7 @@ #include #include -#include "caffe/layer.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/neuron_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/reduction_layer.cpp b/src/caffe/layers/reduction_layer.cpp index 8ae6329e..6b7925e3 100644 --- a/src/caffe/layers/reduction_layer.cpp +++ b/src/caffe/layers/reduction_layer.cpp @@ -1,10 +1,7 @@ -#include -#include #include -#include "caffe/layer.hpp" +#include "caffe/common_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/reduction_layer.cu b/src/caffe/layers/reduction_layer.cu index 2dbd3bc9..a9a8c8d9 100644 --- a/src/caffe/layers/reduction_layer.cu +++ b/src/caffe/layers/reduction_layer.cu @@ -1,9 +1,7 @@ -#include #include -#include "caffe/layer.hpp" +#include "caffe/common_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/relu_layer.cpp b/src/caffe/layers/relu_layer.cpp index cc00319a..93d09026 100644 --- a/src/caffe/layers/relu_layer.cpp +++ b/src/caffe/layers/relu_layer.cpp @@ -1,8 +1,7 @@ #include #include -#include "caffe/layer.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/neuron_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/relu_layer.cu b/src/caffe/layers/relu_layer.cu index b8924c85..c18ab61f 100644 --- a/src/caffe/layers/relu_layer.cu +++ b/src/caffe/layers/relu_layer.cu @@ -1,8 +1,7 @@ #include #include -#include "caffe/layer.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/neuron_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/reshape_layer.cpp b/src/caffe/layers/reshape_layer.cpp index ffe970f2..8659049b 100644 --- a/src/caffe/layers/reshape_layer.cpp +++ b/src/caffe/layers/reshape_layer.cpp @@ -1,7 +1,6 @@ #include #include "caffe/common_layers.hpp" -#include "caffe/layer.hpp" namespace caffe { diff --git a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp index cc236fe1..98588637 100644 --- a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp +++ b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp @@ -1,10 +1,7 @@ -#include -#include #include -#include "caffe/layer.hpp" +#include "caffe/loss_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu index 547fa80c..48dbec41 100644 --- a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu +++ b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu @@ -1,10 +1,7 @@ -#include -#include #include -#include "caffe/layer.hpp" +#include "caffe/loss_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/sigmoid_layer.cpp b/src/caffe/layers/sigmoid_layer.cpp index 48c38490..d4a3f877 100644 --- a/src/caffe/layers/sigmoid_layer.cpp +++ b/src/caffe/layers/sigmoid_layer.cpp @@ -1,9 +1,7 @@ -#include #include #include -#include "caffe/layer.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/neuron_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/sigmoid_layer.cu b/src/caffe/layers/sigmoid_layer.cu index e1af0657..5730636e 100644 --- a/src/caffe/layers/sigmoid_layer.cu +++ b/src/caffe/layers/sigmoid_layer.cu @@ -1,9 +1,7 @@ -#include #include #include -#include "caffe/layer.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/neuron_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/silence_layer.cpp b/src/caffe/layers/silence_layer.cpp index 7e70ab43..3974f5d4 100644 --- a/src/caffe/layers/silence_layer.cpp +++ b/src/caffe/layers/silence_layer.cpp @@ -1,7 +1,6 @@ #include #include "caffe/common_layers.hpp" -#include "caffe/layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/silence_layer.cu b/src/caffe/layers/silence_layer.cu index 34faef22..c49ecb23 100644 --- a/src/caffe/layers/silence_layer.cu +++ b/src/caffe/layers/silence_layer.cu @@ -1,7 +1,6 @@ #include #include "caffe/common_layers.hpp" -#include "caffe/layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/slice_layer.cpp b/src/caffe/layers/slice_layer.cpp index 0a059ae8..f368a249 100644 --- a/src/caffe/layers/slice_layer.cpp +++ b/src/caffe/layers/slice_layer.cpp @@ -1,9 +1,8 @@ #include #include -#include "caffe/layer.hpp" +#include "caffe/common_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/slice_layer.cu b/src/caffe/layers/slice_layer.cu index e8dc6cd9..d555f7d0 100644 --- a/src/caffe/layers/slice_layer.cu +++ b/src/caffe/layers/slice_layer.cu @@ -1,8 +1,7 @@ #include -#include "caffe/layer.hpp" +#include "caffe/common_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/softmax_layer.cpp b/src/caffe/layers/softmax_layer.cpp index 04712c9e..8ae7d49c 100644 --- a/src/caffe/layers/softmax_layer.cpp +++ b/src/caffe/layers/softmax_layer.cpp @@ -1,9 +1,8 @@ #include #include -#include "caffe/layer.hpp" +#include "caffe/common_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/softmax_layer.cu b/src/caffe/layers/softmax_layer.cu index 1f9c3a41..a620fcc8 100644 --- a/src/caffe/layers/softmax_layer.cu +++ b/src/caffe/layers/softmax_layer.cu @@ -4,9 +4,8 @@ #include "thrust/device_vector.h" -#include "caffe/layer.hpp" +#include "caffe/common_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/softmax_loss_layer.cpp b/src/caffe/layers/softmax_loss_layer.cpp index ba312f67..dee50ac6 100644 --- a/src/caffe/layers/softmax_loss_layer.cpp +++ b/src/caffe/layers/softmax_loss_layer.cpp @@ -2,10 +2,8 @@ #include #include -#include "caffe/layer.hpp" -#include "caffe/layer_factory.hpp" +#include "caffe/loss_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/softmax_loss_layer.cu b/src/caffe/layers/softmax_loss_layer.cu index 7e0f3da4..42e91fa9 100644 --- a/src/caffe/layers/softmax_loss_layer.cu +++ b/src/caffe/layers/softmax_loss_layer.cu @@ -2,9 +2,8 @@ #include #include -#include "caffe/layer.hpp" +#include "caffe/loss_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/split_layer.cpp b/src/caffe/layers/split_layer.cpp index 272cb59c..5333e578 100644 --- a/src/caffe/layers/split_layer.cpp +++ b/src/caffe/layers/split_layer.cpp @@ -1,8 +1,7 @@ #include -#include "caffe/layer.hpp" +#include "caffe/common_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/split_layer.cu b/src/caffe/layers/split_layer.cu index a4f5df26..73d04c98 100644 --- a/src/caffe/layers/split_layer.cu +++ b/src/caffe/layers/split_layer.cu @@ -1,8 +1,7 @@ #include -#include "caffe/layer.hpp" +#include "caffe/common_layers.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/spp_layer.cpp b/src/caffe/layers/spp_layer.cpp index d7622910..2ef4ac7a 100644 --- a/src/caffe/layers/spp_layer.cpp +++ b/src/caffe/layers/spp_layer.cpp @@ -1,11 +1,6 @@ #include -#include #include -#include "caffe/common.hpp" -#include "caffe/layer.hpp" -#include "caffe/syncedmem.hpp" -#include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/tanh_layer.cpp b/src/caffe/layers/tanh_layer.cpp index ee5ed773..9d1cac76 100644 --- a/src/caffe/layers/tanh_layer.cpp +++ b/src/caffe/layers/tanh_layer.cpp @@ -1,11 +1,9 @@ // TanH neuron activation function layer. // Adapted from ReLU layer code written by Yangqing Jia -#include #include -#include "caffe/layer.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/neuron_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/tanh_layer.cu b/src/caffe/layers/tanh_layer.cu index ccd6e63e..d87bccec 100644 --- a/src/caffe/layers/tanh_layer.cu +++ b/src/caffe/layers/tanh_layer.cu @@ -1,11 +1,9 @@ // TanH neuron activation function layer. // Adapted from ReLU layer code written by Yangqing Jia -#include #include -#include "caffe/layer.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/neuron_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/threshold_layer.cpp b/src/caffe/layers/threshold_layer.cpp index 2365e7b9..d6514736 100644 --- a/src/caffe/layers/threshold_layer.cpp +++ b/src/caffe/layers/threshold_layer.cpp @@ -1,7 +1,6 @@ #include -#include "caffe/layer.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/neuron_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/threshold_layer.cu b/src/caffe/layers/threshold_layer.cu index bfa7f159..1cd62d99 100644 --- a/src/caffe/layers/threshold_layer.cu +++ b/src/caffe/layers/threshold_layer.cu @@ -1,8 +1,6 @@ -#include #include -#include "caffe/layer.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/neuron_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/tile_layer.cpp b/src/caffe/layers/tile_layer.cpp index f55008cc..581546c4 100644 --- a/src/caffe/layers/tile_layer.cpp +++ b/src/caffe/layers/tile_layer.cpp @@ -1,7 +1,6 @@ #include #include "caffe/common_layers.hpp" -#include "caffe/layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/tile_layer.cu b/src/caffe/layers/tile_layer.cu index 7fd3bc47..fdf96090 100644 --- a/src/caffe/layers/tile_layer.cu +++ b/src/caffe/layers/tile_layer.cu @@ -1,7 +1,6 @@ #include #include "caffe/common_layers.hpp" -#include "caffe/layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp index f8db61c9..3f937bc9 100644 --- a/src/caffe/layers/window_data_layer.cpp +++ b/src/caffe/layers/window_data_layer.cpp @@ -12,9 +12,7 @@ #include "opencv2/highgui/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" -#include "caffe/common.hpp" #include "caffe/data_layers.hpp" -#include "caffe/layer.hpp" #include "caffe/util/benchmark.hpp" #include "caffe/util/io.hpp" #include "caffe/util/math_functions.hpp" diff --git a/src/caffe/parallel.cpp b/src/caffe/parallel.cpp index a6d154e1..9abc92b6 100644 --- a/src/caffe/parallel.cpp +++ b/src/caffe/parallel.cpp @@ -7,7 +7,6 @@ #include #include -#include #include #include #include diff --git a/src/caffe/syncedmem.cpp b/src/caffe/syncedmem.cpp index 632bf1f1..ec4665ec 100644 --- a/src/caffe/syncedmem.cpp +++ b/src/caffe/syncedmem.cpp @@ -1,5 +1,3 @@ -#include - #include "caffe/common.hpp" #include "caffe/syncedmem.hpp" #include "caffe/util/math_functions.hpp" diff --git a/src/caffe/test/test_accuracy_layer.cpp b/src/caffe/test/test_accuracy_layer.cpp index ef0e57a3..5960a666 100644 --- a/src/caffe/test/test_accuracy_layer.cpp +++ b/src/caffe/test/test_accuracy_layer.cpp @@ -1,6 +1,4 @@ #include -#include -#include #include #include "gtest/gtest.h" @@ -8,8 +6,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" +#include "caffe/loss_layers.hpp" #include "caffe/util/rng.hpp" -#include "caffe/vision_layers.hpp" #include "caffe/test/test_caffe_main.hpp" diff --git a/src/caffe/test/test_argmax_layer.cpp b/src/caffe/test/test_argmax_layer.cpp index bbf19099..f3f2094e 100644 --- a/src/caffe/test/test_argmax_layer.cpp +++ b/src/caffe/test/test_argmax_layer.cpp @@ -5,8 +5,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" +#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" #include "caffe/test/test_caffe_main.hpp" diff --git a/src/caffe/test/test_batch_reindex_layer.cpp b/src/caffe/test/test_batch_reindex_layer.cpp index 985db343..17e47f05 100644 --- a/src/caffe/test/test_batch_reindex_layer.cpp +++ b/src/caffe/test/test_batch_reindex_layer.cpp @@ -1,12 +1,11 @@ -#include #include #include "gtest/gtest.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" +#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_blob.cpp b/src/caffe/test/test_blob.cpp index 7da6423b..a9d7d519 100644 --- a/src/caffe/test/test_blob.cpp +++ b/src/caffe/test/test_blob.cpp @@ -1,4 +1,3 @@ -#include #include #include "gtest/gtest.h" diff --git a/src/caffe/test/test_common.cpp b/src/caffe/test/test_common.cpp index b3a61b0f..58ae5c60 100644 --- a/src/caffe/test/test_common.cpp +++ b/src/caffe/test/test_common.cpp @@ -1,5 +1,3 @@ -#include - #include "gtest/gtest.h" #include "caffe/common.hpp" diff --git a/src/caffe/test/test_concat_layer.cpp b/src/caffe/test/test_concat_layer.cpp index ccd97eb1..8ba51f4f 100644 --- a/src/caffe/test/test_concat_layer.cpp +++ b/src/caffe/test/test_concat_layer.cpp @@ -1,12 +1,11 @@ -#include #include #include "gtest/gtest.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" +#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_contrastive_loss_layer.cpp b/src/caffe/test/test_contrastive_loss_layer.cpp index 1e9447cb..592997e4 100644 --- a/src/caffe/test/test_contrastive_loss_layer.cpp +++ b/src/caffe/test/test_contrastive_loss_layer.cpp @@ -1,7 +1,5 @@ #include #include -#include -#include #include #include "gtest/gtest.h" @@ -9,7 +7,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/loss_layers.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_convolution_layer.cpp b/src/caffe/test/test_convolution_layer.cpp index 9df979a2..b4747357 100644 --- a/src/caffe/test/test_convolution_layer.cpp +++ b/src/caffe/test/test_convolution_layer.cpp @@ -1,4 +1,3 @@ -#include #include #include "gtest/gtest.h" diff --git a/src/caffe/test/test_deconvolution_layer.cpp b/src/caffe/test/test_deconvolution_layer.cpp index 770e7b27..b473dbb9 100644 --- a/src/caffe/test/test_deconvolution_layer.cpp +++ b/src/caffe/test/test_deconvolution_layer.cpp @@ -1,4 +1,3 @@ -#include #include #include "gtest/gtest.h" diff --git a/src/caffe/test/test_eltwise_layer.cpp b/src/caffe/test/test_eltwise_layer.cpp index 8031f6e9..3b56c5ca 100644 --- a/src/caffe/test/test_eltwise_layer.cpp +++ b/src/caffe/test/test_eltwise_layer.cpp @@ -5,8 +5,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" +#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_embed_layer.cpp b/src/caffe/test/test_embed_layer.cpp index 7a4fb980..0f4caf15 100644 --- a/src/caffe/test/test_embed_layer.cpp +++ b/src/caffe/test/test_embed_layer.cpp @@ -1,12 +1,11 @@ -#include #include #include "gtest/gtest.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" +#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_euclidean_loss_layer.cpp b/src/caffe/test/test_euclidean_loss_layer.cpp index 1949742b..9dc14de4 100644 --- a/src/caffe/test/test_euclidean_loss_layer.cpp +++ b/src/caffe/test/test_euclidean_loss_layer.cpp @@ -1,6 +1,4 @@ #include -#include -#include #include #include "gtest/gtest.h" @@ -8,7 +6,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/loss_layers.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_filler.cpp b/src/caffe/test/test_filler.cpp index 728b8dc5..26e9b217 100644 --- a/src/caffe/test/test_filler.cpp +++ b/src/caffe/test/test_filler.cpp @@ -1,5 +1,3 @@ -#include - #include "gtest/gtest.h" #include "caffe/filler.hpp" diff --git a/src/caffe/test/test_filter_layer.cpp b/src/caffe/test/test_filter_layer.cpp index c641b6ef..a2d0c293 100644 --- a/src/caffe/test/test_filter_layer.cpp +++ b/src/caffe/test/test_filter_layer.cpp @@ -1,13 +1,11 @@ -#include -#include #include #include "gtest/gtest.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" +#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_flatten_layer.cpp b/src/caffe/test/test_flatten_layer.cpp index 7b6757cb..5d1caac2 100644 --- a/src/caffe/test/test_flatten_layer.cpp +++ b/src/caffe/test/test_flatten_layer.cpp @@ -1,12 +1,11 @@ -#include #include #include "gtest/gtest.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" +#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_hdf5_output_layer.cpp b/src/caffe/test/test_hdf5_output_layer.cpp index b56277b5..adc27df4 100644 --- a/src/caffe/test/test_hdf5_output_layer.cpp +++ b/src/caffe/test/test_hdf5_output_layer.cpp @@ -5,10 +5,10 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" +#include "caffe/data_layers.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/hdf5.hpp" #include "caffe/util/io.hpp" -#include "caffe/vision_layers.hpp" #include "caffe/test/test_caffe_main.hpp" diff --git a/src/caffe/test/test_hdf5data_layer.cpp b/src/caffe/test/test_hdf5data_layer.cpp index c9b027f8..7169e7bf 100644 --- a/src/caffe/test/test_hdf5data_layer.cpp +++ b/src/caffe/test/test_hdf5data_layer.cpp @@ -5,9 +5,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/filler.hpp" +#include "caffe/data_layers.hpp" #include "caffe/proto/caffe.pb.h" -#include "caffe/vision_layers.hpp" #include "caffe/test/test_caffe_main.hpp" diff --git a/src/caffe/test/test_hinge_loss_layer.cpp b/src/caffe/test/test_hinge_loss_layer.cpp index b6a99022..dfdd01d0 100644 --- a/src/caffe/test/test_hinge_loss_layer.cpp +++ b/src/caffe/test/test_hinge_loss_layer.cpp @@ -1,6 +1,4 @@ #include -#include -#include #include #include "gtest/gtest.h" @@ -8,7 +6,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/loss_layers.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_im2col_kernel.cu b/src/caffe/test/test_im2col_kernel.cu index f0b75fcc..bafcacf7 100644 --- a/src/caffe/test/test_im2col_kernel.cu +++ b/src/caffe/test/test_im2col_kernel.cu @@ -1,4 +1,3 @@ -#include #include #include "gtest/gtest.h" diff --git a/src/caffe/test/test_im2col_layer.cpp b/src/caffe/test/test_im2col_layer.cpp index 293aa262..ec055b20 100644 --- a/src/caffe/test/test_im2col_layer.cpp +++ b/src/caffe/test/test_im2col_layer.cpp @@ -1,4 +1,3 @@ -#include #include #include "gtest/gtest.h" diff --git a/src/caffe/test/test_image_data_layer.cpp b/src/caffe/test/test_image_data_layer.cpp index 481fcef7..77690245 100644 --- a/src/caffe/test/test_image_data_layer.cpp +++ b/src/caffe/test/test_image_data_layer.cpp @@ -7,10 +7,10 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" +#include "caffe/data_layers.hpp" #include "caffe/filler.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/io.hpp" -#include "caffe/vision_layers.hpp" #include "caffe/test/test_caffe_main.hpp" diff --git a/src/caffe/test/test_infogain_loss_layer.cpp b/src/caffe/test/test_infogain_loss_layer.cpp index 7ec2f807..b2a6754f 100644 --- a/src/caffe/test/test_infogain_loss_layer.cpp +++ b/src/caffe/test/test_infogain_loss_layer.cpp @@ -1,6 +1,3 @@ -#include -#include -#include #include #include "gtest/gtest.h" diff --git a/src/caffe/test/test_inner_product_layer.cpp b/src/caffe/test/test_inner_product_layer.cpp index fbf0c851..1ad2c97e 100644 --- a/src/caffe/test/test_inner_product_layer.cpp +++ b/src/caffe/test/test_inner_product_layer.cpp @@ -1,12 +1,11 @@ -#include #include #include "gtest/gtest.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" +#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_lrn_layer.cpp b/src/caffe/test/test_lrn_layer.cpp index 78cf2d9d..bd1c4fe8 100644 --- a/src/caffe/test/test_lrn_layer.cpp +++ b/src/caffe/test/test_lrn_layer.cpp @@ -1,5 +1,4 @@ #include -#include #include #include "gtest/gtest.h" diff --git a/src/caffe/test/test_math_functions.cpp b/src/caffe/test/test_math_functions.cpp index a095b544..fbee3f9c 100644 --- a/src/caffe/test/test_math_functions.cpp +++ b/src/caffe/test/test_math_functions.cpp @@ -1,8 +1,6 @@ #include // for uint32_t & uint64_t #include -#include #include // for std::fabs -#include // for rand_r #include "gtest/gtest.h" diff --git a/src/caffe/test/test_maxpool_dropout_layers.cpp b/src/caffe/test/test_maxpool_dropout_layers.cpp index 611d9790..8fc944f3 100644 --- a/src/caffe/test/test_maxpool_dropout_layers.cpp +++ b/src/caffe/test/test_maxpool_dropout_layers.cpp @@ -1,4 +1,3 @@ -#include #include #include "gtest/gtest.h" diff --git a/src/caffe/test/test_multinomial_logistic_loss_layer.cpp b/src/caffe/test/test_multinomial_logistic_loss_layer.cpp index b2db984f..0404aa25 100644 --- a/src/caffe/test/test_multinomial_logistic_loss_layer.cpp +++ b/src/caffe/test/test_multinomial_logistic_loss_layer.cpp @@ -1,6 +1,3 @@ -#include -#include -#include #include #include "gtest/gtest.h" @@ -8,7 +5,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/loss_layers.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_mvn_layer.cpp b/src/caffe/test/test_mvn_layer.cpp index be23d86e..e9a7d54c 100644 --- a/src/caffe/test/test_mvn_layer.cpp +++ b/src/caffe/test/test_mvn_layer.cpp @@ -1,5 +1,3 @@ -#include -#include #include #include "caffe/blob.hpp" diff --git a/src/caffe/test/test_neuron_layer.cpp b/src/caffe/test/test_neuron_layer.cpp index c6e4d27b..b333fdee 100644 --- a/src/caffe/test/test_neuron_layer.cpp +++ b/src/caffe/test/test_neuron_layer.cpp @@ -1,5 +1,4 @@ #include -#include #include #include "google/protobuf/text_format.h" @@ -7,8 +6,9 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" +#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/neuron_layers.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_pooling_layer.cpp b/src/caffe/test/test_pooling_layer.cpp index 69f2d5c1..9e986e66 100644 --- a/src/caffe/test/test_pooling_layer.cpp +++ b/src/caffe/test/test_pooling_layer.cpp @@ -1,4 +1,3 @@ -#include #include #include "gtest/gtest.h" diff --git a/src/caffe/test/test_power_layer.cpp b/src/caffe/test/test_power_layer.cpp index 76c9e857..1041ddd4 100644 --- a/src/caffe/test/test_power_layer.cpp +++ b/src/caffe/test/test_power_layer.cpp @@ -6,7 +6,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/neuron_layers.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_random_number_generator.cpp b/src/caffe/test/test_random_number_generator.cpp index 98424c06..833b0047 100644 --- a/src/caffe/test/test_random_number_generator.cpp +++ b/src/caffe/test/test_random_number_generator.cpp @@ -1,5 +1,4 @@ #include -#include #include "gtest/gtest.h" diff --git a/src/caffe/test/test_reduction_layer.cpp b/src/caffe/test/test_reduction_layer.cpp index f568a180..a8d43727 100644 --- a/src/caffe/test/test_reduction_layer.cpp +++ b/src/caffe/test/test_reduction_layer.cpp @@ -1,12 +1,11 @@ -#include #include #include "gtest/gtest.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" +#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_reshape_layer.cpp b/src/caffe/test/test_reshape_layer.cpp index 9d08ec60..e0f4ba42 100644 --- a/src/caffe/test/test_reshape_layer.cpp +++ b/src/caffe/test/test_reshape_layer.cpp @@ -1,4 +1,3 @@ -#include #include #include "gtest/gtest.h" diff --git a/src/caffe/test/test_sigmoid_cross_entropy_loss_layer.cpp b/src/caffe/test/test_sigmoid_cross_entropy_loss_layer.cpp index e5737e43..b4f831c8 100644 --- a/src/caffe/test/test_sigmoid_cross_entropy_loss_layer.cpp +++ b/src/caffe/test/test_sigmoid_cross_entropy_loss_layer.cpp @@ -1,6 +1,4 @@ #include -#include -#include #include #include "gtest/gtest.h" @@ -8,7 +6,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/loss_layers.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_slice_layer.cpp b/src/caffe/test/test_slice_layer.cpp index 2d2d0fdc..45fbcffd 100644 --- a/src/caffe/test/test_slice_layer.cpp +++ b/src/caffe/test/test_slice_layer.cpp @@ -1,12 +1,11 @@ -#include #include #include "gtest/gtest.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" +#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_softmax_layer.cpp b/src/caffe/test/test_softmax_layer.cpp index 996da4b8..4b01f5cf 100644 --- a/src/caffe/test/test_softmax_layer.cpp +++ b/src/caffe/test/test_softmax_layer.cpp @@ -1,13 +1,12 @@ #include -#include #include #include "gtest/gtest.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" +#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_softmax_with_loss_layer.cpp b/src/caffe/test/test_softmax_with_loss_layer.cpp index 1498d5c5..0ae4cd68 100644 --- a/src/caffe/test/test_softmax_with_loss_layer.cpp +++ b/src/caffe/test/test_softmax_with_loss_layer.cpp @@ -1,6 +1,4 @@ #include -#include -#include #include #include "boost/scoped_ptr.hpp" @@ -9,7 +7,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/loss_layers.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_split_layer.cpp b/src/caffe/test/test_split_layer.cpp index be5204bf..e27e355c 100644 --- a/src/caffe/test/test_split_layer.cpp +++ b/src/caffe/test/test_split_layer.cpp @@ -1,4 +1,3 @@ -#include #include #include @@ -7,10 +6,10 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" +#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/insert_splits.hpp" -#include "caffe/vision_layers.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_spp_layer.cpp b/src/caffe/test/test_spp_layer.cpp index b2585f1a..1b48a842 100644 --- a/src/caffe/test/test_spp_layer.cpp +++ b/src/caffe/test/test_spp_layer.cpp @@ -1,5 +1,3 @@ -#include -#include #include #include "gtest/gtest.h" diff --git a/src/caffe/test/test_stochastic_pooling.cpp b/src/caffe/test/test_stochastic_pooling.cpp index f84464c3..5a412bd4 100644 --- a/src/caffe/test/test_stochastic_pooling.cpp +++ b/src/caffe/test/test_stochastic_pooling.cpp @@ -1,5 +1,4 @@ #include -#include #include #include "gtest/gtest.h" diff --git a/src/caffe/test/test_syncedmem.cpp b/src/caffe/test/test_syncedmem.cpp index b946233d..16dfb582 100644 --- a/src/caffe/test/test_syncedmem.cpp +++ b/src/caffe/test/test_syncedmem.cpp @@ -1,4 +1,3 @@ -#include #include #include "gtest/gtest.h" diff --git a/src/caffe/test/test_tanh_layer.cpp b/src/caffe/test/test_tanh_layer.cpp index 5dc92832..f31579ca 100644 --- a/src/caffe/test/test_tanh_layer.cpp +++ b/src/caffe/test/test_tanh_layer.cpp @@ -5,8 +5,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/neuron_layers.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_threshold_layer.cpp b/src/caffe/test/test_threshold_layer.cpp index 05ce8212..903a9bc8 100644 --- a/src/caffe/test/test_threshold_layer.cpp +++ b/src/caffe/test/test_threshold_layer.cpp @@ -5,7 +5,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/neuron_layers.hpp" #include "caffe/test/test_caffe_main.hpp" diff --git a/src/caffe/test/test_tile_layer.cpp b/src/caffe/test/test_tile_layer.cpp index 540aac3c..5c459604 100644 --- a/src/caffe/test/test_tile_layer.cpp +++ b/src/caffe/test/test_tile_layer.cpp @@ -1,12 +1,11 @@ -#include #include #include "gtest/gtest.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" +#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_upgrade_proto.cpp b/src/caffe/test/test_upgrade_proto.cpp index 23deddd4..9dcc2aa5 100644 --- a/src/caffe/test/test_upgrade_proto.cpp +++ b/src/caffe/test/test_upgrade_proto.cpp @@ -1,4 +1,3 @@ -#include #include #include diff --git a/src/caffe/test/test_util_blas.cpp b/src/caffe/test/test_util_blas.cpp index 8770f309..9ee8818f 100644 --- a/src/caffe/test/test_util_blas.cpp +++ b/src/caffe/test/test_util_blas.cpp @@ -1,7 +1,5 @@ #ifndef CPU_ONLY // CPU-GPU test -#include - #include "gtest/gtest.h" #include "caffe/blob.hpp" diff --git a/src/caffe/util/im2col.cpp b/src/caffe/util/im2col.cpp index 09da23d4..27e5b7c0 100644 --- a/src/caffe/util/im2col.cpp +++ b/src/caffe/util/im2col.cpp @@ -1,6 +1,3 @@ -#include -#include -#include #include #include "caffe/util/im2col.hpp" diff --git a/src/caffe/util/im2col.cu b/src/caffe/util/im2col.cu index 451097f8..49354ab7 100644 --- a/src/caffe/util/im2col.cu +++ b/src/caffe/util/im2col.cu @@ -1,7 +1,4 @@ #include -#include -#include -#include #include "caffe/common.hpp" #include "caffe/util/im2col.hpp" diff --git a/src/caffe/util/math_functions.cu b/src/caffe/util/math_functions.cu index 2631a074..e4d0c4b0 100644 --- a/src/caffe/util/math_functions.cu +++ b/src/caffe/util/math_functions.cu @@ -4,8 +4,6 @@ #include #include -#include -#include #include "caffe/common.hpp" #include "caffe/util/math_functions.hpp" From 2f05b03371e5936a478c7ad2946d0cd7c013920c Mon Sep 17 00:00:00 2001 From: Dmytro Mishkin Date: Wed, 25 Feb 2015 17:00:22 +0200 Subject: [PATCH 149/223] Added batch normalization layer with test and examples --- .../cifar10_full_sigmoid_solver.prototxt | 28 ++ .../cifar10_full_sigmoid_solver_bn.prototxt | 28 ++ .../cifar10_full_sigmoid_train_test.prototxt | 212 +++++++++++ ...ifar10_full_sigmoid_train_test_bn.prototxt | 284 ++++++++++++++ examples/cifar10/train_full_sigmoid.sh | 7 + examples/cifar10/train_full_sigmoid_bn.sh | 7 + include/caffe/common_layers.hpp | 50 ++- src/caffe/layers/batch_norm_layer.cpp | 351 ++++++++++++++++++ src/caffe/layers/batch_norm_layer.cu | 228 ++++++++++++ src/caffe/test/test_batch_norm_layer.cpp | 90 +++++ 10 files changed, 1284 insertions(+), 1 deletion(-) create mode 100644 examples/cifar10/cifar10_full_sigmoid_solver.prototxt create mode 100644 examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt create mode 100644 examples/cifar10/cifar10_full_sigmoid_train_test.prototxt create mode 100644 examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt create mode 100755 examples/cifar10/train_full_sigmoid.sh create mode 100755 examples/cifar10/train_full_sigmoid_bn.sh create mode 100644 src/caffe/layers/batch_norm_layer.cpp create mode 100644 src/caffe/layers/batch_norm_layer.cu create mode 100644 src/caffe/test/test_batch_norm_layer.cpp diff --git a/examples/cifar10/cifar10_full_sigmoid_solver.prototxt b/examples/cifar10/cifar10_full_sigmoid_solver.prototxt new file mode 100644 index 00000000..7dd3ecb9 --- /dev/null +++ b/examples/cifar10/cifar10_full_sigmoid_solver.prototxt @@ -0,0 +1,28 @@ +# reduce learning rate after 120 epochs (60000 iters) by factor 0f 10 +# then another factor of 10 after 10 more epochs (5000 iters) + +# The train/test net protocol buffer definition +net: "examples/cifar10/cifar10_full_sigmoid_train_test.prototxt" +# test_iter specifies how many forward passes the test should carry out. +# In the case of CIFAR10, we have test batch size 100 and 100 test iterations, +# covering the full 10,000 testing images. +test_iter: 10 +# Carry out testing every 1000 training iterations. +test_interval: 1000 +# The base learning rate, momentum and the weight decay of the network. +base_lr: 0.001 +momentum: 0.9 +#weight_decay: 0.004 +# The learning rate policy +lr_policy: "step" +gamma: 1 +stepsize: 5000 +# Display every 200 iterations +display: 100 +# The maximum number of iterations +max_iter: 60000 +# snapshot intermediate results +snapshot: 10000 +snapshot_prefix: "examples/cifar10_full_sigmoid" +# solver mode: CPU or GPU +solver_mode: GPU diff --git a/examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt b/examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt new file mode 100644 index 00000000..a57b280f --- /dev/null +++ b/examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt @@ -0,0 +1,28 @@ +# reduce learning rate after 120 epochs (60000 iters) by factor 0f 10 +# then another factor of 10 after 10 more epochs (5000 iters) + +# The train/test net protocol buffer definition +net: "examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt" +# test_iter specifies how many forward passes the test should carry out. +# In the case of CIFAR10, we have test batch size 100 and 100 test iterations, +# covering the full 10,000 testing images. +test_iter: 10 +# Carry out testing every 1000 training iterations. +test_interval: 1000 +# The base learning rate, momentum and the weight decay of the network. +base_lr: 0.001 +momentum: 0.9 +#weight_decay: 0.004 +# The learning rate policy +lr_policy: "step" +gamma: 1 +stepsize: 5000 +# Display every 200 iterations +display: 100 +# The maximum number of iterations +max_iter: 60000 +# snapshot intermediate results +snapshot: 10000 +snapshot_prefix: "examples/cifar10_full_sigmoid_bn" +# solver mode: CPU or GPU +solver_mode: GPU diff --git a/examples/cifar10/cifar10_full_sigmoid_train_test.prototxt b/examples/cifar10/cifar10_full_sigmoid_train_test.prototxt new file mode 100644 index 00000000..6f5bf26b --- /dev/null +++ b/examples/cifar10/cifar10_full_sigmoid_train_test.prototxt @@ -0,0 +1,212 @@ +name: "CIFAR10_full" +layer { + name: "cifar" + type: "Data" + top: "data" + top: "label" + include { + phase: TRAIN + } + transform_param { + mean_file: "examples/cifar10/mean.binaryproto" + } + data_param { + source: "examples/cifar10/cifar10_train_lmdb" + batch_size: 111 + backend: LMDB + } +} +layer { + name: "cifar" + type: "Data" + top: "data" + top: "label" + include { + phase: TEST + } + transform_param { + mean_file: "examples/cifar10/mean.binaryproto" + } + data_param { + source: "examples/cifar10/cifar10_test_lmdb" + batch_size: 1000 + backend: LMDB + } +} +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 32 + pad: 2 + kernel_size: 5 + stride: 1 + weight_filler { + type: "gaussian" + std: 0.0001 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "pool1" + type: "Pooling" + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} + + + +layer { + name: "Sigmoid1" + type: "Sigmoid" + bottom: "pool1" + top: "Sigmoid1" +} + +layer { + name: "conv2" + type: "Convolution" + bottom: "Sigmoid1" + top: "conv2" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 32 + pad: 2 + kernel_size: 5 + stride: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} + + +layer { + name: "Sigmoid2" + type: "Sigmoid" + bottom: "conv2" + top: "Sigmoid2" +} +layer { + name: "pool2" + type: "Pooling" + bottom: "Sigmoid2" + top: "pool2" + pooling_param { + pool: AVE + kernel_size: 3 + stride: 2 + } +} +layer { + name: "conv3" + type: "Convolution" + bottom: "pool2" + top: "conv3" + convolution_param { + num_output: 64 + pad: 2 + kernel_size: 5 + stride: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } + param { + lr_mult: 1 + } + param { + lr_mult: 1 + } + +} + +layer { + name: "Sigmoid3" + type: "Sigmoid" + bottom: "conv3" + top: "Sigmoid3" +} + +layer { + name: "pool3" + type: "Pooling" + bottom: "Sigmoid3" + top: "pool3" + pooling_param { + pool: AVE + kernel_size: 3 + stride: 2 + } +} + +layer { + name: "ip1" + type: "InnerProduct" + bottom: "pool3" + top: "ip1" + param { + lr_mult: 1 + decay_mult: 250 + } + param { + lr_mult: 0.2 + decay_mult: 0 + } + inner_product_param { + num_output: 10 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "accuracy" + type: "Accuracy" + bottom: "ip1" + bottom: "label" + top: "accuracy" + include { + phase: TEST + } +} +layer { + name: "loss" + type: "SoftmaxWithLoss" + bottom: "ip1" + bottom: "label" + top: "loss" +} diff --git a/examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt b/examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt new file mode 100644 index 00000000..85c2dffe --- /dev/null +++ b/examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt @@ -0,0 +1,284 @@ +name: "CIFAR10_full" +layer { + name: "cifar" + type: "Data" + top: "data" + top: "label" + include { + phase: TRAIN + } + transform_param { + mean_file: "examples/cifar10/mean.binaryproto" + } + data_param { + source: "examples/cifar10/cifar10_train_lmdb" + batch_size: 111 + backend: LMDB + } +} +layer { + name: "cifar" + type: "Data" + top: "data" + top: "label" + include { + phase: TEST + } + transform_param { + mean_file: "examples/cifar10/mean.binaryproto" + } + data_param { + source: "examples/cifar10/cifar10_test_lmdb" + batch_size: 1000 + backend: LMDB + } +} +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 32 + pad: 2 + kernel_size: 5 + stride: 1 + weight_filler { + type: "gaussian" + std: 0.0001 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "pool1" + type: "Pooling" + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} + +layer { + name: "bn1" + type: "BatchNorm" + bottom: "pool1" + top: "bn1" + bn_param { + scale_filler { + type: "constant" + value: 1 + } + shift_filler { + type: "constant" + value: 0.001 + } + } + param { + lr_mult: 1.00001 + decay_mult: 0 + } + param { + lr_mult: 1.00001 + decay_mult: 0 + } +} + +layer { + name: "Sigmoid1" + type: "Sigmoid" + bottom: "bn1" + top: "Sigmoid1" +} + +layer { + name: "conv2" + type: "Convolution" + bottom: "Sigmoid1" + top: "conv2" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 32 + pad: 2 + kernel_size: 5 + stride: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} + + + +layer { + name: "bn2" + type: "BatchNorm" + bottom: "conv2" + top: "bn2" + bn_param { + scale_filler { + type: "constant" + value: 1 + } + shift_filler { + type: "constant" + value: 0.001 + } + } + param { + lr_mult: 1.00001 + decay_mult: 0 + } + param { + lr_mult: 1.00001 + decay_mult: 0 + } +} +layer { + name: "Sigmoid2" + type: "Sigmoid" + bottom: "bn2" + top: "Sigmoid2" +} +layer { + name: "pool2" + type: "Pooling" + bottom: "Sigmoid2" + top: "pool2" + pooling_param { + pool: AVE + kernel_size: 3 + stride: 2 + } +} +layer { + name: "conv3" + type: "Convolution" + bottom: "pool2" + top: "conv3" + convolution_param { + num_output: 64 + pad: 2 + kernel_size: 5 + stride: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } + param { + lr_mult: 1 + } + param { + lr_mult: 1 + } + +} + + +layer { + name: "bn3" + type: "BatchNorm" + bottom: "conv3" + top: "bn3" + bn_param { + scale_filler { + type: "constant" + value: 1 + } + shift_filler { + type: "constant" + value: 0.001 + } + } + param { + lr_mult: 1.00001 + decay_mult: 0 + } + param { + lr_mult: 1.00001 + decay_mult: 0 + } +} +layer { + name: "Sigmoid3" + type: "Sigmoid" + bottom: "bn3" + top: "Sigmoid3" +} +layer { + name: "pool3" + type: "Pooling" + bottom: "Sigmoid3" + top: "pool3" + pooling_param { + pool: AVE + kernel_size: 3 + stride: 2 + } +} + +layer { + name: "ip1" + type: "InnerProduct" + bottom: "pool3" + top: "ip1" + param { + lr_mult: 1 + decay_mult: 250 + } + param { + lr_mult: 0.2 + decay_mult: 0 + } + inner_product_param { + num_output: 10 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "accuracy" + type: "Accuracy" + bottom: "ip1" + bottom: "label" + top: "accuracy" + include { + phase: TEST + } +} +layer { + name: "loss" + type: "SoftmaxWithLoss" + bottom: "ip1" + bottom: "label" + top: "loss" +} diff --git a/examples/cifar10/train_full_sigmoid.sh b/examples/cifar10/train_full_sigmoid.sh new file mode 100755 index 00000000..9cff06d3 --- /dev/null +++ b/examples/cifar10/train_full_sigmoid.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env sh + +TOOLS=./build/tools + +$TOOLS/caffe train \ + --solver=examples/cifar10/cifar10_full_sigmoid_solver.prototxt + diff --git a/examples/cifar10/train_full_sigmoid_bn.sh b/examples/cifar10/train_full_sigmoid_bn.sh new file mode 100755 index 00000000..011387c9 --- /dev/null +++ b/examples/cifar10/train_full_sigmoid_bn.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env sh + +TOOLS=./build/tools + +$TOOLS/caffe train \ + --solver=examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt + diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index 21a27d75..09605db9 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -78,6 +78,55 @@ class ArgMaxLayer : public Layer { int axis_; }; +/** +* @brief Batch Normalization per-channel with scale & shift linear transform. +* +*/ +template +class BatchNormLayer : public Layer { + public: + explicit BatchNormLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "BN"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + // spatial mean & variance + Blob spatial_mean_, spatial_variance_; + // batch mean & variance + Blob batch_mean_, batch_variance_; + // buffer blob + Blob buffer_blob_; + + Blob x_norm_; + // x_sum_multiplier is used to carry out sum using BLAS + Blob spatial_sum_multiplier_, batch_sum_multiplier_; + + // dimension + int N_; + int C_; + int H_; + int W_; + // eps + Dtype var_eps_; +}; + /** * @brief Index into the input blob along its first axis. * @@ -146,7 +195,6 @@ class BatchReindexLayer : public Layer { const Dtype* ridx_data); }; - /** * @brief Takes at least two Blob%s and concatenates them along either the num * or channel dimension, outputting the result. diff --git a/src/caffe/layers/batch_norm_layer.cpp b/src/caffe/layers/batch_norm_layer.cpp new file mode 100644 index 00000000..8dea3493 --- /dev/null +++ b/src/caffe/layers/batch_norm_layer.cpp @@ -0,0 +1,351 @@ +#include +#include + +#include "caffe/common_layers.hpp" +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + template + void BatchNormLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + top[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), + bottom[0]->height(), bottom[0]->width()); + + x_norm_.Reshape(bottom[0]->num(), bottom[0]->channels(), + bottom[0]->height(), bottom[0]->width()); + + // Figure out the dimensions + N_ = bottom[0]->num(); + C_ = bottom[0]->channels(); + H_ = bottom[0]->height(); + W_ = bottom[0]->width(); + + // mean + spatial_mean_.Reshape(N_, C_, 1, 1); + batch_mean_.Reshape(1, C_, 1, 1); + // variance + spatial_variance_.Reshape(N_, C_, 1, 1); + batch_variance_.Reshape(1, C_, 1, 1); + // buffer blod + buffer_blob_.Reshape(N_, C_, H_, W_); + + // fill spatial multiplier + spatial_sum_multiplier_.Reshape(1, 1, H_, W_); + Dtype* spatial_multipl_data = spatial_sum_multiplier_.mutable_cpu_data(); + caffe_set(spatial_sum_multiplier_.count(), Dtype(1), + spatial_multipl_data); + caffe_set(spatial_sum_multiplier_.count(), Dtype(0), + spatial_sum_multiplier_.mutable_cpu_diff()); + // fill batch multiplier + batch_sum_multiplier_.Reshape(N_, 1, 1, 1); + Dtype* batch_multiplier_data = batch_sum_multiplier_.mutable_cpu_data(); + caffe_set(batch_sum_multiplier_.count(), Dtype(1), + batch_multiplier_data); + caffe_set(batch_sum_multiplier_.count(), Dtype(0), + batch_sum_multiplier_.mutable_cpu_diff()); + this->param_propagate_down_.resize(this->blobs_.size(), true); + } + template + void BatchNormLayer::LayerSetUp(const vector*>& bottom, + const vector*>& top) { + CHECK_NE(top[0], bottom[0]) << this->type() << " Layer does not " + "allow in-place computation."; + + top[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), + bottom[0]->height(), bottom[0]->width()); + + x_norm_.Reshape(bottom[0]->num(), bottom[0]->channels(), + bottom[0]->height(), bottom[0]->width()); + // Figure out the dimensions + N_ = bottom[0]->num(); + C_ = bottom[0]->channels(); + H_ = bottom[0]->height(); + W_ = bottom[0]->width(); + var_eps_ = 1e-9; + + // mean + spatial_mean_.Reshape(N_, C_, 1, 1); + batch_mean_.Reshape(1, C_, 1, 1); + // variance + spatial_variance_.Reshape(N_, C_, 1, 1); + batch_variance_.Reshape(1, C_, 1, 1); + // buffer blod + buffer_blob_.Reshape(N_, C_, H_, W_); + + // fill spatial multiplier + spatial_sum_multiplier_.Reshape(1, 1, H_, W_); + Dtype* spatial_multipl_data = spatial_sum_multiplier_.mutable_cpu_data(); + caffe_set(spatial_sum_multiplier_.count(), Dtype(1), + spatial_multipl_data); + caffe_set(spatial_sum_multiplier_.count(), Dtype(0), + spatial_sum_multiplier_.mutable_cpu_diff()); + + // fill batch multiplier + batch_sum_multiplier_.Reshape(N_, 1, 1, 1); + Dtype* batch_multiplier_data = batch_sum_multiplier_.mutable_cpu_data(); + caffe_set(batch_sum_multiplier_.count(), Dtype(1), + batch_multiplier_data); + caffe_set(batch_sum_multiplier_.count(), Dtype(0), + batch_sum_multiplier_.mutable_cpu_diff()); + + // Check if we need to set up the weights + if (this->blobs_.size() > 0) { + LOG(INFO) << "Skipping parameter initialization"; + } else { + this->blobs_.resize(2); + + // fill scale with scale_filler + this->blobs_[0].reset(new Blob(1, C_, 1, 1)); + caffe_set(this->blobs_[0]->count(), Dtype(1), + this->blobs_[0]->mutable_cpu_data()); + + // fill shift with shift_filler + this->blobs_[1].reset(new Blob(1, C_, 1, 1)); + caffe_set(this->blobs_[1]->count(), Dtype(0), + this->blobs_[1]->mutable_cpu_data()); + } // parameter initialization + this->param_propagate_down_.resize(this->blobs_.size(), true); + } + + template + void BatchNormLayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* top_data = top[0]->mutable_cpu_data(); + const Dtype* const_top_data = top[0]->cpu_data(); + + const Dtype* scale_data = this->blobs_[0]->cpu_data(); + const Dtype* shift_data = this->blobs_[1]->cpu_data(); + + // put the squares of bottom into buffer_blob_ + caffe_powx(bottom[0]->count(), bottom_data, Dtype(2), + buffer_blob_.mutable_cpu_data()); + + // computes variance using var(X) = E(X^2) - (EX)^2 + // EX across spatial + caffe_cpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, + Dtype(1. / (H_ * W_)), bottom_data, + spatial_sum_multiplier_.cpu_data(), Dtype(0), + spatial_mean_.mutable_cpu_data()); + // EX across batch + caffe_cpu_gemv(CblasTrans, N_, C_, Dtype(1. / N_), + spatial_mean_.cpu_data(), + batch_sum_multiplier_.cpu_data(), Dtype(0), + batch_mean_.mutable_cpu_data()); + + // E(X^2) across spatial + caffe_cpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, + Dtype(1. / (H_ * W_)), buffer_blob_.cpu_data(), + spatial_sum_multiplier_.cpu_data(), Dtype(0), + spatial_variance_.mutable_cpu_data()); + // E(X^2) across batch + caffe_cpu_gemv(CblasTrans, N_, C_, Dtype(1. / N_), + spatial_variance_.cpu_data(), + batch_sum_multiplier_.cpu_data(), Dtype(0), + batch_variance_.mutable_cpu_data()); + + caffe_powx(batch_mean_.count(), batch_mean_.cpu_data(), Dtype(2), + buffer_blob_.mutable_cpu_data()); // (EX)^2 + caffe_sub(batch_mean_.count(), batch_variance_.cpu_data(), + buffer_blob_.cpu_data(), + batch_variance_.mutable_cpu_data()); // variance + + // do mean and variance normalization + // subtract mean + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_, + C_, 1, Dtype(1), + batch_sum_multiplier_.cpu_data(), + batch_mean_.cpu_data(), Dtype(0), + spatial_mean_.mutable_cpu_data()); + + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, + H_ * W_, 1, Dtype(-1), + spatial_mean_.cpu_data(), + spatial_sum_multiplier_.cpu_data(), Dtype(0), + buffer_blob_.mutable_cpu_data()); + + caffe_add(buffer_blob_.count(), bottom_data, + buffer_blob_.cpu_data(), top_data); + + // normalize variance + caffe_add_scalar(batch_variance_.count(), var_eps_, + batch_variance_.mutable_cpu_data()); + caffe_powx(batch_variance_.count(), + batch_variance_.cpu_data(), Dtype(0.5), + batch_variance_.mutable_cpu_data()); + + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_, + C_, 1, Dtype(1), + batch_sum_multiplier_.cpu_data(), + batch_variance_.cpu_data(), Dtype(0), + spatial_variance_.mutable_cpu_data()); + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, + N_ * C_, H_ * W_, 1, Dtype(1), + spatial_variance_.cpu_data(), + spatial_sum_multiplier_.cpu_data(), Dtype(0), + buffer_blob_.mutable_cpu_data()); + + caffe_div(buffer_blob_.count(), const_top_data, + buffer_blob_.cpu_data(), top_data); + + // Saving x_norm + caffe_copy(buffer_blob_.count(), const_top_data, + x_norm_.mutable_cpu_data()); + // scale + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), + batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0), + spatial_variance_.mutable_cpu_data()); + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, + H_ * W_, 1, Dtype(1), + spatial_variance_.cpu_data(), + spatial_sum_multiplier_.cpu_data(), Dtype(0), + buffer_blob_.mutable_cpu_data()); + caffe_mul(buffer_blob_.count(), top_data, + buffer_blob_.cpu_data(), top_data); + + // shift + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), + batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0), + spatial_mean_.mutable_cpu_data()); + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, + N_ * C_, H_ * W_, 1, Dtype(1), + spatial_mean_.cpu_data(), + spatial_sum_multiplier_.cpu_data(), Dtype(0), + buffer_blob_.mutable_cpu_data()); + caffe_add(buffer_blob_.count(), const_top_data, + buffer_blob_.cpu_data(), top_data); + } + + template + void BatchNormLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, + const vector*>& bottom) { + const Dtype* top_diff = top[0]->cpu_diff(); + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); + + Dtype* scale_diff = this->blobs_[0]->mutable_cpu_diff(); + Dtype* shift_diff = this->blobs_[1]->mutable_cpu_diff(); + const Dtype* scale_data = this->blobs_[0]->cpu_data(); + +// Propagate layer to parameters + // gradient w.r.t. scale + caffe_mul(buffer_blob_.count(), x_norm_.cpu_data(), + top_diff, buffer_blob_.mutable_cpu_data()); + // EX across spatial + caffe_cpu_gemv(CblasNoTrans, N_ * C_, + H_ * W_, Dtype(1), buffer_blob_.cpu_data(), + spatial_sum_multiplier_.cpu_data(), Dtype(0), + spatial_variance_.mutable_cpu_diff()); + // EX across batch + caffe_cpu_gemv(CblasTrans, N_, C_, Dtype(1), + spatial_variance_.cpu_diff(), + batch_sum_multiplier_.cpu_data(), Dtype(0), scale_diff); + + // gradient w.r.t. shift + // EX across spatial + caffe_cpu_gemv(CblasNoTrans, N_ * C_, + H_ * W_, Dtype(1), top_diff, + spatial_sum_multiplier_.cpu_data(), + Dtype(0), spatial_mean_.mutable_cpu_diff()); + // EX across batch + caffe_cpu_gemv(CblasTrans, N_, C_, + Dtype(1), spatial_mean_.cpu_diff(), + batch_sum_multiplier_.cpu_data(), + Dtype(0), shift_diff); + +// Propagate down + + // put scale * top_diff to buffer_blob_ + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), + batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0), + spatial_variance_.mutable_cpu_data()); + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, + H_ * W_, 1, Dtype(1), + spatial_variance_.cpu_data(), + spatial_sum_multiplier_.cpu_data(), Dtype(0), + buffer_blob_.mutable_cpu_data()); + caffe_mul(buffer_blob_.count(), top_diff, buffer_blob_.cpu_data(), + buffer_blob_.mutable_cpu_data()); + + // use new top diff for computation + caffe_mul(buffer_blob_.count(), x_norm_.cpu_data(), + buffer_blob_.cpu_data(), bottom_diff); + // EX across spatial + caffe_cpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, + Dtype(1), bottom_diff, + spatial_sum_multiplier_.cpu_data(), Dtype(0), + spatial_mean_.mutable_cpu_data()); + // EX across batch + caffe_cpu_gemv(CblasTrans, N_, C_, Dtype(1), + spatial_mean_.cpu_data(), + batch_sum_multiplier_.cpu_data(), Dtype(0), + batch_mean_.mutable_cpu_data()); + + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, + N_, C_, 1, Dtype(1), + batch_sum_multiplier_.cpu_data(), + batch_mean_.cpu_data(), Dtype(0), + spatial_mean_.mutable_cpu_data()); + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, + H_ * W_, 1, Dtype(1), + spatial_mean_.cpu_data(), + spatial_sum_multiplier_.cpu_data(), Dtype(0), + bottom_diff); + + caffe_mul(buffer_blob_.count(), + x_norm_.cpu_data(), bottom_diff, bottom_diff); + + // EX across spatial + caffe_cpu_gemv(CblasNoTrans, N_ * C_, + H_ * W_, Dtype(1), buffer_blob_.cpu_data(), + spatial_sum_multiplier_.cpu_data(), Dtype(0), + spatial_mean_.mutable_cpu_data()); + // EX across batch + caffe_cpu_gemv(CblasTrans, N_, C_, Dtype(1), + spatial_mean_.cpu_data(), + batch_sum_multiplier_.cpu_data(), Dtype(0), + batch_mean_.mutable_cpu_data()); + + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, + N_, C_, 1, Dtype(1), + batch_sum_multiplier_.cpu_data(), + batch_mean_.cpu_data(), Dtype(0), + spatial_mean_.mutable_cpu_data()); + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, + N_ * C_, H_ * W_, 1, Dtype(1), + spatial_mean_.cpu_data(), + spatial_sum_multiplier_.cpu_data(), Dtype(1), bottom_diff); + + caffe_cpu_axpby(buffer_blob_.count(), Dtype(1), + buffer_blob_.cpu_data(), Dtype(-1. / (N_ * H_ * W_)), + bottom_diff); + + // put the squares of bottom into buffer_blob_ +// caffe_powx(buffer_blob_.count(), bottom_data, Dtype(2), +// buffer_blob_.mutable_cpu_data()); + + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, + N_, C_, 1, Dtype(1), + batch_sum_multiplier_.cpu_data(), + batch_variance_.cpu_data(), Dtype(0), + spatial_variance_.mutable_cpu_data()); + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, + N_ * C_, H_ * W_, 1, Dtype(1), + spatial_variance_.cpu_data(), + spatial_sum_multiplier_.cpu_data(), Dtype(0), + buffer_blob_.mutable_cpu_data()); + + caffe_div(buffer_blob_.count(), bottom_diff, + buffer_blob_.cpu_data(), bottom_diff); + } +#ifdef CPU_ONLY +STUB_GPU(BatchNormLayer); +#endif + + INSTANTIATE_CLASS(BatchNormLayer); + REGISTER_LAYER_CLASS(BatchNorm); +} // namespace caffe + diff --git a/src/caffe/layers/batch_norm_layer.cu b/src/caffe/layers/batch_norm_layer.cu new file mode 100644 index 00000000..e87f8c62 --- /dev/null +++ b/src/caffe/layers/batch_norm_layer.cu @@ -0,0 +1,228 @@ +#include +#include + +#include "caffe/common_layers.hpp" +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + template + void BatchNormLayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + const Dtype* const_top_data = top[0]->gpu_data(); + Dtype* top_data = top[0]->mutable_gpu_data(); + Dtype* spatial_mean_data = spatial_mean_.mutable_gpu_data(); + Dtype* buffer_data = buffer_blob_.mutable_gpu_data(); + const Dtype* const_buffer_data = buffer_blob_.gpu_data(); + + + // put the squares of bottom into buffer_blob_ + caffe_gpu_powx(bottom[0]->count(), bottom_data, Dtype(2), + buffer_blob_.mutable_gpu_data()); + + // computes variance using var(X) = E(X^2) - (EX)^2 + // EX across spatial + caffe_gpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, + Dtype(1. / (H_ * W_)), + bottom_data, spatial_sum_multiplier_.gpu_data(), + Dtype(0), spatial_mean_data); + // EX across batch + caffe_gpu_gemv(CblasTrans, N_, C_, Dtype(1. / N_), + spatial_mean_.gpu_data(), + batch_sum_multiplier_.gpu_data(), Dtype(0), + batch_mean_.mutable_gpu_data()); + + // E(X^2) across spatial + caffe_gpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, + Dtype(1. / (H_ * W_)), buffer_data, + spatial_sum_multiplier_.gpu_data(), Dtype(0), + spatial_variance_.mutable_gpu_data()); + // E(X^2) across batch + caffe_gpu_gemv(CblasTrans, N_, C_, Dtype(1. / N_), + spatial_variance_.gpu_data(), + batch_sum_multiplier_.gpu_data(), Dtype(0), + batch_variance_.mutable_gpu_data()); + + caffe_gpu_powx(batch_mean_.count(), batch_mean_.gpu_data(), + Dtype(2), buffer_blob_.mutable_gpu_data()); // (EX)^2 + caffe_gpu_sub(batch_mean_.count(), batch_variance_.gpu_data(), + buffer_data, batch_variance_.mutable_gpu_data()); // variance + + // do mean and variance normalization + // subtract mean + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), + batch_sum_multiplier_.gpu_data(), batch_mean_.gpu_data(), Dtype(0), + spatial_mean_data); + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, + 1, -Dtype(1), + spatial_mean_.gpu_data(), spatial_sum_multiplier_.gpu_data(), Dtype(0), + buffer_blob_.mutable_gpu_data()); + + caffe_gpu_add(buffer_blob_.count(), bottom_data, buffer_data, top_data); + + // normalize variance + caffe_gpu_add_scalar(batch_variance_.count(), var_eps_, + batch_variance_.mutable_gpu_data()); + caffe_gpu_powx(batch_variance_.count(), batch_variance_.gpu_data(), + Dtype(0.5), batch_variance_.mutable_gpu_data()); + + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), + batch_sum_multiplier_.gpu_data(), batch_variance_.gpu_data(), Dtype(0), + spatial_variance_.mutable_gpu_data()); + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, + H_ * W_, 1, Dtype(1), + spatial_variance_.gpu_data(), spatial_sum_multiplier_.gpu_data(), + Dtype(0), buffer_blob_.mutable_gpu_data()); + + caffe_gpu_div(buffer_blob_.count(), top_data, buffer_data, top_data); + + // Saving x_norm + caffe_copy(top[0]->count(), const_top_data, x_norm_.mutable_gpu_data()); + + // scale + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), + batch_sum_multiplier_.gpu_data(), this->blobs_[0]->gpu_data(), + Dtype(0), spatial_variance_.mutable_gpu_data()); + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, + H_ * W_, 1, Dtype(1), + spatial_variance_.gpu_data(), spatial_sum_multiplier_.gpu_data(), + Dtype(0), buffer_blob_.mutable_gpu_data()); + + caffe_gpu_mul(buffer_blob_.count(), top_data, buffer_data, top_data); + + // shift + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), + batch_sum_multiplier_.gpu_data(), + this->blobs_[1]->gpu_data(), Dtype(0), + spatial_mean_data); + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1, + Dtype(1), + spatial_mean_.gpu_data(), spatial_sum_multiplier_.gpu_data(), Dtype(0), + buffer_blob_.mutable_gpu_data()); + caffe_gpu_add(buffer_blob_.count(), top_data, buffer_data, top_data); + } + + template + void BatchNormLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, + const vector*>& bottom) { + const Dtype* top_diff = top[0]->gpu_diff(); + const Dtype* top_data = top[0]->gpu_data(); + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); + const Dtype* const_bottom_diff = bottom[0]->gpu_diff(); + Dtype* spatial_mean_data = spatial_mean_.mutable_gpu_data(); + Dtype* buffer_data = buffer_blob_.mutable_gpu_data(); + const Dtype* const_buffer_data = buffer_blob_.gpu_data(); + + // Propage to layer params + // gradient w.r.t. scale + caffe_gpu_mul(buffer_blob_.count(), x_norm_.gpu_data(), + top_diff, buffer_blob_.mutable_gpu_data()); + // EX across spatial + caffe_gpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1), + buffer_data, spatial_sum_multiplier_.gpu_data(), Dtype(0), + spatial_variance_.mutable_gpu_data()); + // EX across batch + caffe_gpu_gemv(CblasTrans, N_, C_, Dtype(1), + spatial_variance_.gpu_data(), + batch_sum_multiplier_.gpu_data(), Dtype(0), + this->blobs_[0]->mutable_gpu_diff()); + + // gradient w.r.t. shift + // EX across spatial + caffe_gpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1), + top_diff, spatial_sum_multiplier_.gpu_data(), + Dtype(0), spatial_mean_data); + // EX across batch + caffe_gpu_gemv(CblasTrans, N_, C_, Dtype(1), + spatial_mean_.gpu_data(), + batch_sum_multiplier_.gpu_data(), Dtype(0), + this->blobs_[1]->mutable_gpu_diff()); + + // Propagate down + // scale top diff + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), + batch_sum_multiplier_.gpu_data(), this->blobs_[0]->gpu_data(), + Dtype(0), spatial_variance_.mutable_gpu_data()); + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, + H_ * W_, 1, Dtype(1), + spatial_variance_.gpu_data(), spatial_sum_multiplier_.gpu_data(), + Dtype(0), + buffer_blob_.mutable_gpu_data()); + caffe_gpu_mul(buffer_blob_.count(), top_diff, buffer_data, + buffer_blob_.mutable_gpu_data()); + + // use new top diff for computation + caffe_gpu_mul(buffer_blob_.count(), x_norm_.gpu_data(), + buffer_data, bottom_diff); + // EX across spatial + caffe_gpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, + Dtype(1), bottom_diff, + spatial_sum_multiplier_.gpu_data(), Dtype(0), spatial_mean_data); + // EX across batch + caffe_gpu_gemv(CblasTrans, N_, C_, Dtype(1), + spatial_mean_.gpu_data(), + batch_sum_multiplier_.gpu_data(), Dtype(0), + batch_mean_.mutable_gpu_data()); + + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), + batch_sum_multiplier_.gpu_data(), + batch_mean_.gpu_data(), Dtype(0), + spatial_mean_data); + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, + H_ * W_, 1, Dtype(1), spatial_mean_.gpu_data(), + spatial_sum_multiplier_.gpu_data(), Dtype(0), + bottom_diff); + + caffe_gpu_mul(buffer_blob_.count(), x_norm_.gpu_data(), + bottom_diff, bottom_diff); + + // EX across spatial + caffe_gpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1), + buffer_data, spatial_sum_multiplier_.gpu_data(), + Dtype(0), spatial_mean_data); + + // EX across batch + caffe_gpu_gemv(CblasTrans, N_, C_, Dtype(1), + spatial_mean_.gpu_data(), + batch_sum_multiplier_.gpu_data(), Dtype(0), + batch_mean_.mutable_gpu_data()); + + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, + C_, 1, Dtype(1), + batch_sum_multiplier_.gpu_data(), + batch_mean_.gpu_data(), Dtype(0), + spatial_mean_data); + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, + H_ * W_, 1, Dtype(1), + spatial_mean_.gpu_data(), spatial_sum_multiplier_.gpu_data(), + Dtype(1), + bottom_diff); + + caffe_gpu_axpby(buffer_blob_.count(), Dtype(1), buffer_data, + Dtype(-1. / (N_ * H_ * W_)), + bottom_diff); + + // put the squares of bottom into buffer_blob_ +// caffe_gpu_powx(buffer_blob_.count(), bottom_data, Dtype(2), +// buffer_blob_.mutable_gpu_data()); + + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), + batch_sum_multiplier_.gpu_data(), batch_variance_.gpu_data(), Dtype(0), + spatial_variance_.mutable_gpu_data()); + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, + H_ * W_, 1, Dtype(1), + spatial_variance_.gpu_data(), spatial_sum_multiplier_.gpu_data(), + Dtype(0), + buffer_blob_.mutable_gpu_data()); + + caffe_gpu_div(buffer_blob_.count(), const_bottom_diff, + const_buffer_data, bottom_diff); + } + + INSTANTIATE_LAYER_GPU_FUNCS(BatchNormLayer); +} // namespace caffe + diff --git a/src/caffe/test/test_batch_norm_layer.cpp b/src/caffe/test/test_batch_norm_layer.cpp new file mode 100644 index 00000000..704efd5d --- /dev/null +++ b/src/caffe/test/test_batch_norm_layer.cpp @@ -0,0 +1,90 @@ +#include +#include +#include + +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/common_layers.hpp" +#include "caffe/filler.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +#define BATCH_SIZE 2 +#define INPUT_DATA_SIZE 3 + +namespace caffe { + + template + class BatchNormLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + protected: + BatchNormLayerTest() + : blob_bottom_(new Blob(5, 2, 3, 4)), + blob_top_(new Blob()) { + // fill the values + FillerParameter filler_param; + GaussianFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + blob_bottom_vec_.push_back(blob_bottom_); + blob_top_vec_.push_back(blob_top_); + } + virtual ~BatchNormLayerTest() { delete blob_bottom_; delete blob_top_; } + Blob* const blob_bottom_; + Blob* const blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; + }; + + TYPED_TEST_CASE(BatchNormLayerTest, TestDtypesAndDevices); + + TYPED_TEST(BatchNormLayerTest, TestForward) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + + BatchNormLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + + // Test mean + int num = this->blob_bottom_->num(); + int channels = this->blob_bottom_->channels(); + int height = this->blob_bottom_->height(); + int width = this->blob_bottom_->width(); + + for (int j = 0; j < channels; ++j) { + Dtype sum = 0, var = 0; + for (int i = 0; i < num; ++i) { + for ( int k = 0; k < height; ++k ) { + for ( int l = 0; l < width; ++l ) { + Dtype data = this->blob_top_->data_at(i, j, k, l); + Dtype bottom_data = this->blob_bottom_->data_at(i, j, k, l); + sum += data; + var += data * data; + } + } + } + sum /= height * width * num; + var /= height * width * num; + + const Dtype kErrorBound = 0.001; + // expect zero mean + EXPECT_NEAR(0, sum, kErrorBound); + // expect unit variance + EXPECT_NEAR(1, var, kErrorBound); + } + } + + TYPED_TEST(BatchNormLayerTest, TestGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + + BatchNormLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-4); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); + } + +} // namespace caffe From a52ee656a589313901560c87b65a570ee41c9fee Mon Sep 17 00:00:00 2001 From: Carl Doersch Date: Tue, 6 Oct 2015 14:19:59 -0700 Subject: [PATCH 150/223] Cleanup batch norm layer, include global stats computation --- .../cifar10_full_sigmoid_train_test.prototxt | 4 +- ...ifar10_full_sigmoid_train_test_bn.prototxt | 90 +-- include/caffe/common_layers.hpp | 64 ++- src/caffe/layers/batch_norm_layer.cpp | 535 +++++++----------- src/caffe/layers/batch_norm_layer.cu | 365 +++++------- src/caffe/proto/caffe.proto | 15 +- src/caffe/test/test_batch_norm_layer.cpp | 45 +- 7 files changed, 486 insertions(+), 632 deletions(-) diff --git a/examples/cifar10/cifar10_full_sigmoid_train_test.prototxt b/examples/cifar10/cifar10_full_sigmoid_train_test.prototxt index 6f5bf26b..fba69b81 100644 --- a/examples/cifar10/cifar10_full_sigmoid_train_test.prototxt +++ b/examples/cifar10/cifar10_full_sigmoid_train_test.prototxt @@ -176,10 +176,10 @@ layer { top: "ip1" param { lr_mult: 1 - decay_mult: 250 + decay_mult: 0 } param { - lr_mult: 0.2 + lr_mult: 2 decay_mult: 0 } inner_product_param { diff --git a/examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt b/examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt index 85c2dffe..1a810751 100644 --- a/examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt +++ b/examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt @@ -12,7 +12,7 @@ layer { } data_param { source: "examples/cifar10/cifar10_train_lmdb" - batch_size: 111 + batch_size: 100 backend: LMDB } } @@ -41,21 +41,16 @@ layer { param { lr_mult: 1 } - param { - lr_mult: 2 - } convolution_param { num_output: 32 pad: 2 kernel_size: 5 stride: 1 + bias_term: false weight_filler { type: "gaussian" std: 0.0001 } - bias_filler { - type: "constant" - } } } layer { @@ -75,23 +70,14 @@ layer { type: "BatchNorm" bottom: "pool1" top: "bn1" - bn_param { - scale_filler { - type: "constant" - value: 1 - } - shift_filler { - type: "constant" - value: 0.001 - } + param { + lr_mult: 0 } param { - lr_mult: 1.00001 - decay_mult: 0 + lr_mult: 0 } param { - lr_mult: 1.00001 - decay_mult: 0 + lr_mult: 0 } } @@ -110,50 +96,35 @@ layer { param { lr_mult: 1 } - param { - lr_mult: 2 - } convolution_param { num_output: 32 pad: 2 kernel_size: 5 stride: 1 + bias_term: false weight_filler { type: "gaussian" std: 0.01 } - bias_filler { - type: "constant" - } } } - - layer { name: "bn2" type: "BatchNorm" bottom: "conv2" top: "bn2" - bn_param { - scale_filler { - type: "constant" - value: 1 - } - shift_filler { - type: "constant" - value: 0.001 - } + param { + lr_mult: 0 } param { - lr_mult: 1.00001 - decay_mult: 0 + lr_mult: 0 } param { - lr_mult: 1.00001 - decay_mult: 0 + lr_mult: 0 } } + layer { name: "Sigmoid2" type: "Sigmoid" @@ -176,53 +147,38 @@ layer { type: "Convolution" bottom: "pool2" top: "conv3" + param { + lr_mult: 1 + } convolution_param { num_output: 64 pad: 2 kernel_size: 5 stride: 1 + bias_term: false weight_filler { type: "gaussian" std: 0.01 } - bias_filler { - type: "constant" - } - } - param { - lr_mult: 1 } - param { - lr_mult: 1 - } - } - layer { name: "bn3" type: "BatchNorm" bottom: "conv3" top: "bn3" - bn_param { - scale_filler { - type: "constant" - value: 1 - } - shift_filler { - type: "constant" - value: 0.001 - } + param { + lr_mult: 0 } param { - lr_mult: 1.00001 - decay_mult: 0 + lr_mult: 0 } param { - lr_mult: 1.00001 - decay_mult: 0 + lr_mult: 0 } } + layer { name: "Sigmoid3" type: "Sigmoid" @@ -248,10 +204,10 @@ layer { top: "ip1" param { lr_mult: 1 - decay_mult: 250 + decay_mult: 1 } param { - lr_mult: 0.2 + lr_mult: 1 decay_mult: 0 } inner_product_param { diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index 09605db9..da38f122 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -79,9 +79,35 @@ class ArgMaxLayer : public Layer { }; /** -* @brief Batch Normalization per-channel with scale & shift linear transform. -* -*/ + * @brief Normalizes the input to have 0-mean and/or unit (1) variance across + * the batch. + * + * This layer computes Batch Normalization described in [1]. For + * each channel in the data (i.e. axis 1), it subtracts the mean and divides + * by the variance, where both statistics are computed across both spatial + * dimensions and across the different examples in the batch. + * + * By default, during training time, the network is computing global mean/ + * variance statistics via a running average, which is then used at test + * time to allow deterministic outputs for each input. You can manually + * toggle whether the network is accumulating or using the statistics via the + * use_global_stats option. IMPORTANT: for this feature to work, you MUST + * set the learning rate to zero for all three parameter blobs, i.e., + * param {lr_mult: 0} three times in the layer definition. + * + * Note that the original paper also included a per-channel learned bias and + * scaling factor. It is possible (though a bit cumbersome) to implement + * this in caffe using a single-channel DummyDataLayer filled with zeros, + * followed by a Convolution layer with output the same size as the current. + * This produces a channel-specific value that can be added or multiplied by + * the BatchNorm layer's output. + * + * [1] S. Ioffe and C. Szegedy, "Batch Normalization: Accelerating Deep Network + * Training by Reducing Internal Covariate Shift." arXiv preprint + * arXiv:1502.03167 (2015). + * + * TODO(dox): thorough documentation for Forward, Backward, and proto params. + */ template class BatchNormLayer : public Layer { public: @@ -89,11 +115,10 @@ class BatchNormLayer : public Layer { : Layer(param) {} virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); - virtual void Reshape(const vector*>& bottom, const vector*>& top); - virtual inline const char* type() const { return "BN"; } + virtual inline const char* type() const { return "BatchNorm"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; } @@ -105,26 +130,19 @@ class BatchNormLayer : public Layer { virtual void Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - // spatial mean & variance - Blob spatial_mean_, spatial_variance_; - // batch mean & variance - Blob batch_mean_, batch_variance_; - // buffer blob - Blob buffer_blob_; + const vector& propagate_down, const vector*>& bottom); - Blob x_norm_; - // x_sum_multiplier is used to carry out sum using BLAS - Blob spatial_sum_multiplier_, batch_sum_multiplier_; + Blob mean_, variance_, temp_, x_norm_; + bool use_global_stats_; + Dtype moving_average_fraction_; + int channels_; + Dtype eps_; - // dimension - int N_; - int C_; - int H_; - int W_; - // eps - Dtype var_eps_; + // extra temporarary variables is used to carry out sums/broadcasting + // using BLAS + Blob batch_sum_multiplier_; + Blob num_by_chans_; + Blob spatial_sum_multiplier_; }; /** diff --git a/src/caffe/layers/batch_norm_layer.cpp b/src/caffe/layers/batch_norm_layer.cpp index 8dea3493..94c2b96b 100644 --- a/src/caffe/layers/batch_norm_layer.cpp +++ b/src/caffe/layers/batch_norm_layer.cpp @@ -2,350 +2,235 @@ #include #include "caffe/common_layers.hpp" -#include "caffe/filler.hpp" #include "caffe/layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { - template - void BatchNormLayer::Reshape(const vector*>& bottom, - const vector*>& top) { - top[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), - bottom[0]->height(), bottom[0]->width()); - - x_norm_.Reshape(bottom[0]->num(), bottom[0]->channels(), - bottom[0]->height(), bottom[0]->width()); - - // Figure out the dimensions - N_ = bottom[0]->num(); - C_ = bottom[0]->channels(); - H_ = bottom[0]->height(); - W_ = bottom[0]->width(); - // mean - spatial_mean_.Reshape(N_, C_, 1, 1); - batch_mean_.Reshape(1, C_, 1, 1); - // variance - spatial_variance_.Reshape(N_, C_, 1, 1); - batch_variance_.Reshape(1, C_, 1, 1); - // buffer blod - buffer_blob_.Reshape(N_, C_, H_, W_); - - // fill spatial multiplier - spatial_sum_multiplier_.Reshape(1, 1, H_, W_); - Dtype* spatial_multipl_data = spatial_sum_multiplier_.mutable_cpu_data(); - caffe_set(spatial_sum_multiplier_.count(), Dtype(1), - spatial_multipl_data); - caffe_set(spatial_sum_multiplier_.count(), Dtype(0), - spatial_sum_multiplier_.mutable_cpu_diff()); - // fill batch multiplier - batch_sum_multiplier_.Reshape(N_, 1, 1, 1); - Dtype* batch_multiplier_data = batch_sum_multiplier_.mutable_cpu_data(); - caffe_set(batch_sum_multiplier_.count(), Dtype(1), - batch_multiplier_data); - caffe_set(batch_sum_multiplier_.count(), Dtype(0), - batch_sum_multiplier_.mutable_cpu_diff()); - this->param_propagate_down_.resize(this->blobs_.size(), true); - } - template - void BatchNormLayer::LayerSetUp(const vector*>& bottom, +template +void BatchNormLayer::LayerSetUp(const vector*>& bottom, const vector*>& top) { - CHECK_NE(top[0], bottom[0]) << this->type() << " Layer does not " - "allow in-place computation."; - - top[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), - bottom[0]->height(), bottom[0]->width()); - - x_norm_.Reshape(bottom[0]->num(), bottom[0]->channels(), - bottom[0]->height(), bottom[0]->width()); - // Figure out the dimensions - N_ = bottom[0]->num(); - C_ = bottom[0]->channels(); - H_ = bottom[0]->height(); - W_ = bottom[0]->width(); - var_eps_ = 1e-9; - - // mean - spatial_mean_.Reshape(N_, C_, 1, 1); - batch_mean_.Reshape(1, C_, 1, 1); - // variance - spatial_variance_.Reshape(N_, C_, 1, 1); - batch_variance_.Reshape(1, C_, 1, 1); - // buffer blod - buffer_blob_.Reshape(N_, C_, H_, W_); - - // fill spatial multiplier - spatial_sum_multiplier_.Reshape(1, 1, H_, W_); - Dtype* spatial_multipl_data = spatial_sum_multiplier_.mutable_cpu_data(); - caffe_set(spatial_sum_multiplier_.count(), Dtype(1), - spatial_multipl_data); - caffe_set(spatial_sum_multiplier_.count(), Dtype(0), - spatial_sum_multiplier_.mutable_cpu_diff()); - - // fill batch multiplier - batch_sum_multiplier_.Reshape(N_, 1, 1, 1); - Dtype* batch_multiplier_data = batch_sum_multiplier_.mutable_cpu_data(); - caffe_set(batch_sum_multiplier_.count(), Dtype(1), - batch_multiplier_data); - caffe_set(batch_sum_multiplier_.count(), Dtype(0), - batch_sum_multiplier_.mutable_cpu_diff()); - - // Check if we need to set up the weights - if (this->blobs_.size() > 0) { - LOG(INFO) << "Skipping parameter initialization"; - } else { - this->blobs_.resize(2); - - // fill scale with scale_filler - this->blobs_[0].reset(new Blob(1, C_, 1, 1)); - caffe_set(this->blobs_[0]->count(), Dtype(1), - this->blobs_[0]->mutable_cpu_data()); - - // fill shift with shift_filler - this->blobs_[1].reset(new Blob(1, C_, 1, 1)); - caffe_set(this->blobs_[1]->count(), Dtype(0), - this->blobs_[1]->mutable_cpu_data()); - } // parameter initialization - this->param_propagate_down_.resize(this->blobs_.size(), true); + BatchNormParameter param = this->layer_param_.batch_norm_param(); + moving_average_fraction_ = param.moving_average_fraction(); + use_global_stats_ = this->phase_ == TEST; + if (param.has_use_global_stats()) + use_global_stats_ = param.use_global_stats(); + if (bottom[0]->num_axes() == 1) + channels_ = 1; + else + channels_ = bottom[0]->shape(1); + eps_ = param.eps(); + if (this->blobs_.size() > 0) { + LOG(INFO) << "Skipping parameter initialization"; + } else { + this->blobs_.resize(3); + vector sz; + sz.push_back(channels_); + this->blobs_[0].reset(new Blob(sz)); + this->blobs_[1].reset(new Blob(sz)); + sz[0]=1; + this->blobs_[2].reset(new Blob(sz)); + for (int i = 0; i < 3; ++i) { + caffe_set(this->blobs_[i]->count(), Dtype(0), + this->blobs_[i]->mutable_cpu_data()); + } } +} - template - void BatchNormLayer::Forward_cpu(const vector*>& bottom, +template +void BatchNormLayer::Reshape(const vector*>& bottom, const vector*>& top) { - const Dtype* bottom_data = bottom[0]->cpu_data(); - Dtype* top_data = top[0]->mutable_cpu_data(); - const Dtype* const_top_data = top[0]->cpu_data(); - - const Dtype* scale_data = this->blobs_[0]->cpu_data(); - const Dtype* shift_data = this->blobs_[1]->cpu_data(); - - // put the squares of bottom into buffer_blob_ - caffe_powx(bottom[0]->count(), bottom_data, Dtype(2), - buffer_blob_.mutable_cpu_data()); + if (bottom[0]->num_axes() >= 1) + CHECK_EQ(bottom[0]->shape(1), channels_); + top[0]->ReshapeLike(*bottom[0]); + + vector sz; + sz.push_back(channels_); + mean_.Reshape(sz); + variance_.Reshape(sz); + temp_.ReshapeLike(*bottom[0]); + x_norm_.ReshapeLike(*bottom[0]); + sz[0]=bottom[0]->shape(0); + batch_sum_multiplier_.Reshape(sz); + + int spatial_dim = bottom[0]->count()/(channels_*bottom[0]->shape(0)); + if (spatial_sum_multiplier_.num_axes() == 0 || + spatial_sum_multiplier_.shape(0) != spatial_dim) { + sz[0] = spatial_dim; + spatial_sum_multiplier_.Reshape(sz); + Dtype* multiplier_data = spatial_sum_multiplier_.mutable_cpu_data(); + caffe_set(spatial_sum_multiplier_.count(), Dtype(1), multiplier_data); + } + int numbychans = channels_*bottom[0]->shape(0); + if (num_by_chans_.num_axes() == 0 || + num_by_chans_.shape(0) != numbychans) { + sz[0] = numbychans; + num_by_chans_.Reshape(sz); + caffe_set(batch_sum_multiplier_.count(), Dtype(1), + batch_sum_multiplier_.mutable_cpu_data()); + } +} + +template +void BatchNormLayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* top_data = top[0]->mutable_cpu_data(); + int num = bottom[0]->shape(0); + int spatial_dim = bottom[0]->count()/(bottom[0]->shape(0)*channels_); + + // elementwise square + caffe_powx(bottom[0]->count(), bottom_data, Dtype(2), + temp_.mutable_cpu_data()); + + if (use_global_stats_) { + // use the stored mean/variance estimates. TODO(cdoersch): allow an option + // to use an unbiased variance estimate, like the paper does. + const Dtype scale_factor = 1 / this->blobs_[2]->cpu_data()[0]; + caffe_cpu_scale(variance_.count(), scale_factor, + this->blobs_[0]->cpu_data(), mean_.mutable_cpu_data()); + caffe_cpu_scale(variance_.count(), scale_factor, + this->blobs_[1]->cpu_data(), variance_.mutable_cpu_data()); + } else { // computes variance using var(X) = E(X^2) - (EX)^2 - // EX across spatial - caffe_cpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, - Dtype(1. / (H_ * W_)), bottom_data, - spatial_sum_multiplier_.cpu_data(), Dtype(0), - spatial_mean_.mutable_cpu_data()); - // EX across batch - caffe_cpu_gemv(CblasTrans, N_, C_, Dtype(1. / N_), - spatial_mean_.cpu_data(), - batch_sum_multiplier_.cpu_data(), Dtype(0), - batch_mean_.mutable_cpu_data()); - - // E(X^2) across spatial - caffe_cpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, - Dtype(1. / (H_ * W_)), buffer_blob_.cpu_data(), - spatial_sum_multiplier_.cpu_data(), Dtype(0), - spatial_variance_.mutable_cpu_data()); - // E(X^2) across batch - caffe_cpu_gemv(CblasTrans, N_, C_, Dtype(1. / N_), - spatial_variance_.cpu_data(), - batch_sum_multiplier_.cpu_data(), Dtype(0), - batch_variance_.mutable_cpu_data()); - - caffe_powx(batch_mean_.count(), batch_mean_.cpu_data(), Dtype(2), - buffer_blob_.mutable_cpu_data()); // (EX)^2 - caffe_sub(batch_mean_.count(), batch_variance_.cpu_data(), - buffer_blob_.cpu_data(), - batch_variance_.mutable_cpu_data()); // variance - - // do mean and variance normalization - // subtract mean - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_, - C_, 1, Dtype(1), - batch_sum_multiplier_.cpu_data(), - batch_mean_.cpu_data(), Dtype(0), - spatial_mean_.mutable_cpu_data()); - - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, - H_ * W_, 1, Dtype(-1), - spatial_mean_.cpu_data(), - spatial_sum_multiplier_.cpu_data(), Dtype(0), - buffer_blob_.mutable_cpu_data()); - - caffe_add(buffer_blob_.count(), bottom_data, - buffer_blob_.cpu_data(), top_data); - - // normalize variance - caffe_add_scalar(batch_variance_.count(), var_eps_, - batch_variance_.mutable_cpu_data()); - caffe_powx(batch_variance_.count(), - batch_variance_.cpu_data(), Dtype(0.5), - batch_variance_.mutable_cpu_data()); - - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_, - C_, 1, Dtype(1), - batch_sum_multiplier_.cpu_data(), - batch_variance_.cpu_data(), Dtype(0), - spatial_variance_.mutable_cpu_data()); - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, - N_ * C_, H_ * W_, 1, Dtype(1), - spatial_variance_.cpu_data(), - spatial_sum_multiplier_.cpu_data(), Dtype(0), - buffer_blob_.mutable_cpu_data()); - - caffe_div(buffer_blob_.count(), const_top_data, - buffer_blob_.cpu_data(), top_data); - - // Saving x_norm - caffe_copy(buffer_blob_.count(), const_top_data, - x_norm_.mutable_cpu_data()); - // scale - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), - batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0), - spatial_variance_.mutable_cpu_data()); - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, - H_ * W_, 1, Dtype(1), - spatial_variance_.cpu_data(), - spatial_sum_multiplier_.cpu_data(), Dtype(0), - buffer_blob_.mutable_cpu_data()); - caffe_mul(buffer_blob_.count(), top_data, - buffer_blob_.cpu_data(), top_data); - - // shift - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), - batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0), - spatial_mean_.mutable_cpu_data()); - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, - N_ * C_, H_ * W_, 1, Dtype(1), - spatial_mean_.cpu_data(), - spatial_sum_multiplier_.cpu_data(), Dtype(0), - buffer_blob_.mutable_cpu_data()); - caffe_add(buffer_blob_.count(), const_top_data, - buffer_blob_.cpu_data(), top_data); + caffe_cpu_gemv(CblasNoTrans, channels_ * num, spatial_dim, + 1. / (num * spatial_dim), bottom_data, + spatial_sum_multiplier_.cpu_data(), 0., + num_by_chans_.mutable_cpu_data()); + caffe_cpu_gemv(CblasTrans, num, channels_, 1., + num_by_chans_.cpu_data(), batch_sum_multiplier_.cpu_data(), 0., + mean_.mutable_cpu_data()); + caffe_cpu_gemv(CblasNoTrans, channels_ * num, spatial_dim, + 1. / (num * spatial_dim), temp_.cpu_data(), + spatial_sum_multiplier_.cpu_data(), 0., + num_by_chans_.mutable_cpu_data()); + caffe_cpu_gemv(CblasTrans, num, channels_, 1., + num_by_chans_.cpu_data(), batch_sum_multiplier_.cpu_data(), 0., + variance_.mutable_cpu_data()); + this->blobs_[2]->mutable_cpu_data()[0] *= moving_average_fraction_; + this->blobs_[2]->mutable_cpu_data()[0] += 1; + caffe_cpu_axpby(mean_.count(), Dtype(1), mean_.cpu_data(), + moving_average_fraction_, this->blobs_[0]->mutable_cpu_data()); + Dtype m = Dtype(bottom[0]->count()/channels_); + caffe_cpu_axpby(variance_.count(), m/(m-1), variance_.cpu_data(), + moving_average_fraction_, this->blobs_[1]->mutable_cpu_data()); } + // elementwise square of mean + caffe_powx(mean_.count(), mean_.cpu_data(), Dtype(2), + temp_.mutable_cpu_data()); - template - void BatchNormLayer::Backward_cpu(const vector*>& top, - const vector& propagate_down, - const vector*>& bottom) { - const Dtype* top_diff = top[0]->cpu_diff(); - const Dtype* bottom_data = bottom[0]->cpu_data(); - Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); - - Dtype* scale_diff = this->blobs_[0]->mutable_cpu_diff(); - Dtype* shift_diff = this->blobs_[1]->mutable_cpu_diff(); - const Dtype* scale_data = this->blobs_[0]->cpu_data(); - -// Propagate layer to parameters - // gradient w.r.t. scale - caffe_mul(buffer_blob_.count(), x_norm_.cpu_data(), - top_diff, buffer_blob_.mutable_cpu_data()); - // EX across spatial - caffe_cpu_gemv(CblasNoTrans, N_ * C_, - H_ * W_, Dtype(1), buffer_blob_.cpu_data(), - spatial_sum_multiplier_.cpu_data(), Dtype(0), - spatial_variance_.mutable_cpu_diff()); - // EX across batch - caffe_cpu_gemv(CblasTrans, N_, C_, Dtype(1), - spatial_variance_.cpu_diff(), - batch_sum_multiplier_.cpu_data(), Dtype(0), scale_diff); - - // gradient w.r.t. shift - // EX across spatial - caffe_cpu_gemv(CblasNoTrans, N_ * C_, - H_ * W_, Dtype(1), top_diff, - spatial_sum_multiplier_.cpu_data(), - Dtype(0), spatial_mean_.mutable_cpu_diff()); - // EX across batch - caffe_cpu_gemv(CblasTrans, N_, C_, - Dtype(1), spatial_mean_.cpu_diff(), - batch_sum_multiplier_.cpu_data(), - Dtype(0), shift_diff); + caffe_sub(mean_.count(), variance_.cpu_data(), temp_.cpu_data(), + variance_.mutable_cpu_data()); // variance -// Propagate down + // normalize variance + caffe_add_scalar(variance_.count(), eps_, variance_.mutable_cpu_data()); + caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5), + variance_.mutable_cpu_data()); - // put scale * top_diff to buffer_blob_ - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), - batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0), - spatial_variance_.mutable_cpu_data()); - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, - H_ * W_, 1, Dtype(1), - spatial_variance_.cpu_data(), - spatial_sum_multiplier_.cpu_data(), Dtype(0), - buffer_blob_.mutable_cpu_data()); - caffe_mul(buffer_blob_.count(), top_diff, buffer_blob_.cpu_data(), - buffer_blob_.mutable_cpu_data()); - - // use new top diff for computation - caffe_mul(buffer_blob_.count(), x_norm_.cpu_data(), - buffer_blob_.cpu_data(), bottom_diff); - // EX across spatial - caffe_cpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, - Dtype(1), bottom_diff, - spatial_sum_multiplier_.cpu_data(), Dtype(0), - spatial_mean_.mutable_cpu_data()); - // EX across batch - caffe_cpu_gemv(CblasTrans, N_, C_, Dtype(1), - spatial_mean_.cpu_data(), - batch_sum_multiplier_.cpu_data(), Dtype(0), - batch_mean_.mutable_cpu_data()); - - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, - N_, C_, 1, Dtype(1), - batch_sum_multiplier_.cpu_data(), - batch_mean_.cpu_data(), Dtype(0), - spatial_mean_.mutable_cpu_data()); - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, - H_ * W_, 1, Dtype(1), - spatial_mean_.cpu_data(), - spatial_sum_multiplier_.cpu_data(), Dtype(0), - bottom_diff); - - caffe_mul(buffer_blob_.count(), - x_norm_.cpu_data(), bottom_diff, bottom_diff); - - // EX across spatial - caffe_cpu_gemv(CblasNoTrans, N_ * C_, - H_ * W_, Dtype(1), buffer_blob_.cpu_data(), - spatial_sum_multiplier_.cpu_data(), Dtype(0), - spatial_mean_.mutable_cpu_data()); - // EX across batch - caffe_cpu_gemv(CblasTrans, N_, C_, Dtype(1), - spatial_mean_.cpu_data(), - batch_sum_multiplier_.cpu_data(), Dtype(0), - batch_mean_.mutable_cpu_data()); - - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, - N_, C_, 1, Dtype(1), - batch_sum_multiplier_.cpu_data(), - batch_mean_.cpu_data(), Dtype(0), - spatial_mean_.mutable_cpu_data()); - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, - N_ * C_, H_ * W_, 1, Dtype(1), - spatial_mean_.cpu_data(), - spatial_sum_multiplier_.cpu_data(), Dtype(1), bottom_diff); - - caffe_cpu_axpby(buffer_blob_.count(), Dtype(1), - buffer_blob_.cpu_data(), Dtype(-1. / (N_ * H_ * W_)), - bottom_diff); - - // put the squares of bottom into buffer_blob_ -// caffe_powx(buffer_blob_.count(), bottom_data, Dtype(2), -// buffer_blob_.mutable_cpu_data()); + // do mean and variance normalization + if (bottom[0] != top[0]) { + caffe_copy(bottom[0]->count(), bottom_data, top_data); + } + // subtract mean + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1, + batch_sum_multiplier_.cpu_data(), mean_.cpu_data(), 0., + num_by_chans_.mutable_cpu_data()); + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, channels_ * num, + spatial_dim, 1, -1, num_by_chans_.cpu_data(), + spatial_sum_multiplier_.cpu_data(), 1., top_data); + // replicate variance to input size + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1, + batch_sum_multiplier_.cpu_data(), variance_.cpu_data(), 0., + num_by_chans_.mutable_cpu_data()); + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, channels_ * num, + spatial_dim, 1, 1., num_by_chans_.cpu_data(), + spatial_sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data()); + caffe_div(temp_.count(), top_data, temp_.cpu_data(), top_data); + // TODO(cdoersch): The caching is only needed because later in-place layers + // might clobber the data. Can we skip this if they won't? + caffe_copy(x_norm_.count(), top_data, + x_norm_.mutable_cpu_data()); +} + +template +void BatchNormLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, + const vector*>& bottom) { + CHECK(!use_global_stats_); + const Dtype* top_diff; + if (bottom[0] != top[0]) { + top_diff = top[0]->cpu_diff(); + } else { + caffe_copy(x_norm_.count(), top[0]->cpu_diff(), x_norm_.mutable_cpu_diff()); + top_diff = x_norm_.cpu_diff(); + } + const Dtype* top_data = x_norm_.cpu_data(); + Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); + int num = bottom[0]->shape()[0]; + int spatial_dim = bottom[0]->count()/(bottom[0]->shape(0)*channels_); + // if Y = (X-mean(X))/(sqrt(var(X)+eps)), then + // + // dE(Y)/dX = + // (dE/dY - mean(dE/dY) - mean(dE/dY \cdot Y) \cdot Y) + // ./ sqrt(var(X) + eps) + // + // where \cdot and ./ are hadamard product and elementwise division, + // respectively, dE/dY is the top diff, and mean/var/sum are all computed + // along all dimensions except the channels dimension. In the above + // equation, the operations allow for expansion (i.e. broadcast) along all + // dimensions except the channels dimension where required. + + // sum(dE/dY \cdot Y) + caffe_mul(temp_.count(), top_data, top_diff, bottom_diff); + caffe_cpu_gemv(CblasNoTrans, channels_ * num, spatial_dim, 1., + bottom_diff, spatial_sum_multiplier_.cpu_data(), 0., + num_by_chans_.mutable_cpu_data()); + caffe_cpu_gemv(CblasTrans, num, channels_, 1., + num_by_chans_.cpu_data(), batch_sum_multiplier_.cpu_data(), 0., + mean_.mutable_cpu_data()); + + // reshape (broadcast) the above + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1, + batch_sum_multiplier_.cpu_data(), mean_.cpu_data(), 0., + num_by_chans_.mutable_cpu_data()); + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, channels_ * num, + spatial_dim, 1, 1., num_by_chans_.cpu_data(), + spatial_sum_multiplier_.cpu_data(), 0., bottom_diff); + + // sum(dE/dY \cdot Y) \cdot Y + caffe_mul(temp_.count(), top_data, bottom_diff, bottom_diff); + + // sum(dE/dY)-sum(dE/dY \cdot Y) \cdot Y + caffe_cpu_gemv(CblasNoTrans, channels_ * num, spatial_dim, 1., + top_diff, spatial_sum_multiplier_.cpu_data(), 0., + num_by_chans_.mutable_cpu_data()); + caffe_cpu_gemv(CblasTrans, num, channels_, 1., + num_by_chans_.cpu_data(), batch_sum_multiplier_.cpu_data(), 0., + mean_.mutable_cpu_data()); + // reshape (broadcast) the above to make + // sum(dE/dY)-sum(dE/dY \cdot Y) \cdot Y + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1, + batch_sum_multiplier_.cpu_data(), mean_.cpu_data(), 0., + num_by_chans_.mutable_cpu_data()); + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num * channels_, + spatial_dim, 1, 1., num_by_chans_.cpu_data(), + spatial_sum_multiplier_.cpu_data(), 1., bottom_diff); + + // dE/dY - mean(dE/dY)-mean(dE/dY \cdot Y) \cdot Y + caffe_cpu_axpby(temp_.count(), Dtype(1), top_diff, + Dtype(-1. / (num * spatial_dim)), bottom_diff); + + // note: temp_ still contains sqrt(var(X)+eps), computed during the forward + // pass. + caffe_div(temp_.count(), bottom_diff, temp_.cpu_data(), bottom_diff); +} - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, - N_, C_, 1, Dtype(1), - batch_sum_multiplier_.cpu_data(), - batch_variance_.cpu_data(), Dtype(0), - spatial_variance_.mutable_cpu_data()); - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, - N_ * C_, H_ * W_, 1, Dtype(1), - spatial_variance_.cpu_data(), - spatial_sum_multiplier_.cpu_data(), Dtype(0), - buffer_blob_.mutable_cpu_data()); - caffe_div(buffer_blob_.count(), bottom_diff, - buffer_blob_.cpu_data(), bottom_diff); - } #ifdef CPU_ONLY STUB_GPU(BatchNormLayer); #endif - INSTANTIATE_CLASS(BatchNormLayer); - REGISTER_LAYER_CLASS(BatchNorm); +INSTANTIATE_CLASS(BatchNormLayer); +REGISTER_LAYER_CLASS(BatchNorm); } // namespace caffe - diff --git a/src/caffe/layers/batch_norm_layer.cu b/src/caffe/layers/batch_norm_layer.cu index e87f8c62..cd8924a4 100644 --- a/src/caffe/layers/batch_norm_layer.cu +++ b/src/caffe/layers/batch_norm_layer.cu @@ -2,227 +2,166 @@ #include #include "caffe/common_layers.hpp" -#include "caffe/filler.hpp" #include "caffe/layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { - template - void BatchNormLayer::Forward_gpu(const vector*>& bottom, - const vector*>& top) { - const Dtype* bottom_data = bottom[0]->gpu_data(); - const Dtype* const_top_data = top[0]->gpu_data(); - Dtype* top_data = top[0]->mutable_gpu_data(); - Dtype* spatial_mean_data = spatial_mean_.mutable_gpu_data(); - Dtype* buffer_data = buffer_blob_.mutable_gpu_data(); - const Dtype* const_buffer_data = buffer_blob_.gpu_data(); - - - // put the squares of bottom into buffer_blob_ - caffe_gpu_powx(bottom[0]->count(), bottom_data, Dtype(2), - buffer_blob_.mutable_gpu_data()); +template +void BatchNormLayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* top_data = top[0]->mutable_gpu_data(); + int num = bottom[0]->shape(0); + int spatial_dim = bottom[0]->count()/(channels_*bottom[0]->shape(0)); + + // elementwise square + caffe_gpu_powx(bottom[0]->count(), bottom_data, Dtype(2), + temp_.mutable_gpu_data()); + + if (use_global_stats_) { + // use the stored mean/variance estimates. TODO(cdoersch): allow an option + // to use an unbiased variance estimate, like the paper does. + const Dtype scale_factor = 1 / this->blobs_[2]->cpu_data()[0]; + caffe_gpu_scale(variance_.count(), scale_factor, + this->blobs_[0]->gpu_data(), mean_.mutable_gpu_data()); + caffe_gpu_scale(variance_.count(), scale_factor, + this->blobs_[1]->gpu_data(), variance_.mutable_gpu_data()); + } else { // computes variance using var(X) = E(X^2) - (EX)^2 - // EX across spatial - caffe_gpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, - Dtype(1. / (H_ * W_)), - bottom_data, spatial_sum_multiplier_.gpu_data(), - Dtype(0), spatial_mean_data); - // EX across batch - caffe_gpu_gemv(CblasTrans, N_, C_, Dtype(1. / N_), - spatial_mean_.gpu_data(), - batch_sum_multiplier_.gpu_data(), Dtype(0), - batch_mean_.mutable_gpu_data()); - - // E(X^2) across spatial - caffe_gpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, - Dtype(1. / (H_ * W_)), buffer_data, - spatial_sum_multiplier_.gpu_data(), Dtype(0), - spatial_variance_.mutable_gpu_data()); - // E(X^2) across batch - caffe_gpu_gemv(CblasTrans, N_, C_, Dtype(1. / N_), - spatial_variance_.gpu_data(), - batch_sum_multiplier_.gpu_data(), Dtype(0), - batch_variance_.mutable_gpu_data()); - - caffe_gpu_powx(batch_mean_.count(), batch_mean_.gpu_data(), - Dtype(2), buffer_blob_.mutable_gpu_data()); // (EX)^2 - caffe_gpu_sub(batch_mean_.count(), batch_variance_.gpu_data(), - buffer_data, batch_variance_.mutable_gpu_data()); // variance - - // do mean and variance normalization - // subtract mean - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), - batch_sum_multiplier_.gpu_data(), batch_mean_.gpu_data(), Dtype(0), - spatial_mean_data); - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, - 1, -Dtype(1), - spatial_mean_.gpu_data(), spatial_sum_multiplier_.gpu_data(), Dtype(0), - buffer_blob_.mutable_gpu_data()); - - caffe_gpu_add(buffer_blob_.count(), bottom_data, buffer_data, top_data); - - // normalize variance - caffe_gpu_add_scalar(batch_variance_.count(), var_eps_, - batch_variance_.mutable_gpu_data()); - caffe_gpu_powx(batch_variance_.count(), batch_variance_.gpu_data(), - Dtype(0.5), batch_variance_.mutable_gpu_data()); - - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), - batch_sum_multiplier_.gpu_data(), batch_variance_.gpu_data(), Dtype(0), - spatial_variance_.mutable_gpu_data()); - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, - H_ * W_, 1, Dtype(1), - spatial_variance_.gpu_data(), spatial_sum_multiplier_.gpu_data(), - Dtype(0), buffer_blob_.mutable_gpu_data()); - - caffe_gpu_div(buffer_blob_.count(), top_data, buffer_data, top_data); - - // Saving x_norm - caffe_copy(top[0]->count(), const_top_data, x_norm_.mutable_gpu_data()); - - // scale - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), - batch_sum_multiplier_.gpu_data(), this->blobs_[0]->gpu_data(), - Dtype(0), spatial_variance_.mutable_gpu_data()); - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, - H_ * W_, 1, Dtype(1), - spatial_variance_.gpu_data(), spatial_sum_multiplier_.gpu_data(), - Dtype(0), buffer_blob_.mutable_gpu_data()); - - caffe_gpu_mul(buffer_blob_.count(), top_data, buffer_data, top_data); - - // shift - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), - batch_sum_multiplier_.gpu_data(), - this->blobs_[1]->gpu_data(), Dtype(0), - spatial_mean_data); - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1, - Dtype(1), - spatial_mean_.gpu_data(), spatial_sum_multiplier_.gpu_data(), Dtype(0), - buffer_blob_.mutable_gpu_data()); - caffe_gpu_add(buffer_blob_.count(), top_data, buffer_data, top_data); + caffe_gpu_gemv(CblasNoTrans, channels_ * num, spatial_dim, + 1. / (num * spatial_dim), bottom_data, + spatial_sum_multiplier_.gpu_data(), 0., + num_by_chans_.mutable_gpu_data()); + caffe_gpu_gemv(CblasTrans, num, channels_, 1., + num_by_chans_.gpu_data(), batch_sum_multiplier_.gpu_data(), 0., + mean_.mutable_gpu_data()); + caffe_gpu_gemv(CblasNoTrans, channels_ * num, spatial_dim, + 1. / (num * spatial_dim), temp_.gpu_data(), + spatial_sum_multiplier_.gpu_data(), 0., + num_by_chans_.mutable_gpu_data()); + caffe_gpu_gemv(CblasTrans, num, channels_, 1., + num_by_chans_.gpu_data(), batch_sum_multiplier_.gpu_data(), 0., + variance_.mutable_gpu_data()); + this->blobs_[2]->mutable_cpu_data()[0] *= moving_average_fraction_; + this->blobs_[2]->mutable_cpu_data()[0] += 1; + caffe_gpu_axpby(mean_.count(), Dtype(1), mean_.gpu_data(), + moving_average_fraction_, this->blobs_[0]->mutable_gpu_data()); + Dtype m = Dtype(bottom[0]->count()/channels_); + caffe_gpu_axpby(variance_.count(), m/(m-1), variance_.gpu_data(), + moving_average_fraction_, this->blobs_[1]->mutable_gpu_data()); } + // elementwise square of mean + caffe_gpu_powx(mean_.count(), mean_.gpu_data(), Dtype(2), + temp_.mutable_gpu_data()); + + caffe_gpu_sub(mean_.count(), variance_.gpu_data(), temp_.gpu_data(), + variance_.mutable_gpu_data()); // variance + + // normalize variance + caffe_gpu_add_scalar(variance_.count(), eps_, variance_.mutable_gpu_data()); + caffe_gpu_powx(variance_.count(), variance_.gpu_data(), Dtype(0.5), + variance_.mutable_gpu_data()); - template - void BatchNormLayer::Backward_gpu(const vector*>& top, - const vector& propagate_down, - const vector*>& bottom) { - const Dtype* top_diff = top[0]->gpu_diff(); - const Dtype* top_data = top[0]->gpu_data(); - const Dtype* bottom_data = bottom[0]->gpu_data(); - Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); - const Dtype* const_bottom_diff = bottom[0]->gpu_diff(); - Dtype* spatial_mean_data = spatial_mean_.mutable_gpu_data(); - Dtype* buffer_data = buffer_blob_.mutable_gpu_data(); - const Dtype* const_buffer_data = buffer_blob_.gpu_data(); - - // Propage to layer params - // gradient w.r.t. scale - caffe_gpu_mul(buffer_blob_.count(), x_norm_.gpu_data(), - top_diff, buffer_blob_.mutable_gpu_data()); - // EX across spatial - caffe_gpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1), - buffer_data, spatial_sum_multiplier_.gpu_data(), Dtype(0), - spatial_variance_.mutable_gpu_data()); - // EX across batch - caffe_gpu_gemv(CblasTrans, N_, C_, Dtype(1), - spatial_variance_.gpu_data(), - batch_sum_multiplier_.gpu_data(), Dtype(0), - this->blobs_[0]->mutable_gpu_diff()); - - // gradient w.r.t. shift - // EX across spatial - caffe_gpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1), - top_diff, spatial_sum_multiplier_.gpu_data(), - Dtype(0), spatial_mean_data); - // EX across batch - caffe_gpu_gemv(CblasTrans, N_, C_, Dtype(1), - spatial_mean_.gpu_data(), - batch_sum_multiplier_.gpu_data(), Dtype(0), - this->blobs_[1]->mutable_gpu_diff()); - - // Propagate down - // scale top diff - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), - batch_sum_multiplier_.gpu_data(), this->blobs_[0]->gpu_data(), - Dtype(0), spatial_variance_.mutable_gpu_data()); - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, - H_ * W_, 1, Dtype(1), - spatial_variance_.gpu_data(), spatial_sum_multiplier_.gpu_data(), - Dtype(0), - buffer_blob_.mutable_gpu_data()); - caffe_gpu_mul(buffer_blob_.count(), top_diff, buffer_data, - buffer_blob_.mutable_gpu_data()); - - // use new top diff for computation - caffe_gpu_mul(buffer_blob_.count(), x_norm_.gpu_data(), - buffer_data, bottom_diff); - // EX across spatial - caffe_gpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, - Dtype(1), bottom_diff, - spatial_sum_multiplier_.gpu_data(), Dtype(0), spatial_mean_data); - // EX across batch - caffe_gpu_gemv(CblasTrans, N_, C_, Dtype(1), - spatial_mean_.gpu_data(), - batch_sum_multiplier_.gpu_data(), Dtype(0), - batch_mean_.mutable_gpu_data()); - - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), - batch_sum_multiplier_.gpu_data(), - batch_mean_.gpu_data(), Dtype(0), - spatial_mean_data); - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, - H_ * W_, 1, Dtype(1), spatial_mean_.gpu_data(), - spatial_sum_multiplier_.gpu_data(), Dtype(0), - bottom_diff); - - caffe_gpu_mul(buffer_blob_.count(), x_norm_.gpu_data(), - bottom_diff, bottom_diff); - - // EX across spatial - caffe_gpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1), - buffer_data, spatial_sum_multiplier_.gpu_data(), - Dtype(0), spatial_mean_data); - - // EX across batch - caffe_gpu_gemv(CblasTrans, N_, C_, Dtype(1), - spatial_mean_.gpu_data(), - batch_sum_multiplier_.gpu_data(), Dtype(0), - batch_mean_.mutable_gpu_data()); - - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, - C_, 1, Dtype(1), - batch_sum_multiplier_.gpu_data(), - batch_mean_.gpu_data(), Dtype(0), - spatial_mean_data); - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, - H_ * W_, 1, Dtype(1), - spatial_mean_.gpu_data(), spatial_sum_multiplier_.gpu_data(), - Dtype(1), - bottom_diff); - - caffe_gpu_axpby(buffer_blob_.count(), Dtype(1), buffer_data, - Dtype(-1. / (N_ * H_ * W_)), - bottom_diff); - - // put the squares of bottom into buffer_blob_ -// caffe_gpu_powx(buffer_blob_.count(), bottom_data, Dtype(2), -// buffer_blob_.mutable_gpu_data()); - - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), - batch_sum_multiplier_.gpu_data(), batch_variance_.gpu_data(), Dtype(0), - spatial_variance_.mutable_gpu_data()); - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, - H_ * W_, 1, Dtype(1), - spatial_variance_.gpu_data(), spatial_sum_multiplier_.gpu_data(), - Dtype(0), - buffer_blob_.mutable_gpu_data()); - - caffe_gpu_div(buffer_blob_.count(), const_bottom_diff, - const_buffer_data, bottom_diff); + // do mean and variance normalization + if (bottom[0] != top[0]) { + caffe_copy(bottom[0]->count(), bottom_data, top_data); } + // subtract mean + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1, + batch_sum_multiplier_.gpu_data(), mean_.gpu_data(), 0., + num_by_chans_.mutable_gpu_data()); + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, channels_ * num, + spatial_dim, 1, -1, num_by_chans_.gpu_data(), + spatial_sum_multiplier_.gpu_data(), 1., top_data); + // replicate variance to input size + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1, + batch_sum_multiplier_.gpu_data(), variance_.gpu_data(), 0., + num_by_chans_.mutable_gpu_data()); + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, channels_ * num, + spatial_dim, 1, 1., num_by_chans_.gpu_data(), + spatial_sum_multiplier_.gpu_data(), 0., temp_.mutable_gpu_data()); + caffe_gpu_div(temp_.count(), top_data, temp_.gpu_data(), top_data); + // TODO(cdoersch): The caching is only needed because later in-place layers + // might clobber the data. Can we skip this if they won't? + caffe_copy(x_norm_.count(), top_data, + x_norm_.mutable_gpu_data()); +} + +template +void BatchNormLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, + const vector*>& bottom) { + CHECK(!use_global_stats_); + const Dtype* top_diff; + if (bottom[0] != top[0]) { + top_diff = top[0]->gpu_diff(); + } else { + caffe_copy(x_norm_.count(), top[0]->gpu_diff(), x_norm_.mutable_gpu_diff()); + top_diff = x_norm_.gpu_diff(); + } + const Dtype* top_data = x_norm_.gpu_data(); + Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); + int num = bottom[0]->shape()[0]; + int spatial_dim = bottom[0]->count()/(channels_*bottom[0]->shape(0)); + // if Y = (X-mean(X))/(sqrt(var(X)+eps)), then + // + // dE(Y)/dX = + // (dE/dY - mean(dE/dY) - mean(dE/dY \cdot Y) \cdot Y) + // ./ sqrt(var(X) + eps) + // + // where \cdot and ./ are hadamard product and elementwise division, + // respectively, dE/dY is the top diff, and mean/var/sum are all computed + // along all dimensions except the channels dimension. In the above + // equation, the operations allow for expansion (i.e. broadcast) along all + // dimensions except the channels dimension where required. + + // sum(dE/dY \cdot Y) + caffe_gpu_mul(temp_.count(), top_data, top_diff, bottom_diff); + caffe_gpu_gemv(CblasNoTrans, channels_ * num, spatial_dim, 1., + bottom_diff, spatial_sum_multiplier_.gpu_data(), 0., + num_by_chans_.mutable_gpu_data()); + caffe_gpu_gemv(CblasTrans, num, channels_, 1., + num_by_chans_.gpu_data(), batch_sum_multiplier_.gpu_data(), 0., + mean_.mutable_gpu_data()); + + // reshape (broadcast) the above + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1, + batch_sum_multiplier_.gpu_data(), mean_.gpu_data(), 0., + num_by_chans_.mutable_gpu_data()); + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, channels_ * num, + spatial_dim, 1, 1., num_by_chans_.gpu_data(), + spatial_sum_multiplier_.gpu_data(), 0., bottom_diff); + + // sum(dE/dY \cdot Y) \cdot Y + caffe_gpu_mul(temp_.count(), top_data, bottom_diff, bottom_diff); + + // sum(dE/dY)-sum(dE/dY \cdot Y) \cdot Y + caffe_gpu_gemv(CblasNoTrans, channels_ * num, spatial_dim, 1., + top_diff, spatial_sum_multiplier_.gpu_data(), 0., + num_by_chans_.mutable_gpu_data()); + caffe_gpu_gemv(CblasTrans, num, channels_, 1., + num_by_chans_.gpu_data(), batch_sum_multiplier_.gpu_data(), 0., + mean_.mutable_gpu_data()); + // reshape (broadcast) the above to make + // sum(dE/dY)-sum(dE/dY \cdot Y) \cdot Y + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1, + batch_sum_multiplier_.gpu_data(), mean_.gpu_data(), 0., + num_by_chans_.mutable_gpu_data()); + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num * channels_, + spatial_dim, 1, 1., num_by_chans_.gpu_data(), + spatial_sum_multiplier_.gpu_data(), 1., bottom_diff); + + // dE/dY - mean(dE/dY)-mean(dE/dY \cdot Y) \cdot Y + caffe_gpu_axpby(temp_.count(), Dtype(1), top_diff, + Dtype(-1. / (num * spatial_dim)), bottom_diff); + + // note: temp_ still contains sqrt(var(X)+eps), computed during the forward + // pass. + caffe_gpu_div(temp_.count(), bottom_diff, temp_.gpu_data(), bottom_diff); +} + +INSTANTIATE_LAYER_GPU_FUNCS(BatchNormLayer); - INSTANTIATE_LAYER_GPU_FUNCS(BatchNormLayer); -} // namespace caffe +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index a8747c12..99dd3c90 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -301,7 +301,7 @@ message ParamSpec { // NOTE // Update the next available ID when you add a new LayerParameter field. // -// LayerParameter next available layer-specific ID: 139 (last added: tile_param) +// LayerParameter next available layer-specific ID: 140 (last added: batch_norm_param) message LayerParameter { optional string name = 1; // the layer name optional string type = 2; // the layer type @@ -350,6 +350,7 @@ message LayerParameter { // The default for the engine is set by the ENGINE switch at compile-time. optional AccuracyParameter accuracy_param = 102; optional ArgMaxParameter argmax_param = 103; + optional BatchNormParameter batch_norm_param = 139; optional ConcatParameter concat_param = 104; optional ContrastiveLossParameter contrastive_loss_param = 105; optional ConvolutionParameter convolution_param = 106; @@ -461,6 +462,18 @@ message ConcatParameter { optional uint32 concat_dim = 1 [default = 1]; } +message BatchNormParameter { + // If false, accumulate global mean/variance values via a moving average. If + // true, use those accumulated values instead of computing mean/variance + // across the batch. + optional bool use_global_stats = 1; + // How much does the moving average decay each iteration? + optional float moving_average_fraction = 2 [default = .999]; + // Small value to add to the variance estimate so that we don't divide by + // zero. + optional float eps = 3 [default = 1e-5]; +} + message ContrastiveLossParameter { // margin for dissimilar pair optional float margin = 1 [default = 1.0]; diff --git a/src/caffe/test/test_batch_norm_layer.cpp b/src/caffe/test/test_batch_norm_layer.cpp index 704efd5d..22b9667f 100644 --- a/src/caffe/test/test_batch_norm_layer.cpp +++ b/src/caffe/test/test_batch_norm_layer.cpp @@ -60,7 +60,50 @@ namespace caffe { for ( int k = 0; k < height; ++k ) { for ( int l = 0; l < width; ++l ) { Dtype data = this->blob_top_->data_at(i, j, k, l); - Dtype bottom_data = this->blob_bottom_->data_at(i, j, k, l); + sum += data; + var += data * data; + } + } + } + sum /= height * width * num; + var /= height * width * num; + + const Dtype kErrorBound = 0.001; + // expect zero mean + EXPECT_NEAR(0, sum, kErrorBound); + // expect unit variance + EXPECT_NEAR(1, var, kErrorBound); + } + } + + TYPED_TEST(BatchNormLayerTest, TestForwardInplace) { + typedef typename TypeParam::Dtype Dtype; + Blob blob_inplace(5, 2, 3, 4); + vector*> blob_bottom_vec; + vector*> blob_top_vec; + LayerParameter layer_param; + FillerParameter filler_param; + GaussianFiller filler(filler_param); + filler.Fill(&blob_inplace); + blob_bottom_vec.push_back(&blob_inplace); + blob_top_vec.push_back(&blob_inplace); + + BatchNormLayer layer(layer_param); + layer.SetUp(blob_bottom_vec, blob_top_vec); + layer.Forward(blob_bottom_vec, blob_top_vec); + + // Test mean + int num = blob_inplace.num(); + int channels = blob_inplace.channels(); + int height = blob_inplace.height(); + int width = blob_inplace.width(); + + for (int j = 0; j < channels; ++j) { + Dtype sum = 0, var = 0; + for (int i = 0; i < num; ++i) { + for ( int k = 0; k < height; ++k ) { + for ( int l = 0; l < width; ++l ) { + Dtype data = blob_inplace.data_at(i, j, k, l); sum += data; var += data * data; } From 09b8738d73ebc37dda09e8c6dd05e35609999c77 Mon Sep 17 00:00:00 2001 From: Rodrigo Benenson Date: Thu, 22 Oct 2015 18:18:08 +0200 Subject: [PATCH 151/223] diff.ndim != 4 is outdated this code seems not to apply to the caffe head. ``` if diff.ndim != 4: raise Exception('{} diff is not 4-d'.format(top)) ``` --- python/caffe/pycaffe.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py index 8ea24da4..7bd4f411 100644 --- a/python/caffe/pycaffe.py +++ b/python/caffe/pycaffe.py @@ -146,8 +146,6 @@ def _Net_backward(self, diffs=None, start=None, end=None, **kwargs): # Set top diffs according to defined shapes and make arrays single and # C-contiguous as Caffe expects. for top, diff in kwargs.iteritems(): - if diff.ndim != 4: - raise Exception('{} diff is not 4-d'.format(top)) if diff.shape[0] != self.blobs[top].num: raise Exception('Diff is not batch sized') self.blobs[top].diff[...] = diff From 3e5f49435f95de57bffbde53d745dcb4a8f1f870 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20L=C3=B8ve=20Selvik?= Date: Tue, 13 Oct 2015 16:32:52 +0200 Subject: [PATCH 152/223] Add opencv_imgcodecs to library path in Makefile Project does not compile without opencv_imgcodecs in the library path if you're using OpenCV 3. This introduces a OPENCV_VERSION flag in Makefile.config that includes the library if set to 3. (Trying to include it with OpenCV 2 also breaks the build) --- Makefile | 7 ++++++- Makefile.config.example | 3 +++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5fb6394e..43cb15fe 100644 --- a/Makefile +++ b/Makefile @@ -184,7 +184,12 @@ ifeq ($(USE_LMDB), 1) LIBRARIES += lmdb endif ifeq ($(USE_OPENCV), 1) - LIBRARIES += opencv_core opencv_highgui opencv_imgproc + LIBRARIES += opencv_core opencv_highgui opencv_imgproc + + ifeq ($(OPENCV_VERSION), 3) + LIBRARIES += opencv_imgcodecs + endif + endif PYTHON_LIBRARIES := boost_python python2.7 WARNINGS := -Wall -Wno-sign-compare diff --git a/Makefile.config.example b/Makefile.config.example index a20bad2f..8e2c4fb4 100644 --- a/Makefile.config.example +++ b/Makefile.config.example @@ -12,6 +12,9 @@ # USE_LMDB := 0 # USE_OPENCV := 0 +# Uncomment if you're using OpenCV 3 +# OPENCV_VERSION := 3 + # To customize your choice of compiler, uncomment and set the following. # N.B. the default for Linux is g++ and the default for OSX is clang++ # CUSTOM_CXX := g++ From 9898794172b7def7a91d925d97e11dd0878ddb61 Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Thu, 22 Oct 2015 19:12:48 -0700 Subject: [PATCH 153/223] cuDNN: only log conv workspace in debug mode --- src/caffe/layers/cudnn_conv_layer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/caffe/layers/cudnn_conv_layer.cpp b/src/caffe/layers/cudnn_conv_layer.cpp index 8b61249a..c82cb7ef 100644 --- a/src/caffe/layers/cudnn_conv_layer.cpp +++ b/src/caffe/layers/cudnn_conv_layer.cpp @@ -191,7 +191,7 @@ void CuDNNConvolutionLayer::Reshape( // this is the total amount of storage needed over all groups + streams if (total_max_workspace > workspaceSizeInBytes) { - LOG(INFO) << "Reallocating workspace storage: " << total_max_workspace; + DLOG(INFO) << "Reallocating workspace storage: " << total_max_workspace; workspaceSizeInBytes = total_max_workspace; // free the existing workspace and allocate a new (larger) one From 8e455850bf398dd16dffa5e7591480d013b8e573 Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Tue, 27 Oct 2015 22:49:28 -0700 Subject: [PATCH 154/223] CuDNNConvolutionLayer accumulate gradients --- src/caffe/layers/cudnn_conv_layer.cu | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/caffe/layers/cudnn_conv_layer.cu b/src/caffe/layers/cudnn_conv_layer.cu index 63b6ab9c..f2df4aa5 100644 --- a/src/caffe/layers/cudnn_conv_layer.cu +++ b/src/caffe/layers/cudnn_conv_layer.cu @@ -53,12 +53,10 @@ void CuDNNConvolutionLayer::Backward_gpu(const vector*>& top, if (this->param_propagate_down_[0]) { weight = this->blobs_[0]->gpu_data(); weight_diff = this->blobs_[0]->mutable_gpu_diff(); - caffe_gpu_set(this->blobs_[0]->count(), Dtype(0), weight_diff); } Dtype* bias_diff = NULL; if (this->bias_term_ && this->param_propagate_down_[1]) { bias_diff = this->blobs_[1]->mutable_gpu_diff(); - caffe_gpu_set(this->blobs_[1]->count(), Dtype(0), bias_diff); } for (int i = 0; i < top.size(); ++i) { const Dtype* top_diff = top[i]->gpu_diff(); From 5925fa8ed94e9af02449f853b06f252ac5f4c364 Mon Sep 17 00:00:00 2001 From: Kai Li <1196594711@qq.com> Date: Fri, 30 Oct 2015 00:46:08 +0800 Subject: [PATCH 155/223] Update plot_training_log.py.example I find there is no plot_log.sh file --- tools/extra/plot_training_log.py.example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/extra/plot_training_log.py.example b/tools/extra/plot_training_log.py.example index b6fda54e..4d3ed0d1 100755 --- a/tools/extra/plot_training_log.py.example +++ b/tools/extra/plot_training_log.py.example @@ -150,7 +150,7 @@ Be warned that the fields in the training log may change in the future. You had better check the data files and change the mapping from field name to field index in create_field_index before designing your own plots. Usage: - ./plot_log.sh chart_type[0-%s] /where/to/save.png /path/to/first.log ... + ./plot_training_log.py chart_type[0-%s] /where/to/save.png /path/to/first.log ... Notes: 1. Supporting multiple logs. 2. Log file name must end with the lower-cased "%s". From 54f0c08ca144c498c835baa017887a64bc8fbbf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scheibe?= Date: Tue, 3 Nov 2015 19:27:07 +0100 Subject: [PATCH 156/223] fix detect.py (invalid model path) --- python/detect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/detect.py b/python/detect.py index 691098f5..1aba964a 100755 --- a/python/detect.py +++ b/python/detect.py @@ -46,7 +46,7 @@ def main(argv): parser.add_argument( "--model_def", default=os.path.join(pycaffe_dir, - "../models/bvlc_reference_caffenet/deploy.prototxt.prototxt"), + "../models/bvlc_reference_caffenet/deploy.prototxt"), help="Model definition file." ) parser.add_argument( From b2339716fd3c6ebb050be0241ba2a34f804ae904 Mon Sep 17 00:00:00 2001 From: Luke Yeager Date: Tue, 3 Nov 2015 14:42:24 -0800 Subject: [PATCH 157/223] TravisCI: wget cmake with --no-check-certificate ``` --2015-11-03 22:31:11-- http://www.cmake.org/files/v3.2/cmake-3.2.3-Linux-x86_64.sh Resolving www.cmake.org (www.cmake.org)... 66.194.253.19 Connecting to www.cmake.org (www.cmake.org)|66.194.253.19|:80... connected. HTTP request sent, awaiting response... 301 Moved Permanently Location: http://cmake.org/files/v3.2/cmake-3.2.3-Linux-x86_64.sh [following] --2015-11-03 22:31:11-- http://cmake.org/files/v3.2/cmake-3.2.3-Linux-x86_64.sh Resolving cmake.org (cmake.org)... 66.194.253.19 Connecting to cmake.org (cmake.org)|66.194.253.19|:80... connected. HTTP request sent, awaiting response... 301 Moved Permanently Location: https://cmake.org/files/v3.2/cmake-3.2.3-Linux-x86_64.sh [following] --2015-11-03 22:31:11-- https://cmake.org/files/v3.2/cmake-3.2.3-Linux-x86_64.sh Connecting to cmake.org (cmake.org)|66.194.253.19|:443... connected. ERROR: no certificate subject alternative name matches requested host name `cmake.org'. To connect to cmake.org insecurely, use `--no-check-certificate'. ``` --- scripts/travis/travis_install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/travis/travis_install.sh b/scripts/travis/travis_install.sh index d6c6e228..d78c4d2f 100755 --- a/scripts/travis/travis_install.sh +++ b/scripts/travis/travis_install.sh @@ -23,7 +23,7 @@ apt-get install \ # Caffe requires a minimum CMake version of 2.8.8. if $WITH_CMAKE; then # cmake 3 will make sure that the python interpreter and libraries match - wget http://www.cmake.org/files/v3.2/cmake-3.2.3-Linux-x86_64.sh -O cmake3.sh + wget --no-check-certificate http://www.cmake.org/files/v3.2/cmake-3.2.3-Linux-x86_64.sh -O cmake3.sh chmod +x cmake3.sh ./cmake3.sh --prefix=/usr/ --skip-license --exclude-subdir fi From 5196926a7cca1a85aecbd97e78452352fc5d2b3d Mon Sep 17 00:00:00 2001 From: Jonathan L Long Date: Wed, 4 Nov 2015 22:10:25 -0800 Subject: [PATCH 158/223] [travis] fix boost/python3 conda conflict --- scripts/travis/travis_install.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/travis/travis_install.sh b/scripts/travis/travis_install.sh index d78c4d2f..432c81dc 100755 --- a/scripts/travis/travis_install.sh +++ b/scripts/travis/travis_install.sh @@ -70,6 +70,10 @@ if [ ! -d $CONDA_DIR ]; then ./miniconda.sh -b -p $CONDA_DIR conda update --yes conda + # The version of boost we're using for Python 3 depends on 3.4 for now. + if [ "$PYTHON_VERSION" -eq "3" ]; then + conda install --yes python=3.4 + fi conda install --yes numpy scipy matplotlib scikit-image pip # Let conda install boost (so that boost_python matches) conda install --yes -c https://conda.binstar.org/menpo boost=1.56.0 From 4137c093bd6ca018c5953a1e069069ab96f4f91d Mon Sep 17 00:00:00 2001 From: Jonathan L Long Date: Wed, 4 Nov 2015 23:54:41 -0800 Subject: [PATCH 159/223] [style] fix whitespace in travis_install.sh --- scripts/travis/travis_install.sh | 54 ++++++++++++++++---------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/scripts/travis/travis_install.sh b/scripts/travis/travis_install.sh index 432c81dc..d18dc223 100755 --- a/scripts/travis/travis_install.sh +++ b/scripts/travis/travis_install.sh @@ -61,39 +61,39 @@ rm -f $LMDB_FILE # than using pip for everything). export PATH=$CONDA_DIR/bin:$PATH if [ ! -d $CONDA_DIR ]; then - if [ "$PYTHON_VERSION" -eq "3" ]; then - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh - else - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh - fi - chmod +x miniconda.sh - ./miniconda.sh -b -p $CONDA_DIR - - conda update --yes conda - # The version of boost we're using for Python 3 depends on 3.4 for now. - if [ "$PYTHON_VERSION" -eq "3" ]; then - conda install --yes python=3.4 - fi - conda install --yes numpy scipy matplotlib scikit-image pip - # Let conda install boost (so that boost_python matches) - conda install --yes -c https://conda.binstar.org/menpo boost=1.56.0 + if [ "$PYTHON_VERSION" -eq "3" ]; then + wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh + else + wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh + fi + chmod +x miniconda.sh + ./miniconda.sh -b -p $CONDA_DIR + + conda update --yes conda + # The version of boost we're using for Python 3 depends on 3.4 for now. + if [ "$PYTHON_VERSION" -eq "3" ]; then + conda install --yes python=3.4 + fi + conda install --yes numpy scipy matplotlib scikit-image pip + # Let conda install boost (so that boost_python matches) + conda install --yes -c https://conda.binstar.org/menpo boost=1.56.0 fi # install protobuf 3 (just use the miniconda3 directory to avoid having to setup the path again) if [ "$PYTHON_VERSION" -eq "3" ] && [ ! -e "$CONDA_DIR/bin/protoc" ]; then - pushd . - wget https://github.com/google/protobuf/archive/v3.0.0-alpha-3.1.tar.gz -O protobuf-3.tar.gz - tar -C /tmp -xzvf protobuf-3.tar.gz - cd /tmp/protobuf-3*/ - ./autogen.sh - ./configure --prefix=$CONDA_DIR - $MAKE - $MAKE install - popd + pushd . + wget https://github.com/google/protobuf/archive/v3.0.0-alpha-3.1.tar.gz -O protobuf-3.tar.gz + tar -C /tmp -xzvf protobuf-3.tar.gz + cd /tmp/protobuf-3*/ + ./autogen.sh + ./configure --prefix=$CONDA_DIR + $MAKE + $MAKE install + popd fi if [ "$PYTHON_VERSION" -eq "3" ]; then - pip install --pre protobuf + pip install --pre protobuf else - pip install protobuf + pip install protobuf fi From bc1aa41af7d0ba46da4d7c71fc9109baea651ce0 Mon Sep 17 00:00:00 2001 From: Jonathan L Long Date: Wed, 4 Nov 2015 20:48:43 -0800 Subject: [PATCH 160/223] remove dead cpp code for number of CUDA threads __CUDA_ARCH__ is not defined in host code; the #if was vacuous and misleading. --- include/caffe/util/device_alternate.hpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/include/caffe/util/device_alternate.hpp b/include/caffe/util/device_alternate.hpp index 6ea595db..e3fe4fe2 100644 --- a/include/caffe/util/device_alternate.hpp +++ b/include/caffe/util/device_alternate.hpp @@ -81,14 +81,8 @@ namespace caffe { const char* cublasGetErrorString(cublasStatus_t error); const char* curandGetErrorString(curandStatus_t error); -// CUDA: thread number configuration. -// Use 1024 threads per block, which requires cuda sm_2x or above, -// or fall back to attempt compatibility (best of luck to you). -#if __CUDA_ARCH__ >= 200 - const int CAFFE_CUDA_NUM_THREADS = 1024; -#else - const int CAFFE_CUDA_NUM_THREADS = 512; -#endif +// CUDA: use 512 threads per block +const int CAFFE_CUDA_NUM_THREADS = 512; // CUDA: number of blocks for threads. inline int CAFFE_GET_BLOCKS(const int N) { From 32dc03f14c36d1df46f37a7d13ad528e52c6f786 Mon Sep 17 00:00:00 2001 From: ernest-tg Date: Thu, 5 Nov 2015 15:47:28 +0100 Subject: [PATCH 161/223] Correct transposition & channel_swap in deprocess MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The deprocess( ) function should invert the preprocess( ) function, however it only worked when the permutation of your channel_swap is of order 2 and the permutation of your transpose were of order 3. This is usually the case, which is why this bug went unnoticed for a long time. To reproduce it (on former version), try to preprocess and then deprocess with transformer.set_transpose('data', (0,2,1)) (or (1,0,2) or (2,1,0)) Or with transformer.set_channel_swap('data', (2,0,1)) (or (1,2,0) ) Indeed, we had L152 (in preprocess) caffe_in = caffe_in[channel_swap, :, :] L181 (in deprocess) decaf_in = decaf_in[channel_swap, :, :] So we applied [channel_swap,:,:] twice to the initial data => not always the identity L154 (in preprocess) caffe_in = caffe_in.transpose(transpose) L183 (in deprocess) decaf_in = decaf_in.transpose([transpose[t] for t in transpose]) The transposition [transpose[t] for t in transpose] is (tranpsose)² so we applied transpose[t] three times which is not always the identity. --- python/caffe/io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/caffe/io.py b/python/caffe/io.py index 11c84260..14942bed 100644 --- a/python/caffe/io.py +++ b/python/caffe/io.py @@ -178,9 +178,9 @@ def deprocess(self, in_, data): if raw_scale is not None: decaf_in /= raw_scale if channel_swap is not None: - decaf_in = decaf_in[channel_swap, :, :] + decaf_in = decaf_in[np.argsort(channel_swap), :, :] if transpose is not None: - decaf_in = decaf_in.transpose([transpose[t] for t in transpose]) + decaf_in = decaf_in.transpose(np.argsort(transpose)) return decaf_in def set_transpose(self, in_, order): From 987b3d8794e3fe27b4402d52fb3921555104b451 Mon Sep 17 00:00:00 2001 From: Tim Meinhardt Date: Fri, 6 Nov 2015 14:51:46 +0100 Subject: [PATCH 162/223] Fix ArgMaxLayer::Reshape for any num of bottom axes --- include/caffe/common_layers.hpp | 14 +++++++------- src/caffe/layers/argmax_layer.cpp | 4 +++- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index 72f39ee0..d42d15c4 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -53,8 +53,8 @@ class ArgMaxLayer : public Layer { * -# @f$ (N \times C \times H \times W) @f$ * the inputs @f$ x @f$ * @param top output Blob vector (length 1) - * -# @f$ (N \times 1 \times K \times 1) @f$ or, if out_max_val - * @f$ (N \times 2 \times K \times 1) @f$ unless axis set than e.g. + * -# @f$ (N \times 1 \times K) @f$ or, if out_max_val + * @f$ (N \times 2 \times K) @f$ unless axis set than e.g. * @f$ (N \times K \times H \times W) @f$ if axis == 1 * the computed outputs @f$ * y_n = \arg\max\limits_i x_{ni} @@ -81,13 +81,13 @@ class ArgMaxLayer : public Layer { * each channel in the data (i.e. axis 1), it subtracts the mean and divides * by the variance, where both statistics are computed across both spatial * dimensions and across the different examples in the batch. - * + * * By default, during training time, the network is computing global mean/ * variance statistics via a running average, which is then used at test * time to allow deterministic outputs for each input. You can manually * toggle whether the network is accumulating or using the statistics via the * use_global_stats option. IMPORTANT: for this feature to work, you MUST - * set the learning rate to zero for all three parameter blobs, i.e., + * set the learning rate to zero for all three parameter blobs, i.e., * param {lr_mult: 0} three times in the layer definition. * * Note that the original paper also included a per-channel learned bias and @@ -96,10 +96,10 @@ class ArgMaxLayer : public Layer { * followed by a Convolution layer with output the same size as the current. * This produces a channel-specific value that can be added or multiplied by * the BatchNorm layer's output. - * + * * [1] S. Ioffe and C. Szegedy, "Batch Normalization: Accelerating Deep Network - * Training by Reducing Internal Covariate Shift." arXiv preprint - * arXiv:1502.03167 (2015). + * Training by Reducing Internal Covariate Shift." arXiv preprint + * arXiv:1502.03167 (2015). * * TODO(dox): thorough documentation for Forward, Backward, and proto params. */ diff --git a/src/caffe/layers/argmax_layer.cpp b/src/caffe/layers/argmax_layer.cpp index 44df8d4e..354d83f7 100644 --- a/src/caffe/layers/argmax_layer.cpp +++ b/src/caffe/layers/argmax_layer.cpp @@ -32,7 +32,9 @@ void ArgMaxLayer::LayerSetUp(const vector*>& bottom, template void ArgMaxLayer::Reshape(const vector*>& bottom, const vector*>& top) { - std::vector shape(bottom[0]->num_axes(), 1); + int num_top_axes = bottom[0]->num_axes(); + if ( num_top_axes < 3 ) num_top_axes = 3; + std::vector shape(num_top_axes, 1); if (has_axis_) { // Produces max_ind or max_val per axis shape = bottom[0]->shape(); From 0f1e4e5ddd884325df82db00ae0fc531481e9c60 Mon Sep 17 00:00:00 2001 From: Shandy Brown Date: Fri, 6 Nov 2015 20:01:57 -0800 Subject: [PATCH 163/223] Add a -c to wget so that it continues interrupted downloads This would've saved me an overnight download (slow connection here) I tested it, and it worked for me. --- data/ilsvrc12/get_ilsvrc_aux.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/ilsvrc12/get_ilsvrc_aux.sh b/data/ilsvrc12/get_ilsvrc_aux.sh index b9b85d21..90935f25 100755 --- a/data/ilsvrc12/get_ilsvrc_aux.sh +++ b/data/ilsvrc12/get_ilsvrc_aux.sh @@ -12,7 +12,7 @@ cd $DIR echo "Downloading..." -wget http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz +wget -c http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz echo "Unzipping..." From c42eb9c4f7d18f1ba16d1d5cb0646296679d936c Mon Sep 17 00:00:00 2001 From: "T.E.A de Souza" Date: Sun, 8 Nov 2015 18:50:29 +0800 Subject: [PATCH 164/223] GetDB must return a value As noted by @danst18, when USE_LEVELDB and USE_LMDB are disabled, a compiler error is issued since GetDB no longer returns a value. At runtime a fatal error would be issued anyways. However to help users who don't need a DB backend, NULL should be returned here. --- src/caffe/util/db.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/caffe/util/db.cpp b/src/caffe/util/db.cpp index ccda054d..d0a2b0b5 100644 --- a/src/caffe/util/db.cpp +++ b/src/caffe/util/db.cpp @@ -33,6 +33,7 @@ DB* GetDB(const string& backend) { } #endif // USE_LMDB LOG(FATAL) << "Unknown database backend"; + return NULL; } } // namespace db From 0eea94a0e02dd6d28175538a29720456f5213da9 Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Sun, 8 Nov 2015 11:20:32 -0800 Subject: [PATCH 165/223] display 'ignore source layer' when initializing from existing parameters This helps in the case to see which layer is initialized from existing parameters, and which layer is ignored. This helps identify the cases where the user types a error mismatch layer name. --- src/caffe/net.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index 1ad93e6a..05bee798 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -745,7 +745,7 @@ void Net::ShareTrainedLayersWith(const Net* other) { ++target_layer_id; } if (target_layer_id == layer_names_.size()) { - DLOG(INFO) << "Ignoring source layer " << source_layer_name; + LOG(INFO) << "Ignoring source layer " << source_layer_name; continue; } DLOG(INFO) << "Copying source layer " << source_layer_name; @@ -813,7 +813,7 @@ void Net::CopyTrainedLayersFrom(const NetParameter& param) { ++target_layer_id; } if (target_layer_id == layer_names_.size()) { - DLOG(INFO) << "Ignoring source layer " << source_layer_name; + LOG(INFO) << "Ignoring source layer " << source_layer_name; continue; } DLOG(INFO) << "Copying source layer " << source_layer_name; @@ -868,7 +868,7 @@ void Net::CopyTrainedLayersFromHDF5(const string trained_filename) { for (int i = 0; i < num_layers; ++i) { string source_layer_name = hdf5_get_name_by_idx(data_hid, i); if (!layer_names_index_.count(source_layer_name)) { - DLOG(INFO) << "Ignoring source layer " << source_layer_name; + LOG(INFO) << "Ignoring source layer " << source_layer_name; continue; } int target_layer_id = layer_names_index_[source_layer_name]; From 50a44b05f87d6c5b734e2b172f5120898c6e3e47 Mon Sep 17 00:00:00 2001 From: panmari Date: Fri, 6 Nov 2015 12:53:16 +0100 Subject: [PATCH 166/223] Switched order of two layers for simpler diff with untuned file Untuned file is in models/bvlc_reference_caffenet/train_val.prototxt. --- models/finetune_flickr_style/train_val.prototxt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/models/finetune_flickr_style/train_val.prototxt b/models/finetune_flickr_style/train_val.prototxt index 848a426c..985353be 100644 --- a/models/finetune_flickr_style/train_val.prototxt +++ b/models/finetune_flickr_style/train_val.prototxt @@ -369,13 +369,6 @@ layer { } } } -layer { - name: "loss" - type: "SoftmaxWithLoss" - bottom: "fc8_flickr" - bottom: "label" - top: "loss" -} layer { name: "accuracy" type: "Accuracy" @@ -386,3 +379,10 @@ layer { phase: TEST } } +layer { + name: "loss" + type: "SoftmaxWithLoss" + bottom: "fc8_flickr" + bottom: "label" + top: "loss" +} From 96e95fb24dc53bed1e46f2404a2f79f1cf870472 Mon Sep 17 00:00:00 2001 From: Gustav Larsson Date: Mon, 9 Nov 2015 14:32:37 -0600 Subject: [PATCH 167/223] DOC: Fix consistent typo in contrastive loss If a pair is similar, it should take the squared distance and not the distance. This is clearly what the code is doing. --- include/caffe/loss_layers.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp index d08ad9b6..1591c0fe 100644 --- a/include/caffe/loss_layers.hpp +++ b/include/caffe/loss_layers.hpp @@ -132,7 +132,7 @@ class LossLayer : public Layer { /** * @brief Computes the contrastive loss @f$ - * E = \frac{1}{2N} \sum\limits_{n=1}^N \left(y\right) d + + * E = \frac{1}{2N} \sum\limits_{n=1}^N \left(y\right) d^2 + * \left(1-y\right) \max \left(margin-d, 0\right)^2 * @f$ where @f$ * d = \left| \left| a_n - b_n \right| \right|_2 @f$. This can be @@ -148,7 +148,7 @@ class LossLayer : public Layer { * @param top output Blob vector (length 1) * -# @f$ (1 \times 1 \times 1 \times 1) @f$ * the computed contrastive loss: @f$ E = - * \frac{1}{2N} \sum\limits_{n=1}^N \left(y\right) d + + * \frac{1}{2N} \sum\limits_{n=1}^N \left(y\right) d^2 + * \left(1-y\right) \max \left(margin-d, 0\right)^2 * @f$ where @f$ * d = \left| \left| a_n - b_n \right| \right|_2 @f$. From 29f6670f11c4ac505cad0f779430dea01358c025 Mon Sep 17 00:00:00 2001 From: Tea Date: Sat, 7 Nov 2015 14:09:59 +0800 Subject: [PATCH 168/223] Replace unistd functions with cross platform counterparts --- Makefile | 2 +- cmake/Dependencies.cmake | 2 +- include/caffe/util/io.hpp | 30 +++++++++++------------------- src/caffe/test/test_benchmark.cpp | 6 +++--- 4 files changed, 16 insertions(+), 24 deletions(-) diff --git a/Makefile b/Makefile index 4a1d41d5..f5dbf432 100644 --- a/Makefile +++ b/Makefile @@ -170,7 +170,7 @@ ifneq ($(CPU_ONLY), 1) LIBRARIES := cudart cublas curand endif -LIBRARIES += glog gflags protobuf boost_system m hdf5_hl hdf5 +LIBRARIES += glog gflags protobuf boost_system boost_filesystem m hdf5_hl hdf5 # handle IO dependencies USE_LEVELDB ?= 1 diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 5651e2b0..51a803c1 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -2,7 +2,7 @@ set(Caffe_LINKER_LIBS "") # ---[ Boost -find_package(Boost 1.46 REQUIRED COMPONENTS system thread) +find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem) include_directories(SYSTEM ${Boost_INCLUDE_DIR}) list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES}) diff --git a/include/caffe/util/io.hpp b/include/caffe/util/io.hpp index d6cfa442..6b733254 100644 --- a/include/caffe/util/io.hpp +++ b/include/caffe/util/io.hpp @@ -1,7 +1,7 @@ #ifndef CAFFE_UTIL_IO_H_ #define CAFFE_UTIL_IO_H_ -#include +#include #include #include "google/protobuf/message.h" @@ -12,31 +12,23 @@ namespace caffe { using ::google::protobuf::Message; +using ::boost::filesystem::path; inline void MakeTempFilename(string* temp_filename) { temp_filename->clear(); - *temp_filename = "/tmp/caffe_test.XXXXXX"; - char* temp_filename_cstr = new char[temp_filename->size() + 1]; - // NOLINT_NEXT_LINE(runtime/printf) - strcpy(temp_filename_cstr, temp_filename->c_str()); - int fd = mkstemp(temp_filename_cstr); - CHECK_GE(fd, 0) << "Failed to open a temporary file at: " << *temp_filename; - close(fd); - *temp_filename = temp_filename_cstr; - delete[] temp_filename_cstr; + const path& model = boost::filesystem::temp_directory_path() + /"caffe_test.%%%%%%"; + *temp_filename = boost::filesystem::unique_path(model).string(); } inline void MakeTempDir(string* temp_dirname) { temp_dirname->clear(); - *temp_dirname = "/tmp/caffe_test.XXXXXX"; - char* temp_dirname_cstr = new char[temp_dirname->size() + 1]; - // NOLINT_NEXT_LINE(runtime/printf) - strcpy(temp_dirname_cstr, temp_dirname->c_str()); - char* mkdtemp_result = mkdtemp(temp_dirname_cstr); - CHECK(mkdtemp_result != NULL) - << "Failed to create a temporary directory at: " << *temp_dirname; - *temp_dirname = temp_dirname_cstr; - delete[] temp_dirname_cstr; + const path& model = boost::filesystem::temp_directory_path() + /"caffe_test.%%%%%%"; + const path& dir = boost::filesystem::unique_path(model).string(); + bool directoryCreated = boost::filesystem::create_directory(dir); + CHECK(directoryCreated); + *temp_dirname = dir.string(); } bool ReadProtoFromTextFile(const char* filename, Message* proto); diff --git a/src/caffe/test/test_benchmark.cpp b/src/caffe/test/test_benchmark.cpp index 43aaa639..b03fdf69 100644 --- a/src/caffe/test/test_benchmark.cpp +++ b/src/caffe/test/test_benchmark.cpp @@ -1,4 +1,4 @@ -#include // for usleep +#include #include "gtest/gtest.h" @@ -64,7 +64,7 @@ TYPED_TEST(BenchmarkTest, TestTimerMilliSeconds) { EXPECT_FALSE(timer.running()); EXPECT_FALSE(timer.has_run_at_least_once()); timer.Start(); - usleep(300 * 1000); + boost::this_thread::sleep(boost::posix_time::milliseconds(300)); EXPECT_GE(timer.MilliSeconds(), 300 - kMillisecondsThreshold); EXPECT_LE(timer.MilliSeconds(), 300 + kMillisecondsThreshold); EXPECT_TRUE(timer.initted()); @@ -79,7 +79,7 @@ TYPED_TEST(BenchmarkTest, TestTimerSeconds) { EXPECT_FALSE(timer.running()); EXPECT_FALSE(timer.has_run_at_least_once()); timer.Start(); - usleep(300 * 1000); + boost::this_thread::sleep(boost::posix_time::milliseconds(300)); EXPECT_GE(timer.Seconds(), 0.3 - kMillisecondsThreshold / 1000.); EXPECT_LE(timer.Seconds(), 0.3 + kMillisecondsThreshold / 1000.); EXPECT_TRUE(timer.initted()); From f9970c83264b43722bd9f97376580cc3dbf61227 Mon Sep 17 00:00:00 2001 From: gdh1995 Date: Tue, 10 Nov 2015 22:41:55 +0800 Subject: [PATCH 169/223] fix a bug that time duration may be 0 when downloading model binary --- scripts/download_model_binary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/download_model_binary.py b/scripts/download_model_binary.py index 03a50f67..66f72f24 100755 --- a/scripts/download_model_binary.py +++ b/scripts/download_model_binary.py @@ -18,7 +18,7 @@ def reporthook(count, block_size, total_size): if count == 0: start_time = time.time() return - duration = time.time() - start_time + duration = (time.time() - start_time) or 0.01 progress_size = int(count * block_size) speed = int(progress_size / (1024 * duration)) percent = int(count * block_size * 100 / total_size) From 9ff2baf8e06e4809ad668e5c355ad76c36d9674d Mon Sep 17 00:00:00 2001 From: "T.E.A de Souza" Date: Thu, 12 Nov 2015 15:32:28 +0800 Subject: [PATCH 170/223] Remove un-necessary includes --- src/caffe/parallel.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/caffe/parallel.cpp b/src/caffe/parallel.cpp index 9abc92b6..62f5d738 100644 --- a/src/caffe/parallel.cpp +++ b/src/caffe/parallel.cpp @@ -3,9 +3,6 @@ #endif #include #include -#include -#include -#include #include #include From 3682fde8a9a4a7b20e6ceb2d95a9abeab5227561 Mon Sep 17 00:00:00 2001 From: Tea Date: Thu, 12 Nov 2015 15:55:06 +0800 Subject: [PATCH 171/223] Functions shall return a value in syncedmem --- src/caffe/syncedmem.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/caffe/syncedmem.cpp b/src/caffe/syncedmem.cpp index ec4665ec..4d356417 100644 --- a/src/caffe/syncedmem.cpp +++ b/src/caffe/syncedmem.cpp @@ -97,6 +97,7 @@ const void* SyncedMemory::gpu_data() { return (const void*)gpu_ptr_; #else NO_GPU; + return NULL; #endif } @@ -133,6 +134,7 @@ void* SyncedMemory::mutable_gpu_data() { return gpu_ptr_; #else NO_GPU; + return NULL; #endif } From dc48870d7f8e823138594c794789ac3156cc0798 Mon Sep 17 00:00:00 2001 From: Benedikt Wilbertz Date: Wed, 30 Sep 2015 23:02:34 +0200 Subject: [PATCH 172/223] Fix loss of last iteration when average_loss > 1 refactor duplicate code into separate update function for smoothed loss fix naming convention --- include/caffe/solver.hpp | 3 +++ src/caffe/solver.cpp | 37 ++++++++++++++++++++++++------------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index 26b8e8e2..38259eda 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -107,6 +107,7 @@ class Solver { virtual void RestoreSolverStateFromHDF5(const string& state_file) = 0; virtual void RestoreSolverStateFromBinaryProto(const string& state_file) = 0; void DisplayOutputBlobs(const int net_id); + void UpdateSmoothedLoss(Dtype loss, int start_iter, int average_loss); SolverParameter param_; int iter_; @@ -114,6 +115,8 @@ class Solver { shared_ptr > net_; vector > > test_nets_; vector callbacks_; + vector losses_; + Dtype smoothed_loss_; // The root solver that holds root nets (actually containing shared layers) // in data parallelism diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index d3bc7361..5b31c7d8 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -195,8 +195,8 @@ void Solver::Step(int iters) { const int start_iter = iter_; const int stop_iter = iter_ + iters; int average_loss = this->param_.average_loss(); - vector losses; - Dtype smoothed_loss = 0; + losses_.clear(); + smoothed_loss_ = 0; while (iter_ < stop_iter) { // zero-init the params @@ -223,18 +223,10 @@ void Solver::Step(int iters) { } loss /= param_.iter_size(); // average the loss across iterations for smoothed reporting - if (losses.size() < average_loss) { - losses.push_back(loss); - int size = losses.size(); - smoothed_loss = (smoothed_loss * (size - 1) + loss) / size; - } else { - int idx = (iter_ - start_iter) % average_loss; - smoothed_loss += (loss - losses[idx]) / average_loss; - losses[idx] = loss; - } + UpdateSmoothedLoss(loss, start_iter, average_loss); if (display) { LOG_IF(INFO, Caffe::root_solver()) << "Iteration " << iter_ - << ", loss = " << smoothed_loss; + << ", loss = " << smoothed_loss_; const vector*>& result = net_->output_blobs(); int score_index = 0; for (int j = 0; j < result.size(); ++j) { @@ -297,6 +289,7 @@ void Solver::Solve(const char* resume_file) { // For a network that is trained by the solver, no bottom or top vecs // should be given, and we will just provide dummy vecs. + int start_iter = iter_; Step(param_.max_iter() - iter_); // If we haven't already, save a snapshot after optimization, unless // overridden by setting snapshot_after_train := false @@ -315,9 +308,13 @@ void Solver::Solve(const char* resume_file) { // updated the parameters "max_iter" times -- this final pass is only done to // display the loss, which is computed in the forward pass. if (param_.display() && iter_ % param_.display() == 0) { + int average_loss = this->param_.average_loss(); Dtype loss; net_->ForwardPrefilled(&loss); - LOG(INFO) << "Iteration " << iter_ << ", loss = " << loss; + + UpdateSmoothedLoss(loss, start_iter, average_loss); + + LOG(INFO) << "Iteration " << iter_ << ", loss = " << smoothed_loss_; } if (param_.test_interval() && iter_ % param_.test_interval() == 0) { TestAll(); @@ -485,6 +482,20 @@ void Solver::Restore(const char* state_file) { } } +template +void Solver::UpdateSmoothedLoss(Dtype loss, int start_iter, + int average_loss) { + if (losses_.size() < average_loss) { + losses_.push_back(loss); + int size = losses_.size(); + smoothed_loss_ = (smoothed_loss_ * (size - 1) + loss) / size; + } else { + int idx = (iter_ - start_iter) % average_loss; + smoothed_loss_ += (loss - losses_[idx]) / average_loss; + losses_[idx] = loss; + } +} + INSTANTIATE_CLASS(Solver); } // namespace caffe From 0ad1d8ab3f8e3d0bd4d9a7e8b65c7a5f9f28d60a Mon Sep 17 00:00:00 2001 From: Kang Kim Date: Sat, 7 Nov 2015 12:49:15 +0900 Subject: [PATCH 173/223] Update computation of variance and global stats in BatchNormLayer --- src/caffe/layers/batch_norm_layer.cpp | 55 +++++++++++++------------- src/caffe/layers/batch_norm_layer.cu | 56 ++++++++++++++------------- 2 files changed, 57 insertions(+), 54 deletions(-) diff --git a/src/caffe/layers/batch_norm_layer.cpp b/src/caffe/layers/batch_norm_layer.cpp index 94c2b96b..5eba25e9 100644 --- a/src/caffe/layers/batch_norm_layer.cpp +++ b/src/caffe/layers/batch_norm_layer.cpp @@ -2,7 +2,6 @@ #include #include "caffe/common_layers.hpp" -#include "caffe/layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { @@ -80,20 +79,21 @@ void BatchNormLayer::Forward_cpu(const vector*>& bottom, int num = bottom[0]->shape(0); int spatial_dim = bottom[0]->count()/(bottom[0]->shape(0)*channels_); - // elementwise square - caffe_powx(bottom[0]->count(), bottom_data, Dtype(2), - temp_.mutable_cpu_data()); + if (bottom[0] != top[0]) { + caffe_copy(bottom[0]->count(), bottom_data, top_data); + } if (use_global_stats_) { // use the stored mean/variance estimates. TODO(cdoersch): allow an option // to use an unbiased variance estimate, like the paper does. - const Dtype scale_factor = 1 / this->blobs_[2]->cpu_data()[0]; + const Dtype scale_factor = this->blobs_[2]->cpu_data()[0] == 0 ? + 0 : 1 / this->blobs_[2]->cpu_data()[0]; caffe_cpu_scale(variance_.count(), scale_factor, this->blobs_[0]->cpu_data(), mean_.mutable_cpu_data()); caffe_cpu_scale(variance_.count(), scale_factor, this->blobs_[1]->cpu_data(), variance_.mutable_cpu_data()); } else { - // computes variance using var(X) = E(X^2) - (EX)^2 + // compute mean caffe_cpu_gemv(CblasNoTrans, channels_ * num, spatial_dim, 1. / (num * spatial_dim), bottom_data, spatial_sum_multiplier_.cpu_data(), 0., @@ -101,44 +101,45 @@ void BatchNormLayer::Forward_cpu(const vector*>& bottom, caffe_cpu_gemv(CblasTrans, num, channels_, 1., num_by_chans_.cpu_data(), batch_sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); + } + + // subtract mean + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1, + batch_sum_multiplier_.cpu_data(), mean_.cpu_data(), 0., + num_by_chans_.mutable_cpu_data()); + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, channels_ * num, + spatial_dim, 1, -1, num_by_chans_.cpu_data(), + spatial_sum_multiplier_.cpu_data(), 1., top_data); + + if (!use_global_stats_) { + // compute variance using var(X) = E((X-EX)^2) + caffe_powx(top[0]->count(), top_data, Dtype(2), + temp_.mutable_cpu_data()); // (X-EX)^2 caffe_cpu_gemv(CblasNoTrans, channels_ * num, spatial_dim, 1. / (num * spatial_dim), temp_.cpu_data(), spatial_sum_multiplier_.cpu_data(), 0., num_by_chans_.mutable_cpu_data()); caffe_cpu_gemv(CblasTrans, num, channels_, 1., num_by_chans_.cpu_data(), batch_sum_multiplier_.cpu_data(), 0., - variance_.mutable_cpu_data()); + variance_.mutable_cpu_data()); // E((X_EX)^2) + + // compute and save moving average this->blobs_[2]->mutable_cpu_data()[0] *= moving_average_fraction_; this->blobs_[2]->mutable_cpu_data()[0] += 1; caffe_cpu_axpby(mean_.count(), Dtype(1), mean_.cpu_data(), moving_average_fraction_, this->blobs_[0]->mutable_cpu_data()); - Dtype m = Dtype(bottom[0]->count()/channels_); - caffe_cpu_axpby(variance_.count(), m/(m-1), variance_.cpu_data(), - moving_average_fraction_, this->blobs_[1]->mutable_cpu_data()); + int m = bottom[0]->count()/channels_; + Dtype bias_correction_factor = m > 1 ? Dtype(m)/(m-1) : 1; + caffe_cpu_axpby(variance_.count(), bias_correction_factor, + variance_.cpu_data(), moving_average_fraction_, + this->blobs_[1]->mutable_cpu_data()); } - // elementwise square of mean - caffe_powx(mean_.count(), mean_.cpu_data(), Dtype(2), - temp_.mutable_cpu_data()); - - caffe_sub(mean_.count(), variance_.cpu_data(), temp_.cpu_data(), - variance_.mutable_cpu_data()); // variance // normalize variance caffe_add_scalar(variance_.count(), eps_, variance_.mutable_cpu_data()); caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5), variance_.mutable_cpu_data()); - // do mean and variance normalization - if (bottom[0] != top[0]) { - caffe_copy(bottom[0]->count(), bottom_data, top_data); - } - // subtract mean - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1, - batch_sum_multiplier_.cpu_data(), mean_.cpu_data(), 0., - num_by_chans_.mutable_cpu_data()); - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, channels_ * num, - spatial_dim, 1, -1, num_by_chans_.cpu_data(), - spatial_sum_multiplier_.cpu_data(), 1., top_data); // replicate variance to input size caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1, batch_sum_multiplier_.cpu_data(), variance_.cpu_data(), 0., diff --git a/src/caffe/layers/batch_norm_layer.cu b/src/caffe/layers/batch_norm_layer.cu index cd8924a4..921a58f0 100644 --- a/src/caffe/layers/batch_norm_layer.cu +++ b/src/caffe/layers/batch_norm_layer.cu @@ -2,7 +2,6 @@ #include #include "caffe/common_layers.hpp" -#include "caffe/layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { @@ -15,20 +14,22 @@ void BatchNormLayer::Forward_gpu(const vector*>& bottom, int num = bottom[0]->shape(0); int spatial_dim = bottom[0]->count()/(channels_*bottom[0]->shape(0)); - // elementwise square - caffe_gpu_powx(bottom[0]->count(), bottom_data, Dtype(2), - temp_.mutable_gpu_data()); + if (bottom[0] != top[0]) { + caffe_copy(bottom[0]->count(), bottom_data, top_data); + } + if (use_global_stats_) { // use the stored mean/variance estimates. TODO(cdoersch): allow an option // to use an unbiased variance estimate, like the paper does. - const Dtype scale_factor = 1 / this->blobs_[2]->cpu_data()[0]; + const Dtype scale_factor = this->blobs_[2]->cpu_data()[0] == 0 ? + 0 : 1 / this->blobs_[2]->cpu_data()[0]; caffe_gpu_scale(variance_.count(), scale_factor, this->blobs_[0]->gpu_data(), mean_.mutable_gpu_data()); caffe_gpu_scale(variance_.count(), scale_factor, this->blobs_[1]->gpu_data(), variance_.mutable_gpu_data()); } else { - // computes variance using var(X) = E(X^2) - (EX)^2 + // compute mean caffe_gpu_gemv(CblasNoTrans, channels_ * num, spatial_dim, 1. / (num * spatial_dim), bottom_data, spatial_sum_multiplier_.gpu_data(), 0., @@ -36,44 +37,45 @@ void BatchNormLayer::Forward_gpu(const vector*>& bottom, caffe_gpu_gemv(CblasTrans, num, channels_, 1., num_by_chans_.gpu_data(), batch_sum_multiplier_.gpu_data(), 0., mean_.mutable_gpu_data()); + } + + // subtract mean + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1, + batch_sum_multiplier_.gpu_data(), mean_.gpu_data(), 0., + num_by_chans_.mutable_gpu_data()); + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, channels_ * num, + spatial_dim, 1, -1, num_by_chans_.gpu_data(), + spatial_sum_multiplier_.gpu_data(), 1., top_data); + + if (!use_global_stats_) { + // compute variance using var(X) = E((X-EX)^2) + caffe_gpu_powx(top[0]->count(), top_data, Dtype(2), + temp_.mutable_gpu_data()); // (X-EX)^2 caffe_gpu_gemv(CblasNoTrans, channels_ * num, spatial_dim, 1. / (num * spatial_dim), temp_.gpu_data(), spatial_sum_multiplier_.gpu_data(), 0., num_by_chans_.mutable_gpu_data()); caffe_gpu_gemv(CblasTrans, num, channels_, 1., num_by_chans_.gpu_data(), batch_sum_multiplier_.gpu_data(), 0., - variance_.mutable_gpu_data()); + variance_.mutable_gpu_data()); // E((X_EX)^2) + + // compute and save moving average this->blobs_[2]->mutable_cpu_data()[0] *= moving_average_fraction_; this->blobs_[2]->mutable_cpu_data()[0] += 1; caffe_gpu_axpby(mean_.count(), Dtype(1), mean_.gpu_data(), moving_average_fraction_, this->blobs_[0]->mutable_gpu_data()); - Dtype m = Dtype(bottom[0]->count()/channels_); - caffe_gpu_axpby(variance_.count(), m/(m-1), variance_.gpu_data(), - moving_average_fraction_, this->blobs_[1]->mutable_gpu_data()); + int m = bottom[0]->count()/channels_; + Dtype bias_correction_factor = m > 1 ? Dtype(m)/(m-1) : 1; + caffe_gpu_axpby(variance_.count(), bias_correction_factor, + variance_.gpu_data(), moving_average_fraction_, + this->blobs_[1]->mutable_gpu_data()); } - // elementwise square of mean - caffe_gpu_powx(mean_.count(), mean_.gpu_data(), Dtype(2), - temp_.mutable_gpu_data()); - - caffe_gpu_sub(mean_.count(), variance_.gpu_data(), temp_.gpu_data(), - variance_.mutable_gpu_data()); // variance // normalize variance caffe_gpu_add_scalar(variance_.count(), eps_, variance_.mutable_gpu_data()); caffe_gpu_powx(variance_.count(), variance_.gpu_data(), Dtype(0.5), variance_.mutable_gpu_data()); - // do mean and variance normalization - if (bottom[0] != top[0]) { - caffe_copy(bottom[0]->count(), bottom_data, top_data); - } - // subtract mean - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1, - batch_sum_multiplier_.gpu_data(), mean_.gpu_data(), 0., - num_by_chans_.mutable_gpu_data()); - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, channels_ * num, - spatial_dim, 1, -1, num_by_chans_.gpu_data(), - spatial_sum_multiplier_.gpu_data(), 1., top_data); // replicate variance to input size caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1, batch_sum_multiplier_.gpu_data(), variance_.gpu_data(), 0., From f6e582a38deee8db0904460cbf7aaeb143c682f5 Mon Sep 17 00:00:00 2001 From: Kang Kim Date: Fri, 13 Nov 2015 02:20:02 +0900 Subject: [PATCH 174/223] Make backward pass work when global stats is active for BatchNormLayer including minor code cleaning --- src/caffe/layers/batch_norm_layer.cpp | 10 ++++++---- src/caffe/layers/batch_norm_layer.cu | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/caffe/layers/batch_norm_layer.cpp b/src/caffe/layers/batch_norm_layer.cpp index 5eba25e9..b5c91b5e 100644 --- a/src/caffe/layers/batch_norm_layer.cpp +++ b/src/caffe/layers/batch_norm_layer.cpp @@ -84,8 +84,7 @@ void BatchNormLayer::Forward_cpu(const vector*>& bottom, } if (use_global_stats_) { - // use the stored mean/variance estimates. TODO(cdoersch): allow an option - // to use an unbiased variance estimate, like the paper does. + // use the stored mean/variance estimates. const Dtype scale_factor = this->blobs_[2]->cpu_data()[0] == 0 ? 0 : 1 / this->blobs_[2]->cpu_data()[0]; caffe_cpu_scale(variance_.count(), scale_factor, @@ -158,7 +157,6 @@ template void BatchNormLayer::Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { - CHECK(!use_global_stats_); const Dtype* top_diff; if (bottom[0] != top[0]) { top_diff = top[0]->cpu_diff(); @@ -166,8 +164,12 @@ void BatchNormLayer::Backward_cpu(const vector*>& top, caffe_copy(x_norm_.count(), top[0]->cpu_diff(), x_norm_.mutable_cpu_diff()); top_diff = x_norm_.cpu_diff(); } - const Dtype* top_data = x_norm_.cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); + if (use_global_stats_) { + caffe_div(temp_.count(), top_diff, temp_.cpu_data(), bottom_diff); + return; + } + const Dtype* top_data = x_norm_.cpu_data(); int num = bottom[0]->shape()[0]; int spatial_dim = bottom[0]->count()/(bottom[0]->shape(0)*channels_); // if Y = (X-mean(X))/(sqrt(var(X)+eps)), then diff --git a/src/caffe/layers/batch_norm_layer.cu b/src/caffe/layers/batch_norm_layer.cu index 921a58f0..2a6cac54 100644 --- a/src/caffe/layers/batch_norm_layer.cu +++ b/src/caffe/layers/batch_norm_layer.cu @@ -20,8 +20,7 @@ void BatchNormLayer::Forward_gpu(const vector*>& bottom, if (use_global_stats_) { - // use the stored mean/variance estimates. TODO(cdoersch): allow an option - // to use an unbiased variance estimate, like the paper does. + // use the stored mean/variance estimates. const Dtype scale_factor = this->blobs_[2]->cpu_data()[0] == 0 ? 0 : 1 / this->blobs_[2]->cpu_data()[0]; caffe_gpu_scale(variance_.count(), scale_factor, @@ -94,7 +93,6 @@ template void BatchNormLayer::Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { - CHECK(!use_global_stats_); const Dtype* top_diff; if (bottom[0] != top[0]) { top_diff = top[0]->gpu_diff(); @@ -102,8 +100,12 @@ void BatchNormLayer::Backward_gpu(const vector*>& top, caffe_copy(x_norm_.count(), top[0]->gpu_diff(), x_norm_.mutable_gpu_diff()); top_diff = x_norm_.gpu_diff(); } - const Dtype* top_data = x_norm_.gpu_data(); Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); + if (use_global_stats_) { + caffe_gpu_div(temp_.count(), top_diff, temp_.gpu_data(), bottom_diff); + return; + } + const Dtype* top_data = x_norm_.gpu_data(); int num = bottom[0]->shape()[0]; int spatial_dim = bottom[0]->count()/(channels_*bottom[0]->shape(0)); // if Y = (X-mean(X))/(sqrt(var(X)+eps)), then From d81ffbff8cda56de1fe6c41b7156d781f775c7b3 Mon Sep 17 00:00:00 2001 From: Adam Siembida Date: Thu, 12 Nov 2015 16:03:41 -0500 Subject: [PATCH 175/223] Add parentheses to backward_{cpu,gpu} method. --- docs/tutorial/forward_backward.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorial/forward_backward.md b/docs/tutorial/forward_backward.md index a645f002..528b993b 100644 --- a/docs/tutorial/forward_backward.md +++ b/docs/tutorial/forward_backward.md @@ -29,7 +29,7 @@ The backward pass begins with the loss and computes the gradient with respect to These computations follow immediately from defining the model: Caffe plans and carries out the forward and backward passes for you. - The `Net::Forward()` and `Net::Backward()` methods carry out the respective passes while `Layer::Forward()` and `Layer::Backward()` compute each step. -- Every layer type has `forward_{cpu,gpu}()` and `backward_{cpu,gpu}` methods to compute its steps according to the mode of computation. A layer may only implement CPU or GPU mode due to constraints or convenience. +- Every layer type has `forward_{cpu,gpu}()` and `backward_{cpu,gpu}()` methods to compute its steps according to the mode of computation. A layer may only implement CPU or GPU mode due to constraints or convenience. The [Solver](solver.html) optimizes a model by first calling forward to yield the output and loss, then calling backward to generate the gradient of the model, and then incorporating the gradient into a weight update that attempts to minimize the loss. Division of labor between the Solver, Net, and Layer keep Caffe modular and open to development. From a6f14f6e3d03caf8242ed5aa7e224a9ea8ef740d Mon Sep 17 00:00:00 2001 From: Balint Cristian Date: Fri, 13 Nov 2015 13:58:49 +0200 Subject: [PATCH 176/223] Display and store cuDNN version numbers during cmake. --- cmake/Cuda.cmake | 33 +++++++++++++++++++++++++++++++-- cmake/Summary.cmake | 2 +- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/cmake/Cuda.cmake b/cmake/Cuda.cmake index 98aef268..286a4280 100644 --- a/cmake/Cuda.cmake +++ b/cmake/Cuda.cmake @@ -183,12 +183,41 @@ function(detect_cuDNN) set(HAVE_CUDNN TRUE PARENT_SCOPE) set(CUDNN_FOUND TRUE PARENT_SCOPE) + file(READ ${CUDNN_INCLUDE}/cudnn.h CUDNN_VERSION_FILE_CONTENTS) + + # cuDNN v3 and beyond + string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)" + CUDNN_VERSION_MAJOR "${CUDNN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1" + CUDNN_VERSION_MAJOR "${CUDNN_VERSION_MAJOR}") + string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)" + CUDNN_VERSION_MINOR "${CUDNN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1" + CUDNN_VERSION_MINOR "${CUDNN_VERSION_MINOR}") + string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)" + CUDNN_VERSION_PATCH "${CUDNN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1" + CUDNN_VERSION_PATCH "${CUDNN_VERSION_PATCH}") + + if(NOT CUDNN_VERSION_MAJOR) + set(CUDNN_VERSION "???") + else() + set(CUDNN_VERSION "${CUDNN_VERSION_MAJOR}.${CUDNN_VERSION_MINOR}.${CUDNN_VERSION_PATCH}") + endif() + + message(STATUS "Found cuDNN: ver. ${CUDNN_VERSION} found (include: ${CUDNN_INCLUDE}, library: ${CUDNN_LIBRARY})") + + string(COMPARE LESS "${CUDNN_VERSION_MAJOR}" 3 cuDNNVersionIncompatible) + if(cuDNNVersionIncompatible) + message(FATAL_ERROR "cuDNN version >3 is required.") + endif() + + set(CUDNN_VERSION "${CUDNN_VERSION}" PARENT_SCOPE) mark_as_advanced(CUDNN_INCLUDE CUDNN_LIBRARY CUDNN_ROOT) - message(STATUS "Found cuDNN (include: ${CUDNN_INCLUDE}, library: ${CUDNN_LIBRARY})") + endif() endfunction() - ################################################################################################ ### Non macro section ################################################################################################ diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index 6984f417..557a6f04 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -142,7 +142,7 @@ function(caffe_print_configuration_summary) caffe_status(" Target GPU(s) : ${CUDA_ARCH_NAME}" ) caffe_status(" GPU arch(s) : ${NVCC_FLAGS_EXTRA_readable}") if(USE_CUDNN) - caffe_status(" cuDNN : " HAVE_CUDNN THEN "Yes" ELSE "Not found") + caffe_status(" cuDNN : " HAVE_CUDNN THEN "Yes (ver. ${CUDNN_VERSION})" ELSE "Not found") else() caffe_status(" cuDNN : Disabled") endif() From a29c2f7a0ff2ff4278a2e498f0b686b5d5cb88cd Mon Sep 17 00:00:00 2001 From: Alex Lee Date: Sat, 14 Nov 2015 12:49:05 -0800 Subject: [PATCH 177/223] Fix outs and diffs being overwritten in forward_backward_all. --- python/caffe/pycaffe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py index 7bd4f411..31dc702f 100644 --- a/python/caffe/pycaffe.py +++ b/python/caffe/pycaffe.py @@ -216,9 +216,9 @@ def _Net_forward_backward_all(self, blobs=None, diffs=None, **kwargs): batch_blobs = self.forward(blobs=blobs, **fb) batch_diffs = self.backward(diffs=diffs, **bb) for out, out_blobs in batch_blobs.iteritems(): - all_outs[out].extend(out_blobs) + all_outs[out].extend(out_blobs.copy()) for diff, out_diffs in batch_diffs.iteritems(): - all_diffs[diff].extend(out_diffs) + all_diffs[diff].extend(out_diffs.copy()) # Package in ndarray. for out, diff in zip(all_outs, all_diffs): all_outs[out] = np.asarray(all_outs[out]) From c4190a56ab62b1a63c1c55bcef3860701a322bed Mon Sep 17 00:00:00 2001 From: Luke Yeager Date: Wed, 18 Nov 2015 10:38:32 -0800 Subject: [PATCH 178/223] Skip python layer tests if WITH_PYTHON_LAYER unset --- python/caffe/test/test_python_layer.py | 2 ++ python/caffe/test/test_python_layer_with_param_str.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/python/caffe/test/test_python_layer.py b/python/caffe/test/test_python_layer.py index 8ed86655..e46b7118 100644 --- a/python/caffe/test/test_python_layer.py +++ b/python/caffe/test/test_python_layer.py @@ -77,6 +77,8 @@ def parameter_net_file(): return f.name +@unittest.skipIf('Python' not in caffe.layer_type_list(), + 'Caffe built without Python layer support') class TestPythonLayer(unittest.TestCase): def setUp(self): net_file = python_net_file() diff --git a/python/caffe/test/test_python_layer_with_param_str.py b/python/caffe/test/test_python_layer_with_param_str.py index 3d0f107b..c36048ae 100644 --- a/python/caffe/test/test_python_layer_with_param_str.py +++ b/python/caffe/test/test_python_layer_with_param_str.py @@ -38,6 +38,8 @@ def python_param_net_file(): return f.name +@unittest.skipIf('Python' not in caffe.layer_type_list(), + 'Caffe built without Python layer support') class TestLayerWithParam(unittest.TestCase): def setUp(self): net_file = python_param_net_file() From 1b0716cfd761cec547c85b19fc8f6f971e9236ac Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Thu, 19 Nov 2015 10:05:48 -0800 Subject: [PATCH 179/223] Fix MaxTopBlobs in Accuracy Layer Fix the typo "MaxTopBlos" to "MaxTopBlobs". This typo causes maximum top number to be incorrect. --- include/caffe/loss_layers.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp index 1591c0fe..e2e3e48c 100644 --- a/include/caffe/loss_layers.hpp +++ b/include/caffe/loss_layers.hpp @@ -42,7 +42,7 @@ class AccuracyLayer : public Layer { // If there are two top blobs, then the second blob will contain // accuracies per class. virtual inline int MinTopBlobs() const { return 1; } - virtual inline int MaxTopBlos() const { return 2; } + virtual inline int MaxTopBlobs() const { return 2; } protected: /** From 41d0c77e5849f97744a3ca5933fd20887bb97f43 Mon Sep 17 00:00:00 2001 From: Tea Date: Thu, 12 Nov 2015 15:15:22 +0800 Subject: [PATCH 180/223] Convert std::max args to Dtype --- include/caffe/test/test_gradient_check_util.hpp | 5 +++-- src/caffe/layers/contrastive_loss_layer.cpp | 3 ++- src/caffe/test/test_contrastive_loss_layer.cpp | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/caffe/test/test_gradient_check_util.hpp b/include/caffe/test/test_gradient_check_util.hpp index 25f35d15..b25a8487 100644 --- a/include/caffe/test/test_gradient_check_util.hpp +++ b/include/caffe/test/test_gradient_check_util.hpp @@ -169,8 +169,9 @@ void GradientChecker::CheckGradientSingle(Layer* layer, || fabs(feature) > kink_ + kink_range_) { // We check relative accuracy, but for too small values, we threshold // the scale factor by 1. - Dtype scale = std::max( - std::max(fabs(computed_gradient), fabs(estimated_gradient)), 1.); + Dtype scale = std::max( + std::max(fabs(computed_gradient), fabs(estimated_gradient)), + Dtype(1.)); EXPECT_NEAR(computed_gradient, estimated_gradient, threshold_ * scale) << "debug: (top_id, top_data_id, blob_id, feat_id)=" << top_id << "," << top_data_id << "," << blob_id << "," << feat_id diff --git a/src/caffe/layers/contrastive_loss_layer.cpp b/src/caffe/layers/contrastive_loss_layer.cpp index 74002087..45facd4a 100644 --- a/src/caffe/layers/contrastive_loss_layer.cpp +++ b/src/caffe/layers/contrastive_loss_layer.cpp @@ -51,7 +51,8 @@ void ContrastiveLossLayer::Forward_cpu( if (legacy_version) { loss += std::max(margin - dist_sq_.cpu_data()[i], Dtype(0.0)); } else { - Dtype dist = std::max(margin - sqrt(dist_sq_.cpu_data()[i]), 0.0); + Dtype dist = std::max(margin - sqrt(dist_sq_.cpu_data()[i]), + Dtype(0.0)); loss += dist*dist; } } diff --git a/src/caffe/test/test_contrastive_loss_layer.cpp b/src/caffe/test/test_contrastive_loss_layer.cpp index 592997e4..95901f14 100644 --- a/src/caffe/test/test_contrastive_loss_layer.cpp +++ b/src/caffe/test/test_contrastive_loss_layer.cpp @@ -77,7 +77,7 @@ TYPED_TEST(ContrastiveLossLayerTest, TestForward) { if (this->blob_bottom_y_->cpu_data()[i]) { // similar pairs loss += dist_sq; } else { - Dtype dist = std::max(margin - sqrt(dist_sq), 0.0); + Dtype dist = std::max(margin - sqrt(dist_sq), 0.0); loss += dist*dist; } } From 23e4e4621b0199684d6d7a8535fb7628f5609952 Mon Sep 17 00:00:00 2001 From: "T.E.A de Souza" Date: Fri, 20 Nov 2015 16:36:29 +0800 Subject: [PATCH 181/223] Function must return a value Currently compilation will fail with some compilers when LevelDB and LMDB are disabled. Very similar to a recently fixed issue. --- src/caffe/util/db.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/caffe/util/db.cpp b/src/caffe/util/db.cpp index d0a2b0b5..7f22509b 100644 --- a/src/caffe/util/db.cpp +++ b/src/caffe/util/db.cpp @@ -18,6 +18,7 @@ DB* GetDB(DataParameter::DB backend) { #endif // USE_LMDB default: LOG(FATAL) << "Unknown database backend"; + return NULL; } } From e09329077d7612d7d1a185ea120be6be91bf03d2 Mon Sep 17 00:00:00 2001 From: "T.E.A de Souza" Date: Fri, 20 Nov 2015 16:52:25 +0800 Subject: [PATCH 182/223] Exclude core.hpp when building without OpenCV --- src/caffe/util/io.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp index f2b1dd98..835d2d4e 100644 --- a/src/caffe/util/io.cpp +++ b/src/caffe/util/io.cpp @@ -2,8 +2,8 @@ #include #include #include -#include #ifdef USE_OPENCV +#include #include #include #include From 8b2aa7093cba002a5f286d47658de72a961d1299 Mon Sep 17 00:00:00 2001 From: Carl Doersch Date: Fri, 6 Nov 2015 14:41:30 -0800 Subject: [PATCH 183/223] Better normalization options for SoftmaxWithLoss layer. --- include/caffe/loss_layers.hpp | 11 +++-- src/caffe/layers/softmax_loss_layer.cpp | 54 +++++++++++++++++++------ src/caffe/layers/softmax_loss_layer.cu | 32 ++++++++------- src/caffe/proto/caffe.proto | 24 +++++++++-- 4 files changed, 89 insertions(+), 32 deletions(-) diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp index d08ad9b6..d6569c4a 100644 --- a/include/caffe/loss_layers.hpp +++ b/include/caffe/loss_layers.hpp @@ -747,6 +747,12 @@ class SoftmaxWithLossLayer : public LossLayer { virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); + /// Read the normalization mode parameter and compute the normalizer based + /// on the blob size. If normalization_mode is VALID, the count of valid + /// outputs will be read from valid_count, unless it is -1 in which case + /// all outputs are assumed to be valid. + virtual Dtype get_normalizer( + LossParameter_NormalizationMode normalization_mode, int valid_count); /// The internal SoftmaxLayer used to map predictions to a distribution. shared_ptr > softmax_layer_; @@ -760,9 +766,8 @@ class SoftmaxWithLossLayer : public LossLayer { bool has_ignore_label_; /// The label indicating that an instance should be ignored. int ignore_label_; - /// Whether to normalize the loss by the total number of values present - /// (otherwise just by the batch size). - bool normalize_; + /// How to normalize the output loss. + LossParameter_NormalizationMode normalization_; int softmax_axis_, outer_num_, inner_num_; }; diff --git a/src/caffe/layers/softmax_loss_layer.cpp b/src/caffe/layers/softmax_loss_layer.cpp index dee50ac6..3cdef82a 100644 --- a/src/caffe/layers/softmax_loss_layer.cpp +++ b/src/caffe/layers/softmax_loss_layer.cpp @@ -25,7 +25,14 @@ void SoftmaxWithLossLayer::LayerSetUp( if (has_ignore_label_) { ignore_label_ = this->layer_param_.loss_param().ignore_label(); } - normalize_ = this->layer_param_.loss_param().normalize(); + if (!this->layer_param_.loss_param().has_normalization() && + this->layer_param_.loss_param().has_normalize()) { + normalization_ = this->layer_param_.loss_param().normalize() ? + LossParameter_NormalizationMode_VALID : + LossParameter_NormalizationMode_BATCH_SIZE; + } else { + normalization_ = this->layer_param_.loss_param().normalization(); + } } template @@ -48,6 +55,36 @@ void SoftmaxWithLossLayer::Reshape( } } +template +Dtype SoftmaxWithLossLayer::get_normalizer( + LossParameter_NormalizationMode normalization_mode, int valid_count) { + Dtype normalizer; + switch (normalization_mode) { + case LossParameter_NormalizationMode_FULL: + normalizer = Dtype(outer_num_ * inner_num_); + break; + case LossParameter_NormalizationMode_VALID: + if (valid_count == -1) { + normalizer = Dtype(outer_num_ * inner_num_); + } else { + normalizer = Dtype(valid_count); + } + break; + case LossParameter_NormalizationMode_BATCH_SIZE: + normalizer = Dtype(outer_num_); + break; + case LossParameter_NormalizationMode_NONE: + normalizer = Dtype(1); + break; + default: + LOG(FATAL) << "Unknown normalization mode: " + << LossParameter_NormalizationMode_Name(normalization_mode); + } + // Some users will have no labels for some examples in order to 'turn off' a + // particular loss in a multi-task setup. The max prevents NaNs in that case. + return std::max(Dtype(1.0), normalizer); +} + template void SoftmaxWithLossLayer::Forward_cpu( const vector*>& bottom, const vector*>& top) { @@ -71,11 +108,7 @@ void SoftmaxWithLossLayer::Forward_cpu( ++count; } } - if (normalize_) { - top[0]->mutable_cpu_data()[0] = loss / count; - } else { - top[0]->mutable_cpu_data()[0] = loss / outer_num_; - } + top[0]->mutable_cpu_data()[0] = loss / get_normalizer(normalization_, count); if (top.size() == 2) { top[1]->ShareData(prob_); } @@ -109,12 +142,9 @@ void SoftmaxWithLossLayer::Backward_cpu(const vector*>& top, } } // Scale gradient - const Dtype loss_weight = top[0]->cpu_diff()[0]; - if (normalize_) { - caffe_scal(prob_.count(), loss_weight / count, bottom_diff); - } else { - caffe_scal(prob_.count(), loss_weight / outer_num_, bottom_diff); - } + Dtype loss_weight = top[0]->cpu_diff()[0] / + get_normalizer(normalization_, count); + caffe_scal(prob_.count(), loss_weight, bottom_diff); } } diff --git a/src/caffe/layers/softmax_loss_layer.cu b/src/caffe/layers/softmax_loss_layer.cu index 42e91fa9..4753a1ec 100644 --- a/src/caffe/layers/softmax_loss_layer.cu +++ b/src/caffe/layers/softmax_loss_layer.cu @@ -49,14 +49,15 @@ void SoftmaxWithLossLayer::Forward_gpu( outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts); Dtype loss; caffe_gpu_asum(nthreads, loss_data, &loss); - if (normalize_) { - Dtype count; - caffe_gpu_asum(nthreads, counts, &count); - loss /= count; - } else { - loss /= outer_num_; + Dtype valid_count = -1; + // Only launch another CUDA kernel if we actually need the count of valid + // outputs. + if (normalization_ == LossParameter_NormalizationMode_VALID && + has_ignore_label_) { + caffe_gpu_asum(nthreads, counts, &valid_count); } - top[0]->mutable_cpu_data()[0] = loss; + top[0]->mutable_cpu_data()[0] = loss / get_normalizer(normalization_, + valid_count); if (top.size() == 2) { top[1]->ShareData(prob_); } @@ -108,14 +109,17 @@ void SoftmaxWithLossLayer::Backward_gpu(const vector*>& top, SoftmaxLossBackwardGPU<<>>(nthreads, top_data, label, bottom_diff, outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts); - const Dtype loss_weight = top[0]->cpu_diff()[0]; - if (normalize_) { - Dtype count; - caffe_gpu_asum(nthreads, counts, &count); - caffe_gpu_scal(prob_.count(), loss_weight / count, bottom_diff); - } else { - caffe_gpu_scal(prob_.count(), loss_weight / outer_num_, bottom_diff); + + Dtype valid_count = -1; + // Only launch another CUDA kernel if we actually need the count of valid + // outputs. + if (normalization_ == LossParameter_NormalizationMode_VALID && + has_ignore_label_) { + caffe_gpu_asum(nthreads, counts, &valid_count); } + const Dtype loss_weight = top[0]->cpu_diff()[0] / + get_normalizer(normalization_, valid_count); + caffe_gpu_scal(prob_.count(), loss_weight , bottom_diff); } } diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 39873cf7..787369f7 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -420,9 +420,27 @@ message TransformationParameter { message LossParameter { // If specified, ignore instances with the given label. optional int32 ignore_label = 1; - // If true, normalize each batch across all instances (including spatial - // dimesions, but not ignored instances); else, divide by batch size only. - optional bool normalize = 2 [default = true]; + // How to normalize the loss for loss layers that aggregate across batches, + // spatial dimensions, or other dimensions. Currently only implemented in + // SoftmaxWithLoss layer. + enum NormalizationMode { + // Divide by the number of examples in the batch times spatial dimensions. + // Outputs that receive the ignore label will NOT be ignored in computing + // the normalization factor. + FULL = 0; + // Divide by the total number of output locations that do not take the + // ignore_label. If ignore_label is not set, this behaves like FULL. + VALID = 1; + // Divide by the batch size. + BATCH_SIZE = 2; + // Do not normalize the loss. + NONE = 3; + } + optional NormalizationMode normalization = 3 [default = VALID]; + // Deprecated. Ignored if normalization is specified. If normalization + // is not specified, then setting this to false will be equivalent to + // normalization = BATCH_SIZE to be consistent with previous behavior. + optional bool normalize = 2; } // Messages that store parameters used by individual layer types follow, in From b72b0318e2802785c17be1fe8ed1b6899961df19 Mon Sep 17 00:00:00 2001 From: Tea Date: Tue, 17 Nov 2015 17:05:56 +0800 Subject: [PATCH 184/223] replace snprintf with a C++98 equivalent --- examples/cifar10/convert_cifar_data.cpp | 13 ++++++------- examples/mnist/convert_mnist_data.cpp | 12 +++++------- .../siamese/convert_mnist_siamese_data.cpp | 7 +++---- include/caffe/util/format.hpp | 18 ++++++++++++++++++ src/caffe/solver.cpp | 8 +++----- tools/convert_imageset.cpp | 8 +++----- tools/extract_features.cpp | 11 ++++------- 7 files changed, 42 insertions(+), 35 deletions(-) create mode 100644 include/caffe/util/format.hpp diff --git a/examples/cifar10/convert_cifar_data.cpp b/examples/cifar10/convert_cifar_data.cpp index f4c42e4d..e1b89f42 100644 --- a/examples/cifar10/convert_cifar_data.cpp +++ b/examples/cifar10/convert_cifar_data.cpp @@ -16,6 +16,7 @@ #include "caffe/proto/caffe.pb.h" #include "caffe/util/db.hpp" +#include "caffe/util/format.hpp" using caffe::Datum; using boost::scoped_ptr; @@ -52,19 +53,18 @@ void convert_dataset(const string& input_folder, const string& output_folder, for (int fileid = 0; fileid < kCIFARTrainBatches; ++fileid) { // Open files LOG(INFO) << "Training Batch " << fileid + 1; - snprintf(str_buffer, kCIFARImageNBytes, "/data_batch_%d.bin", fileid + 1); - std::ifstream data_file((input_folder + str_buffer).c_str(), + string batchFileName = input_folder + "/data_batch_" + + caffe::format_int(fileid+1) + ".bin"; + std::ifstream data_file(batchFileName.c_str(), std::ios::in | std::ios::binary); CHECK(data_file) << "Unable to open train file #" << fileid + 1; for (int itemid = 0; itemid < kCIFARBatchSize; ++itemid) { read_image(&data_file, &label, str_buffer); datum.set_label(label); datum.set_data(str_buffer, kCIFARImageNBytes); - int length = snprintf(str_buffer, kCIFARImageNBytes, "%05d", - fileid * kCIFARBatchSize + itemid); string out; CHECK(datum.SerializeToString(&out)); - txn->Put(string(str_buffer, length), out); + txn->Put(caffe::format_int(fileid * kCIFARBatchSize + itemid, 5), out); } } txn->Commit(); @@ -82,10 +82,9 @@ void convert_dataset(const string& input_folder, const string& output_folder, read_image(&data_file, &label, str_buffer); datum.set_label(label); datum.set_data(str_buffer, kCIFARImageNBytes); - int length = snprintf(str_buffer, kCIFARImageNBytes, "%05d", itemid); string out; CHECK(datum.SerializeToString(&out)); - txn->Put(string(str_buffer, length), out); + txn->Put(caffe::format_int(itemid, 5), out); } txn->Commit(); test_db->Close(); diff --git a/examples/mnist/convert_mnist_data.cpp b/examples/mnist/convert_mnist_data.cpp index 8f29bafd..16d28093 100644 --- a/examples/mnist/convert_mnist_data.cpp +++ b/examples/mnist/convert_mnist_data.cpp @@ -23,6 +23,7 @@ #include #include "caffe/proto/caffe.pb.h" +#include "caffe/util/format.hpp" #if defined(USE_LEVELDB) && defined(USE_LMDB) @@ -108,8 +109,6 @@ void convert_dataset(const char* image_filename, const char* label_filename, char label; char* pixels = new char[rows * cols]; int count = 0; - const int kMaxKeyLength = 10; - char key_cstr[kMaxKeyLength]; string value; Datum datum; @@ -123,18 +122,17 @@ void convert_dataset(const char* image_filename, const char* label_filename, label_file.read(&label, 1); datum.set_data(pixels, rows*cols); datum.set_label(label); - snprintf(key_cstr, kMaxKeyLength, "%08d", item_id); + string key_str = caffe::format_int(item_id, 8); datum.SerializeToString(&value); - string keystr(key_cstr); // Put in db if (db_backend == "leveldb") { // leveldb - batch->Put(keystr, value); + batch->Put(key_str, value); } else if (db_backend == "lmdb") { // lmdb mdb_data.mv_size = value.size(); mdb_data.mv_data = reinterpret_cast(&value[0]); - mdb_key.mv_size = keystr.size(); - mdb_key.mv_data = reinterpret_cast(&keystr[0]); + mdb_key.mv_size = key_str.size(); + mdb_key.mv_data = reinterpret_cast(&key_str[0]); CHECK_EQ(mdb_put(mdb_txn, mdb_dbi, &mdb_key, &mdb_data, 0), MDB_SUCCESS) << "mdb_put failed"; } else { diff --git a/examples/siamese/convert_mnist_siamese_data.cpp b/examples/siamese/convert_mnist_siamese_data.cpp index ad08036f..928b3fbf 100644 --- a/examples/siamese/convert_mnist_siamese_data.cpp +++ b/examples/siamese/convert_mnist_siamese_data.cpp @@ -13,6 +13,7 @@ #include "stdint.h" #include "caffe/proto/caffe.pb.h" +#include "caffe/util/format.hpp" #include "caffe/util/math_functions.hpp" #ifdef USE_LEVELDB @@ -75,8 +76,6 @@ void convert_dataset(const char* image_filename, const char* label_filename, char label_i; char label_j; char* pixels = new char[2 * rows * cols]; - const int kMaxKeyLength = 10; - char key[kMaxKeyLength]; std::string value; caffe::Datum datum; @@ -99,8 +98,8 @@ void convert_dataset(const char* image_filename, const char* label_filename, datum.set_label(0); } datum.SerializeToString(&value); - snprintf(key, kMaxKeyLength, "%08d", itemid); - db->Put(leveldb::WriteOptions(), std::string(key), value); + std::string key_str = caffe::format_int(itemid, 8); + db->Put(leveldb::WriteOptions(), key_str, value); } delete db; diff --git a/include/caffe/util/format.hpp b/include/caffe/util/format.hpp new file mode 100644 index 00000000..925ad2e0 --- /dev/null +++ b/include/caffe/util/format.hpp @@ -0,0 +1,18 @@ +#ifndef CAFFE_UTIL_FORMAT_H_ +#define CAFFE_UTIL_FORMAT_H_ + +#include // NOLINT(readability/streams) +#include // NOLINT(readability/streams) +#include + +namespace caffe { + +inline std::string format_int(int n, int numberOfLeadingZeros = 0 ) { + std::ostringstream s; + s << std::setw(numberOfLeadingZeros) << std::setfill('0') << n; + return s.str(); +} + +} + +#endif // CAFFE_UTIL_FORMAT_H_ diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index d3bc7361..95d75066 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -4,6 +4,7 @@ #include #include "caffe/solver.hpp" +#include "caffe/util/format.hpp" #include "caffe/util/hdf5.hpp" #include "caffe/util/io.hpp" #include "caffe/util/upgrade_proto.hpp" @@ -448,11 +449,8 @@ void Solver::CheckSnapshotWritePermissions() { template string Solver::SnapshotFilename(const string extension) { - string filename(param_.snapshot_prefix()); - const int kBufferSize = 20; - char iter_str_buffer[kBufferSize]; - snprintf(iter_str_buffer, kBufferSize, "_iter_%d", iter_); - return filename + iter_str_buffer + extension; + return param_.snapshot_prefix() + "_iter_" + caffe::format_int(iter_) + + extension; } template diff --git a/tools/convert_imageset.cpp b/tools/convert_imageset.cpp index e51a2631..9c52bfa0 100644 --- a/tools/convert_imageset.cpp +++ b/tools/convert_imageset.cpp @@ -20,6 +20,7 @@ #include "caffe/proto/caffe.pb.h" #include "caffe/util/db.hpp" +#include "caffe/util/format.hpp" #include "caffe/util/io.hpp" #include "caffe/util/rng.hpp" @@ -99,8 +100,6 @@ int main(int argc, char** argv) { std::string root_folder(argv[1]); Datum datum; int count = 0; - const int kMaxKeyLength = 256; - char key_cstr[kMaxKeyLength]; int data_size = 0; bool data_size_initialized = false; @@ -131,13 +130,12 @@ int main(int argc, char** argv) { } } // sequential - int length = snprintf(key_cstr, kMaxKeyLength, "%08d_%s", line_id, - lines[line_id].first.c_str()); + string key_str = caffe::format_int(line_id, 8) + "_" + lines[line_id].first; // Put in db string out; CHECK(datum.SerializeToString(&out)); - txn->Put(string(key_cstr, length), out); + txn->Put(key_str, out); if (++count % 1000 == 0) { // Commit db diff --git a/tools/extract_features.cpp b/tools/extract_features.cpp index 084c9bf8..b94dbb98 100644 --- a/tools/extract_features.cpp +++ b/tools/extract_features.cpp @@ -1,4 +1,3 @@ -#include // for snprintf #include #include @@ -10,6 +9,7 @@ #include "caffe/net.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/db.hpp" +#include "caffe/util/format.hpp" #include "caffe/util/io.hpp" #include "caffe/vision_layers.hpp" @@ -135,8 +135,6 @@ int feature_extraction_pipeline(int argc, char** argv) { LOG(ERROR)<< "Extacting Features"; Datum datum; - const int kMaxKeyStrLength = 100; - char key_str[kMaxKeyStrLength]; std::vector*> input_vec; std::vector image_indices(num_features, 0); for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index) { @@ -158,11 +156,11 @@ int feature_extraction_pipeline(int argc, char** argv) { for (int d = 0; d < dim_features; ++d) { datum.add_float_data(feature_blob_data[d]); } - int length = snprintf(key_str, kMaxKeyStrLength, "%010d", - image_indices[i]); + string key_str = caffe::format_int(image_indices[i], 10); + string out; CHECK(datum.SerializeToString(&out)); - txns.at(i)->Put(std::string(key_str, length), out); + txns.at(i)->Put(key_str, out); ++image_indices[i]; if (image_indices[i] % 1000 == 0) { txns.at(i)->Commit(); @@ -186,4 +184,3 @@ int feature_extraction_pipeline(int argc, char** argv) { LOG(ERROR)<< "Successfully extracted the features!"; return 0; } - From d3025f5ffb731ef2f7e796f67f6fd6bd43f601b9 Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Wed, 25 Nov 2015 21:02:02 -0800 Subject: [PATCH 185/223] Remove bogus stepearly in MNIST example This `examples/lenet/lenet_stepearly_solver.prototxt` is introduced in #190 by mistake, since stepearly is never actually merged. --- .../mnist/lenet_stepearly_solver.prototxt | 28 ------------------- 1 file changed, 28 deletions(-) delete mode 100644 examples/mnist/lenet_stepearly_solver.prototxt diff --git a/examples/mnist/lenet_stepearly_solver.prototxt b/examples/mnist/lenet_stepearly_solver.prototxt deleted file mode 100644 index efc6a335..00000000 --- a/examples/mnist/lenet_stepearly_solver.prototxt +++ /dev/null @@ -1,28 +0,0 @@ -# The training protocol buffer definition -train_net: "lenet_train.prototxt" -# The testing protocol buffer definition -test_net: "lenet_test.prototxt" -# test_iter specifies how many forward passes the test should carry out. -# In the case of MNIST, we have test batch size 100 and 100 test iterations, -# covering the full 10,000 testing images. -test_iter: 100 -# Carry out testing every 500 training iterations. -test_interval: 500 -# The base learning rate, momentum and the weight decay of the network. -base_lr: 0.01 -momentum: 0.9 -weight_decay: 0.0005 -# The learning rate policy -lr_policy: "stepearly" -gamma: 0.9 -stepearly: 1 -# Display every 100 iterations -display: 100 -# The maximum number of iterations -max_iter: 10000 -# snapshot intermediate results -snapshot: 5000 -snapshot_prefix: "lenet" -# solver mode: 0 for CPU and 1 for GPU -solver_mode: 1 -device_id: 1 From 34ee5df55dc11dfc8afff60cf64cd479b639e5a8 Mon Sep 17 00:00:00 2001 From: "T.E.A de Souza" Date: Tue, 24 Nov 2015 14:33:27 +0800 Subject: [PATCH 186/223] Secure implementation of MakeTempDir --- include/caffe/util/io.hpp | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/include/caffe/util/io.hpp b/include/caffe/util/io.hpp index 6b733254..f9f0f55a 100644 --- a/include/caffe/util/io.hpp +++ b/include/caffe/util/io.hpp @@ -9,6 +9,10 @@ #include "caffe/common.hpp" #include "caffe/proto/caffe.pb.h" +#ifndef CAFFE_TMP_DIR_RETRIES +#define CAFFE_TMP_DIR_RETRIES 100 +#endif + namespace caffe { using ::google::protobuf::Message; @@ -23,12 +27,17 @@ inline void MakeTempFilename(string* temp_filename) { inline void MakeTempDir(string* temp_dirname) { temp_dirname->clear(); - const path& model = boost::filesystem::temp_directory_path() - /"caffe_test.%%%%%%"; - const path& dir = boost::filesystem::unique_path(model).string(); - bool directoryCreated = boost::filesystem::create_directory(dir); - CHECK(directoryCreated); - *temp_dirname = dir.string(); + const path& model = + boost::filesystem::temp_directory_path()/"caffe_test.%%%%-%%%%"; + for ( int i = 0; i < CAFFE_TMP_DIR_RETRIES; i++ ) { + const path& dir = boost::filesystem::unique_path(model).string(); + bool done = boost::filesystem::create_directory(dir); + if ( done ) { + *temp_dirname = dir.string(); + return; + } + } + LOG(FATAL) << "Failed to create a temporary directory."; } bool ReadProtoFromTextFile(const char* filename, Message* proto); From 33905d5a8023c3dbac514dac680060dc608145e8 Mon Sep 17 00:00:00 2001 From: Tea Date: Wed, 25 Nov 2015 11:43:45 +0800 Subject: [PATCH 187/223] Secure temporary file creation --- include/caffe/util/io.hpp | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/include/caffe/util/io.hpp b/include/caffe/util/io.hpp index f9f0f55a..1a599883 100644 --- a/include/caffe/util/io.hpp +++ b/include/caffe/util/io.hpp @@ -2,12 +2,15 @@ #define CAFFE_UTIL_IO_H_ #include +#include +#include // NOLINT(readability/streams) #include #include "google/protobuf/message.h" #include "caffe/common.hpp" #include "caffe/proto/caffe.pb.h" +#include "caffe/util/format.hpp" #ifndef CAFFE_TMP_DIR_RETRIES #define CAFFE_TMP_DIR_RETRIES 100 @@ -18,13 +21,6 @@ namespace caffe { using ::google::protobuf::Message; using ::boost::filesystem::path; -inline void MakeTempFilename(string* temp_filename) { - temp_filename->clear(); - const path& model = boost::filesystem::temp_directory_path() - /"caffe_test.%%%%%%"; - *temp_filename = boost::filesystem::unique_path(model).string(); -} - inline void MakeTempDir(string* temp_dirname) { temp_dirname->clear(); const path& model = @@ -40,6 +36,19 @@ inline void MakeTempDir(string* temp_dirname) { LOG(FATAL) << "Failed to create a temporary directory."; } +inline void MakeTempFilename(string* temp_filename) { + static path temp_files_subpath; + static uint64_t next_temp_file = 0; + temp_filename->clear(); + if ( temp_files_subpath.empty() ) { + string path_string=""; + MakeTempDir(&path_string); + temp_files_subpath = path_string; + } + *temp_filename = + (temp_files_subpath/caffe::format_int(next_temp_file++, 9)).string(); +} + bool ReadProtoFromTextFile(const char* filename, Message* proto); inline bool ReadProtoFromTextFile(const string& filename, Message* proto) { From 300f43f3ae6347ac8e01093f9a57ee99e551ed74 Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Tue, 10 Nov 2015 00:22:58 -0800 Subject: [PATCH 188/223] dismantle layer headers No more monolithic includes: split layers into their own headers for modular inclusion and build. --- Makefile | 2 +- include/caffe/caffe.hpp | 1 - include/caffe/common_layers.hpp | 816 ------------------ include/caffe/data_layers.hpp | 347 -------- include/caffe/layer_factory.hpp | 1 + include/caffe/layers/absval_layer.hpp | 68 ++ include/caffe/layers/accuracy_layer.hpp | 95 ++ include/caffe/layers/argmax_layer.hpp | 77 ++ include/caffe/layers/base_conv_layer.hpp | 168 ++++ include/caffe/layers/base_data_layer.hpp | 86 ++ include/caffe/layers/batch_norm_layer.hpp | 81 ++ include/caffe/layers/batch_reindex_layer.hpp | 83 ++ include/caffe/layers/bnll_layer.hpp | 70 ++ include/caffe/layers/concat_layer.hpp | 87 ++ .../caffe/layers/contrastive_loss_layer.hpp | 101 +++ include/caffe/layers/conv_layer.hpp | 81 ++ include/caffe/layers/cudnn_conv_layer.hpp | 72 ++ include/caffe/layers/cudnn_lcn_layer.hpp | 49 ++ include/caffe/layers/cudnn_lrn_layer.hpp | 44 + include/caffe/layers/cudnn_pooling_layer.hpp | 49 ++ include/caffe/layers/cudnn_relu_layer.hpp | 45 + include/caffe/layers/cudnn_sigmoid_layer.hpp | 45 + include/caffe/layers/cudnn_softmax_layer.hpp | 45 + include/caffe/layers/cudnn_tanh_layer.hpp | 45 + include/caffe/layers/data_layer.hpp | 39 + include/caffe/layers/deconv_layer.hpp | 51 ++ include/caffe/layers/dropout_layer.hpp | 80 ++ include/caffe/layers/dummy_data_layer.hpp | 49 ++ include/caffe/layers/eltwise_layer.hpp | 51 ++ include/caffe/layers/embed_layer.hpp | 52 ++ include/caffe/layers/euclidean_loss_layer.hpp | 107 +++ include/caffe/layers/exp_layer.hpp | 80 ++ include/caffe/layers/filter_layer.hpp | 77 ++ include/caffe/layers/flatten_layer.hpp | 61 ++ include/caffe/layers/hdf5_data_layer.hpp | 62 ++ include/caffe/layers/hdf5_output_layer.hpp | 64 ++ include/caffe/layers/hinge_loss_layer.hpp | 104 +++ include/caffe/layers/im2col_layer.hpp | 63 ++ include/caffe/layers/image_data_layer.hpp | 47 + include/caffe/layers/infogain_loss_layer.hpp | 110 +++ include/caffe/layers/inner_product_layer.hpp | 51 ++ include/caffe/layers/log_layer.hpp | 82 ++ include/caffe/layers/loss_layer.hpp | 53 ++ include/caffe/layers/lrn_layer.hpp | 94 ++ include/caffe/layers/memory_data_layer.hpp | 63 ++ .../multinomial_logistic_loss_layer.hpp | 92 ++ include/caffe/layers/mvn_layer.hpp | 48 ++ include/caffe/layers/neuron_layer.hpp | 32 + include/caffe/layers/pooling_layer.hpp | 60 ++ include/caffe/layers/power_layer.hpp | 89 ++ include/caffe/layers/prelu_layer.hpp | 101 +++ include/caffe/{ => layers}/python_layer.hpp | 0 include/caffe/layers/reduction_layer.hpp | 59 ++ include/caffe/layers/relu_layer.hpp | 85 ++ include/caffe/layers/reshape_layer.hpp | 52 ++ .../sigmoid_cross_entropy_loss_layer.hpp | 110 +++ include/caffe/layers/sigmoid_layer.hpp | 71 ++ include/caffe/layers/silence_layer.hpp | 43 + include/caffe/layers/slice_layer.hpp | 51 ++ include/caffe/layers/softmax_layer.hpp | 50 ++ include/caffe/layers/softmax_loss_layer.hpp | 130 +++ include/caffe/layers/split_layer.hpp | 45 + include/caffe/layers/spp_layer.hpp | 76 ++ include/caffe/layers/tanh_layer.hpp | 73 ++ include/caffe/layers/threshold_layer.hpp | 64 ++ include/caffe/layers/tile_layer.hpp | 43 + include/caffe/layers/window_data_layer.hpp | 55 ++ include/caffe/loss_layers.hpp | 777 ----------------- include/caffe/neuron_layers.hpp | 806 ----------------- include/caffe/vision_layers.hpp | 659 -------------- python/caffe/_caffe.cpp | 3 +- src/caffe/data_reader.cpp | 2 +- src/caffe/layer_factory.cpp | 21 +- src/caffe/layers/absval_layer.cpp | 2 +- src/caffe/layers/absval_layer.cu | 2 +- src/caffe/layers/accuracy_layer.cpp | 2 +- src/caffe/layers/argmax_layer.cpp | 2 +- src/caffe/layers/base_conv_layer.cpp | 2 +- src/caffe/layers/base_data_layer.cpp | 8 +- src/caffe/layers/base_data_layer.cu | 2 +- src/caffe/layers/batch_norm_layer.cpp | 2 +- src/caffe/layers/batch_norm_layer.cu | 2 +- src/caffe/layers/batch_reindex_layer.cpp | 2 +- src/caffe/layers/batch_reindex_layer.cu | 2 +- src/caffe/layers/bnll_layer.cpp | 2 +- src/caffe/layers/bnll_layer.cu | 2 +- src/caffe/layers/concat_layer.cpp | 2 +- src/caffe/layers/concat_layer.cu | 2 +- src/caffe/layers/contrastive_loss_layer.cpp | 2 +- src/caffe/layers/contrastive_loss_layer.cu | 2 +- src/caffe/layers/conv_layer.cpp | 2 +- src/caffe/layers/conv_layer.cu | 2 +- src/caffe/layers/cudnn_conv_layer.cpp | 2 +- src/caffe/layers/cudnn_conv_layer.cu | 2 +- src/caffe/layers/cudnn_lcn_layer.cpp | 2 +- src/caffe/layers/cudnn_lcn_layer.cu | 2 +- src/caffe/layers/cudnn_lrn_layer.cpp | 2 +- src/caffe/layers/cudnn_lrn_layer.cu | 2 +- src/caffe/layers/cudnn_pooling_layer.cpp | 2 +- src/caffe/layers/cudnn_pooling_layer.cu | 2 +- src/caffe/layers/cudnn_relu_layer.cpp | 2 +- src/caffe/layers/cudnn_relu_layer.cu | 2 +- src/caffe/layers/cudnn_sigmoid_layer.cpp | 2 +- src/caffe/layers/cudnn_sigmoid_layer.cu | 2 +- src/caffe/layers/cudnn_softmax_layer.cpp | 2 +- src/caffe/layers/cudnn_softmax_layer.cu | 2 +- src/caffe/layers/cudnn_tanh_layer.cpp | 2 +- src/caffe/layers/cudnn_tanh_layer.cu | 2 +- src/caffe/layers/data_layer.cpp | 4 +- src/caffe/layers/deconv_layer.cpp | 2 +- src/caffe/layers/deconv_layer.cu | 2 +- src/caffe/layers/dropout_layer.cpp | 2 +- src/caffe/layers/dropout_layer.cu | 4 +- src/caffe/layers/dummy_data_layer.cpp | 2 +- src/caffe/layers/eltwise_layer.cpp | 2 +- src/caffe/layers/eltwise_layer.cu | 2 +- src/caffe/layers/embed_layer.cpp | 2 +- src/caffe/layers/embed_layer.cu | 2 +- src/caffe/layers/euclidean_loss_layer.cpp | 2 +- src/caffe/layers/euclidean_loss_layer.cu | 2 +- src/caffe/layers/exp_layer.cpp | 2 +- src/caffe/layers/exp_layer.cu | 2 +- src/caffe/layers/filter_layer.cpp | 2 +- src/caffe/layers/filter_layer.cu | 2 +- src/caffe/layers/flatten_layer.cpp | 2 +- src/caffe/layers/hdf5_data_layer.cpp | 2 +- src/caffe/layers/hdf5_data_layer.cu | 2 +- src/caffe/layers/hdf5_output_layer.cpp | 2 +- src/caffe/layers/hdf5_output_layer.cu | 2 +- src/caffe/layers/hinge_loss_layer.cpp | 2 +- src/caffe/layers/im2col_layer.cpp | 2 +- src/caffe/layers/im2col_layer.cu | 2 +- src/caffe/layers/image_data_layer.cpp | 4 +- src/caffe/layers/infogain_loss_layer.cpp | 2 +- src/caffe/layers/inner_product_layer.cpp | 2 +- src/caffe/layers/inner_product_layer.cu | 2 +- src/caffe/layers/log_layer.cpp | 2 +- src/caffe/layers/log_layer.cu | 2 +- src/caffe/layers/loss_layer.cpp | 2 +- src/caffe/layers/lrn_layer.cpp | 2 +- src/caffe/layers/lrn_layer.cu | 2 +- src/caffe/layers/memory_data_layer.cpp | 2 +- .../multinomial_logistic_loss_layer.cpp | 2 +- src/caffe/layers/mvn_layer.cpp | 2 +- src/caffe/layers/mvn_layer.cu | 2 +- src/caffe/layers/neuron_layer.cpp | 2 +- src/caffe/layers/pooling_layer.cpp | 2 +- src/caffe/layers/pooling_layer.cu | 2 +- src/caffe/layers/power_layer.cpp | 2 +- src/caffe/layers/power_layer.cu | 2 +- src/caffe/layers/prelu_layer.cpp | 4 +- src/caffe/layers/prelu_layer.cu | 3 +- src/caffe/layers/reduction_layer.cpp | 2 +- src/caffe/layers/reduction_layer.cu | 2 +- src/caffe/layers/relu_layer.cpp | 2 +- src/caffe/layers/relu_layer.cu | 2 +- src/caffe/layers/reshape_layer.cpp | 2 +- .../sigmoid_cross_entropy_loss_layer.cpp | 2 +- .../sigmoid_cross_entropy_loss_layer.cu | 2 +- src/caffe/layers/sigmoid_layer.cpp | 2 +- src/caffe/layers/sigmoid_layer.cu | 2 +- src/caffe/layers/silence_layer.cpp | 2 +- src/caffe/layers/silence_layer.cu | 2 +- src/caffe/layers/slice_layer.cpp | 2 +- src/caffe/layers/slice_layer.cu | 2 +- src/caffe/layers/softmax_layer.cpp | 2 +- src/caffe/layers/softmax_layer.cu | 2 +- src/caffe/layers/softmax_loss_layer.cpp | 2 +- src/caffe/layers/softmax_loss_layer.cu | 2 +- src/caffe/layers/split_layer.cpp | 2 +- src/caffe/layers/split_layer.cu | 2 +- src/caffe/layers/spp_layer.cpp | 8 +- src/caffe/layers/tanh_layer.cpp | 2 +- src/caffe/layers/tanh_layer.cu | 2 +- src/caffe/layers/threshold_layer.cpp | 3 +- src/caffe/layers/threshold_layer.cu | 2 +- src/caffe/layers/tile_layer.cpp | 2 +- src/caffe/layers/tile_layer.cu | 2 +- src/caffe/layers/window_data_layer.cpp | 5 +- src/caffe/test/test_accuracy_layer.cpp | 2 +- src/caffe/test/test_argmax_layer.cpp | 2 +- src/caffe/test/test_batch_norm_layer.cpp | 2 +- src/caffe/test/test_batch_reindex_layer.cpp | 2 +- src/caffe/test/test_concat_layer.cpp | 2 +- .../test/test_contrastive_loss_layer.cpp | 2 +- src/caffe/test/test_convolution_layer.cpp | 6 +- src/caffe/test/test_data_layer.cpp | 2 +- src/caffe/test/test_deconvolution_layer.cpp | 2 +- src/caffe/test/test_dummy_data_layer.cpp | 2 +- src/caffe/test/test_eltwise_layer.cpp | 2 +- src/caffe/test/test_embed_layer.cpp | 2 +- src/caffe/test/test_euclidean_loss_layer.cpp | 2 +- src/caffe/test/test_filter_layer.cpp | 2 +- src/caffe/test/test_flatten_layer.cpp | 2 +- src/caffe/test/test_hdf5_output_layer.cpp | 2 +- src/caffe/test/test_hdf5data_layer.cpp | 4 +- src/caffe/test/test_hinge_loss_layer.cpp | 2 +- src/caffe/test/test_im2col_kernel.cu | 2 +- src/caffe/test/test_im2col_layer.cpp | 2 +- src/caffe/test/test_image_data_layer.cpp | 2 +- src/caffe/test/test_infogain_loss_layer.cpp | 2 +- src/caffe/test/test_inner_product_layer.cpp | 2 +- src/caffe/test/test_lrn_layer.cpp | 7 +- .../test/test_maxpool_dropout_layers.cpp | 3 +- src/caffe/test/test_memory_data_layer.cpp | 2 +- .../test_multinomial_logistic_loss_layer.cpp | 2 +- src/caffe/test/test_mvn_layer.cpp | 2 +- src/caffe/test/test_neuron_layer.cpp | 21 +- src/caffe/test/test_pooling_layer.cpp | 6 +- src/caffe/test/test_power_layer.cpp | 2 +- src/caffe/test/test_reduction_layer.cpp | 2 +- src/caffe/test/test_reshape_layer.cpp | 2 +- .../test_sigmoid_cross_entropy_loss_layer.cpp | 2 +- src/caffe/test/test_slice_layer.cpp | 2 +- src/caffe/test/test_softmax_layer.cpp | 6 +- .../test/test_softmax_with_loss_layer.cpp | 2 +- src/caffe/test/test_split_layer.cpp | 2 +- src/caffe/test/test_spp_layer.cpp | 7 +- src/caffe/test/test_stochastic_pooling.cpp | 2 +- src/caffe/test/test_tanh_layer.cpp | 2 +- src/caffe/test/test_threshold_layer.cpp | 2 +- src/caffe/test/test_tile_layer.cpp | 2 +- src/caffe/util/blocking_queue.cpp | 2 +- tools/extract_features.cpp | 1 - 224 files changed, 4497 insertions(+), 3568 deletions(-) delete mode 100644 include/caffe/common_layers.hpp delete mode 100644 include/caffe/data_layers.hpp create mode 100644 include/caffe/layers/absval_layer.hpp create mode 100644 include/caffe/layers/accuracy_layer.hpp create mode 100644 include/caffe/layers/argmax_layer.hpp create mode 100644 include/caffe/layers/base_conv_layer.hpp create mode 100644 include/caffe/layers/base_data_layer.hpp create mode 100644 include/caffe/layers/batch_norm_layer.hpp create mode 100644 include/caffe/layers/batch_reindex_layer.hpp create mode 100644 include/caffe/layers/bnll_layer.hpp create mode 100644 include/caffe/layers/concat_layer.hpp create mode 100644 include/caffe/layers/contrastive_loss_layer.hpp create mode 100644 include/caffe/layers/conv_layer.hpp create mode 100644 include/caffe/layers/cudnn_conv_layer.hpp create mode 100644 include/caffe/layers/cudnn_lcn_layer.hpp create mode 100644 include/caffe/layers/cudnn_lrn_layer.hpp create mode 100644 include/caffe/layers/cudnn_pooling_layer.hpp create mode 100644 include/caffe/layers/cudnn_relu_layer.hpp create mode 100644 include/caffe/layers/cudnn_sigmoid_layer.hpp create mode 100644 include/caffe/layers/cudnn_softmax_layer.hpp create mode 100644 include/caffe/layers/cudnn_tanh_layer.hpp create mode 100644 include/caffe/layers/data_layer.hpp create mode 100644 include/caffe/layers/deconv_layer.hpp create mode 100644 include/caffe/layers/dropout_layer.hpp create mode 100644 include/caffe/layers/dummy_data_layer.hpp create mode 100644 include/caffe/layers/eltwise_layer.hpp create mode 100644 include/caffe/layers/embed_layer.hpp create mode 100644 include/caffe/layers/euclidean_loss_layer.hpp create mode 100644 include/caffe/layers/exp_layer.hpp create mode 100644 include/caffe/layers/filter_layer.hpp create mode 100644 include/caffe/layers/flatten_layer.hpp create mode 100644 include/caffe/layers/hdf5_data_layer.hpp create mode 100644 include/caffe/layers/hdf5_output_layer.hpp create mode 100644 include/caffe/layers/hinge_loss_layer.hpp create mode 100644 include/caffe/layers/im2col_layer.hpp create mode 100644 include/caffe/layers/image_data_layer.hpp create mode 100644 include/caffe/layers/infogain_loss_layer.hpp create mode 100644 include/caffe/layers/inner_product_layer.hpp create mode 100644 include/caffe/layers/log_layer.hpp create mode 100644 include/caffe/layers/loss_layer.hpp create mode 100644 include/caffe/layers/lrn_layer.hpp create mode 100644 include/caffe/layers/memory_data_layer.hpp create mode 100644 include/caffe/layers/multinomial_logistic_loss_layer.hpp create mode 100644 include/caffe/layers/mvn_layer.hpp create mode 100644 include/caffe/layers/neuron_layer.hpp create mode 100644 include/caffe/layers/pooling_layer.hpp create mode 100644 include/caffe/layers/power_layer.hpp create mode 100644 include/caffe/layers/prelu_layer.hpp rename include/caffe/{ => layers}/python_layer.hpp (100%) create mode 100644 include/caffe/layers/reduction_layer.hpp create mode 100644 include/caffe/layers/relu_layer.hpp create mode 100644 include/caffe/layers/reshape_layer.hpp create mode 100644 include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp create mode 100644 include/caffe/layers/sigmoid_layer.hpp create mode 100644 include/caffe/layers/silence_layer.hpp create mode 100644 include/caffe/layers/slice_layer.hpp create mode 100644 include/caffe/layers/softmax_layer.hpp create mode 100644 include/caffe/layers/softmax_loss_layer.hpp create mode 100644 include/caffe/layers/split_layer.hpp create mode 100644 include/caffe/layers/spp_layer.hpp create mode 100644 include/caffe/layers/tanh_layer.hpp create mode 100644 include/caffe/layers/threshold_layer.hpp create mode 100644 include/caffe/layers/tile_layer.hpp create mode 100644 include/caffe/layers/window_data_layer.hpp delete mode 100644 include/caffe/loss_layers.hpp delete mode 100644 include/caffe/neuron_layers.hpp delete mode 100644 include/caffe/vision_layers.hpp diff --git a/Makefile b/Makefile index 3dc76ae5..985fffd6 100644 --- a/Makefile +++ b/Makefile @@ -78,7 +78,7 @@ NONEMPTY_LINT_REPORT := $(BUILD_DIR)/$(LINT_EXT) # PY$(PROJECT)_SRC is the python wrapper for $(PROJECT) PY$(PROJECT)_SRC := python/$(PROJECT)/_$(PROJECT).cpp PY$(PROJECT)_SO := python/$(PROJECT)/_$(PROJECT).so -PY$(PROJECT)_HXX := include/$(PROJECT)/python_layer.hpp +PY$(PROJECT)_HXX := include/$(PROJECT)/layers/python_layer.hpp # MAT$(PROJECT)_SRC is the mex entrance point of matlab package for $(PROJECT) MAT$(PROJECT)_SRC := matlab/+$(PROJECT)/private/$(PROJECT)_.cpp ifneq ($(MATLAB_DIR),) diff --git a/include/caffe/caffe.hpp b/include/caffe/caffe.hpp index a339efba..06882096 100644 --- a/include/caffe/caffe.hpp +++ b/include/caffe/caffe.hpp @@ -17,6 +17,5 @@ #include "caffe/util/benchmark.hpp" #include "caffe/util/io.hpp" #include "caffe/util/upgrade_proto.hpp" -#include "caffe/vision_layers.hpp" #endif // CAFFE_CAFFE_HPP_ diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp deleted file mode 100644 index d42d15c4..00000000 --- a/include/caffe/common_layers.hpp +++ /dev/null @@ -1,816 +0,0 @@ -#ifndef CAFFE_COMMON_LAYERS_HPP_ -#define CAFFE_COMMON_LAYERS_HPP_ - -#include -#include - -#include "caffe/blob.hpp" -#include "caffe/layer.hpp" -#include "caffe/proto/caffe.pb.h" - -namespace caffe { - -/** - * @brief Compute the index of the @f$ K @f$ max values for each datum across - * all dimensions @f$ (C \times H \times W) @f$. - * - * Intended for use after a classification layer to produce a prediction. - * If parameter out_max_val is set to true, output is a vector of pairs - * (max_ind, max_val) for each image. The axis parameter specifies an axis - * along which to maximise. - * - * NOTE: does not implement Backwards operation. - */ -template -class ArgMaxLayer : public Layer { - public: - /** - * @param param provides ArgMaxParameter argmax_param, - * with ArgMaxLayer options: - * - top_k (\b optional uint, default 1). - * the number @f$ K @f$ of maximal items to output. - * - out_max_val (\b optional bool, default false). - * if set, output a vector of pairs (max_ind, max_val) unless axis is set then - * output max_val along the specified axis. - * - axis (\b optional int). - * if set, maximise along the specified axis else maximise the flattened - * trailing dimensions for each index of the first / num dimension. - */ - explicit ArgMaxLayer(const LayerParameter& param) - : Layer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "ArgMax"; } - virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int ExactNumTopBlobs() const { return 1; } - - protected: - /** - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x @f$ - * @param top output Blob vector (length 1) - * -# @f$ (N \times 1 \times K) @f$ or, if out_max_val - * @f$ (N \times 2 \times K) @f$ unless axis set than e.g. - * @f$ (N \times K \times H \times W) @f$ if axis == 1 - * the computed outputs @f$ - * y_n = \arg\max\limits_i x_{ni} - * @f$ (for @f$ K = 1 @f$). - */ - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - /// @brief Not implemented (non-differentiable function) - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom) { - NOT_IMPLEMENTED; - } - bool out_max_val_; - size_t top_k_; - bool has_axis_; - int axis_; -}; - -/** - * @brief Normalizes the input to have 0-mean and/or unit (1) variance across - * the batch. - * - * This layer computes Batch Normalization described in [1]. For - * each channel in the data (i.e. axis 1), it subtracts the mean and divides - * by the variance, where both statistics are computed across both spatial - * dimensions and across the different examples in the batch. - * - * By default, during training time, the network is computing global mean/ - * variance statistics via a running average, which is then used at test - * time to allow deterministic outputs for each input. You can manually - * toggle whether the network is accumulating or using the statistics via the - * use_global_stats option. IMPORTANT: for this feature to work, you MUST - * set the learning rate to zero for all three parameter blobs, i.e., - * param {lr_mult: 0} three times in the layer definition. - * - * Note that the original paper also included a per-channel learned bias and - * scaling factor. It is possible (though a bit cumbersome) to implement - * this in caffe using a single-channel DummyDataLayer filled with zeros, - * followed by a Convolution layer with output the same size as the current. - * This produces a channel-specific value that can be added or multiplied by - * the BatchNorm layer's output. - * - * [1] S. Ioffe and C. Szegedy, "Batch Normalization: Accelerating Deep Network - * Training by Reducing Internal Covariate Shift." arXiv preprint - * arXiv:1502.03167 (2015). - * - * TODO(dox): thorough documentation for Forward, Backward, and proto params. - */ -template -class BatchNormLayer : public Layer { - public: - explicit BatchNormLayer(const LayerParameter& param) - : Layer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "BatchNorm"; } - virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int ExactNumTopBlobs() const { return 1; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - Blob mean_, variance_, temp_, x_norm_; - bool use_global_stats_; - Dtype moving_average_fraction_; - int channels_; - Dtype eps_; - - // extra temporarary variables is used to carry out sums/broadcasting - // using BLAS - Blob batch_sum_multiplier_; - Blob num_by_chans_; - Blob spatial_sum_multiplier_; -}; - -/** - * @brief Index into the input blob along its first axis. - * - * This layer can be used to select, reorder, and even replicate examples in a - * batch. The second blob is cast to int and treated as an index into the - * first axis of the first blob. - */ -template -class BatchReindexLayer : public Layer { - public: - explicit BatchReindexLayer(const LayerParameter& param) - : Layer(param) {} - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "BatchReindex"; } - virtual inline int ExactNumBottomBlobs() const { return 2; } - virtual inline int ExactNumTopBlobs() const { return 1; } - - protected: - /** - * @param bottom input Blob vector (length 2+) - * -# @f$ (N \times ...) @f$ - * the inputs @f$ x_1 @f$ - * -# @f$ (M) @f$ - * the inputs @f$ x_2 @f$ - * @param top output Blob vector (length 1) - * -# @f$ (M \times ...) @f$: - * the reindexed array @f$ - * y = x_1[x_2] - * @f$ - */ - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - - /** - * @brief Computes the error gradient w.r.t. the reordered input. - * - * @param top output Blob vector (length 1), providing the error gradient - * with respect to the outputs - * -# @f$ (M \times ...) @f$: - * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ - * with respect to concatenated outputs @f$ y @f$ - * @param propagate_down see Layer::Backward. - * @param bottom input Blob vector (length 2): - * - @f$ \frac{\partial E}{\partial y} @f$ is de-indexed (summing where - * required) back to the input x_1 - * - This layer cannot backprop to x_2, i.e. propagate_down[1] must be - * false. - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - private: - struct pair_sort_first { - bool operator()(const std::pair &left, - const std::pair &right) { - return left.first < right.first; - } - }; - void check_batch_reindex(int initial_num, int final_num, - const Dtype* ridx_data); -}; - -/** - * @brief Takes at least two Blob%s and concatenates them along either the num - * or channel dimension, outputting the result. - */ -template -class ConcatLayer : public Layer { - public: - explicit ConcatLayer(const LayerParameter& param) - : Layer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "Concat"; } - virtual inline int MinBottomBlobs() const { return 1; } - virtual inline int ExactNumTopBlobs() const { return 1; } - - protected: - /** - * @param bottom input Blob vector (length 2+) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x_1 @f$ - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x_2 @f$ - * -# ... - * - K @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x_K @f$ - * @param top output Blob vector (length 1) - * -# @f$ (KN \times C \times H \times W) @f$ if axis == 0, or - * @f$ (N \times KC \times H \times W) @f$ if axis == 1: - * the concatenated output @f$ - * y = [\begin{array}{cccc} x_1 & x_2 & ... & x_K \end{array}] - * @f$ - */ - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - - /** - * @brief Computes the error gradient w.r.t. the concatenate inputs. - * - * @param top output Blob vector (length 1), providing the error gradient with - * respect to the outputs - * -# @f$ (KN \times C \times H \times W) @f$ if axis == 0, or - * @f$ (N \times KC \times H \times W) @f$ if axis == 1: - * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ - * with respect to concatenated outputs @f$ y @f$ - * @param propagate_down see Layer::Backward. - * @param bottom input Blob vector (length K), into which the top gradient - * @f$ \frac{\partial E}{\partial y} @f$ is deconcatenated back to the - * inputs @f$ - * \left[ \begin{array}{cccc} - * \frac{\partial E}{\partial x_1} & - * \frac{\partial E}{\partial x_2} & - * ... & - * \frac{\partial E}{\partial x_K} - * \end{array} \right] = - * \frac{\partial E}{\partial y} - * @f$ - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - int count_; - int num_concats_; - int concat_input_size_; - int concat_axis_; -}; - -/** - * @brief Compute elementwise operations, such as product and sum, - * along multiple input Blobs. - * - * TODO(dox): thorough documentation for Forward, Backward, and proto params. - */ -template -class EltwiseLayer : public Layer { - public: - explicit EltwiseLayer(const LayerParameter& param) - : Layer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "Eltwise"; } - virtual inline int MinBottomBlobs() const { return 2; } - virtual inline int ExactNumTopBlobs() const { return 1; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - EltwiseParameter_EltwiseOp op_; - vector coeffs_; - Blob max_idx_; - - bool stable_prod_grad_; -}; - -/** - * @brief A layer for learning "embeddings" of one-hot vector input. - * Equivalent to an InnerProductLayer with one-hot vectors as input, but - * for efficiency the input is the "hot" index of each column itself. - * - * TODO(dox): thorough documentation for Forward, Backward, and proto params. - */ -template -class EmbedLayer : public Layer { - public: - explicit EmbedLayer(const LayerParameter& param) - : Layer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "Embed"; } - virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int ExactNumTopBlobs() const { return 1; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - int M_; - int K_; - int N_; - bool bias_term_; - Blob bias_multiplier_; -}; - -/** - * @brief Takes two+ Blobs, interprets last Blob as a selector and - * filter remaining Blobs accordingly with selector data (0 means that - * the corresponding item has to be filtered, non-zero means that corresponding - * item needs to stay). - */ -template -class FilterLayer : public Layer { - public: - explicit FilterLayer(const LayerParameter& param) - : Layer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "Filter"; } - virtual inline int MinBottomBlobs() const { return 2; } - virtual inline int MinTopBlobs() const { return 1; } - - protected: - /** - * @param bottom input Blob vector (length 2+) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs to be filtered @f$ x_1 @f$ - * -# ... - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs to be filtered @f$ x_K @f$ - * -# @f$ (N \times 1 \times 1 \times 1) @f$ - * the selector blob - * @param top output Blob vector (length 1+) - * -# @f$ (S \times C \times H \times W) @f$ () - * the filtered output @f$ x_1 @f$ - * where S is the number of items - * that haven't been filtered - * @f$ (S \times C \times H \times W) @f$ - * the filtered output @f$ x_K @f$ - * where S is the number of items - * that haven't been filtered - */ - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - - /** - * @brief Computes the error gradient w.r.t. the forwarded inputs. - * - * @param top output Blob vector (length 1+), providing the error gradient with - * respect to the outputs - * @param propagate_down see Layer::Backward. - * @param bottom input Blob vector (length 2+), into which the top error - * gradient is copied - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - bool first_reshape_; - vector indices_to_forward_; -}; - -/** - * @brief Reshapes the input Blob into flat vectors. - * - * Note: because this layer does not change the input values -- merely the - * dimensions -- it can simply copy the input. The copy happens "virtually" - * (thus taking effectively 0 real time) by setting, in Forward, the data - * pointer of the top Blob to that of the bottom Blob (see Blob::ShareData), - * and in Backward, the diff pointer of the bottom Blob to that of the top Blob - * (see Blob::ShareDiff). - */ -template -class FlattenLayer : public Layer { - public: - explicit FlattenLayer(const LayerParameter& param) - : Layer(param) {} - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "Flatten"; } - virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int ExactNumTopBlobs() const { return 1; } - - protected: - /** - * @param bottom input Blob vector (length 2+) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs - * @param top output Blob vector (length 1) - * -# @f$ (N \times CHW \times 1 \times 1) @f$ - * the outputs -- i.e., the (virtually) copied, flattened inputs - */ - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - - /** - * @brief Computes the error gradient w.r.t. the concatenate inputs. - * - * @param top output Blob vector (length 1), providing the error gradient with - * respect to the outputs - * @param propagate_down see Layer::Backward. - * @param bottom input Blob vector (length K), into which the top error - * gradient is (virtually) copied - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); -}; - -/** - * @brief Also known as a "fully-connected" layer, computes an inner product - * with a set of learned weights, and (optionally) adds biases. - * - * TODO(dox): thorough documentation for Forward, Backward, and proto params. - */ -template -class InnerProductLayer : public Layer { - public: - explicit InnerProductLayer(const LayerParameter& param) - : Layer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "InnerProduct"; } - virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int ExactNumTopBlobs() const { return 1; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - int M_; - int K_; - int N_; - bool bias_term_; - Blob bias_multiplier_; -}; - -/** - * @brief Normalizes the input to have 0-mean and/or unit (1) variance. - * - * TODO(dox): thorough documentation for Forward, Backward, and proto params. - */ -template -class MVNLayer : public Layer { - public: - explicit MVNLayer(const LayerParameter& param) - : Layer(param) {} - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "MVN"; } - virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int ExactNumTopBlobs() const { return 1; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - Blob mean_, variance_, temp_; - - /// sum_multiplier is used to carry out sum using BLAS - Blob sum_multiplier_; - Dtype eps_; -}; - -/* - * @brief Reshapes the input Blob into an arbitrary-sized output Blob. - * - * Note: similarly to FlattenLayer, this layer does not change the input values - * (see FlattenLayer, Blob::ShareData and Blob::ShareDiff). - */ -template -class ReshapeLayer : public Layer { - public: - explicit ReshapeLayer(const LayerParameter& param) - : Layer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "Reshape"; } - virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int ExactNumTopBlobs() const { return 1; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top) {} - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom) {} - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top) {} - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom) {} - - /// @brief vector of axes indices whose dimensions we'll copy from the bottom - vector copy_axes_; - /// @brief the index of the axis whose dimension we infer, or -1 if none - int inferred_axis_; - /// @brief the product of the "constant" output dimensions - int constant_count_; -}; - -/** - * @brief Compute "reductions" -- operations that return a scalar output Blob - * for an input Blob of arbitrary size, such as the sum, absolute sum, - * and sum of squares. - * - * TODO(dox): thorough documentation for Forward, Backward, and proto params. - */ -template -class ReductionLayer : public Layer { - public: - explicit ReductionLayer(const LayerParameter& param) - : Layer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "Reduction"; } - virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int ExactNumTopBlobs() const { return 1; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - /// @brief the reduction operation performed by the layer - ReductionParameter_ReductionOp op_; - /// @brief a scalar coefficient applied to all outputs - Dtype coeff_; - /// @brief the index of the first input axis to reduce - int axis_; - /// @brief the number of reductions performed - int num_; - /// @brief the input size of each reduction - int dim_; - /// @brief a helper Blob used for summation (op_ == SUM) - Blob sum_multiplier_; -}; - -/** - * @brief Ignores bottom blobs while producing no top blobs. (This is useful - * to suppress outputs during testing.) - */ -template -class SilenceLayer : public Layer { - public: - explicit SilenceLayer(const LayerParameter& param) - : Layer(param) {} - virtual void Reshape(const vector*>& bottom, - const vector*>& top) {} - - virtual inline const char* type() const { return "Silence"; } - virtual inline int MinBottomBlobs() const { return 1; } - virtual inline int ExactNumTopBlobs() const { return 0; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top) {} - // We can't define Forward_gpu here, since STUB_GPU will provide - // its own definition for CPU_ONLY mode. - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); -}; - -/** - * @brief Computes the softmax function. - * - * TODO(dox): thorough documentation for Forward, Backward, and proto params. - */ -template -class SoftmaxLayer : public Layer { - public: - explicit SoftmaxLayer(const LayerParameter& param) - : Layer(param) {} - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "Softmax"; } - virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int ExactNumTopBlobs() const { return 1; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - int outer_num_; - int inner_num_; - int softmax_axis_; - /// sum_multiplier is used to carry out sum using BLAS - Blob sum_multiplier_; - /// scale is an intermediate Blob to hold temporary results. - Blob scale_; -}; - -#ifdef USE_CUDNN -/** - * @brief cuDNN implementation of SoftmaxLayer. - * Fallback to SoftmaxLayer for CPU mode. - */ -template -class CuDNNSoftmaxLayer : public SoftmaxLayer { - public: - explicit CuDNNSoftmaxLayer(const LayerParameter& param) - : SoftmaxLayer(param), handles_setup_(false) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - virtual ~CuDNNSoftmaxLayer(); - - protected: - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - bool handles_setup_; - cudnnHandle_t handle_; - cudnnTensorDescriptor_t bottom_desc_; - cudnnTensorDescriptor_t top_desc_; -}; -#endif - -/** - * @brief Creates a "split" path in the network by copying the bottom Blob - * into multiple top Blob%s to be used by multiple consuming layers. - * - * TODO(dox): thorough documentation for Forward, Backward, and proto params. - */ -template -class SplitLayer : public Layer { - public: - explicit SplitLayer(const LayerParameter& param) - : Layer(param) {} - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "Split"; } - virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int MinTopBlobs() const { return 1; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - int count_; -}; - -/** - * @brief Takes a Blob and slices it along either the num or channel dimension, - * outputting multiple sliced Blob results. - * - * TODO(dox): thorough documentation for Forward, Backward, and proto params. - */ -template -class SliceLayer : public Layer { - public: - explicit SliceLayer(const LayerParameter& param) - : Layer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "Slice"; } - virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int MinTopBlobs() const { return 1; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - int count_; - int num_slices_; - int slice_size_; - int slice_axis_; - vector slice_point_; -}; - -/** - * @brief Copy a Blob along specified dimensions. - */ -template -class TileLayer : public Layer { - public: - explicit TileLayer(const LayerParameter& param) - : Layer(param) {} - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "Tile"; } - virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int ExactNumTopBlobs() const { return 1; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - unsigned int axis_, tiles_, outer_dim_, inner_dim_; -}; - -} // namespace caffe - -#endif // CAFFE_COMMON_LAYERS_HPP_ diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp deleted file mode 100644 index aa0ab7df..00000000 --- a/include/caffe/data_layers.hpp +++ /dev/null @@ -1,347 +0,0 @@ -#ifndef CAFFE_DATA_LAYERS_HPP_ -#define CAFFE_DATA_LAYERS_HPP_ - -#include -#include -#include -#include "hdf5.h" - -#include "caffe/blob.hpp" -#include "caffe/common.hpp" -#include "caffe/data_reader.hpp" -#include "caffe/data_transformer.hpp" -#include "caffe/filler.hpp" -#include "caffe/internal_thread.hpp" -#include "caffe/layer.hpp" -#include "caffe/proto/caffe.pb.h" -#include "caffe/util/blocking_queue.hpp" -#include "caffe/util/db.hpp" - -#define HDF5_DATA_DATASET_NAME "data" -#define HDF5_DATA_LABEL_NAME "label" - -namespace caffe { - -/** - * @brief Provides base for data layers that feed blobs to the Net. - * - * TODO(dox): thorough documentation for Forward and proto params. - */ -template -class BaseDataLayer : public Layer { - public: - explicit BaseDataLayer(const LayerParameter& param); - // LayerSetUp: implements common data layer setup functionality, and calls - // DataLayerSetUp to do special data layer setup for individual layer types. - // This method may not be overridden except by the BasePrefetchingDataLayer. - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - // Data layers should be shared by multiple solvers in parallel - virtual inline bool ShareInParallel() const { return true; } - virtual void DataLayerSetUp(const vector*>& bottom, - const vector*>& top) {} - // Data layers have no bottoms, so reshaping is trivial. - virtual void Reshape(const vector*>& bottom, - const vector*>& top) {} - - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom) {} - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom) {} - - protected: - TransformationParameter transform_param_; - shared_ptr > data_transformer_; - bool output_labels_; -}; - -template -class Batch { - public: - Blob data_, label_; -}; - -template -class BasePrefetchingDataLayer : - public BaseDataLayer, public InternalThread { - public: - explicit BasePrefetchingDataLayer(const LayerParameter& param); - // LayerSetUp: implements common data layer setup functionality, and calls - // DataLayerSetUp to do special data layer setup for individual layer types. - // This method may not be overridden. - void LayerSetUp(const vector*>& bottom, - const vector*>& top); - - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - - // Prefetches batches (asynchronously if to GPU memory) - static const int PREFETCH_COUNT = 3; - - protected: - virtual void InternalThreadEntry(); - virtual void load_batch(Batch* batch) = 0; - - Batch prefetch_[PREFETCH_COUNT]; - BlockingQueue*> prefetch_free_; - BlockingQueue*> prefetch_full_; - - Blob transformed_data_; -}; - -template -class DataLayer : public BasePrefetchingDataLayer { - public: - explicit DataLayer(const LayerParameter& param); - virtual ~DataLayer(); - virtual void DataLayerSetUp(const vector*>& bottom, - const vector*>& top); - // DataLayer uses DataReader instead for sharing for parallelism - virtual inline bool ShareInParallel() const { return false; } - virtual inline const char* type() const { return "Data"; } - virtual inline int ExactNumBottomBlobs() const { return 0; } - virtual inline int MinTopBlobs() const { return 1; } - virtual inline int MaxTopBlobs() const { return 2; } - - protected: - virtual void load_batch(Batch* batch); - - DataReader reader_; -}; - -/** - * @brief Provides data to the Net generated by a Filler. - * - * TODO(dox): thorough documentation for Forward and proto params. - */ -template -class DummyDataLayer : public Layer { - public: - explicit DummyDataLayer(const LayerParameter& param) - : Layer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - // Data layers should be shared by multiple solvers in parallel - virtual inline bool ShareInParallel() const { return true; } - // Data layers have no bottoms, so reshaping is trivial. - virtual void Reshape(const vector*>& bottom, - const vector*>& top) {} - - virtual inline const char* type() const { return "DummyData"; } - virtual inline int ExactNumBottomBlobs() const { return 0; } - virtual inline int MinTopBlobs() const { return 1; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom) {} - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom) {} - - vector > > fillers_; - vector refill_; -}; - -/** - * @brief Provides data to the Net from HDF5 files. - * - * TODO(dox): thorough documentation for Forward and proto params. - */ -template -class HDF5DataLayer : public Layer { - public: - explicit HDF5DataLayer(const LayerParameter& param) - : Layer(param) {} - virtual ~HDF5DataLayer(); - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - // Data layers should be shared by multiple solvers in parallel - virtual inline bool ShareInParallel() const { return true; } - // Data layers have no bottoms, so reshaping is trivial. - virtual void Reshape(const vector*>& bottom, - const vector*>& top) {} - - virtual inline const char* type() const { return "HDF5Data"; } - virtual inline int ExactNumBottomBlobs() const { return 0; } - virtual inline int MinTopBlobs() const { return 1; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom) {} - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom) {} - virtual void LoadHDF5FileData(const char* filename); - - std::vector hdf_filenames_; - unsigned int num_files_; - unsigned int current_file_; - hsize_t current_row_; - std::vector > > hdf_blobs_; - std::vector data_permutation_; - std::vector file_permutation_; -}; - -/** - * @brief Write blobs to disk as HDF5 files. - * - * TODO(dox): thorough documentation for Forward and proto params. - */ -template -class HDF5OutputLayer : public Layer { - public: - explicit HDF5OutputLayer(const LayerParameter& param) - : Layer(param), file_opened_(false) {} - virtual ~HDF5OutputLayer(); - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - // Data layers should be shared by multiple solvers in parallel - virtual inline bool ShareInParallel() const { return true; } - // Data layers have no bottoms, so reshaping is trivial. - virtual void Reshape(const vector*>& bottom, - const vector*>& top) {} - - virtual inline const char* type() const { return "HDF5Output"; } - // TODO: no limit on the number of blobs - virtual inline int ExactNumBottomBlobs() const { return 2; } - virtual inline int ExactNumTopBlobs() const { return 0; } - - inline std::string file_name() const { return file_name_; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void SaveBlobs(); - - bool file_opened_; - std::string file_name_; - hid_t file_id_; - Blob data_blob_; - Blob label_blob_; -}; - -/** - * @brief Provides data to the Net from image files. - * - * TODO(dox): thorough documentation for Forward and proto params. - */ -template -class ImageDataLayer : public BasePrefetchingDataLayer { - public: - explicit ImageDataLayer(const LayerParameter& param) - : BasePrefetchingDataLayer(param) {} - virtual ~ImageDataLayer(); - virtual void DataLayerSetUp(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "ImageData"; } - virtual inline int ExactNumBottomBlobs() const { return 0; } - virtual inline int ExactNumTopBlobs() const { return 2; } - - protected: - shared_ptr prefetch_rng_; - virtual void ShuffleImages(); - virtual void load_batch(Batch* batch); - - vector > lines_; - int lines_id_; -}; - -/** - * @brief Provides data to the Net from memory. - * - * TODO(dox): thorough documentation for Forward and proto params. - */ -template -class MemoryDataLayer : public BaseDataLayer { - public: - explicit MemoryDataLayer(const LayerParameter& param) - : BaseDataLayer(param), has_new_data_(false) {} - virtual void DataLayerSetUp(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "MemoryData"; } - virtual inline int ExactNumBottomBlobs() const { return 0; } - virtual inline int ExactNumTopBlobs() const { return 2; } - - virtual void AddDatumVector(const vector& datum_vector); -#ifdef USE_OPENCV - virtual void AddMatVector(const vector& mat_vector, - const vector& labels); -#endif // USE_OPENCV - - // Reset should accept const pointers, but can't, because the memory - // will be given to Blob, which is mutable - void Reset(Dtype* data, Dtype* label, int n); - void set_batch_size(int new_size); - - int batch_size() { return batch_size_; } - int channels() { return channels_; } - int height() { return height_; } - int width() { return width_; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - - int batch_size_, channels_, height_, width_, size_; - Dtype* data_; - Dtype* labels_; - int n_; - size_t pos_; - Blob added_data_; - Blob added_label_; - bool has_new_data_; -}; - -/** - * @brief Provides data to the Net from windows of images files, specified - * by a window data file. - * - * TODO(dox): thorough documentation for Forward and proto params. - */ -template -class WindowDataLayer : public BasePrefetchingDataLayer { - public: - explicit WindowDataLayer(const LayerParameter& param) - : BasePrefetchingDataLayer(param) {} - virtual ~WindowDataLayer(); - virtual void DataLayerSetUp(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "WindowData"; } - virtual inline int ExactNumBottomBlobs() const { return 0; } - virtual inline int ExactNumTopBlobs() const { return 2; } - - protected: - virtual unsigned int PrefetchRand(); - virtual void load_batch(Batch* batch); - - shared_ptr prefetch_rng_; - vector > > image_database_; - enum WindowField { IMAGE_INDEX, LABEL, OVERLAP, X1, Y1, X2, Y2, NUM }; - vector > fg_windows_; - vector > bg_windows_; - Blob data_mean_; - vector mean_values_; - bool has_mean_file_; - bool has_mean_values_; - bool cache_images_; - vector > image_database_cache_; -}; - -} // namespace caffe - -#endif // CAFFE_DATA_LAYERS_HPP_ diff --git a/include/caffe/layer_factory.hpp b/include/caffe/layer_factory.hpp index 2c2fde4d..f385afcc 100644 --- a/include/caffe/layer_factory.hpp +++ b/include/caffe/layer_factory.hpp @@ -44,6 +44,7 @@ #include #include "caffe/common.hpp" +#include "caffe/layer.hpp" #include "caffe/proto/caffe.pb.h" namespace caffe { diff --git a/include/caffe/layers/absval_layer.hpp b/include/caffe/layers/absval_layer.hpp new file mode 100644 index 00000000..9b5305dc --- /dev/null +++ b/include/caffe/layers/absval_layer.hpp @@ -0,0 +1,68 @@ +#ifndef CAFFE_ABSVAL_LAYER_HPP_ +#define CAFFE_ABSVAL_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/neuron_layer.hpp" + +namespace caffe { + +/** + * @brief Computes @f$ y = |x| @f$ + * + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$ + * @param top output Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the computed outputs @f$ y = |x| @f$ + */ +template +class AbsValLayer : public NeuronLayer { + public: + explicit AbsValLayer(const LayerParameter& param) + : NeuronLayer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "AbsVal"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + /// @copydoc AbsValLayer + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the error gradient w.r.t. the absolute value inputs. + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * -# @f$ (N \times C \times H \times W) @f$ + * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ + * with respect to computed outputs @f$ y @f$ + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length 2) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$; Backward fills their diff with + * gradients @f$ + * \frac{\partial E}{\partial x} = + * \mathrm{sign}(x) \frac{\partial E}{\partial y} + * @f$ if propagate_down[0] + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); +}; + +} // namespace caffe + +#endif // CAFFE_ABSVAL_LAYER_HPP_ diff --git a/include/caffe/layers/accuracy_layer.hpp b/include/caffe/layers/accuracy_layer.hpp new file mode 100644 index 00000000..fe2adb93 --- /dev/null +++ b/include/caffe/layers/accuracy_layer.hpp @@ -0,0 +1,95 @@ +#ifndef CAFFE_ACCURACY_LAYER_HPP_ +#define CAFFE_ACCURACY_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/loss_layer.hpp" + +namespace caffe { + +/** + * @brief Computes the classification accuracy for a one-of-many + * classification task. + */ +template +class AccuracyLayer : public Layer { + public: + /** + * @param param provides AccuracyParameter accuracy_param, + * with AccuracyLayer options: + * - top_k (\b optional, default 1). + * Sets the maximum rank @f$ k @f$ at which a prediction is considered + * correct. For example, if @f$ k = 5 @f$, a prediction is counted + * correct if the correct label is among the top 5 predicted labels. + */ + explicit AccuracyLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Accuracy"; } + virtual inline int ExactNumBottomBlobs() const { return 2; } + + // If there are two top blobs, then the second blob will contain + // accuracies per class. + virtual inline int MinTopBlobs() const { return 1; } + virtual inline int MaxTopBlos() const { return 2; } + + protected: + /** + * @param bottom input Blob vector (length 2) + * -# @f$ (N \times C \times H \times W) @f$ + * the predictions @f$ x @f$, a Blob with values in + * @f$ [-\infty, +\infty] @f$ indicating the predicted score for each of + * the @f$ K = CHW @f$ classes. Each @f$ x_n @f$ is mapped to a predicted + * label @f$ \hat{l}_n @f$ given by its maximal index: + * @f$ \hat{l}_n = \arg\max\limits_k x_{nk} @f$ + * -# @f$ (N \times 1 \times 1 \times 1) @f$ + * the labels @f$ l @f$, an integer-valued Blob with values + * @f$ l_n \in [0, 1, 2, ..., K - 1] @f$ + * indicating the correct class label among the @f$ K @f$ classes + * @param top output Blob vector (length 1) + * -# @f$ (1 \times 1 \times 1 \times 1) @f$ + * the computed accuracy: @f$ + * \frac{1}{N} \sum\limits_{n=1}^N \delta\{ \hat{l}_n = l_n \} + * @f$, where @f$ + * \delta\{\mathrm{condition}\} = \left\{ + * \begin{array}{lr} + * 1 & \mbox{if condition} \\ + * 0 & \mbox{otherwise} + * \end{array} \right. + * @f$ + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + + + /// @brief Not implemented -- AccuracyLayer cannot be used as a loss. + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + for (int i = 0; i < propagate_down.size(); ++i) { + if (propagate_down[i]) { NOT_IMPLEMENTED; } + } + } + + int label_axis_, outer_num_, inner_num_; + + int top_k_; + + /// Whether to ignore instances with a certain label. + bool has_ignore_label_; + /// The label indicating that an instance should be ignored. + int ignore_label_; + /// Keeps counts of the number of samples per class. + Blob nums_buffer_; +}; + +} // namespace caffe + +#endif // CAFFE_ACCURACY_LAYER_HPP_ diff --git a/include/caffe/layers/argmax_layer.hpp b/include/caffe/layers/argmax_layer.hpp new file mode 100644 index 00000000..4fef363e --- /dev/null +++ b/include/caffe/layers/argmax_layer.hpp @@ -0,0 +1,77 @@ +#ifndef CAFFE_ARGMAX_LAYER_HPP_ +#define CAFFE_ARGMAX_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Compute the index of the @f$ K @f$ max values for each datum across + * all dimensions @f$ (C \times H \times W) @f$. + * + * Intended for use after a classification layer to produce a prediction. + * If parameter out_max_val is set to true, output is a vector of pairs + * (max_ind, max_val) for each image. The axis parameter specifies an axis + * along which to maximise. + * + * NOTE: does not implement Backwards operation. + */ +template +class ArgMaxLayer : public Layer { + public: + /** + * @param param provides ArgMaxParameter argmax_param, + * with ArgMaxLayer options: + * - top_k (\b optional uint, default 1). + * the number @f$ K @f$ of maximal items to output. + * - out_max_val (\b optional bool, default false). + * if set, output a vector of pairs (max_ind, max_val) unless axis is set then + * output max_val along the specified axis. + * - axis (\b optional int). + * if set, maximise along the specified axis else maximise the flattened + * trailing dimensions for each index of the first / num dimension. + */ + explicit ArgMaxLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "ArgMax"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + /** + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$ + * @param top output Blob vector (length 1) + * -# @f$ (N \times 1 \times K) @f$ or, if out_max_val + * @f$ (N \times 2 \times K) @f$ unless axis set than e.g. + * @f$ (N \times K \times H \times W) @f$ if axis == 1 + * the computed outputs @f$ + * y_n = \arg\max\limits_i x_{ni} + * @f$ (for @f$ K = 1 @f$). + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + /// @brief Not implemented (non-differentiable function) + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + NOT_IMPLEMENTED; + } + bool out_max_val_; + size_t top_k_; + bool has_axis_; + int axis_; +}; + +} // namespace caffe + +#endif // CAFFE_ARGMAX_LAYER_HPP_ diff --git a/include/caffe/layers/base_conv_layer.hpp b/include/caffe/layers/base_conv_layer.hpp new file mode 100644 index 00000000..f3def16c --- /dev/null +++ b/include/caffe/layers/base_conv_layer.hpp @@ -0,0 +1,168 @@ +#ifndef CAFFE_BASE_CONVOLUTION_LAYER_HPP_ +#define CAFFE_BASE_CONVOLUTION_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" +#include "caffe/util/im2col.hpp" + +namespace caffe { + +/** + * @brief Abstract base class that factors out the BLAS code common to + * ConvolutionLayer and DeconvolutionLayer. + */ +template +class BaseConvolutionLayer : public Layer { + public: + explicit BaseConvolutionLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline int MinBottomBlobs() const { return 1; } + virtual inline int MinTopBlobs() const { return 1; } + virtual inline bool EqualNumBottomTopBlobs() const { return true; } + + protected: + // Helper functions that abstract away the column buffer and gemm arguments. + // The last argument in forward_cpu_gemm is so that we can skip the im2col if + // we just called weight_cpu_gemm with the same input. + void forward_cpu_gemm(const Dtype* input, const Dtype* weights, + Dtype* output, bool skip_im2col = false); + void forward_cpu_bias(Dtype* output, const Dtype* bias); + void backward_cpu_gemm(const Dtype* input, const Dtype* weights, + Dtype* output); + void weight_cpu_gemm(const Dtype* input, const Dtype* output, Dtype* + weights); + void backward_cpu_bias(Dtype* bias, const Dtype* input); + +#ifndef CPU_ONLY + void forward_gpu_gemm(const Dtype* col_input, const Dtype* weights, + Dtype* output, bool skip_im2col = false); + void forward_gpu_bias(Dtype* output, const Dtype* bias); + void backward_gpu_gemm(const Dtype* input, const Dtype* weights, + Dtype* col_output); + void weight_gpu_gemm(const Dtype* col_input, const Dtype* output, Dtype* + weights); + void backward_gpu_bias(Dtype* bias, const Dtype* input); +#endif + + /// @brief The spatial dimensions of the input. + inline int input_shape(int i) { + return (*bottom_shape_)[channel_axis_ + i]; + } + // reverse_dimensions should return true iff we are implementing deconv, so + // that conv helpers know which dimensions are which. + virtual bool reverse_dimensions() = 0; + // Compute height_out_ and width_out_ from other parameters. + virtual void compute_output_shape() = 0; + + /// @brief The spatial dimensions of a filter kernel. + Blob kernel_shape_; + /// @brief The spatial dimensions of the stride. + Blob stride_; + /// @brief The spatial dimensions of the padding. + Blob pad_; + /// @brief The spatial dimensions of the convolution input. + Blob conv_input_shape_; + /// @brief The spatial dimensions of the col_buffer. + vector col_buffer_shape_; + /// @brief The spatial dimensions of the output. + vector output_shape_; + const vector* bottom_shape_; + + int num_spatial_axes_; + int bottom_dim_; + int top_dim_; + + int channel_axis_; + int num_; + int channels_; + int group_; + int out_spatial_dim_; + int weight_offset_; + int num_output_; + bool bias_term_; + bool is_1x1_; + bool force_nd_im2col_; + + private: + // wrap im2col/col2im so we don't have to remember the (long) argument lists + inline void conv_im2col_cpu(const Dtype* data, Dtype* col_buff) { + if (!force_nd_im2col_ && num_spatial_axes_ == 2) { + im2col_cpu(data, conv_in_channels_, + conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], + kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], + pad_.cpu_data()[0], pad_.cpu_data()[1], + stride_.cpu_data()[0], stride_.cpu_data()[1], col_buff); + } else { + im2col_nd_cpu(data, num_spatial_axes_, conv_input_shape_.cpu_data(), + col_buffer_shape_.data(), kernel_shape_.cpu_data(), + pad_.cpu_data(), stride_.cpu_data(), col_buff); + } + } + inline void conv_col2im_cpu(const Dtype* col_buff, Dtype* data) { + if (!force_nd_im2col_ && num_spatial_axes_ == 2) { + col2im_cpu(col_buff, conv_in_channels_, + conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], + kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], + pad_.cpu_data()[0], pad_.cpu_data()[1], + stride_.cpu_data()[0], stride_.cpu_data()[1], data); + } else { + col2im_nd_cpu(col_buff, num_spatial_axes_, conv_input_shape_.cpu_data(), + col_buffer_shape_.data(), kernel_shape_.cpu_data(), + pad_.cpu_data(), stride_.cpu_data(), data); + } + } +#ifndef CPU_ONLY + inline void conv_im2col_gpu(const Dtype* data, Dtype* col_buff) { + if (!force_nd_im2col_ && num_spatial_axes_ == 2) { + im2col_gpu(data, conv_in_channels_, + conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], + kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], + pad_.cpu_data()[0], pad_.cpu_data()[1], + stride_.cpu_data()[0], stride_.cpu_data()[1], col_buff); + } else { + im2col_nd_gpu(data, num_spatial_axes_, num_kernels_im2col_, + conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(), + kernel_shape_.gpu_data(), pad_.gpu_data(), + stride_.gpu_data(), col_buff); + } + } + inline void conv_col2im_gpu(const Dtype* col_buff, Dtype* data) { + if (!force_nd_im2col_ && num_spatial_axes_ == 2) { + col2im_gpu(col_buff, conv_in_channels_, + conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], + kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], + pad_.cpu_data()[0], pad_.cpu_data()[1], + stride_.cpu_data()[0], stride_.cpu_data()[1], data); + } else { + col2im_nd_gpu(col_buff, num_spatial_axes_, num_kernels_col2im_, + conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(), + kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(), + data); + } + } +#endif + + int num_kernels_im2col_; + int num_kernels_col2im_; + int conv_out_channels_; + int conv_in_channels_; + int conv_out_spatial_dim_; + int kernel_dim_; + int col_offset_; + int output_offset_; + + Blob col_buffer_; + Blob bias_multiplier_; +}; + +} // namespace caffe + +#endif // CAFFE_BASE_CONVOLUTION_LAYER_HPP_ diff --git a/include/caffe/layers/base_data_layer.hpp b/include/caffe/layers/base_data_layer.hpp new file mode 100644 index 00000000..2c49b731 --- /dev/null +++ b/include/caffe/layers/base_data_layer.hpp @@ -0,0 +1,86 @@ +#ifndef CAFFE_DATA_LAYERS_HPP_ +#define CAFFE_DATA_LAYERS_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/data_transformer.hpp" +#include "caffe/internal_thread.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" +#include "caffe/util/blocking_queue.hpp" + +namespace caffe { + +/** + * @brief Provides base for data layers that feed blobs to the Net. + * + * TODO(dox): thorough documentation for Forward and proto params. + */ +template +class BaseDataLayer : public Layer { + public: + explicit BaseDataLayer(const LayerParameter& param); + // LayerSetUp: implements common data layer setup functionality, and calls + // DataLayerSetUp to do special data layer setup for individual layer types. + // This method may not be overridden except by the BasePrefetchingDataLayer. + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + // Data layers should be shared by multiple solvers in parallel + virtual inline bool ShareInParallel() const { return true; } + virtual void DataLayerSetUp(const vector*>& bottom, + const vector*>& top) {} + // Data layers have no bottoms, so reshaping is trivial. + virtual void Reshape(const vector*>& bottom, + const vector*>& top) {} + + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) {} + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) {} + + protected: + TransformationParameter transform_param_; + shared_ptr > data_transformer_; + bool output_labels_; +}; + +template +class Batch { + public: + Blob data_, label_; +}; + +template +class BasePrefetchingDataLayer : + public BaseDataLayer, public InternalThread { + public: + explicit BasePrefetchingDataLayer(const LayerParameter& param); + // LayerSetUp: implements common data layer setup functionality, and calls + // DataLayerSetUp to do special data layer setup for individual layer types. + // This method may not be overridden. + void LayerSetUp(const vector*>& bottom, + const vector*>& top); + + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + // Prefetches batches (asynchronously if to GPU memory) + static const int PREFETCH_COUNT = 3; + + protected: + virtual void InternalThreadEntry(); + virtual void load_batch(Batch* batch) = 0; + + Batch prefetch_[PREFETCH_COUNT]; + BlockingQueue*> prefetch_free_; + BlockingQueue*> prefetch_full_; + + Blob transformed_data_; +}; + +} // namespace caffe + +#endif // CAFFE_DATA_LAYERS_HPP_ diff --git a/include/caffe/layers/batch_norm_layer.hpp b/include/caffe/layers/batch_norm_layer.hpp new file mode 100644 index 00000000..9b2d5126 --- /dev/null +++ b/include/caffe/layers/batch_norm_layer.hpp @@ -0,0 +1,81 @@ +#ifndef CAFFE_BATCHNORM_LAYER_HPP_ +#define CAFFE_BATCHNORM_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Normalizes the input to have 0-mean and/or unit (1) variance across + * the batch. + * + * This layer computes Batch Normalization described in [1]. For + * each channel in the data (i.e. axis 1), it subtracts the mean and divides + * by the variance, where both statistics are computed across both spatial + * dimensions and across the different examples in the batch. + * + * By default, during training time, the network is computing global mean/ + * variance statistics via a running average, which is then used at test + * time to allow deterministic outputs for each input. You can manually + * toggle whether the network is accumulating or using the statistics via the + * use_global_stats option. IMPORTANT: for this feature to work, you MUST + * set the learning rate to zero for all three parameter blobs, i.e., + * param {lr_mult: 0} three times in the layer definition. + * + * Note that the original paper also included a per-channel learned bias and + * scaling factor. It is possible (though a bit cumbersome) to implement + * this in caffe using a single-channel DummyDataLayer filled with zeros, + * followed by a Convolution layer with output the same size as the current. + * This produces a channel-specific value that can be added or multiplied by + * the BatchNorm layer's output. + * + * [1] S. Ioffe and C. Szegedy, "Batch Normalization: Accelerating Deep Network + * Training by Reducing Internal Covariate Shift." arXiv preprint + * arXiv:1502.03167 (2015). + * + * TODO(dox): thorough documentation for Forward, Backward, and proto params. + */ +template +class BatchNormLayer : public Layer { + public: + explicit BatchNormLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "BatchNorm"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + Blob mean_, variance_, temp_, x_norm_; + bool use_global_stats_; + Dtype moving_average_fraction_; + int channels_; + Dtype eps_; + + // extra temporarary variables is used to carry out sums/broadcasting + // using BLAS + Blob batch_sum_multiplier_; + Blob num_by_chans_; + Blob spatial_sum_multiplier_; +}; + +} // namespace caffe + +#endif // CAFFE_BATCHNORM_LAYER_HPP_ diff --git a/include/caffe/layers/batch_reindex_layer.hpp b/include/caffe/layers/batch_reindex_layer.hpp new file mode 100644 index 00000000..ebb3a567 --- /dev/null +++ b/include/caffe/layers/batch_reindex_layer.hpp @@ -0,0 +1,83 @@ +#ifndef CAFFE_BATCHREINDEX_LAYER_HPP_ +#define CAFFE_BATCHREINDEX_LAYER_HPP_ + +#include +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Index into the input blob along its first axis. + * + * This layer can be used to select, reorder, and even replicate examples in a + * batch. The second blob is cast to int and treated as an index into the + * first axis of the first blob. + */ +template +class BatchReindexLayer : public Layer { + public: + explicit BatchReindexLayer(const LayerParameter& param) + : Layer(param) {} + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "BatchReindex"; } + virtual inline int ExactNumBottomBlobs() const { return 2; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + /** + * @param bottom input Blob vector (length 2+) + * -# @f$ (N \times ...) @f$ + * the inputs @f$ x_1 @f$ + * -# @f$ (M) @f$ + * the inputs @f$ x_2 @f$ + * @param top output Blob vector (length 1) + * -# @f$ (M \times ...) @f$: + * the reindexed array @f$ + * y = x_1[x_2] + * @f$ + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the error gradient w.r.t. the reordered input. + * + * @param top output Blob vector (length 1), providing the error gradient + * with respect to the outputs + * -# @f$ (M \times ...) @f$: + * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ + * with respect to concatenated outputs @f$ y @f$ + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length 2): + * - @f$ \frac{\partial E}{\partial y} @f$ is de-indexed (summing where + * required) back to the input x_1 + * - This layer cannot backprop to x_2, i.e. propagate_down[1] must be + * false. + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + private: + struct pair_sort_first { + bool operator()(const std::pair &left, + const std::pair &right) { + return left.first < right.first; + } + }; + void check_batch_reindex(int initial_num, int final_num, + const Dtype* ridx_data); +}; + +} // namespace caffe + +#endif // CAFFE_BATCHREINDEX_LAYER_HPP_ diff --git a/include/caffe/layers/bnll_layer.hpp b/include/caffe/layers/bnll_layer.hpp new file mode 100644 index 00000000..be07c748 --- /dev/null +++ b/include/caffe/layers/bnll_layer.hpp @@ -0,0 +1,70 @@ +#ifndef CAFFE_BNLL_LAYER_HPP_ +#define CAFFE_BNLL_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/neuron_layer.hpp" + +namespace caffe { + +/** + * @brief Computes @f$ y = x + \log(1 + \exp(-x)) @f$ if @f$ x > 0 @f$; + * @f$ y = \log(1 + \exp(x)) @f$ otherwise. + * + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$ + * @param top output Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the computed outputs @f$ + * y = \left\{ + * \begin{array}{ll} + * x + \log(1 + \exp(-x)) & \mbox{if } x > 0 \\ + * \log(1 + \exp(x)) & \mbox{otherwise} + * \end{array} \right. + * @f$ + */ +template +class BNLLLayer : public NeuronLayer { + public: + explicit BNLLLayer(const LayerParameter& param) + : NeuronLayer(param) {} + + virtual inline const char* type() const { return "BNLL"; } + + protected: + /// @copydoc BNLLLayer + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the error gradient w.r.t. the BNLL inputs. + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * -# @f$ (N \times C \times H \times W) @f$ + * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ + * with respect to computed outputs @f$ y @f$ + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length 2) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$; Backward fills their diff with + * gradients @f$ + * \frac{\partial E}{\partial x} + * @f$ if propagate_down[0] + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); +}; + +} // namespace caffe + +#endif // CAFFE_BNLL_LAYER_HPP_ diff --git a/include/caffe/layers/concat_layer.hpp b/include/caffe/layers/concat_layer.hpp new file mode 100644 index 00000000..a1570249 --- /dev/null +++ b/include/caffe/layers/concat_layer.hpp @@ -0,0 +1,87 @@ +#ifndef CAFFE_CONCAT_LAYER_HPP_ +#define CAFFE_CONCAT_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Takes at least two Blob%s and concatenates them along either the num + * or channel dimension, outputting the result. + */ +template +class ConcatLayer : public Layer { + public: + explicit ConcatLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Concat"; } + virtual inline int MinBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + /** + * @param bottom input Blob vector (length 2+) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x_1 @f$ + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x_2 @f$ + * -# ... + * - K @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x_K @f$ + * @param top output Blob vector (length 1) + * -# @f$ (KN \times C \times H \times W) @f$ if axis == 0, or + * @f$ (N \times KC \times H \times W) @f$ if axis == 1: + * the concatenated output @f$ + * y = [\begin{array}{cccc} x_1 & x_2 & ... & x_K \end{array}] + * @f$ + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the error gradient w.r.t. the concatenate inputs. + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * -# @f$ (KN \times C \times H \times W) @f$ if axis == 0, or + * @f$ (N \times KC \times H \times W) @f$ if axis == 1: + * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ + * with respect to concatenated outputs @f$ y @f$ + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length K), into which the top gradient + * @f$ \frac{\partial E}{\partial y} @f$ is deconcatenated back to the + * inputs @f$ + * \left[ \begin{array}{cccc} + * \frac{\partial E}{\partial x_1} & + * \frac{\partial E}{\partial x_2} & + * ... & + * \frac{\partial E}{\partial x_K} + * \end{array} \right] = + * \frac{\partial E}{\partial y} + * @f$ + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + int count_; + int num_concats_; + int concat_input_size_; + int concat_axis_; +}; + +} // namespace caffe + +#endif // CAFFE_CONCAT_LAYER_HPP_ diff --git a/include/caffe/layers/contrastive_loss_layer.hpp b/include/caffe/layers/contrastive_loss_layer.hpp new file mode 100644 index 00000000..e890afb8 --- /dev/null +++ b/include/caffe/layers/contrastive_loss_layer.hpp @@ -0,0 +1,101 @@ +#ifndef CAFFE_CONTRASTIVE_LOSS_LAYER_HPP_ +#define CAFFE_CONTRASTIVE_LOSS_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/loss_layer.hpp" + +namespace caffe { + +/** + * @brief Computes the contrastive loss @f$ + * E = \frac{1}{2N} \sum\limits_{n=1}^N \left(y\right) d^2 + + * \left(1-y\right) \max \left(margin-d, 0\right)^2 + * @f$ where @f$ + * d = \left| \left| a_n - b_n \right| \right|_2 @f$. This can be + * used to train siamese networks. + * + * @param bottom input Blob vector (length 3) + * -# @f$ (N \times C \times 1 \times 1) @f$ + * the features @f$ a \in [-\infty, +\infty]@f$ + * -# @f$ (N \times C \times 1 \times 1) @f$ + * the features @f$ b \in [-\infty, +\infty]@f$ + * -# @f$ (N \times 1 \times 1 \times 1) @f$ + * the binary similarity @f$ s \in [0, 1]@f$ + * @param top output Blob vector (length 1) + * -# @f$ (1 \times 1 \times 1 \times 1) @f$ + * the computed contrastive loss: @f$ E = + * \frac{1}{2N} \sum\limits_{n=1}^N \left(y\right) d^2 + + * \left(1-y\right) \max \left(margin-d, 0\right)^2 + * @f$ where @f$ + * d = \left| \left| a_n - b_n \right| \right|_2 @f$. + * This can be used to train siamese networks. + */ +template +class ContrastiveLossLayer : public LossLayer { + public: + explicit ContrastiveLossLayer(const LayerParameter& param) + : LossLayer(param), diff_() {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + + virtual inline int ExactNumBottomBlobs() const { return 3; } + virtual inline const char* type() const { return "ContrastiveLoss"; } + /** + * Unlike most loss layers, in the ContrastiveLossLayer we can backpropagate + * to the first two inputs. + */ + virtual inline bool AllowForceBackward(const int bottom_index) const { + return bottom_index != 2; + } + + protected: + /// @copydoc ContrastiveLossLayer + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the Contrastive error gradient w.r.t. the inputs. + * + * Computes the gradients with respect to the two input vectors (bottom[0] and + * bottom[1]), but not the similarity label (bottom[2]). + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * -# @f$ (1 \times 1 \times 1 \times 1) @f$ + * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$, + * as @f$ \lambda @f$ is the coefficient of this layer's output + * @f$\ell_i@f$ in the overall Net loss + * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence + * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$. + * (*Assuming that this top Blob is not used as a bottom (input) by any + * other layer of the Net.) + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length 2) + * -# @f$ (N \times C \times 1 \times 1) @f$ + * the features @f$a@f$; Backward fills their diff with + * gradients if propagate_down[0] + * -# @f$ (N \times C \times 1 \times 1) @f$ + * the features @f$b@f$; Backward fills their diff with gradients if + * propagate_down[1] + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + Blob diff_; // cached for backward pass + Blob dist_sq_; // cached for backward pass + Blob diff_sq_; // tmp storage for gpu forward pass + Blob summer_vec_; // tmp storage for gpu forward pass +}; + +} // namespace caffe + +#endif // CAFFE_CONTRASTIVE_LOSS_LAYER_HPP_ diff --git a/include/caffe/layers/conv_layer.hpp b/include/caffe/layers/conv_layer.hpp new file mode 100644 index 00000000..15574766 --- /dev/null +++ b/include/caffe/layers/conv_layer.hpp @@ -0,0 +1,81 @@ +#ifndef CAFFE_CONV_LAYER_HPP_ +#define CAFFE_CONV_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/base_conv_layer.hpp" + +namespace caffe { + +/** + * @brief Convolves the input image with a bank of learned filters, + * and (optionally) adds biases. + * + * Caffe convolves by reduction to matrix multiplication. This achieves + * high-throughput and generality of input and filter dimensions but comes at + * the cost of memory for matrices. This makes use of efficiency in BLAS. + * + * The input is "im2col" transformed to a channel K' x H x W data matrix + * for multiplication with the N x K' x H x W filter matrix to yield a + * N' x H x W output matrix that is then "col2im" restored. K' is the + * input channel * kernel height * kernel width dimension of the unrolled + * inputs so that the im2col matrix has a column for each input region to + * be filtered. col2im restores the output spatial structure by rolling up + * the output channel N' columns of the output matrix. + */ +template +class ConvolutionLayer : public BaseConvolutionLayer { + public: + /** + * @param param provides ConvolutionParameter convolution_param, + * with ConvolutionLayer options: + * - num_output. The number of filters. + * - kernel_size / kernel_h / kernel_w. The filter dimensions, given by + * kernel_size for square filters or kernel_h and kernel_w for rectangular + * filters. + * - stride / stride_h / stride_w (\b optional, default 1). The filter + * stride, given by stride_size for equal dimensions or stride_h and stride_w + * for different strides. By default the convolution is dense with stride 1. + * - pad / pad_h / pad_w (\b optional, default 0). The zero-padding for + * convolution, given by pad for equal dimensions or pad_h and pad_w for + * different padding. Input padding is computed implicitly instead of + * actually padding. + * - group (\b optional, default 1). The number of filter groups. Group + * convolution is a method for reducing parameterization by selectively + * connecting input and output channels. The input and output channel dimensions must be divisible + * by the number of groups. For group @f$ \geq 1 @f$, the + * convolutional filters' input and output channels are separated s.t. each + * group takes 1 / group of the input channels and makes 1 / group of the + * output channels. Concretely 4 input channels, 8 output channels, and + * 2 groups separate input channels 1-2 and output channels 1-4 into the + * first group and input channels 3-4 and output channels 5-8 into the second + * group. + * - bias_term (\b optional, default true). Whether to have a bias. + * - engine: convolution has CAFFE (matrix multiplication) and CUDNN (library + * kernels + stream parallelism) engines. + */ + explicit ConvolutionLayer(const LayerParameter& param) + : BaseConvolutionLayer(param) {} + + virtual inline const char* type() const { return "Convolution"; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual inline bool reverse_dimensions() { return false; } + virtual void compute_output_shape(); +}; + +} // namespace caffe + +#endif // CAFFE_CONV_LAYER_HPP_ diff --git a/include/caffe/layers/cudnn_conv_layer.hpp b/include/caffe/layers/cudnn_conv_layer.hpp new file mode 100644 index 00000000..31fe49a7 --- /dev/null +++ b/include/caffe/layers/cudnn_conv_layer.hpp @@ -0,0 +1,72 @@ +#ifndef CAFFE_CUDNN_CONV_LAYER_HPP_ +#define CAFFE_CUDNN_CONV_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/conv_layer.hpp" + +namespace caffe { + +#ifdef USE_CUDNN +/* + * @brief cuDNN implementation of ConvolutionLayer. + * Fallback to ConvolutionLayer for CPU mode. + * + * cuDNN accelerates convolution through forward kernels for filtering and bias + * plus backward kernels for the gradient w.r.t. the filters, biases, and + * inputs. Caffe + cuDNN further speeds up the computation through forward + * parallelism across groups and backward parallelism across gradients. + * + * The CUDNN engine does not have memory overhead for matrix buffers. For many + * input and filter regimes the CUDNN engine is faster than the CAFFE engine, + * but for fully-convolutional models and large inputs the CAFFE engine can be + * faster as long as it fits in memory. +*/ +template +class CuDNNConvolutionLayer : public ConvolutionLayer { + public: + explicit CuDNNConvolutionLayer(const LayerParameter& param) + : ConvolutionLayer(param), handles_setup_(false) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + virtual ~CuDNNConvolutionLayer(); + + protected: + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + bool handles_setup_; + cudnnHandle_t* handle_; + cudaStream_t* stream_; + + // algorithms for forward and backwards convolutions + cudnnConvolutionFwdAlgo_t *fwd_algo_; + cudnnConvolutionBwdFilterAlgo_t *bwd_filter_algo_; + cudnnConvolutionBwdDataAlgo_t *bwd_data_algo_; + + vector bottom_descs_, top_descs_; + cudnnTensorDescriptor_t bias_desc_; + cudnnFilterDescriptor_t filter_desc_; + vector conv_descs_; + int bottom_offset_, top_offset_, bias_offset_; + + size_t *workspace_fwd_sizes_; + size_t *workspace_bwd_data_sizes_; + size_t *workspace_bwd_filter_sizes_; + size_t workspaceSizeInBytes; // size of underlying storage + void *workspaceData; // underlying storage + void **workspace; // aliases into workspaceData +}; +#endif + +} // namespace caffe + +#endif // CAFFE_CUDNN_CONV_LAYER_HPP_ diff --git a/include/caffe/layers/cudnn_lcn_layer.hpp b/include/caffe/layers/cudnn_lcn_layer.hpp new file mode 100644 index 00000000..74cf4775 --- /dev/null +++ b/include/caffe/layers/cudnn_lcn_layer.hpp @@ -0,0 +1,49 @@ +#ifndef CAFFE_CUDNN_LCN_LAYER_HPP_ +#define CAFFE_CUDNN_LCN_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/lrn_layer.hpp" +#include "caffe/layers/power_layer.hpp" + +namespace caffe { + +#ifdef USE_CUDNN +template +class CuDNNLCNLayer : public LRNLayer { + public: + explicit CuDNNLCNLayer(const LayerParameter& param) + : LRNLayer(param), handles_setup_(false), tempDataSize(0), + tempData1(NULL), tempData2(NULL) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + virtual ~CuDNNLCNLayer(); + + protected: + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + bool handles_setup_; + cudnnHandle_t handle_; + cudnnLRNDescriptor_t norm_desc_; + cudnnTensorDescriptor_t bottom_desc_, top_desc_; + + int size_, pre_pad_; + Dtype alpha_, beta_, k_; + + size_t tempDataSize; + void *tempData1, *tempData2; +}; +#endif + +} // namespace caffe + +#endif // CAFFE_CUDNN_LCN_LAYER_HPP_ diff --git a/include/caffe/layers/cudnn_lrn_layer.hpp b/include/caffe/layers/cudnn_lrn_layer.hpp new file mode 100644 index 00000000..000ccc36 --- /dev/null +++ b/include/caffe/layers/cudnn_lrn_layer.hpp @@ -0,0 +1,44 @@ +#ifndef CAFFE_CUDNN_LRN_LAYER_HPP_ +#define CAFFE_CUDNN_LRN_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/lrn_layer.hpp" + +namespace caffe { + +#ifdef USE_CUDNN +template +class CuDNNLRNLayer : public LRNLayer { + public: + explicit CuDNNLRNLayer(const LayerParameter& param) + : LRNLayer(param), handles_setup_(false) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + virtual ~CuDNNLRNLayer(); + + protected: + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + bool handles_setup_; + cudnnHandle_t handle_; + cudnnLRNDescriptor_t norm_desc_; + cudnnTensorDescriptor_t bottom_desc_, top_desc_; + + int size_; + Dtype alpha_, beta_, k_; +}; +#endif + +} // namespace caffe + +#endif // CAFFE_CUDNN_LRN_LAYER_HPP_ diff --git a/include/caffe/layers/cudnn_pooling_layer.hpp b/include/caffe/layers/cudnn_pooling_layer.hpp new file mode 100644 index 00000000..6d0db47d --- /dev/null +++ b/include/caffe/layers/cudnn_pooling_layer.hpp @@ -0,0 +1,49 @@ +#ifndef CAFFE_CUDNN_POOLING_LAYER_HPP_ +#define CAFFE_CUDNN_POOLING_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/pooling_layer.hpp" + +namespace caffe { + +#ifdef USE_CUDNN +/* + * @brief cuDNN implementation of PoolingLayer. + * Fallback to PoolingLayer for CPU mode. +*/ +template +class CuDNNPoolingLayer : public PoolingLayer { + public: + explicit CuDNNPoolingLayer(const LayerParameter& param) + : PoolingLayer(param), handles_setup_(false) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + virtual ~CuDNNPoolingLayer(); + // Currently, cuDNN does not support the extra top blob. + virtual inline int MinTopBlobs() const { return -1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + bool handles_setup_; + cudnnHandle_t handle_; + cudnnTensorDescriptor_t bottom_desc_, top_desc_; + cudnnPoolingDescriptor_t pooling_desc_; + cudnnPoolingMode_t mode_; +}; +#endif + +} // namespace caffe + +#endif // CAFFE_CUDNN_POOLING_LAYER_HPP_ diff --git a/include/caffe/layers/cudnn_relu_layer.hpp b/include/caffe/layers/cudnn_relu_layer.hpp new file mode 100644 index 00000000..e01f568a --- /dev/null +++ b/include/caffe/layers/cudnn_relu_layer.hpp @@ -0,0 +1,45 @@ +#ifndef CAFFE_CUDNN_RELU_LAYER_HPP_ +#define CAFFE_CUDNN_RELU_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/neuron_layer.hpp" +#include "caffe/layers/relu_layer.hpp" + +namespace caffe { + +#ifdef USE_CUDNN +/** + * @brief CuDNN acceleration of ReLULayer. + */ +template +class CuDNNReLULayer : public ReLULayer { + public: + explicit CuDNNReLULayer(const LayerParameter& param) + : ReLULayer(param), handles_setup_(false) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + virtual ~CuDNNReLULayer(); + + protected: + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + bool handles_setup_; + cudnnHandle_t handle_; + cudnnTensorDescriptor_t bottom_desc_; + cudnnTensorDescriptor_t top_desc_; +}; +#endif + +} // namespace caffe + +#endif // CAFFE_CUDNN_RELU_LAYER_HPP_ diff --git a/include/caffe/layers/cudnn_sigmoid_layer.hpp b/include/caffe/layers/cudnn_sigmoid_layer.hpp new file mode 100644 index 00000000..9c597958 --- /dev/null +++ b/include/caffe/layers/cudnn_sigmoid_layer.hpp @@ -0,0 +1,45 @@ +#ifndef CAFFE_CUDNN_SIGMOID_LAYER_HPP_ +#define CAFFE_CUDNN_SIGMOID_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/neuron_layer.hpp" +#include "caffe/layers/sigmoid_layer.hpp" + +namespace caffe { + +#ifdef USE_CUDNN +/** + * @brief CuDNN acceleration of SigmoidLayer. + */ +template +class CuDNNSigmoidLayer : public SigmoidLayer { + public: + explicit CuDNNSigmoidLayer(const LayerParameter& param) + : SigmoidLayer(param), handles_setup_(false) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + virtual ~CuDNNSigmoidLayer(); + + protected: + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + bool handles_setup_; + cudnnHandle_t handle_; + cudnnTensorDescriptor_t bottom_desc_; + cudnnTensorDescriptor_t top_desc_; +}; +#endif + +} // namespace caffe + +#endif // CAFFE_CUDNN_SIGMOID_LAYER_HPP_ diff --git a/include/caffe/layers/cudnn_softmax_layer.hpp b/include/caffe/layers/cudnn_softmax_layer.hpp new file mode 100644 index 00000000..174368e4 --- /dev/null +++ b/include/caffe/layers/cudnn_softmax_layer.hpp @@ -0,0 +1,45 @@ +#ifndef CAFFE_CUDNN_SOFTMAX_LAYER_HPP_ +#define CAFFE_CUDNN_SOFTMAX_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/softmax_layer.hpp" + +namespace caffe { + +#ifdef USE_CUDNN +/** + * @brief cuDNN implementation of SoftmaxLayer. + * Fallback to SoftmaxLayer for CPU mode. + */ +template +class CuDNNSoftmaxLayer : public SoftmaxLayer { + public: + explicit CuDNNSoftmaxLayer(const LayerParameter& param) + : SoftmaxLayer(param), handles_setup_(false) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + virtual ~CuDNNSoftmaxLayer(); + + protected: + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + bool handles_setup_; + cudnnHandle_t handle_; + cudnnTensorDescriptor_t bottom_desc_; + cudnnTensorDescriptor_t top_desc_; +}; +#endif + +} // namespace caffe + +#endif // CAFFE_CUDNN_SOFTMAX_LAYER_HPP_ diff --git a/include/caffe/layers/cudnn_tanh_layer.hpp b/include/caffe/layers/cudnn_tanh_layer.hpp new file mode 100644 index 00000000..c0f0053f --- /dev/null +++ b/include/caffe/layers/cudnn_tanh_layer.hpp @@ -0,0 +1,45 @@ +#ifndef CAFFE_CUDNN_TANH_LAYER_HPP_ +#define CAFFE_CUDNN_TANH_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/neuron_layer.hpp" +#include "caffe/layers/tanh_layer.hpp" + +namespace caffe { + +#ifdef USE_CUDNN +/** + * @brief CuDNN acceleration of TanHLayer. + */ +template +class CuDNNTanHLayer : public TanHLayer { + public: + explicit CuDNNTanHLayer(const LayerParameter& param) + : TanHLayer(param), handles_setup_(false) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + virtual ~CuDNNTanHLayer(); + + protected: + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + bool handles_setup_; + cudnnHandle_t handle_; + cudnnTensorDescriptor_t bottom_desc_; + cudnnTensorDescriptor_t top_desc_; +}; +#endif + +} // namespace caffe + +#endif // CAFFE_CUDNN_TANH_LAYER_HPP_ diff --git a/include/caffe/layers/data_layer.hpp b/include/caffe/layers/data_layer.hpp new file mode 100644 index 00000000..6c361791 --- /dev/null +++ b/include/caffe/layers/data_layer.hpp @@ -0,0 +1,39 @@ +#ifndef CAFFE_DATA_LAYER_HPP_ +#define CAFFE_DATA_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/data_reader.hpp" +#include "caffe/data_transformer.hpp" +#include "caffe/internal_thread.hpp" +#include "caffe/layer.hpp" +#include "caffe/layers/base_data_layer.hpp" +#include "caffe/proto/caffe.pb.h" +#include "caffe/util/db.hpp" + +namespace caffe { + +template +class DataLayer : public BasePrefetchingDataLayer { + public: + explicit DataLayer(const LayerParameter& param); + virtual ~DataLayer(); + virtual void DataLayerSetUp(const vector*>& bottom, + const vector*>& top); + // DataLayer uses DataReader instead for sharing for parallelism + virtual inline bool ShareInParallel() const { return false; } + virtual inline const char* type() const { return "Data"; } + virtual inline int ExactNumBottomBlobs() const { return 0; } + virtual inline int MinTopBlobs() const { return 1; } + virtual inline int MaxTopBlobs() const { return 2; } + + protected: + virtual void load_batch(Batch* batch); + + DataReader reader_; +}; + +} // namespace caffe + +#endif // CAFFE_DATA_LAYER_HPP_ diff --git a/include/caffe/layers/deconv_layer.hpp b/include/caffe/layers/deconv_layer.hpp new file mode 100644 index 00000000..23ae887e --- /dev/null +++ b/include/caffe/layers/deconv_layer.hpp @@ -0,0 +1,51 @@ +#ifndef CAFFE_DECONV_LAYER_HPP_ +#define CAFFE_DECONV_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/base_conv_layer.hpp" + +namespace caffe { + +/** + * @brief Convolve the input with a bank of learned filters, and (optionally) + * add biases, treating filters and convolution parameters in the + * opposite sense as ConvolutionLayer. + * + * ConvolutionLayer computes each output value by dotting an input window with + * a filter; DeconvolutionLayer multiplies each input value by a filter + * elementwise, and sums over the resulting output windows. In other words, + * DeconvolutionLayer is ConvolutionLayer with the forward and backward passes + * reversed. DeconvolutionLayer reuses ConvolutionParameter for its + * parameters, but they take the opposite sense as in ConvolutionLayer (so + * padding is removed from the output rather than added to the input, and + * stride results in upsampling rather than downsampling). + */ +template +class DeconvolutionLayer : public BaseConvolutionLayer { + public: + explicit DeconvolutionLayer(const LayerParameter& param) + : BaseConvolutionLayer(param) {} + + virtual inline const char* type() const { return "Deconvolution"; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual inline bool reverse_dimensions() { return true; } + virtual void compute_output_shape(); +}; + +} // namespace caffe + +#endif // CAFFE_DECONV_LAYER_HPP_ diff --git a/include/caffe/layers/dropout_layer.hpp b/include/caffe/layers/dropout_layer.hpp new file mode 100644 index 00000000..e83143bc --- /dev/null +++ b/include/caffe/layers/dropout_layer.hpp @@ -0,0 +1,80 @@ +#ifndef CAFFE_DROPOUT_LAYER_HPP_ +#define CAFFE_DROPOUT_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/neuron_layer.hpp" + +namespace caffe { + +/** + * @brief During training only, sets a random portion of @f$x@f$ to 0, adjusting + * the rest of the vector magnitude accordingly. + * + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$ + * @param top output Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the computed outputs @f$ y = |x| @f$ + */ +template +class DropoutLayer : public NeuronLayer { + public: + /** + * @param param provides DropoutParameter dropout_param, + * with DropoutLayer options: + * - dropout_ratio (\b optional, default 0.5). + * Sets the probability @f$ p @f$ that any given unit is dropped. + */ + explicit DropoutLayer(const LayerParameter& param) + : NeuronLayer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Dropout"; } + + protected: + /** + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$ + * @param top output Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the computed outputs. At training time, we have @f$ + * y_{\mbox{train}} = \left\{ + * \begin{array}{ll} + * \frac{x}{1 - p} & \mbox{if } u > p \\ + * 0 & \mbox{otherwise} + * \end{array} \right. + * @f$, where @f$ u \sim U(0, 1)@f$ is generated independently for each + * input at each iteration. At test time, we simply have + * @f$ y_{\mbox{test}} = \mathbb{E}[y_{\mbox{train}}] = x @f$. + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + /// when divided by UINT_MAX, the randomly generated values @f$u\sim U(0,1)@f$ + Blob rand_vec_; + /// the probability @f$ p @f$ of dropping any input + Dtype threshold_; + /// the scale for undropped inputs at train time @f$ 1 / (1 - p) @f$ + Dtype scale_; + unsigned int uint_thres_; +}; + +} // namespace caffe + +#endif // CAFFE_DROPOUT_LAYER_HPP_ diff --git a/include/caffe/layers/dummy_data_layer.hpp b/include/caffe/layers/dummy_data_layer.hpp new file mode 100644 index 00000000..4180f1d0 --- /dev/null +++ b/include/caffe/layers/dummy_data_layer.hpp @@ -0,0 +1,49 @@ +#ifndef CAFFE_DUMMY_DATA_LAYER_HPP_ +#define CAFFE_DUMMY_DATA_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Provides data to the Net generated by a Filler. + * + * TODO(dox): thorough documentation for Forward and proto params. + */ +template +class DummyDataLayer : public Layer { + public: + explicit DummyDataLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + // Data layers should be shared by multiple solvers in parallel + virtual inline bool ShareInParallel() const { return true; } + // Data layers have no bottoms, so reshaping is trivial. + virtual void Reshape(const vector*>& bottom, + const vector*>& top) {} + + virtual inline const char* type() const { return "DummyData"; } + virtual inline int ExactNumBottomBlobs() const { return 0; } + virtual inline int MinTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) {} + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) {} + + vector > > fillers_; + vector refill_; +}; + +} // namespace caffe + +#endif // CAFFE_DUMMY_DATA_LAYER_HPP_ diff --git a/include/caffe/layers/eltwise_layer.hpp b/include/caffe/layers/eltwise_layer.hpp new file mode 100644 index 00000000..091de834 --- /dev/null +++ b/include/caffe/layers/eltwise_layer.hpp @@ -0,0 +1,51 @@ +#ifndef CAFFE_ELTWISE_LAYER_HPP_ +#define CAFFE_ELTWISE_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Compute elementwise operations, such as product and sum, + * along multiple input Blobs. + * + * TODO(dox): thorough documentation for Forward, Backward, and proto params. + */ +template +class EltwiseLayer : public Layer { + public: + explicit EltwiseLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Eltwise"; } + virtual inline int MinBottomBlobs() const { return 2; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + EltwiseParameter_EltwiseOp op_; + vector coeffs_; + Blob max_idx_; + + bool stable_prod_grad_; +}; + +} // namespace caffe + +#endif // CAFFE_ELTWISE_LAYER_HPP_ diff --git a/include/caffe/layers/embed_layer.hpp b/include/caffe/layers/embed_layer.hpp new file mode 100644 index 00000000..36137a62 --- /dev/null +++ b/include/caffe/layers/embed_layer.hpp @@ -0,0 +1,52 @@ +#ifndef CAFFE_EMBED_LAYER_HPP_ +#define CAFFE_EMBED_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief A layer for learning "embeddings" of one-hot vector input. + * Equivalent to an InnerProductLayer with one-hot vectors as input, but + * for efficiency the input is the "hot" index of each column itself. + * + * TODO(dox): thorough documentation for Forward, Backward, and proto params. + */ +template +class EmbedLayer : public Layer { + public: + explicit EmbedLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Embed"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + int M_; + int K_; + int N_; + bool bias_term_; + Blob bias_multiplier_; +}; + +} // namespace caffe + +#endif // CAFFE_EMBED_LAYER_HPP_ diff --git a/include/caffe/layers/euclidean_loss_layer.hpp b/include/caffe/layers/euclidean_loss_layer.hpp new file mode 100644 index 00000000..f564569e --- /dev/null +++ b/include/caffe/layers/euclidean_loss_layer.hpp @@ -0,0 +1,107 @@ +#ifndef CAFFE_EUCLIDEAN_LOSS_LAYER_HPP_ +#define CAFFE_EUCLIDEAN_LOSS_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/loss_layer.hpp" + +namespace caffe { + +/** + * @brief Computes the Euclidean (L2) loss @f$ + * E = \frac{1}{2N} \sum\limits_{n=1}^N \left| \left| \hat{y}_n - y_n + * \right| \right|_2^2 @f$ for real-valued regression tasks. + * + * @param bottom input Blob vector (length 2) + * -# @f$ (N \times C \times H \times W) @f$ + * the predictions @f$ \hat{y} \in [-\infty, +\infty]@f$ + * -# @f$ (N \times C \times H \times W) @f$ + * the targets @f$ y \in [-\infty, +\infty]@f$ + * @param top output Blob vector (length 1) + * -# @f$ (1 \times 1 \times 1 \times 1) @f$ + * the computed Euclidean loss: @f$ E = + * \frac{1}{2n} \sum\limits_{n=1}^N \left| \left| \hat{y}_n - y_n + * \right| \right|_2^2 @f$ + * + * This can be used for least-squares regression tasks. An InnerProductLayer + * input to a EuclideanLossLayer exactly formulates a linear least squares + * regression problem. With non-zero weight decay the problem becomes one of + * ridge regression -- see src/caffe/test/test_sgd_solver.cpp for a concrete + * example wherein we check that the gradients computed for a Net with exactly + * this structure match hand-computed gradient formulas for ridge regression. + * + * (Note: Caffe, and SGD in general, is certainly \b not the best way to solve + * linear least squares problems! We use it only as an instructive example.) + */ +template +class EuclideanLossLayer : public LossLayer { + public: + explicit EuclideanLossLayer(const LayerParameter& param) + : LossLayer(param), diff_() {} + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "EuclideanLoss"; } + /** + * Unlike most loss layers, in the EuclideanLossLayer we can backpropagate + * to both inputs -- override to return true and always allow force_backward. + */ + virtual inline bool AllowForceBackward(const int bottom_index) const { + return true; + } + + protected: + /// @copydoc EuclideanLossLayer + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the Euclidean error gradient w.r.t. the inputs. + * + * Unlike other children of LossLayer, EuclideanLossLayer \b can compute + * gradients with respect to the label inputs bottom[1] (but still only will + * if propagate_down[1] is set, due to being produced by learnable parameters + * or if force_backward is set). In fact, this layer is "commutative" -- the + * result is the same regardless of the order of the two bottoms. + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * -# @f$ (1 \times 1 \times 1 \times 1) @f$ + * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$, + * as @f$ \lambda @f$ is the coefficient of this layer's output + * @f$\ell_i@f$ in the overall Net loss + * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence + * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$. + * (*Assuming that this top Blob is not used as a bottom (input) by any + * other layer of the Net.) + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length 2) + * -# @f$ (N \times C \times H \times W) @f$ + * the predictions @f$\hat{y}@f$; Backward fills their diff with + * gradients @f$ + * \frac{\partial E}{\partial \hat{y}} = + * \frac{1}{n} \sum\limits_{n=1}^N (\hat{y}_n - y_n) + * @f$ if propagate_down[0] + * -# @f$ (N \times C \times H \times W) @f$ + * the targets @f$y@f$; Backward fills their diff with gradients + * @f$ \frac{\partial E}{\partial y} = + * \frac{1}{n} \sum\limits_{n=1}^N (y_n - \hat{y}_n) + * @f$ if propagate_down[1] + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + Blob diff_; +}; + +} // namespace caffe + +#endif // CAFFE_EUCLIDEAN_LOSS_LAYER_HPP_ diff --git a/include/caffe/layers/exp_layer.hpp b/include/caffe/layers/exp_layer.hpp new file mode 100644 index 00000000..9fc8c396 --- /dev/null +++ b/include/caffe/layers/exp_layer.hpp @@ -0,0 +1,80 @@ +#ifndef CAFFE_EXP_LAYER_HPP_ +#define CAFFE_EXP_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/neuron_layer.hpp" + +namespace caffe { + +/** + * @brief Computes @f$ y = \gamma ^ {\alpha x + \beta} @f$, + * as specified by the scale @f$ \alpha @f$, shift @f$ \beta @f$, + * and base @f$ \gamma @f$. + */ +template +class ExpLayer : public NeuronLayer { + public: + /** + * @param param provides ExpParameter exp_param, + * with ExpLayer options: + * - scale (\b optional, default 1) the scale @f$ \alpha @f$ + * - shift (\b optional, default 0) the shift @f$ \beta @f$ + * - base (\b optional, default -1 for a value of @f$ e \approx 2.718 @f$) + * the base @f$ \gamma @f$ + */ + explicit ExpLayer(const LayerParameter& param) + : NeuronLayer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Exp"; } + + protected: + /** + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$ + * @param top output Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the computed outputs @f$ + * y = \gamma ^ {\alpha x + \beta} + * @f$ + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the error gradient w.r.t. the exp inputs. + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * -# @f$ (N \times C \times H \times W) @f$ + * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ + * with respect to computed outputs @f$ y @f$ + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$; Backward fills their diff with + * gradients @f$ + * \frac{\partial E}{\partial x} = + * \frac{\partial E}{\partial y} y \alpha \log_e(gamma) + * @f$ if propagate_down[0] + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + Dtype inner_scale_, outer_scale_; +}; + +} // namespace caffe + +#endif // CAFFE_EXP_LAYER_HPP_ diff --git a/include/caffe/layers/filter_layer.hpp b/include/caffe/layers/filter_layer.hpp new file mode 100644 index 00000000..e040e666 --- /dev/null +++ b/include/caffe/layers/filter_layer.hpp @@ -0,0 +1,77 @@ +#ifndef CAFFE_FILTER_LAYER_HPP_ +#define CAFFE_FILTER_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Takes two+ Blobs, interprets last Blob as a selector and + * filter remaining Blobs accordingly with selector data (0 means that + * the corresponding item has to be filtered, non-zero means that corresponding + * item needs to stay). + */ +template +class FilterLayer : public Layer { + public: + explicit FilterLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Filter"; } + virtual inline int MinBottomBlobs() const { return 2; } + virtual inline int MinTopBlobs() const { return 1; } + + protected: + /** + * @param bottom input Blob vector (length 2+) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs to be filtered @f$ x_1 @f$ + * -# ... + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs to be filtered @f$ x_K @f$ + * -# @f$ (N \times 1 \times 1 \times 1) @f$ + * the selector blob + * @param top output Blob vector (length 1+) + * -# @f$ (S \times C \times H \times W) @f$ () + * the filtered output @f$ x_1 @f$ + * where S is the number of items + * that haven't been filtered + * @f$ (S \times C \times H \times W) @f$ + * the filtered output @f$ x_K @f$ + * where S is the number of items + * that haven't been filtered + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the error gradient w.r.t. the forwarded inputs. + * + * @param top output Blob vector (length 1+), providing the error gradient with + * respect to the outputs + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length 2+), into which the top error + * gradient is copied + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + bool first_reshape_; + vector indices_to_forward_; +}; + +} // namespace caffe + +#endif // CAFFE_FILTER_LAYER_HPP_ diff --git a/include/caffe/layers/flatten_layer.hpp b/include/caffe/layers/flatten_layer.hpp new file mode 100644 index 00000000..e494bbb5 --- /dev/null +++ b/include/caffe/layers/flatten_layer.hpp @@ -0,0 +1,61 @@ +#ifndef CAFFE_FLATTEN_LAYER_HPP_ +#define CAFFE_FLATTEN_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Reshapes the input Blob into flat vectors. + * + * Note: because this layer does not change the input values -- merely the + * dimensions -- it can simply copy the input. The copy happens "virtually" + * (thus taking effectively 0 real time) by setting, in Forward, the data + * pointer of the top Blob to that of the bottom Blob (see Blob::ShareData), + * and in Backward, the diff pointer of the bottom Blob to that of the top Blob + * (see Blob::ShareDiff). + */ +template +class FlattenLayer : public Layer { + public: + explicit FlattenLayer(const LayerParameter& param) + : Layer(param) {} + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Flatten"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + /** + * @param bottom input Blob vector (length 2+) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs + * @param top output Blob vector (length 1) + * -# @f$ (N \times CHW \times 1 \times 1) @f$ + * the outputs -- i.e., the (virtually) copied, flattened inputs + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the error gradient w.r.t. the concatenate inputs. + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length K), into which the top error + * gradient is (virtually) copied + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); +}; + +} // namespace caffe + +#endif // CAFFE_FLATTEN_LAYER_HPP_ diff --git a/include/caffe/layers/hdf5_data_layer.hpp b/include/caffe/layers/hdf5_data_layer.hpp new file mode 100644 index 00000000..b04cf8e1 --- /dev/null +++ b/include/caffe/layers/hdf5_data_layer.hpp @@ -0,0 +1,62 @@ +#ifndef CAFFE_HDF5_DATA_LAYER_HPP_ +#define CAFFE_HDF5_DATA_LAYER_HPP_ + +#include "hdf5.h" + +#include +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/base_data_layer.hpp" + +namespace caffe { + +/** + * @brief Provides data to the Net from HDF5 files. + * + * TODO(dox): thorough documentation for Forward and proto params. + */ +template +class HDF5DataLayer : public Layer { + public: + explicit HDF5DataLayer(const LayerParameter& param) + : Layer(param) {} + virtual ~HDF5DataLayer(); + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + // Data layers should be shared by multiple solvers in parallel + virtual inline bool ShareInParallel() const { return true; } + // Data layers have no bottoms, so reshaping is trivial. + virtual void Reshape(const vector*>& bottom, + const vector*>& top) {} + + virtual inline const char* type() const { return "HDF5Data"; } + virtual inline int ExactNumBottomBlobs() const { return 0; } + virtual inline int MinTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) {} + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) {} + virtual void LoadHDF5FileData(const char* filename); + + std::vector hdf_filenames_; + unsigned int num_files_; + unsigned int current_file_; + hsize_t current_row_; + std::vector > > hdf_blobs_; + std::vector data_permutation_; + std::vector file_permutation_; +}; + +} // namespace caffe + +#endif // CAFFE_HDF5_DATA_LAYER_HPP_ diff --git a/include/caffe/layers/hdf5_output_layer.hpp b/include/caffe/layers/hdf5_output_layer.hpp new file mode 100644 index 00000000..487d08fc --- /dev/null +++ b/include/caffe/layers/hdf5_output_layer.hpp @@ -0,0 +1,64 @@ +#ifndef CAFFE_HDF5_OUTPUT_LAYER_HPP_ +#define CAFFE_HDF5_OUTPUT_LAYER_HPP_ + +#include "hdf5.h" + +#include +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +#define HDF5_DATA_DATASET_NAME "data" +#define HDF5_DATA_LABEL_NAME "label" + +/** + * @brief Write blobs to disk as HDF5 files. + * + * TODO(dox): thorough documentation for Forward and proto params. + */ +template +class HDF5OutputLayer : public Layer { + public: + explicit HDF5OutputLayer(const LayerParameter& param) + : Layer(param), file_opened_(false) {} + virtual ~HDF5OutputLayer(); + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + // Data layers should be shared by multiple solvers in parallel + virtual inline bool ShareInParallel() const { return true; } + // Data layers have no bottoms, so reshaping is trivial. + virtual void Reshape(const vector*>& bottom, + const vector*>& top) {} + + virtual inline const char* type() const { return "HDF5Output"; } + // TODO: no limit on the number of blobs + virtual inline int ExactNumBottomBlobs() const { return 2; } + virtual inline int ExactNumTopBlobs() const { return 0; } + + inline std::string file_name() const { return file_name_; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void SaveBlobs(); + + bool file_opened_; + std::string file_name_; + hid_t file_id_; + Blob data_blob_; + Blob label_blob_; +}; + +} // namespace caffe + +#endif // CAFFE_HDF5_OUTPUT_LAYER_HPP_ diff --git a/include/caffe/layers/hinge_loss_layer.hpp b/include/caffe/layers/hinge_loss_layer.hpp new file mode 100644 index 00000000..54e42bd4 --- /dev/null +++ b/include/caffe/layers/hinge_loss_layer.hpp @@ -0,0 +1,104 @@ +#ifndef CAFFE_HINGE_LOSS_LAYER_HPP_ +#define CAFFE_HINGE_LOSS_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/loss_layer.hpp" + +namespace caffe { + +/** + * @brief Computes the hinge loss for a one-of-many classification task. + * + * @param bottom input Blob vector (length 2) + * -# @f$ (N \times C \times H \times W) @f$ + * the predictions @f$ t @f$, a Blob with values in + * @f$ [-\infty, +\infty] @f$ indicating the predicted score for each of + * the @f$ K = CHW @f$ classes. In an SVM, @f$ t @f$ is the result of + * taking the inner product @f$ X^T W @f$ of the D-dimensional features + * @f$ X \in \mathcal{R}^{D \times N} @f$ and the learned hyperplane + * parameters @f$ W \in \mathcal{R}^{D \times K} @f$, so a Net with just + * an InnerProductLayer (with num_output = D) providing predictions to a + * HingeLossLayer and no other learnable parameters or losses is + * equivalent to an SVM. + * -# @f$ (N \times 1 \times 1 \times 1) @f$ + * the labels @f$ l @f$, an integer-valued Blob with values + * @f$ l_n \in [0, 1, 2, ..., K - 1] @f$ + * indicating the correct class label among the @f$ K @f$ classes + * @param top output Blob vector (length 1) + * -# @f$ (1 \times 1 \times 1 \times 1) @f$ + * the computed hinge loss: @f$ E = + * \frac{1}{N} \sum\limits_{n=1}^N \sum\limits_{k=1}^K + * [\max(0, 1 - \delta\{l_n = k\} t_{nk})] ^ p + * @f$, for the @f$ L^p @f$ norm + * (defaults to @f$ p = 1 @f$, the L1 norm; L2 norm, as in L2-SVM, + * is also available), and @f$ + * \delta\{\mathrm{condition}\} = \left\{ + * \begin{array}{lr} + * 1 & \mbox{if condition} \\ + * -1 & \mbox{otherwise} + * \end{array} \right. + * @f$ + * + * In an SVM, @f$ t \in \mathcal{R}^{N \times K} @f$ is the result of taking + * the inner product @f$ X^T W @f$ of the features + * @f$ X \in \mathcal{R}^{D \times N} @f$ + * and the learned hyperplane parameters + * @f$ W \in \mathcal{R}^{D \times K} @f$. So, a Net with just an + * InnerProductLayer (with num_output = @f$k@f$) providing predictions to a + * HingeLossLayer is equivalent to an SVM (assuming it has no other learned + * outside the InnerProductLayer and no other losses outside the + * HingeLossLayer). + */ +template +class HingeLossLayer : public LossLayer { + public: + explicit HingeLossLayer(const LayerParameter& param) + : LossLayer(param) {} + + virtual inline const char* type() const { return "HingeLoss"; } + + protected: + /// @copydoc HingeLossLayer + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the hinge loss error gradient w.r.t. the predictions. + * + * Gradients cannot be computed with respect to the label inputs (bottom[1]), + * so this method ignores bottom[1] and requires !propagate_down[1], crashing + * if propagate_down[1] is set. + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * -# @f$ (1 \times 1 \times 1 \times 1) @f$ + * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$, + * as @f$ \lambda @f$ is the coefficient of this layer's output + * @f$\ell_i@f$ in the overall Net loss + * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence + * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$. + * (*Assuming that this top Blob is not used as a bottom (input) by any + * other layer of the Net.) + * @param propagate_down see Layer::Backward. + * propagate_down[1] must be false as we can't compute gradients with + * respect to the labels. + * @param bottom input Blob vector (length 2) + * -# @f$ (N \times C \times H \times W) @f$ + * the predictions @f$t@f$; Backward computes diff + * @f$ \frac{\partial E}{\partial t} @f$ + * -# @f$ (N \times 1 \times 1 \times 1) @f$ + * the labels -- ignored as we can't compute their error gradients + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); +}; + + +} // namespace caffe + +#endif // CAFFE_HINGE_LOSS_LAYER_HPP_ diff --git a/include/caffe/layers/im2col_layer.hpp b/include/caffe/layers/im2col_layer.hpp new file mode 100644 index 00000000..1d3b2eb6 --- /dev/null +++ b/include/caffe/layers/im2col_layer.hpp @@ -0,0 +1,63 @@ +#ifndef CAFFE_IM2COL_LAYER_HPP_ +#define CAFFE_IM2COL_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief A helper for image operations that rearranges image regions into + * column vectors. Used by ConvolutionLayer to perform convolution + * by matrix multiplication. + * + * TODO(dox): thorough documentation for Forward, Backward, and proto params. + */ +template +class Im2colLayer : public Layer { + public: + explicit Im2colLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Im2col"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + /// @brief The spatial dimensions of a filter kernel. + Blob kernel_shape_; + /// @brief The spatial dimensions of the stride. + Blob stride_; + /// @brief The spatial dimensions of the padding. + Blob pad_; + + int num_spatial_axes_; + int bottom_dim_; + int top_dim_; + + int channel_axis_; + int num_; + int channels_; + + bool force_nd_im2col_; +}; + +} // namespace caffe + +#endif // CAFFE_IM2COL_LAYER_HPP_ diff --git a/include/caffe/layers/image_data_layer.hpp b/include/caffe/layers/image_data_layer.hpp new file mode 100644 index 00000000..a0d3384e --- /dev/null +++ b/include/caffe/layers/image_data_layer.hpp @@ -0,0 +1,47 @@ +#ifndef CAFFE_IMAGE_DATA_LAYER_HPP_ +#define CAFFE_IMAGE_DATA_LAYER_HPP_ + +#include +#include +#include + +#include "caffe/blob.hpp" +#include "caffe/data_transformer.hpp" +#include "caffe/internal_thread.hpp" +#include "caffe/layer.hpp" +#include "caffe/layers/base_data_layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Provides data to the Net from image files. + * + * TODO(dox): thorough documentation for Forward and proto params. + */ +template +class ImageDataLayer : public BasePrefetchingDataLayer { + public: + explicit ImageDataLayer(const LayerParameter& param) + : BasePrefetchingDataLayer(param) {} + virtual ~ImageDataLayer(); + virtual void DataLayerSetUp(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "ImageData"; } + virtual inline int ExactNumBottomBlobs() const { return 0; } + virtual inline int ExactNumTopBlobs() const { return 2; } + + protected: + shared_ptr prefetch_rng_; + virtual void ShuffleImages(); + virtual void load_batch(Batch* batch); + + vector > lines_; + int lines_id_; +}; + + +} // namespace caffe + +#endif // CAFFE_IMAGE_DATA_LAYER_HPP_ diff --git a/include/caffe/layers/infogain_loss_layer.hpp b/include/caffe/layers/infogain_loss_layer.hpp new file mode 100644 index 00000000..633f339a --- /dev/null +++ b/include/caffe/layers/infogain_loss_layer.hpp @@ -0,0 +1,110 @@ +#ifndef CAFFE_INFOGAIN_LOSS_LAYER_HPP_ +#define CAFFE_INFOGAIN_LOSS_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/loss_layer.hpp" + +namespace caffe { + +/** + * @brief A generalization of MultinomialLogisticLossLayer that takes an + * "information gain" (infogain) matrix specifying the "value" of all label + * pairs. + * + * Equivalent to the MultinomialLogisticLossLayer if the infogain matrix is the + * identity. + * + * @param bottom input Blob vector (length 2-3) + * -# @f$ (N \times C \times H \times W) @f$ + * the predictions @f$ \hat{p} @f$, a Blob with values in + * @f$ [0, 1] @f$ indicating the predicted probability of each of the + * @f$ K = CHW @f$ classes. Each prediction vector @f$ \hat{p}_n @f$ + * should sum to 1 as in a probability distribution: @f$ + * \forall n \sum\limits_{k=1}^K \hat{p}_{nk} = 1 @f$. + * -# @f$ (N \times 1 \times 1 \times 1) @f$ + * the labels @f$ l @f$, an integer-valued Blob with values + * @f$ l_n \in [0, 1, 2, ..., K - 1] @f$ + * indicating the correct class label among the @f$ K @f$ classes + * -# @f$ (1 \times 1 \times K \times K) @f$ + * (\b optional) the infogain matrix @f$ H @f$. This must be provided as + * the third bottom blob input if not provided as the infogain_mat in the + * InfogainLossParameter. If @f$ H = I @f$, this layer is equivalent to the + * MultinomialLogisticLossLayer. + * @param top output Blob vector (length 1) + * -# @f$ (1 \times 1 \times 1 \times 1) @f$ + * the computed infogain multinomial logistic loss: @f$ E = + * \frac{-1}{N} \sum\limits_{n=1}^N H_{l_n} \log(\hat{p}_n) = + * \frac{-1}{N} \sum\limits_{n=1}^N \sum\limits_{k=1}^{K} H_{l_n,k} + * \log(\hat{p}_{n,k}) + * @f$, where @f$ H_{l_n} @f$ denotes row @f$l_n@f$ of @f$H@f$. + */ +template +class InfogainLossLayer : public LossLayer { + public: + explicit InfogainLossLayer(const LayerParameter& param) + : LossLayer(param), infogain_() {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + // InfogainLossLayer takes 2-3 bottom Blobs; if there are 3 the third should + // be the infogain matrix. (Otherwise the infogain matrix is loaded from a + // file specified by LayerParameter.) + virtual inline int ExactNumBottomBlobs() const { return -1; } + virtual inline int MinBottomBlobs() const { return 2; } + virtual inline int MaxBottomBlobs() const { return 3; } + + virtual inline const char* type() const { return "InfogainLoss"; } + + protected: + /// @copydoc InfogainLossLayer + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the infogain loss error gradient w.r.t. the predictions. + * + * Gradients cannot be computed with respect to the label inputs (bottom[1]), + * so this method ignores bottom[1] and requires !propagate_down[1], crashing + * if propagate_down[1] is set. (The same applies to the infogain matrix, if + * provided as bottom[2] rather than in the layer_param.) + * + * @param top output Blob vector (length 1), providing the error gradient + * with respect to the outputs + * -# @f$ (1 \times 1 \times 1 \times 1) @f$ + * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$, + * as @f$ \lambda @f$ is the coefficient of this layer's output + * @f$\ell_i@f$ in the overall Net loss + * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence + * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$. + * (*Assuming that this top Blob is not used as a bottom (input) by any + * other layer of the Net.) + * @param propagate_down see Layer::Backward. + * propagate_down[1] must be false as we can't compute gradients with + * respect to the labels (similarly for propagate_down[2] and the + * infogain matrix, if provided as bottom[2]) + * @param bottom input Blob vector (length 2-3) + * -# @f$ (N \times C \times H \times W) @f$ + * the predictions @f$ \hat{p} @f$; Backward computes diff + * @f$ \frac{\partial E}{\partial \hat{p}} @f$ + * -# @f$ (N \times 1 \times 1 \times 1) @f$ + * the labels -- ignored as we can't compute their error gradients + * -# @f$ (1 \times 1 \times K \times K) @f$ + * (\b optional) the information gain matrix -- ignored as its error + * gradient computation is not implemented. + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + Blob infogain_; +}; + +} // namespace caffe + +#endif // CAFFE_INFOGAIN_LOSS_LAYER_HPP_ diff --git a/include/caffe/layers/inner_product_layer.hpp b/include/caffe/layers/inner_product_layer.hpp new file mode 100644 index 00000000..250576a4 --- /dev/null +++ b/include/caffe/layers/inner_product_layer.hpp @@ -0,0 +1,51 @@ +#ifndef CAFFE_INNER_PRODUCT_LAYER_HPP_ +#define CAFFE_INNER_PRODUCT_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Also known as a "fully-connected" layer, computes an inner product + * with a set of learned weights, and (optionally) adds biases. + * + * TODO(dox): thorough documentation for Forward, Backward, and proto params. + */ +template +class InnerProductLayer : public Layer { + public: + explicit InnerProductLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "InnerProduct"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + int M_; + int K_; + int N_; + bool bias_term_; + Blob bias_multiplier_; +}; + +} // namespace caffe + +#endif // CAFFE_INNER_PRODUCT_LAYER_HPP_ diff --git a/include/caffe/layers/log_layer.hpp b/include/caffe/layers/log_layer.hpp new file mode 100644 index 00000000..7d037d2b --- /dev/null +++ b/include/caffe/layers/log_layer.hpp @@ -0,0 +1,82 @@ +#ifndef CAFFE_LOG_LAYER_HPP_ +#define CAFFE_LOG_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/neuron_layer.hpp" + +namespace caffe { + +/** + * @brief Computes @f$ y = log_{\gamma}(\alpha x + \beta) @f$, + * as specified by the scale @f$ \alpha @f$, shift @f$ \beta @f$, + * and base @f$ \gamma @f$. + */ +template +class LogLayer : public NeuronLayer { + public: + /** + * @param param provides LogParameter log_param, + * with LogLayer options: + * - scale (\b optional, default 1) the scale @f$ \alpha @f$ + * - shift (\b optional, default 0) the shift @f$ \beta @f$ + * - base (\b optional, default -1 for a value of @f$ e \approx 2.718 @f$) + * the base @f$ \gamma @f$ + */ + explicit LogLayer(const LayerParameter& param) + : NeuronLayer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Log"; } + + protected: + /** + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$ + * @param top output Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the computed outputs @f$ + * y = log_{\gamma}(\alpha x + \beta) + * @f$ + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the error gradient w.r.t. the exp inputs. + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * -# @f$ (N \times C \times H \times W) @f$ + * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ + * with respect to computed outputs @f$ y @f$ + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$; Backward fills their diff with + * gradients @f$ + * \frac{\partial E}{\partial x} = + * \frac{\partial E}{\partial y} y \alpha \log_e(gamma) + * @f$ if propagate_down[0] + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + Dtype base_scale_; + Dtype input_scale_, input_shift_; + Dtype backward_num_scale_; +}; + +} // namespace caffe + +#endif // CAFFE_LOG_LAYER_HPP_ diff --git a/include/caffe/layers/loss_layer.hpp b/include/caffe/layers/loss_layer.hpp new file mode 100644 index 00000000..dbdf612c --- /dev/null +++ b/include/caffe/layers/loss_layer.hpp @@ -0,0 +1,53 @@ +#ifndef CAFFE_LOSS_LAYER_HPP_ +#define CAFFE_LOSS_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +const float kLOG_THRESHOLD = 1e-20; + +/** + * @brief An interface for Layer%s that take two Blob%s as input -- usually + * (1) predictions and (2) ground-truth labels -- and output a + * singleton Blob representing the loss. + * + * LossLayers are typically only capable of backpropagating to their first input + * -- the predictions. + */ +template +class LossLayer : public Layer { + public: + explicit LossLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp( + const vector*>& bottom, const vector*>& top); + virtual void Reshape( + const vector*>& bottom, const vector*>& top); + + virtual inline int ExactNumBottomBlobs() const { return 2; } + + /** + * @brief For convenience and backwards compatibility, instruct the Net to + * automatically allocate a single top Blob for LossLayers, into which + * they output their singleton loss, (even if the user didn't specify + * one in the prototxt, etc.). + */ + virtual inline bool AutoTopBlobs() const { return true; } + virtual inline int ExactNumTopBlobs() const { return 1; } + /** + * We usually cannot backpropagate to the labels; ignore force_backward for + * these inputs. + */ + virtual inline bool AllowForceBackward(const int bottom_index) const { + return bottom_index != 1; + } +}; + +} // namespace caffe + +#endif // CAFFE_LOSS_LAYER_HPP_ diff --git a/include/caffe/layers/lrn_layer.hpp b/include/caffe/layers/lrn_layer.hpp new file mode 100644 index 00000000..06cf71a9 --- /dev/null +++ b/include/caffe/layers/lrn_layer.hpp @@ -0,0 +1,94 @@ +#ifndef CAFFE_LRN_LAYER_HPP_ +#define CAFFE_LRN_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/eltwise_layer.hpp" +#include "caffe/layers/pooling_layer.hpp" +#include "caffe/layers/power_layer.hpp" +#include "caffe/layers/split_layer.hpp" + +namespace caffe { + +/** + * @brief Normalize the input in a local region across or within feature maps. + * + * TODO(dox): thorough documentation for Forward, Backward, and proto params. + */ +template +class LRNLayer : public Layer { + public: + explicit LRNLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "LRN"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + virtual void CrossChannelForward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void CrossChannelForward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void WithinChannelForward(const vector*>& bottom, + const vector*>& top); + virtual void CrossChannelBackward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void CrossChannelBackward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void WithinChannelBackward(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + int size_; + int pre_pad_; + Dtype alpha_; + Dtype beta_; + Dtype k_; + int num_; + int channels_; + int height_; + int width_; + + // Fields used for normalization ACROSS_CHANNELS + // scale_ stores the intermediate summing results + Blob scale_; + + // Fields used for normalization WITHIN_CHANNEL + shared_ptr > split_layer_; + vector*> split_top_vec_; + shared_ptr > square_layer_; + Blob square_input_; + Blob square_output_; + vector*> square_bottom_vec_; + vector*> square_top_vec_; + shared_ptr > pool_layer_; + Blob pool_output_; + vector*> pool_top_vec_; + shared_ptr > power_layer_; + Blob power_output_; + vector*> power_top_vec_; + shared_ptr > product_layer_; + Blob product_input_; + vector*> product_bottom_vec_; +}; + +} // namespace caffe + +#endif // CAFFE_LRN_LAYER_HPP_ diff --git a/include/caffe/layers/memory_data_layer.hpp b/include/caffe/layers/memory_data_layer.hpp new file mode 100644 index 00000000..8abcc8c1 --- /dev/null +++ b/include/caffe/layers/memory_data_layer.hpp @@ -0,0 +1,63 @@ +#ifndef CAFFE_MEMORY_DATA_LAYER_HPP_ +#define CAFFE_MEMORY_DATA_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/base_data_layer.hpp" + +namespace caffe { + +/** + * @brief Provides data to the Net from memory. + * + * TODO(dox): thorough documentation for Forward and proto params. + */ +template +class MemoryDataLayer : public BaseDataLayer { + public: + explicit MemoryDataLayer(const LayerParameter& param) + : BaseDataLayer(param), has_new_data_(false) {} + virtual void DataLayerSetUp(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "MemoryData"; } + virtual inline int ExactNumBottomBlobs() const { return 0; } + virtual inline int ExactNumTopBlobs() const { return 2; } + + virtual void AddDatumVector(const vector& datum_vector); +#ifdef USE_OPENCV + virtual void AddMatVector(const vector& mat_vector, + const vector& labels); +#endif // USE_OPENCV + + // Reset should accept const pointers, but can't, because the memory + // will be given to Blob, which is mutable + void Reset(Dtype* data, Dtype* label, int n); + void set_batch_size(int new_size); + + int batch_size() { return batch_size_; } + int channels() { return channels_; } + int height() { return height_; } + int width() { return width_; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + + int batch_size_, channels_, height_, width_, size_; + Dtype* data_; + Dtype* labels_; + int n_; + size_t pos_; + Blob added_data_; + Blob added_label_; + bool has_new_data_; +}; + +} // namespace caffe + +#endif // CAFFE_MEMORY_DATA_LAYER_HPP_ diff --git a/include/caffe/layers/multinomial_logistic_loss_layer.hpp b/include/caffe/layers/multinomial_logistic_loss_layer.hpp new file mode 100644 index 00000000..3977cf9e --- /dev/null +++ b/include/caffe/layers/multinomial_logistic_loss_layer.hpp @@ -0,0 +1,92 @@ +#ifndef CAFFE_MULTINOMIAL_LOGISTIC_LOSS_LAYER_HPP_ +#define CAFFE_MULTINOMIAL_LOGISTIC_LOSS_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/loss_layer.hpp" + +namespace caffe { + +/** + * @brief Computes the multinomial logistic loss for a one-of-many + * classification task, directly taking a predicted probability + * distribution as input. + * + * When predictions are not already a probability distribution, you should + * instead use the SoftmaxWithLossLayer, which maps predictions to a + * distribution using the SoftmaxLayer, before computing the multinomial + * logistic loss. The SoftmaxWithLossLayer should be preferred over separate + * SoftmaxLayer + MultinomialLogisticLossLayer + * as its gradient computation is more numerically stable. + * + * @param bottom input Blob vector (length 2) + * -# @f$ (N \times C \times H \times W) @f$ + * the predictions @f$ \hat{p} @f$, a Blob with values in + * @f$ [0, 1] @f$ indicating the predicted probability of each of the + * @f$ K = CHW @f$ classes. Each prediction vector @f$ \hat{p}_n @f$ + * should sum to 1 as in a probability distribution: @f$ + * \forall n \sum\limits_{k=1}^K \hat{p}_{nk} = 1 @f$. + * -# @f$ (N \times 1 \times 1 \times 1) @f$ + * the labels @f$ l @f$, an integer-valued Blob with values + * @f$ l_n \in [0, 1, 2, ..., K - 1] @f$ + * indicating the correct class label among the @f$ K @f$ classes + * @param top output Blob vector (length 1) + * -# @f$ (1 \times 1 \times 1 \times 1) @f$ + * the computed multinomial logistic loss: @f$ E = + * \frac{-1}{N} \sum\limits_{n=1}^N \log(\hat{p}_{n,l_n}) + * @f$ + */ +template +class MultinomialLogisticLossLayer : public LossLayer { + public: + explicit MultinomialLogisticLossLayer(const LayerParameter& param) + : LossLayer(param) {} + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "MultinomialLogisticLoss"; } + + protected: + /// @copydoc MultinomialLogisticLossLayer + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the multinomial logistic loss error gradient w.r.t. the + * predictions. + * + * Gradients cannot be computed with respect to the label inputs (bottom[1]), + * so this method ignores bottom[1] and requires !propagate_down[1], crashing + * if propagate_down[1] is set. + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * -# @f$ (1 \times 1 \times 1 \times 1) @f$ + * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$, + * as @f$ \lambda @f$ is the coefficient of this layer's output + * @f$\ell_i@f$ in the overall Net loss + * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence + * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$. + * (*Assuming that this top Blob is not used as a bottom (input) by any + * other layer of the Net.) + * @param propagate_down see Layer::Backward. + * propagate_down[1] must be false as we can't compute gradients with + * respect to the labels. + * @param bottom input Blob vector (length 2) + * -# @f$ (N \times C \times H \times W) @f$ + * the predictions @f$ \hat{p} @f$; Backward computes diff + * @f$ \frac{\partial E}{\partial \hat{p}} @f$ + * -# @f$ (N \times 1 \times 1 \times 1) @f$ + * the labels -- ignored as we can't compute their error gradients + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); +}; + +} // namespace caffe + +#endif // CAFFE_MULTINOMIAL_LOGISTIC_LOSS_LAYER_HPP_ diff --git a/include/caffe/layers/mvn_layer.hpp b/include/caffe/layers/mvn_layer.hpp new file mode 100644 index 00000000..3a235cec --- /dev/null +++ b/include/caffe/layers/mvn_layer.hpp @@ -0,0 +1,48 @@ +#ifndef CAFFE_MVN_LAYER_HPP_ +#define CAFFE_MVN_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Normalizes the input to have 0-mean and/or unit (1) variance. + * + * TODO(dox): thorough documentation for Forward, Backward, and proto params. + */ +template +class MVNLayer : public Layer { + public: + explicit MVNLayer(const LayerParameter& param) + : Layer(param) {} + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "MVN"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + Blob mean_, variance_, temp_; + + /// sum_multiplier is used to carry out sum using BLAS + Blob sum_multiplier_; + Dtype eps_; +}; + +} // namespace caffe + +#endif // CAFFE_MVN_LAYER_HPP_ diff --git a/include/caffe/layers/neuron_layer.hpp b/include/caffe/layers/neuron_layer.hpp new file mode 100644 index 00000000..10c108ce --- /dev/null +++ b/include/caffe/layers/neuron_layer.hpp @@ -0,0 +1,32 @@ +#ifndef CAFFE_NEURON_LAYER_HPP_ +#define CAFFE_NEURON_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief An interface for layers that take one blob as input (@f$ x @f$) + * and produce one equally-sized blob as output (@f$ y @f$), where + * each element of the output depends only on the corresponding input + * element. + */ +template +class NeuronLayer : public Layer { + public: + explicit NeuronLayer(const LayerParameter& param) + : Layer(param) {} + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } +}; + +} // namespace caffe + +#endif // CAFFE_NEURON_LAYER_HPP_ diff --git a/include/caffe/layers/pooling_layer.hpp b/include/caffe/layers/pooling_layer.hpp new file mode 100644 index 00000000..f4d6803b --- /dev/null +++ b/include/caffe/layers/pooling_layer.hpp @@ -0,0 +1,60 @@ +#ifndef CAFFE_POOLING_LAYER_HPP_ +#define CAFFE_POOLING_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Pools the input image by taking the max, average, etc. within regions. + * + * TODO(dox): thorough documentation for Forward, Backward, and proto params. + */ +template +class PoolingLayer : public Layer { + public: + explicit PoolingLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Pooling"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int MinTopBlobs() const { return 1; } + // MAX POOL layers can output an extra top blob for the mask; + // others can only output the pooled inputs. + virtual inline int MaxTopBlobs() const { + return (this->layer_param_.pooling_param().pool() == + PoolingParameter_PoolMethod_MAX) ? 2 : 1; + } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + int kernel_h_, kernel_w_; + int stride_h_, stride_w_; + int pad_h_, pad_w_; + int channels_; + int height_, width_; + int pooled_height_, pooled_width_; + bool global_pooling_; + Blob rand_idx_; + Blob max_idx_; +}; + +} // namespace caffe + +#endif // CAFFE_POOLING_LAYER_HPP_ diff --git a/include/caffe/layers/power_layer.hpp b/include/caffe/layers/power_layer.hpp new file mode 100644 index 00000000..6ecbafca --- /dev/null +++ b/include/caffe/layers/power_layer.hpp @@ -0,0 +1,89 @@ +#ifndef CAFFE_POWER_LAYER_HPP_ +#define CAFFE_POWER_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/neuron_layer.hpp" + +namespace caffe { + +/** + * @brief Computes @f$ y = (\alpha x + \beta) ^ \gamma @f$, + * as specified by the scale @f$ \alpha @f$, shift @f$ \beta @f$, + * and power @f$ \gamma @f$. + */ +template +class PowerLayer : public NeuronLayer { + public: + /** + * @param param provides PowerParameter power_param, + * with PowerLayer options: + * - scale (\b optional, default 1) the scale @f$ \alpha @f$ + * - shift (\b optional, default 0) the shift @f$ \beta @f$ + * - power (\b optional, default 1) the power @f$ \gamma @f$ + */ + explicit PowerLayer(const LayerParameter& param) + : NeuronLayer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Power"; } + + protected: + /** + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$ + * @param top output Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the computed outputs @f$ + * y = (\alpha x + \beta) ^ \gamma + * @f$ + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the error gradient w.r.t. the power inputs. + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * -# @f$ (N \times C \times H \times W) @f$ + * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ + * with respect to computed outputs @f$ y @f$ + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$; Backward fills their diff with + * gradients @f$ + * \frac{\partial E}{\partial x} = + * \frac{\partial E}{\partial y} + * \alpha \gamma (\alpha x + \beta) ^ {\gamma - 1} = + * \frac{\partial E}{\partial y} + * \frac{\alpha \gamma y}{\alpha x + \beta} + * @f$ if propagate_down[0] + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + /// @brief @f$ \gamma @f$ from layer_param_.power_param() + Dtype power_; + /// @brief @f$ \alpha @f$ from layer_param_.power_param() + Dtype scale_; + /// @brief @f$ \beta @f$ from layer_param_.power_param() + Dtype shift_; + /// @brief Result of @f$ \alpha \gamma @f$ + Dtype diff_scale_; +}; + +} // namespace caffe + +#endif // CAFFE_POWER_LAYER_HPP_ diff --git a/include/caffe/layers/prelu_layer.hpp b/include/caffe/layers/prelu_layer.hpp new file mode 100644 index 00000000..3ddfb484 --- /dev/null +++ b/include/caffe/layers/prelu_layer.hpp @@ -0,0 +1,101 @@ +#ifndef CAFFE_PRELU_LAYER_HPP_ +#define CAFFE_PRELU_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/neuron_layer.hpp" + +namespace caffe { + +/** + * @brief Parameterized Rectified Linear Unit non-linearity @f$ + * y_i = \max(0, x_i) + a_i \min(0, x_i) + * @f$. The differences from ReLULayer are 1) negative slopes are + * learnable though backprop and 2) negative slopes can vary across + * channels. The number of axes of input blob should be greater than or + * equal to 2. The 1st axis (0-based) is seen as channels. + */ +template +class PReLULayer : public NeuronLayer { + public: + /** + * @param param provides PReLUParameter prelu_param, + * with PReLULayer options: + * - filler (\b optional, FillerParameter, + * default {'type': constant 'value':0.25}). + * - channel_shared (\b optional, default false). + * negative slopes are shared across channels. + */ + explicit PReLULayer(const LayerParameter& param) + : NeuronLayer(param) {} + + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "PReLU"; } + + protected: + /** + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times ...) @f$ + * the inputs @f$ x @f$ + * @param top output Blob vector (length 1) + * -# @f$ (N \times C \times ...) @f$ + * the computed outputs for each channel @f$i@f$ @f$ + * y_i = \max(0, x_i) + a_i \min(0, x_i) + * @f$. + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the error gradient w.r.t. the PReLU inputs. + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * -# @f$ (N \times C \times ...) @f$ + * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ + * with respect to computed outputs @f$ y @f$ + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times ...) @f$ + * the inputs @f$ x @f$; For each channel @f$i@f$, backward fills their + * diff with gradients @f$ + * \frac{\partial E}{\partial x_i} = \left\{ + * \begin{array}{lr} + * a_i \frac{\partial E}{\partial y_i} & \mathrm{if} \; x_i \le 0 \\ + * \frac{\partial E}{\partial y_i} & \mathrm{if} \; x_i > 0 + * \end{array} \right. + * @f$. + * If param_propagate_down_[0] is true, it fills the diff with gradients + * @f$ + * \frac{\partial E}{\partial a_i} = \left\{ + * \begin{array}{lr} + * \sum_{x_i} x_i \frac{\partial E}{\partial y_i} & \mathrm{if} \; x_i \le 0 \\ + * 0 & \mathrm{if} \; x_i > 0 + * \end{array} \right. + * @f$. + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + bool channel_shared_; + Blob multiplier_; // dot multiplier for backward computation of params + Blob backward_buff_; // temporary buffer for backward computation + Blob bottom_memory_; // memory for in-place computation +}; + +} // namespace caffe + +#endif // CAFFE_PRELU_LAYER_HPP_ diff --git a/include/caffe/python_layer.hpp b/include/caffe/layers/python_layer.hpp similarity index 100% rename from include/caffe/python_layer.hpp rename to include/caffe/layers/python_layer.hpp diff --git a/include/caffe/layers/reduction_layer.hpp b/include/caffe/layers/reduction_layer.hpp new file mode 100644 index 00000000..804a495b --- /dev/null +++ b/include/caffe/layers/reduction_layer.hpp @@ -0,0 +1,59 @@ +#ifndef CAFFE_REDUCTION_LAYER_HPP_ +#define CAFFE_REDUCTION_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Compute "reductions" -- operations that return a scalar output Blob + * for an input Blob of arbitrary size, such as the sum, absolute sum, + * and sum of squares. + * + * TODO(dox): thorough documentation for Forward, Backward, and proto params. + */ +template +class ReductionLayer : public Layer { + public: + explicit ReductionLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Reduction"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + /// @brief the reduction operation performed by the layer + ReductionParameter_ReductionOp op_; + /// @brief a scalar coefficient applied to all outputs + Dtype coeff_; + /// @brief the index of the first input axis to reduce + int axis_; + /// @brief the number of reductions performed + int num_; + /// @brief the input size of each reduction + int dim_; + /// @brief a helper Blob used for summation (op_ == SUM) + Blob sum_multiplier_; +}; + +} // namespace caffe + +#endif // CAFFE_REDUCTION_LAYER_HPP_ diff --git a/include/caffe/layers/relu_layer.hpp b/include/caffe/layers/relu_layer.hpp new file mode 100644 index 00000000..d7a73f7a --- /dev/null +++ b/include/caffe/layers/relu_layer.hpp @@ -0,0 +1,85 @@ +#ifndef CAFFE_RELU_LAYER_HPP_ +#define CAFFE_RELU_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/neuron_layer.hpp" + +namespace caffe { + +/** + * @brief Rectified Linear Unit non-linearity @f$ y = \max(0, x) @f$. + * The simple max is fast to compute, and the function does not saturate. + */ +template +class ReLULayer : public NeuronLayer { + public: + /** + * @param param provides ReLUParameter relu_param, + * with ReLULayer options: + * - negative_slope (\b optional, default 0). + * the value @f$ \nu @f$ by which negative values are multiplied. + */ + explicit ReLULayer(const LayerParameter& param) + : NeuronLayer(param) {} + + virtual inline const char* type() const { return "ReLU"; } + + protected: + /** + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$ + * @param top output Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the computed outputs @f$ + * y = \max(0, x) + * @f$ by default. If a non-zero negative_slope @f$ \nu @f$ is provided, + * the computed outputs are @f$ y = \max(0, x) + \nu \min(0, x) @f$. + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the error gradient w.r.t. the ReLU inputs. + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * -# @f$ (N \times C \times H \times W) @f$ + * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ + * with respect to computed outputs @f$ y @f$ + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$; Backward fills their diff with + * gradients @f$ + * \frac{\partial E}{\partial x} = \left\{ + * \begin{array}{lr} + * 0 & \mathrm{if} \; x \le 0 \\ + * \frac{\partial E}{\partial y} & \mathrm{if} \; x > 0 + * \end{array} \right. + * @f$ if propagate_down[0], by default. + * If a non-zero negative_slope @f$ \nu @f$ is provided, + * the computed gradients are @f$ + * \frac{\partial E}{\partial x} = \left\{ + * \begin{array}{lr} + * \nu \frac{\partial E}{\partial y} & \mathrm{if} \; x \le 0 \\ + * \frac{\partial E}{\partial y} & \mathrm{if} \; x > 0 + * \end{array} \right. + * @f$. + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); +}; + +} // namespace caffe + +#endif // CAFFE_RELU_LAYER_HPP_ diff --git a/include/caffe/layers/reshape_layer.hpp b/include/caffe/layers/reshape_layer.hpp new file mode 100644 index 00000000..d11e0638 --- /dev/null +++ b/include/caffe/layers/reshape_layer.hpp @@ -0,0 +1,52 @@ +#ifndef CAFFE_XXX_LAYER_HPP_ +#define CAFFE_XXX_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/* + * @brief Reshapes the input Blob into an arbitrary-sized output Blob. + * + * Note: similarly to FlattenLayer, this layer does not change the input values + * (see FlattenLayer, Blob::ShareData and Blob::ShareDiff). + */ +template +class ReshapeLayer : public Layer { + public: + explicit ReshapeLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Reshape"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top) {} + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) {} + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top) {} + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) {} + + /// @brief vector of axes indices whose dimensions we'll copy from the bottom + vector copy_axes_; + /// @brief the index of the axis whose dimension we infer, or -1 if none + int inferred_axis_; + /// @brief the product of the "constant" output dimensions + int constant_count_; +}; + +} // namespace caffe + +#endif // CAFFE_XXX_LAYER_HPP_ diff --git a/include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp b/include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp new file mode 100644 index 00000000..598dca5f --- /dev/null +++ b/include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp @@ -0,0 +1,110 @@ +#ifndef CAFFE_SIGMOID_CROSS_ENTROPY_LOSS_LAYER_HPP_ +#define CAFFE_SIGMOID_CROSS_ENTROPY_LOSS_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/loss_layer.hpp" +#include "caffe/layers/sigmoid_layer.hpp" + +namespace caffe { + +/** + * @brief Computes the cross-entropy (logistic) loss @f$ + * E = \frac{-1}{n} \sum\limits_{n=1}^N \left[ + * p_n \log \hat{p}_n + + * (1 - p_n) \log(1 - \hat{p}_n) + * \right] + * @f$, often used for predicting targets interpreted as probabilities. + * + * This layer is implemented rather than separate + * SigmoidLayer + CrossEntropyLayer + * as its gradient computation is more numerically stable. + * At test time, this layer can be replaced simply by a SigmoidLayer. + * + * @param bottom input Blob vector (length 2) + * -# @f$ (N \times C \times H \times W) @f$ + * the scores @f$ x \in [-\infty, +\infty]@f$, + * which this layer maps to probability predictions + * @f$ \hat{p}_n = \sigma(x_n) \in [0, 1] @f$ + * using the sigmoid function @f$ \sigma(.) @f$ (see SigmoidLayer). + * -# @f$ (N \times C \times H \times W) @f$ + * the targets @f$ y \in [0, 1] @f$ + * @param top output Blob vector (length 1) + * -# @f$ (1 \times 1 \times 1 \times 1) @f$ + * the computed cross-entropy loss: @f$ + * E = \frac{-1}{n} \sum\limits_{n=1}^N \left[ + * p_n \log \hat{p}_n + (1 - p_n) \log(1 - \hat{p}_n) + * \right] + * @f$ + */ +template +class SigmoidCrossEntropyLossLayer : public LossLayer { + public: + explicit SigmoidCrossEntropyLossLayer(const LayerParameter& param) + : LossLayer(param), + sigmoid_layer_(new SigmoidLayer(param)), + sigmoid_output_(new Blob()) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "SigmoidCrossEntropyLoss"; } + + protected: + /// @copydoc SigmoidCrossEntropyLossLayer + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the sigmoid cross-entropy loss error gradient w.r.t. the + * predictions. + * + * Gradients cannot be computed with respect to the target inputs (bottom[1]), + * so this method ignores bottom[1] and requires !propagate_down[1], crashing + * if propagate_down[1] is set. + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * -# @f$ (1 \times 1 \times 1 \times 1) @f$ + * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$, + * as @f$ \lambda @f$ is the coefficient of this layer's output + * @f$\ell_i@f$ in the overall Net loss + * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence + * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$. + * (*Assuming that this top Blob is not used as a bottom (input) by any + * other layer of the Net.) + * @param propagate_down see Layer::Backward. + * propagate_down[1] must be false as gradient computation with respect + * to the targets is not implemented. + * @param bottom input Blob vector (length 2) + * -# @f$ (N \times C \times H \times W) @f$ + * the predictions @f$x@f$; Backward computes diff + * @f$ \frac{\partial E}{\partial x} = + * \frac{1}{n} \sum\limits_{n=1}^N (\hat{p}_n - p_n) + * @f$ + * -# @f$ (N \times 1 \times 1 \times 1) @f$ + * the labels -- ignored as we can't compute their error gradients + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + /// The internal SigmoidLayer used to map predictions to probabilities. + shared_ptr > sigmoid_layer_; + /// sigmoid_output stores the output of the SigmoidLayer. + shared_ptr > sigmoid_output_; + /// bottom vector holder to call the underlying SigmoidLayer::Forward + vector*> sigmoid_bottom_vec_; + /// top vector holder to call the underlying SigmoidLayer::Forward + vector*> sigmoid_top_vec_; +}; + +} // namespace caffe + +#endif // CAFFE_SIGMOID_CROSS_ENTROPY_LOSS_LAYER_HPP_ diff --git a/include/caffe/layers/sigmoid_layer.hpp b/include/caffe/layers/sigmoid_layer.hpp new file mode 100644 index 00000000..ac0f6927 --- /dev/null +++ b/include/caffe/layers/sigmoid_layer.hpp @@ -0,0 +1,71 @@ +#ifndef CAFFE_SIGMOID_LAYER_HPP_ +#define CAFFE_SIGMOID_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/neuron_layer.hpp" + +namespace caffe { + +/** + * @brief Sigmoid function non-linearity @f$ + * y = (1 + \exp(-x))^{-1} + * @f$, a classic choice in neural networks. + * + * Note that the gradient vanishes as the values move away from 0. + * The ReLULayer is often a better choice for this reason. + */ +template +class SigmoidLayer : public NeuronLayer { + public: + explicit SigmoidLayer(const LayerParameter& param) + : NeuronLayer(param) {} + + virtual inline const char* type() const { return "Sigmoid"; } + + protected: + /** + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$ + * @param top output Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the computed outputs @f$ + * y = (1 + \exp(-x))^{-1} + * @f$ + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the error gradient w.r.t. the sigmoid inputs. + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * -# @f$ (N \times C \times H \times W) @f$ + * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ + * with respect to computed outputs @f$ y @f$ + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$; Backward fills their diff with + * gradients @f$ + * \frac{\partial E}{\partial x} + * = \frac{\partial E}{\partial y} y (1 - y) + * @f$ if propagate_down[0] + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); +}; + +} // namespace caffe + +#endif // CAFFE_SIGMOID_LAYER_HPP_ diff --git a/include/caffe/layers/silence_layer.hpp b/include/caffe/layers/silence_layer.hpp new file mode 100644 index 00000000..fba087fc --- /dev/null +++ b/include/caffe/layers/silence_layer.hpp @@ -0,0 +1,43 @@ +#ifndef CAFFE_SILENCE_LAYER_HPP_ +#define CAFFE_SILENCE_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Ignores bottom blobs while producing no top blobs. (This is useful + * to suppress outputs during testing.) + */ +template +class SilenceLayer : public Layer { + public: + explicit SilenceLayer(const LayerParameter& param) + : Layer(param) {} + virtual void Reshape(const vector*>& bottom, + const vector*>& top) {} + + virtual inline const char* type() const { return "Silence"; } + virtual inline int MinBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 0; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top) {} + // We can't define Forward_gpu here, since STUB_GPU will provide + // its own definition for CPU_ONLY mode. + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); +}; + +} // namespace caffe + +#endif // CAFFE_SILENCE_LAYER_HPP_ diff --git a/include/caffe/layers/slice_layer.hpp b/include/caffe/layers/slice_layer.hpp new file mode 100644 index 00000000..10a0abb6 --- /dev/null +++ b/include/caffe/layers/slice_layer.hpp @@ -0,0 +1,51 @@ +#ifndef CAFFE_SLICE_LAYER_HPP_ +#define CAFFE_SLICE_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Takes a Blob and slices it along either the num or channel dimension, + * outputting multiple sliced Blob results. + * + * TODO(dox): thorough documentation for Forward, Backward, and proto params. + */ +template +class SliceLayer : public Layer { + public: + explicit SliceLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Slice"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int MinTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + int count_; + int num_slices_; + int slice_size_; + int slice_axis_; + vector slice_point_; +}; + +} // namespace caffe + +#endif // CAFFE_SLICE_LAYER_HPP_ diff --git a/include/caffe/layers/softmax_layer.hpp b/include/caffe/layers/softmax_layer.hpp new file mode 100644 index 00000000..c65b8703 --- /dev/null +++ b/include/caffe/layers/softmax_layer.hpp @@ -0,0 +1,50 @@ +#ifndef CAFFE_SOFTMAX_LAYER_HPP_ +#define CAFFE_SOFTMAX_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Computes the softmax function. + * + * TODO(dox): thorough documentation for Forward, Backward, and proto params. + */ +template +class SoftmaxLayer : public Layer { + public: + explicit SoftmaxLayer(const LayerParameter& param) + : Layer(param) {} + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Softmax"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + int outer_num_; + int inner_num_; + int softmax_axis_; + /// sum_multiplier is used to carry out sum using BLAS + Blob sum_multiplier_; + /// scale is an intermediate Blob to hold temporary results. + Blob scale_; +}; + +} // namespace caffe + +#endif // CAFFE_SOFTMAX_LAYER_HPP_ diff --git a/include/caffe/layers/softmax_loss_layer.hpp b/include/caffe/layers/softmax_loss_layer.hpp new file mode 100644 index 00000000..f07e8a02 --- /dev/null +++ b/include/caffe/layers/softmax_loss_layer.hpp @@ -0,0 +1,130 @@ +#ifndef CAFFE_SOFTMAX_WITH_LOSS_LAYER_HPP_ +#define CAFFE_SOFTMAX_WITH_LOSS_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/loss_layer.hpp" +#include "caffe/layers/softmax_layer.hpp" + +namespace caffe { + +/** + * @brief Computes the multinomial logistic loss for a one-of-many + * classification task, passing real-valued predictions through a + * softmax to get a probability distribution over classes. + * + * This layer should be preferred over separate + * SoftmaxLayer + MultinomialLogisticLossLayer + * as its gradient computation is more numerically stable. + * At test time, this layer can be replaced simply by a SoftmaxLayer. + * + * @param bottom input Blob vector (length 2) + * -# @f$ (N \times C \times H \times W) @f$ + * the predictions @f$ x @f$, a Blob with values in + * @f$ [-\infty, +\infty] @f$ indicating the predicted score for each of + * the @f$ K = CHW @f$ classes. This layer maps these scores to a + * probability distribution over classes using the softmax function + * @f$ \hat{p}_{nk} = \exp(x_{nk}) / + * \left[\sum_{k'} \exp(x_{nk'})\right] @f$ (see SoftmaxLayer). + * -# @f$ (N \times 1 \times 1 \times 1) @f$ + * the labels @f$ l @f$, an integer-valued Blob with values + * @f$ l_n \in [0, 1, 2, ..., K - 1] @f$ + * indicating the correct class label among the @f$ K @f$ classes + * @param top output Blob vector (length 1) + * -# @f$ (1 \times 1 \times 1 \times 1) @f$ + * the computed cross-entropy classification loss: @f$ E = + * \frac{-1}{N} \sum\limits_{n=1}^N \log(\hat{p}_{n,l_n}) + * @f$, for softmax output class probabilites @f$ \hat{p} @f$ + */ +template +class SoftmaxWithLossLayer : public LossLayer { + public: + /** + * @param param provides LossParameter loss_param, with options: + * - ignore_label (optional) + * Specify a label value that should be ignored when computing the loss. + * - normalize (optional, default true) + * If true, the loss is normalized by the number of (nonignored) labels + * present; otherwise the loss is simply summed over spatial locations. + */ + explicit SoftmaxWithLossLayer(const LayerParameter& param) + : LossLayer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "SoftmaxWithLoss"; } + virtual inline int ExactNumTopBlobs() const { return -1; } + virtual inline int MinTopBlobs() const { return 1; } + virtual inline int MaxTopBlobs() const { return 2; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + /** + * @brief Computes the softmax loss error gradient w.r.t. the predictions. + * + * Gradients cannot be computed with respect to the label inputs (bottom[1]), + * so this method ignores bottom[1] and requires !propagate_down[1], crashing + * if propagate_down[1] is set. + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * -# @f$ (1 \times 1 \times 1 \times 1) @f$ + * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$, + * as @f$ \lambda @f$ is the coefficient of this layer's output + * @f$\ell_i@f$ in the overall Net loss + * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence + * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$. + * (*Assuming that this top Blob is not used as a bottom (input) by any + * other layer of the Net.) + * @param propagate_down see Layer::Backward. + * propagate_down[1] must be false as we can't compute gradients with + * respect to the labels. + * @param bottom input Blob vector (length 2) + * -# @f$ (N \times C \times H \times W) @f$ + * the predictions @f$ x @f$; Backward computes diff + * @f$ \frac{\partial E}{\partial x} @f$ + * -# @f$ (N \times 1 \times 1 \times 1) @f$ + * the labels -- ignored as we can't compute their error gradients + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + /// Read the normalization mode parameter and compute the normalizer based + /// on the blob size. If normalization_mode is VALID, the count of valid + /// outputs will be read from valid_count, unless it is -1 in which case + /// all outputs are assumed to be valid. + virtual Dtype get_normalizer( + LossParameter_NormalizationMode normalization_mode, int valid_count); + + /// The internal SoftmaxLayer used to map predictions to a distribution. + shared_ptr > softmax_layer_; + /// prob stores the output probability predictions from the SoftmaxLayer. + Blob prob_; + /// bottom vector holder used in call to the underlying SoftmaxLayer::Forward + vector*> softmax_bottom_vec_; + /// top vector holder used in call to the underlying SoftmaxLayer::Forward + vector*> softmax_top_vec_; + /// Whether to ignore instances with a certain label. + bool has_ignore_label_; + /// The label indicating that an instance should be ignored. + int ignore_label_; + /// How to normalize the output loss. + LossParameter_NormalizationMode normalization_; + + int softmax_axis_, outer_num_, inner_num_; +}; + +} // namespace caffe + +#endif // CAFFE_SOFTMAX_WITH_LOSS_LAYER_HPP_ diff --git a/include/caffe/layers/split_layer.hpp b/include/caffe/layers/split_layer.hpp new file mode 100644 index 00000000..8140dfc7 --- /dev/null +++ b/include/caffe/layers/split_layer.hpp @@ -0,0 +1,45 @@ +#ifndef CAFFE_SPLIT_LAYER_HPP_ +#define CAFFE_SPLIT_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Creates a "split" path in the network by copying the bottom Blob + * into multiple top Blob%s to be used by multiple consuming layers. + * + * TODO(dox): thorough documentation for Forward, Backward, and proto params. + */ +template +class SplitLayer : public Layer { + public: + explicit SplitLayer(const LayerParameter& param) + : Layer(param) {} + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Split"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int MinTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + int count_; +}; + +} // namespace caffe + +#endif // CAFFE_SPLIT_LAYER_HPP_ diff --git a/include/caffe/layers/spp_layer.hpp b/include/caffe/layers/spp_layer.hpp new file mode 100644 index 00000000..9f145cc7 --- /dev/null +++ b/include/caffe/layers/spp_layer.hpp @@ -0,0 +1,76 @@ +#ifndef CAFFE_SPP_LAYER_HPP_ +#define CAFFE_SPP_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Does spatial pyramid pooling on the input image + * by taking the max, average, etc. within regions + * so that the result vector of different sized + * images are of the same size. + */ +template +class SPPLayer : public Layer { + public: + explicit SPPLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "SPP"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + // calculates the kernel and stride dimensions for the pooling layer, + // returns a correctly configured LayerParameter for a PoolingLayer + virtual LayerParameter GetPoolingParam(const int pyramid_level, + const int bottom_h, const int bottom_w, const SPPParameter spp_param); + + int pyramid_height_; + int bottom_h_, bottom_w_; + int num_; + int channels_; + int kernel_h_, kernel_w_; + int pad_h_, pad_w_; + bool reshaped_first_time_; + + /// the internal Split layer that feeds the pooling layers + shared_ptr > split_layer_; + /// top vector holder used in call to the underlying SplitLayer::Forward + vector*> split_top_vec_; + /// bottom vector holder used in call to the underlying PoolingLayer::Forward + vector*>*> pooling_bottom_vecs_; + /// the internal Pooling layers of different kernel sizes + vector > > pooling_layers_; + /// top vector holders used in call to the underlying PoolingLayer::Forward + vector*>*> pooling_top_vecs_; + /// pooling_outputs stores the outputs of the PoolingLayers + vector*> pooling_outputs_; + /// the internal Flatten layers that the Pooling layers feed into + vector*> flatten_layers_; + /// top vector holders used in call to the underlying FlattenLayer::Forward + vector*>*> flatten_top_vecs_; + /// flatten_outputs stores the outputs of the FlattenLayers + vector*> flatten_outputs_; + /// bottom vector holder used in call to the underlying ConcatLayer::Forward + vector*> concat_bottom_vec_; + /// the internal Concat layers that the Flatten layers feed into + shared_ptr > concat_layer_; +}; + +} // namespace caffe + +#endif // CAFFE_SPP_LAYER_HPP_ diff --git a/include/caffe/layers/tanh_layer.hpp b/include/caffe/layers/tanh_layer.hpp new file mode 100644 index 00000000..8f95e932 --- /dev/null +++ b/include/caffe/layers/tanh_layer.hpp @@ -0,0 +1,73 @@ +#ifndef CAFFE_TANH_LAYER_HPP_ +#define CAFFE_TANH_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/neuron_layer.hpp" + +namespace caffe { + +/** + * @brief TanH hyperbolic tangent non-linearity @f$ + * y = \frac{\exp(2x) - 1}{\exp(2x) + 1} + * @f$, popular in auto-encoders. + * + * Note that the gradient vanishes as the values move away from 0. + * The ReLULayer is often a better choice for this reason. + */ +template +class TanHLayer : public NeuronLayer { + public: + explicit TanHLayer(const LayerParameter& param) + : NeuronLayer(param) {} + + virtual inline const char* type() const { return "TanH"; } + + protected: + /** + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$ + * @param top output Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the computed outputs @f$ + * y = \frac{\exp(2x) - 1}{\exp(2x) + 1} + * @f$ + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the error gradient w.r.t. the sigmoid inputs. + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * -# @f$ (N \times C \times H \times W) @f$ + * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ + * with respect to computed outputs @f$ y @f$ + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$; Backward fills their diff with + * gradients @f$ + * \frac{\partial E}{\partial x} + * = \frac{\partial E}{\partial y} + * \left(1 - \left[\frac{\exp(2x) - 1}{exp(2x) + 1} \right]^2 \right) + * = \frac{\partial E}{\partial y} (1 - y^2) + * @f$ if propagate_down[0] + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); +}; + +} // namespace caffe + +#endif // CAFFE_TANH_LAYER_HPP_ diff --git a/include/caffe/layers/threshold_layer.hpp b/include/caffe/layers/threshold_layer.hpp new file mode 100644 index 00000000..3bf4db63 --- /dev/null +++ b/include/caffe/layers/threshold_layer.hpp @@ -0,0 +1,64 @@ +#ifndef CAFFE_THRESHOLD_LAYER_HPP_ +#define CAFFE_THRESHOLD_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/neuron_layer.hpp" + +namespace caffe { + +/** + * @brief Tests whether the input exceeds a threshold: outputs 1 for inputs + * above threshold; 0 otherwise. + */ +template +class ThresholdLayer : public NeuronLayer { + public: + /** + * @param param provides ThresholdParameter threshold_param, + * with ThresholdLayer options: + * - threshold (\b optional, default 0). + * the threshold value @f$ t @f$ to which the input values are compared. + */ + explicit ThresholdLayer(const LayerParameter& param) + : NeuronLayer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Threshold"; } + + protected: + /** + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$ + * @param top output Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the computed outputs @f$ + * y = \left\{ + * \begin{array}{lr} + * 0 & \mathrm{if} \; x \le t \\ + * 1 & \mathrm{if} \; x > t + * \end{array} \right. + * @f$ + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + /// @brief Not implemented (non-differentiable function) + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + NOT_IMPLEMENTED; + } + + Dtype threshold_; +}; + +} // namespace caffe + +#endif // CAFFE_THRESHOLD_LAYER_HPP_ diff --git a/include/caffe/layers/tile_layer.hpp b/include/caffe/layers/tile_layer.hpp new file mode 100644 index 00000000..fbdbe2f0 --- /dev/null +++ b/include/caffe/layers/tile_layer.hpp @@ -0,0 +1,43 @@ +#ifndef CAFFE_TILE_LAYER_HPP_ +#define CAFFE_TILE_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Copy a Blob along specified dimensions. + */ +template +class TileLayer : public Layer { + public: + explicit TileLayer(const LayerParameter& param) + : Layer(param) {} + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Tile"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + unsigned int axis_, tiles_, outer_dim_, inner_dim_; +}; + +} // namespace caffe + +#endif // CAFFE_TILE_LAYER_HPP_ diff --git a/include/caffe/layers/window_data_layer.hpp b/include/caffe/layers/window_data_layer.hpp new file mode 100644 index 00000000..35f41b80 --- /dev/null +++ b/include/caffe/layers/window_data_layer.hpp @@ -0,0 +1,55 @@ +#ifndef CAFFE_WINDOW_DATA_LAYER_HPP_ +#define CAFFE_WINDOW_DATA_LAYER_HPP_ + +#include +#include +#include + +#include "caffe/blob.hpp" +#include "caffe/data_transformer.hpp" +#include "caffe/internal_thread.hpp" +#include "caffe/layer.hpp" +#include "caffe/layers/base_data_layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Provides data to the Net from windows of images files, specified + * by a window data file. + * + * TODO(dox): thorough documentation for Forward and proto params. + */ +template +class WindowDataLayer : public BasePrefetchingDataLayer { + public: + explicit WindowDataLayer(const LayerParameter& param) + : BasePrefetchingDataLayer(param) {} + virtual ~WindowDataLayer(); + virtual void DataLayerSetUp(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "WindowData"; } + virtual inline int ExactNumBottomBlobs() const { return 0; } + virtual inline int ExactNumTopBlobs() const { return 2; } + + protected: + virtual unsigned int PrefetchRand(); + virtual void load_batch(Batch* batch); + + shared_ptr prefetch_rng_; + vector > > image_database_; + enum WindowField { IMAGE_INDEX, LABEL, OVERLAP, X1, Y1, X2, Y2, NUM }; + vector > fg_windows_; + vector > bg_windows_; + Blob data_mean_; + vector mean_values_; + bool has_mean_file_; + bool has_mean_values_; + bool cache_images_; + vector > image_database_cache_; +}; + +} // namespace caffe + +#endif // CAFFE_WINDOW_DATA_LAYER_HPP_ diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp deleted file mode 100644 index 53d07025..00000000 --- a/include/caffe/loss_layers.hpp +++ /dev/null @@ -1,777 +0,0 @@ -#ifndef CAFFE_LOSS_LAYERS_HPP_ -#define CAFFE_LOSS_LAYERS_HPP_ - -#include -#include -#include - -#include "caffe/blob.hpp" -#include "caffe/layer.hpp" -#include "caffe/neuron_layers.hpp" -#include "caffe/proto/caffe.pb.h" - -namespace caffe { - -const float kLOG_THRESHOLD = 1e-20; - -/** - * @brief Computes the classification accuracy for a one-of-many - * classification task. - */ -template -class AccuracyLayer : public Layer { - public: - /** - * @param param provides AccuracyParameter accuracy_param, - * with AccuracyLayer options: - * - top_k (\b optional, default 1). - * Sets the maximum rank @f$ k @f$ at which a prediction is considered - * correct. For example, if @f$ k = 5 @f$, a prediction is counted - * correct if the correct label is among the top 5 predicted labels. - */ - explicit AccuracyLayer(const LayerParameter& param) - : Layer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "Accuracy"; } - virtual inline int ExactNumBottomBlobs() const { return 2; } - - // If there are two top blobs, then the second blob will contain - // accuracies per class. - virtual inline int MinTopBlobs() const { return 1; } - virtual inline int MaxTopBlobs() const { return 2; } - - protected: - /** - * @param bottom input Blob vector (length 2) - * -# @f$ (N \times C \times H \times W) @f$ - * the predictions @f$ x @f$, a Blob with values in - * @f$ [-\infty, +\infty] @f$ indicating the predicted score for each of - * the @f$ K = CHW @f$ classes. Each @f$ x_n @f$ is mapped to a predicted - * label @f$ \hat{l}_n @f$ given by its maximal index: - * @f$ \hat{l}_n = \arg\max\limits_k x_{nk} @f$ - * -# @f$ (N \times 1 \times 1 \times 1) @f$ - * the labels @f$ l @f$, an integer-valued Blob with values - * @f$ l_n \in [0, 1, 2, ..., K - 1] @f$ - * indicating the correct class label among the @f$ K @f$ classes - * @param top output Blob vector (length 1) - * -# @f$ (1 \times 1 \times 1 \times 1) @f$ - * the computed accuracy: @f$ - * \frac{1}{N} \sum\limits_{n=1}^N \delta\{ \hat{l}_n = l_n \} - * @f$, where @f$ - * \delta\{\mathrm{condition}\} = \left\{ - * \begin{array}{lr} - * 1 & \mbox{if condition} \\ - * 0 & \mbox{otherwise} - * \end{array} \right. - * @f$ - */ - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - - - /// @brief Not implemented -- AccuracyLayer cannot be used as a loss. - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom) { - for (int i = 0; i < propagate_down.size(); ++i) { - if (propagate_down[i]) { NOT_IMPLEMENTED; } - } - } - - int label_axis_, outer_num_, inner_num_; - - int top_k_; - - /// Whether to ignore instances with a certain label. - bool has_ignore_label_; - /// The label indicating that an instance should be ignored. - int ignore_label_; - /// Keeps counts of the number of samples per class. - Blob nums_buffer_; -}; - -/** - * @brief An interface for Layer%s that take two Blob%s as input -- usually - * (1) predictions and (2) ground-truth labels -- and output a - * singleton Blob representing the loss. - * - * LossLayers are typically only capable of backpropagating to their first input - * -- the predictions. - */ -template -class LossLayer : public Layer { - public: - explicit LossLayer(const LayerParameter& param) - : Layer(param) {} - virtual void LayerSetUp( - const vector*>& bottom, const vector*>& top); - virtual void Reshape( - const vector*>& bottom, const vector*>& top); - - virtual inline int ExactNumBottomBlobs() const { return 2; } - - /** - * @brief For convenience and backwards compatibility, instruct the Net to - * automatically allocate a single top Blob for LossLayers, into which - * they output their singleton loss, (even if the user didn't specify - * one in the prototxt, etc.). - */ - virtual inline bool AutoTopBlobs() const { return true; } - virtual inline int ExactNumTopBlobs() const { return 1; } - /** - * We usually cannot backpropagate to the labels; ignore force_backward for - * these inputs. - */ - virtual inline bool AllowForceBackward(const int bottom_index) const { - return bottom_index != 1; - } -}; - -/** - * @brief Computes the contrastive loss @f$ - * E = \frac{1}{2N} \sum\limits_{n=1}^N \left(y\right) d^2 + - * \left(1-y\right) \max \left(margin-d, 0\right)^2 - * @f$ where @f$ - * d = \left| \left| a_n - b_n \right| \right|_2 @f$. This can be - * used to train siamese networks. - * - * @param bottom input Blob vector (length 3) - * -# @f$ (N \times C \times 1 \times 1) @f$ - * the features @f$ a \in [-\infty, +\infty]@f$ - * -# @f$ (N \times C \times 1 \times 1) @f$ - * the features @f$ b \in [-\infty, +\infty]@f$ - * -# @f$ (N \times 1 \times 1 \times 1) @f$ - * the binary similarity @f$ s \in [0, 1]@f$ - * @param top output Blob vector (length 1) - * -# @f$ (1 \times 1 \times 1 \times 1) @f$ - * the computed contrastive loss: @f$ E = - * \frac{1}{2N} \sum\limits_{n=1}^N \left(y\right) d^2 + - * \left(1-y\right) \max \left(margin-d, 0\right)^2 - * @f$ where @f$ - * d = \left| \left| a_n - b_n \right| \right|_2 @f$. - * This can be used to train siamese networks. - */ -template -class ContrastiveLossLayer : public LossLayer { - public: - explicit ContrastiveLossLayer(const LayerParameter& param) - : LossLayer(param), diff_() {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - - virtual inline int ExactNumBottomBlobs() const { return 3; } - virtual inline const char* type() const { return "ContrastiveLoss"; } - /** - * Unlike most loss layers, in the ContrastiveLossLayer we can backpropagate - * to the first two inputs. - */ - virtual inline bool AllowForceBackward(const int bottom_index) const { - return bottom_index != 2; - } - - protected: - /// @copydoc ContrastiveLossLayer - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - - /** - * @brief Computes the Contrastive error gradient w.r.t. the inputs. - * - * Computes the gradients with respect to the two input vectors (bottom[0] and - * bottom[1]), but not the similarity label (bottom[2]). - * - * @param top output Blob vector (length 1), providing the error gradient with - * respect to the outputs - * -# @f$ (1 \times 1 \times 1 \times 1) @f$ - * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$, - * as @f$ \lambda @f$ is the coefficient of this layer's output - * @f$\ell_i@f$ in the overall Net loss - * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence - * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$. - * (*Assuming that this top Blob is not used as a bottom (input) by any - * other layer of the Net.) - * @param propagate_down see Layer::Backward. - * @param bottom input Blob vector (length 2) - * -# @f$ (N \times C \times 1 \times 1) @f$ - * the features @f$a@f$; Backward fills their diff with - * gradients if propagate_down[0] - * -# @f$ (N \times C \times 1 \times 1) @f$ - * the features @f$b@f$; Backward fills their diff with gradients if - * propagate_down[1] - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - Blob diff_; // cached for backward pass - Blob dist_sq_; // cached for backward pass - Blob diff_sq_; // tmp storage for gpu forward pass - Blob summer_vec_; // tmp storage for gpu forward pass -}; - -/** - * @brief Computes the Euclidean (L2) loss @f$ - * E = \frac{1}{2N} \sum\limits_{n=1}^N \left| \left| \hat{y}_n - y_n - * \right| \right|_2^2 @f$ for real-valued regression tasks. - * - * @param bottom input Blob vector (length 2) - * -# @f$ (N \times C \times H \times W) @f$ - * the predictions @f$ \hat{y} \in [-\infty, +\infty]@f$ - * -# @f$ (N \times C \times H \times W) @f$ - * the targets @f$ y \in [-\infty, +\infty]@f$ - * @param top output Blob vector (length 1) - * -# @f$ (1 \times 1 \times 1 \times 1) @f$ - * the computed Euclidean loss: @f$ E = - * \frac{1}{2n} \sum\limits_{n=1}^N \left| \left| \hat{y}_n - y_n - * \right| \right|_2^2 @f$ - * - * This can be used for least-squares regression tasks. An InnerProductLayer - * input to a EuclideanLossLayer exactly formulates a linear least squares - * regression problem. With non-zero weight decay the problem becomes one of - * ridge regression -- see src/caffe/test/test_sgd_solver.cpp for a concrete - * example wherein we check that the gradients computed for a Net with exactly - * this structure match hand-computed gradient formulas for ridge regression. - * - * (Note: Caffe, and SGD in general, is certainly \b not the best way to solve - * linear least squares problems! We use it only as an instructive example.) - */ -template -class EuclideanLossLayer : public LossLayer { - public: - explicit EuclideanLossLayer(const LayerParameter& param) - : LossLayer(param), diff_() {} - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "EuclideanLoss"; } - /** - * Unlike most loss layers, in the EuclideanLossLayer we can backpropagate - * to both inputs -- override to return true and always allow force_backward. - */ - virtual inline bool AllowForceBackward(const int bottom_index) const { - return true; - } - - protected: - /// @copydoc EuclideanLossLayer - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - - /** - * @brief Computes the Euclidean error gradient w.r.t. the inputs. - * - * Unlike other children of LossLayer, EuclideanLossLayer \b can compute - * gradients with respect to the label inputs bottom[1] (but still only will - * if propagate_down[1] is set, due to being produced by learnable parameters - * or if force_backward is set). In fact, this layer is "commutative" -- the - * result is the same regardless of the order of the two bottoms. - * - * @param top output Blob vector (length 1), providing the error gradient with - * respect to the outputs - * -# @f$ (1 \times 1 \times 1 \times 1) @f$ - * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$, - * as @f$ \lambda @f$ is the coefficient of this layer's output - * @f$\ell_i@f$ in the overall Net loss - * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence - * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$. - * (*Assuming that this top Blob is not used as a bottom (input) by any - * other layer of the Net.) - * @param propagate_down see Layer::Backward. - * @param bottom input Blob vector (length 2) - * -# @f$ (N \times C \times H \times W) @f$ - * the predictions @f$\hat{y}@f$; Backward fills their diff with - * gradients @f$ - * \frac{\partial E}{\partial \hat{y}} = - * \frac{1}{n} \sum\limits_{n=1}^N (\hat{y}_n - y_n) - * @f$ if propagate_down[0] - * -# @f$ (N \times C \times H \times W) @f$ - * the targets @f$y@f$; Backward fills their diff with gradients - * @f$ \frac{\partial E}{\partial y} = - * \frac{1}{n} \sum\limits_{n=1}^N (y_n - \hat{y}_n) - * @f$ if propagate_down[1] - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - Blob diff_; -}; - -/** - * @brief Computes the hinge loss for a one-of-many classification task. - * - * @param bottom input Blob vector (length 2) - * -# @f$ (N \times C \times H \times W) @f$ - * the predictions @f$ t @f$, a Blob with values in - * @f$ [-\infty, +\infty] @f$ indicating the predicted score for each of - * the @f$ K = CHW @f$ classes. In an SVM, @f$ t @f$ is the result of - * taking the inner product @f$ X^T W @f$ of the D-dimensional features - * @f$ X \in \mathcal{R}^{D \times N} @f$ and the learned hyperplane - * parameters @f$ W \in \mathcal{R}^{D \times K} @f$, so a Net with just - * an InnerProductLayer (with num_output = D) providing predictions to a - * HingeLossLayer and no other learnable parameters or losses is - * equivalent to an SVM. - * -# @f$ (N \times 1 \times 1 \times 1) @f$ - * the labels @f$ l @f$, an integer-valued Blob with values - * @f$ l_n \in [0, 1, 2, ..., K - 1] @f$ - * indicating the correct class label among the @f$ K @f$ classes - * @param top output Blob vector (length 1) - * -# @f$ (1 \times 1 \times 1 \times 1) @f$ - * the computed hinge loss: @f$ E = - * \frac{1}{N} \sum\limits_{n=1}^N \sum\limits_{k=1}^K - * [\max(0, 1 - \delta\{l_n = k\} t_{nk})] ^ p - * @f$, for the @f$ L^p @f$ norm - * (defaults to @f$ p = 1 @f$, the L1 norm; L2 norm, as in L2-SVM, - * is also available), and @f$ - * \delta\{\mathrm{condition}\} = \left\{ - * \begin{array}{lr} - * 1 & \mbox{if condition} \\ - * -1 & \mbox{otherwise} - * \end{array} \right. - * @f$ - * - * In an SVM, @f$ t \in \mathcal{R}^{N \times K} @f$ is the result of taking - * the inner product @f$ X^T W @f$ of the features - * @f$ X \in \mathcal{R}^{D \times N} @f$ - * and the learned hyperplane parameters - * @f$ W \in \mathcal{R}^{D \times K} @f$. So, a Net with just an - * InnerProductLayer (with num_output = @f$k@f$) providing predictions to a - * HingeLossLayer is equivalent to an SVM (assuming it has no other learned - * outside the InnerProductLayer and no other losses outside the - * HingeLossLayer). - */ -template -class HingeLossLayer : public LossLayer { - public: - explicit HingeLossLayer(const LayerParameter& param) - : LossLayer(param) {} - - virtual inline const char* type() const { return "HingeLoss"; } - - protected: - /// @copydoc HingeLossLayer - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - - /** - * @brief Computes the hinge loss error gradient w.r.t. the predictions. - * - * Gradients cannot be computed with respect to the label inputs (bottom[1]), - * so this method ignores bottom[1] and requires !propagate_down[1], crashing - * if propagate_down[1] is set. - * - * @param top output Blob vector (length 1), providing the error gradient with - * respect to the outputs - * -# @f$ (1 \times 1 \times 1 \times 1) @f$ - * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$, - * as @f$ \lambda @f$ is the coefficient of this layer's output - * @f$\ell_i@f$ in the overall Net loss - * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence - * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$. - * (*Assuming that this top Blob is not used as a bottom (input) by any - * other layer of the Net.) - * @param propagate_down see Layer::Backward. - * propagate_down[1] must be false as we can't compute gradients with - * respect to the labels. - * @param bottom input Blob vector (length 2) - * -# @f$ (N \times C \times H \times W) @f$ - * the predictions @f$t@f$; Backward computes diff - * @f$ \frac{\partial E}{\partial t} @f$ - * -# @f$ (N \times 1 \times 1 \times 1) @f$ - * the labels -- ignored as we can't compute their error gradients - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); -}; - -/** - * @brief A generalization of MultinomialLogisticLossLayer that takes an - * "information gain" (infogain) matrix specifying the "value" of all label - * pairs. - * - * Equivalent to the MultinomialLogisticLossLayer if the infogain matrix is the - * identity. - * - * @param bottom input Blob vector (length 2-3) - * -# @f$ (N \times C \times H \times W) @f$ - * the predictions @f$ \hat{p} @f$, a Blob with values in - * @f$ [0, 1] @f$ indicating the predicted probability of each of the - * @f$ K = CHW @f$ classes. Each prediction vector @f$ \hat{p}_n @f$ - * should sum to 1 as in a probability distribution: @f$ - * \forall n \sum\limits_{k=1}^K \hat{p}_{nk} = 1 @f$. - * -# @f$ (N \times 1 \times 1 \times 1) @f$ - * the labels @f$ l @f$, an integer-valued Blob with values - * @f$ l_n \in [0, 1, 2, ..., K - 1] @f$ - * indicating the correct class label among the @f$ K @f$ classes - * -# @f$ (1 \times 1 \times K \times K) @f$ - * (\b optional) the infogain matrix @f$ H @f$. This must be provided as - * the third bottom blob input if not provided as the infogain_mat in the - * InfogainLossParameter. If @f$ H = I @f$, this layer is equivalent to the - * MultinomialLogisticLossLayer. - * @param top output Blob vector (length 1) - * -# @f$ (1 \times 1 \times 1 \times 1) @f$ - * the computed infogain multinomial logistic loss: @f$ E = - * \frac{-1}{N} \sum\limits_{n=1}^N H_{l_n} \log(\hat{p}_n) = - * \frac{-1}{N} \sum\limits_{n=1}^N \sum\limits_{k=1}^{K} H_{l_n,k} - * \log(\hat{p}_{n,k}) - * @f$, where @f$ H_{l_n} @f$ denotes row @f$l_n@f$ of @f$H@f$. - */ -template -class InfogainLossLayer : public LossLayer { - public: - explicit InfogainLossLayer(const LayerParameter& param) - : LossLayer(param), infogain_() {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - // InfogainLossLayer takes 2-3 bottom Blobs; if there are 3 the third should - // be the infogain matrix. (Otherwise the infogain matrix is loaded from a - // file specified by LayerParameter.) - virtual inline int ExactNumBottomBlobs() const { return -1; } - virtual inline int MinBottomBlobs() const { return 2; } - virtual inline int MaxBottomBlobs() const { return 3; } - - virtual inline const char* type() const { return "InfogainLoss"; } - - protected: - /// @copydoc InfogainLossLayer - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - - /** - * @brief Computes the infogain loss error gradient w.r.t. the predictions. - * - * Gradients cannot be computed with respect to the label inputs (bottom[1]), - * so this method ignores bottom[1] and requires !propagate_down[1], crashing - * if propagate_down[1] is set. (The same applies to the infogain matrix, if - * provided as bottom[2] rather than in the layer_param.) - * - * @param top output Blob vector (length 1), providing the error gradient - * with respect to the outputs - * -# @f$ (1 \times 1 \times 1 \times 1) @f$ - * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$, - * as @f$ \lambda @f$ is the coefficient of this layer's output - * @f$\ell_i@f$ in the overall Net loss - * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence - * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$. - * (*Assuming that this top Blob is not used as a bottom (input) by any - * other layer of the Net.) - * @param propagate_down see Layer::Backward. - * propagate_down[1] must be false as we can't compute gradients with - * respect to the labels (similarly for propagate_down[2] and the - * infogain matrix, if provided as bottom[2]) - * @param bottom input Blob vector (length 2-3) - * -# @f$ (N \times C \times H \times W) @f$ - * the predictions @f$ \hat{p} @f$; Backward computes diff - * @f$ \frac{\partial E}{\partial \hat{p}} @f$ - * -# @f$ (N \times 1 \times 1 \times 1) @f$ - * the labels -- ignored as we can't compute their error gradients - * -# @f$ (1 \times 1 \times K \times K) @f$ - * (\b optional) the information gain matrix -- ignored as its error - * gradient computation is not implemented. - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - Blob infogain_; -}; - -/** - * @brief Computes the multinomial logistic loss for a one-of-many - * classification task, directly taking a predicted probability - * distribution as input. - * - * When predictions are not already a probability distribution, you should - * instead use the SoftmaxWithLossLayer, which maps predictions to a - * distribution using the SoftmaxLayer, before computing the multinomial - * logistic loss. The SoftmaxWithLossLayer should be preferred over separate - * SoftmaxLayer + MultinomialLogisticLossLayer - * as its gradient computation is more numerically stable. - * - * @param bottom input Blob vector (length 2) - * -# @f$ (N \times C \times H \times W) @f$ - * the predictions @f$ \hat{p} @f$, a Blob with values in - * @f$ [0, 1] @f$ indicating the predicted probability of each of the - * @f$ K = CHW @f$ classes. Each prediction vector @f$ \hat{p}_n @f$ - * should sum to 1 as in a probability distribution: @f$ - * \forall n \sum\limits_{k=1}^K \hat{p}_{nk} = 1 @f$. - * -# @f$ (N \times 1 \times 1 \times 1) @f$ - * the labels @f$ l @f$, an integer-valued Blob with values - * @f$ l_n \in [0, 1, 2, ..., K - 1] @f$ - * indicating the correct class label among the @f$ K @f$ classes - * @param top output Blob vector (length 1) - * -# @f$ (1 \times 1 \times 1 \times 1) @f$ - * the computed multinomial logistic loss: @f$ E = - * \frac{-1}{N} \sum\limits_{n=1}^N \log(\hat{p}_{n,l_n}) - * @f$ - */ -template -class MultinomialLogisticLossLayer : public LossLayer { - public: - explicit MultinomialLogisticLossLayer(const LayerParameter& param) - : LossLayer(param) {} - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "MultinomialLogisticLoss"; } - - protected: - /// @copydoc MultinomialLogisticLossLayer - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - - /** - * @brief Computes the multinomial logistic loss error gradient w.r.t. the - * predictions. - * - * Gradients cannot be computed with respect to the label inputs (bottom[1]), - * so this method ignores bottom[1] and requires !propagate_down[1], crashing - * if propagate_down[1] is set. - * - * @param top output Blob vector (length 1), providing the error gradient with - * respect to the outputs - * -# @f$ (1 \times 1 \times 1 \times 1) @f$ - * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$, - * as @f$ \lambda @f$ is the coefficient of this layer's output - * @f$\ell_i@f$ in the overall Net loss - * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence - * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$. - * (*Assuming that this top Blob is not used as a bottom (input) by any - * other layer of the Net.) - * @param propagate_down see Layer::Backward. - * propagate_down[1] must be false as we can't compute gradients with - * respect to the labels. - * @param bottom input Blob vector (length 2) - * -# @f$ (N \times C \times H \times W) @f$ - * the predictions @f$ \hat{p} @f$; Backward computes diff - * @f$ \frac{\partial E}{\partial \hat{p}} @f$ - * -# @f$ (N \times 1 \times 1 \times 1) @f$ - * the labels -- ignored as we can't compute their error gradients - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); -}; - -/** - * @brief Computes the cross-entropy (logistic) loss @f$ - * E = \frac{-1}{n} \sum\limits_{n=1}^N \left[ - * p_n \log \hat{p}_n + - * (1 - p_n) \log(1 - \hat{p}_n) - * \right] - * @f$, often used for predicting targets interpreted as probabilities. - * - * This layer is implemented rather than separate - * SigmoidLayer + CrossEntropyLayer - * as its gradient computation is more numerically stable. - * At test time, this layer can be replaced simply by a SigmoidLayer. - * - * @param bottom input Blob vector (length 2) - * -# @f$ (N \times C \times H \times W) @f$ - * the scores @f$ x \in [-\infty, +\infty]@f$, - * which this layer maps to probability predictions - * @f$ \hat{p}_n = \sigma(x_n) \in [0, 1] @f$ - * using the sigmoid function @f$ \sigma(.) @f$ (see SigmoidLayer). - * -# @f$ (N \times C \times H \times W) @f$ - * the targets @f$ y \in [0, 1] @f$ - * @param top output Blob vector (length 1) - * -# @f$ (1 \times 1 \times 1 \times 1) @f$ - * the computed cross-entropy loss: @f$ - * E = \frac{-1}{n} \sum\limits_{n=1}^N \left[ - * p_n \log \hat{p}_n + (1 - p_n) \log(1 - \hat{p}_n) - * \right] - * @f$ - */ -template -class SigmoidCrossEntropyLossLayer : public LossLayer { - public: - explicit SigmoidCrossEntropyLossLayer(const LayerParameter& param) - : LossLayer(param), - sigmoid_layer_(new SigmoidLayer(param)), - sigmoid_output_(new Blob()) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "SigmoidCrossEntropyLoss"; } - - protected: - /// @copydoc SigmoidCrossEntropyLossLayer - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - - /** - * @brief Computes the sigmoid cross-entropy loss error gradient w.r.t. the - * predictions. - * - * Gradients cannot be computed with respect to the target inputs (bottom[1]), - * so this method ignores bottom[1] and requires !propagate_down[1], crashing - * if propagate_down[1] is set. - * - * @param top output Blob vector (length 1), providing the error gradient with - * respect to the outputs - * -# @f$ (1 \times 1 \times 1 \times 1) @f$ - * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$, - * as @f$ \lambda @f$ is the coefficient of this layer's output - * @f$\ell_i@f$ in the overall Net loss - * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence - * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$. - * (*Assuming that this top Blob is not used as a bottom (input) by any - * other layer of the Net.) - * @param propagate_down see Layer::Backward. - * propagate_down[1] must be false as gradient computation with respect - * to the targets is not implemented. - * @param bottom input Blob vector (length 2) - * -# @f$ (N \times C \times H \times W) @f$ - * the predictions @f$x@f$; Backward computes diff - * @f$ \frac{\partial E}{\partial x} = - * \frac{1}{n} \sum\limits_{n=1}^N (\hat{p}_n - p_n) - * @f$ - * -# @f$ (N \times 1 \times 1 \times 1) @f$ - * the labels -- ignored as we can't compute their error gradients - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - /// The internal SigmoidLayer used to map predictions to probabilities. - shared_ptr > sigmoid_layer_; - /// sigmoid_output stores the output of the SigmoidLayer. - shared_ptr > sigmoid_output_; - /// bottom vector holder to call the underlying SigmoidLayer::Forward - vector*> sigmoid_bottom_vec_; - /// top vector holder to call the underlying SigmoidLayer::Forward - vector*> sigmoid_top_vec_; -}; - -// Forward declare SoftmaxLayer for use in SoftmaxWithLossLayer. -template class SoftmaxLayer; - -/** - * @brief Computes the multinomial logistic loss for a one-of-many - * classification task, passing real-valued predictions through a - * softmax to get a probability distribution over classes. - * - * This layer should be preferred over separate - * SoftmaxLayer + MultinomialLogisticLossLayer - * as its gradient computation is more numerically stable. - * At test time, this layer can be replaced simply by a SoftmaxLayer. - * - * @param bottom input Blob vector (length 2) - * -# @f$ (N \times C \times H \times W) @f$ - * the predictions @f$ x @f$, a Blob with values in - * @f$ [-\infty, +\infty] @f$ indicating the predicted score for each of - * the @f$ K = CHW @f$ classes. This layer maps these scores to a - * probability distribution over classes using the softmax function - * @f$ \hat{p}_{nk} = \exp(x_{nk}) / - * \left[\sum_{k'} \exp(x_{nk'})\right] @f$ (see SoftmaxLayer). - * -# @f$ (N \times 1 \times 1 \times 1) @f$ - * the labels @f$ l @f$, an integer-valued Blob with values - * @f$ l_n \in [0, 1, 2, ..., K - 1] @f$ - * indicating the correct class label among the @f$ K @f$ classes - * @param top output Blob vector (length 1) - * -# @f$ (1 \times 1 \times 1 \times 1) @f$ - * the computed cross-entropy classification loss: @f$ E = - * \frac{-1}{N} \sum\limits_{n=1}^N \log(\hat{p}_{n,l_n}) - * @f$, for softmax output class probabilites @f$ \hat{p} @f$ - */ -template -class SoftmaxWithLossLayer : public LossLayer { - public: - /** - * @param param provides LossParameter loss_param, with options: - * - ignore_label (optional) - * Specify a label value that should be ignored when computing the loss. - * - normalize (optional, default true) - * If true, the loss is normalized by the number of (nonignored) labels - * present; otherwise the loss is simply summed over spatial locations. - */ - explicit SoftmaxWithLossLayer(const LayerParameter& param) - : LossLayer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "SoftmaxWithLoss"; } - virtual inline int ExactNumTopBlobs() const { return -1; } - virtual inline int MinTopBlobs() const { return 1; } - virtual inline int MaxTopBlobs() const { return 2; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - /** - * @brief Computes the softmax loss error gradient w.r.t. the predictions. - * - * Gradients cannot be computed with respect to the label inputs (bottom[1]), - * so this method ignores bottom[1] and requires !propagate_down[1], crashing - * if propagate_down[1] is set. - * - * @param top output Blob vector (length 1), providing the error gradient with - * respect to the outputs - * -# @f$ (1 \times 1 \times 1 \times 1) @f$ - * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$, - * as @f$ \lambda @f$ is the coefficient of this layer's output - * @f$\ell_i@f$ in the overall Net loss - * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence - * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$. - * (*Assuming that this top Blob is not used as a bottom (input) by any - * other layer of the Net.) - * @param propagate_down see Layer::Backward. - * propagate_down[1] must be false as we can't compute gradients with - * respect to the labels. - * @param bottom input Blob vector (length 2) - * -# @f$ (N \times C \times H \times W) @f$ - * the predictions @f$ x @f$; Backward computes diff - * @f$ \frac{\partial E}{\partial x} @f$ - * -# @f$ (N \times 1 \times 1 \times 1) @f$ - * the labels -- ignored as we can't compute their error gradients - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - /// Read the normalization mode parameter and compute the normalizer based - /// on the blob size. If normalization_mode is VALID, the count of valid - /// outputs will be read from valid_count, unless it is -1 in which case - /// all outputs are assumed to be valid. - virtual Dtype get_normalizer( - LossParameter_NormalizationMode normalization_mode, int valid_count); - - /// The internal SoftmaxLayer used to map predictions to a distribution. - shared_ptr > softmax_layer_; - /// prob stores the output probability predictions from the SoftmaxLayer. - Blob prob_; - /// bottom vector holder used in call to the underlying SoftmaxLayer::Forward - vector*> softmax_bottom_vec_; - /// top vector holder used in call to the underlying SoftmaxLayer::Forward - vector*> softmax_top_vec_; - /// Whether to ignore instances with a certain label. - bool has_ignore_label_; - /// The label indicating that an instance should be ignored. - int ignore_label_; - /// How to normalize the output loss. - LossParameter_NormalizationMode normalization_; - - int softmax_axis_, outer_num_, inner_num_; -}; - -} // namespace caffe - -#endif // CAFFE_LOSS_LAYERS_HPP_ diff --git a/include/caffe/neuron_layers.hpp b/include/caffe/neuron_layers.hpp deleted file mode 100644 index 4fa330ec..00000000 --- a/include/caffe/neuron_layers.hpp +++ /dev/null @@ -1,806 +0,0 @@ -#ifndef CAFFE_NEURON_LAYERS_HPP_ -#define CAFFE_NEURON_LAYERS_HPP_ - -#include -#include -#include - -#include "caffe/blob.hpp" -#include "caffe/common.hpp" -#include "caffe/layer.hpp" -#include "caffe/proto/caffe.pb.h" - -namespace caffe { - -/** - * @brief An interface for layers that take one blob as input (@f$ x @f$) - * and produce one equally-sized blob as output (@f$ y @f$), where - * each element of the output depends only on the corresponding input - * element. - */ -template -class NeuronLayer : public Layer { - public: - explicit NeuronLayer(const LayerParameter& param) - : Layer(param) {} - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int ExactNumTopBlobs() const { return 1; } -}; - -/** - * @brief Computes @f$ y = |x| @f$ - * - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x @f$ - * @param top output Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the computed outputs @f$ y = |x| @f$ - */ -template -class AbsValLayer : public NeuronLayer { - public: - explicit AbsValLayer(const LayerParameter& param) - : NeuronLayer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "AbsVal"; } - virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int ExactNumTopBlobs() const { return 1; } - - protected: - /// @copydoc AbsValLayer - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - - /** - * @brief Computes the error gradient w.r.t. the absolute value inputs. - * - * @param top output Blob vector (length 1), providing the error gradient with - * respect to the outputs - * -# @f$ (N \times C \times H \times W) @f$ - * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ - * with respect to computed outputs @f$ y @f$ - * @param propagate_down see Layer::Backward. - * @param bottom input Blob vector (length 2) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x @f$; Backward fills their diff with - * gradients @f$ - * \frac{\partial E}{\partial x} = - * \mathrm{sign}(x) \frac{\partial E}{\partial y} - * @f$ if propagate_down[0] - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); -}; - -/** - * @brief Computes @f$ y = x + \log(1 + \exp(-x)) @f$ if @f$ x > 0 @f$; - * @f$ y = \log(1 + \exp(x)) @f$ otherwise. - * - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x @f$ - * @param top output Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the computed outputs @f$ - * y = \left\{ - * \begin{array}{ll} - * x + \log(1 + \exp(-x)) & \mbox{if } x > 0 \\ - * \log(1 + \exp(x)) & \mbox{otherwise} - * \end{array} \right. - * @f$ - */ -template -class BNLLLayer : public NeuronLayer { - public: - explicit BNLLLayer(const LayerParameter& param) - : NeuronLayer(param) {} - - virtual inline const char* type() const { return "BNLL"; } - - protected: - /// @copydoc BNLLLayer - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - - /** - * @brief Computes the error gradient w.r.t. the BNLL inputs. - * - * @param top output Blob vector (length 1), providing the error gradient with - * respect to the outputs - * -# @f$ (N \times C \times H \times W) @f$ - * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ - * with respect to computed outputs @f$ y @f$ - * @param propagate_down see Layer::Backward. - * @param bottom input Blob vector (length 2) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x @f$; Backward fills their diff with - * gradients @f$ - * \frac{\partial E}{\partial x} - * @f$ if propagate_down[0] - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); -}; - -/** - * @brief During training only, sets a random portion of @f$x@f$ to 0, adjusting - * the rest of the vector magnitude accordingly. - * - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x @f$ - * @param top output Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the computed outputs @f$ y = |x| @f$ - */ -template -class DropoutLayer : public NeuronLayer { - public: - /** - * @param param provides DropoutParameter dropout_param, - * with DropoutLayer options: - * - dropout_ratio (\b optional, default 0.5). - * Sets the probability @f$ p @f$ that any given unit is dropped. - */ - explicit DropoutLayer(const LayerParameter& param) - : NeuronLayer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "Dropout"; } - - protected: - /** - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x @f$ - * @param top output Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the computed outputs. At training time, we have @f$ - * y_{\mbox{train}} = \left\{ - * \begin{array}{ll} - * \frac{x}{1 - p} & \mbox{if } u > p \\ - * 0 & \mbox{otherwise} - * \end{array} \right. - * @f$, where @f$ u \sim U(0, 1)@f$ is generated independently for each - * input at each iteration. At test time, we simply have - * @f$ y_{\mbox{test}} = \mathbb{E}[y_{\mbox{train}}] = x @f$. - */ - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - /// when divided by UINT_MAX, the randomly generated values @f$u\sim U(0,1)@f$ - Blob rand_vec_; - /// the probability @f$ p @f$ of dropping any input - Dtype threshold_; - /// the scale for undropped inputs at train time @f$ 1 / (1 - p) @f$ - Dtype scale_; - unsigned int uint_thres_; -}; - -/** - * @brief Computes @f$ y = \gamma ^ {\alpha x + \beta} @f$, - * as specified by the scale @f$ \alpha @f$, shift @f$ \beta @f$, - * and base @f$ \gamma @f$. - */ -template -class ExpLayer : public NeuronLayer { - public: - /** - * @param param provides ExpParameter exp_param, - * with ExpLayer options: - * - scale (\b optional, default 1) the scale @f$ \alpha @f$ - * - shift (\b optional, default 0) the shift @f$ \beta @f$ - * - base (\b optional, default -1 for a value of @f$ e \approx 2.718 @f$) - * the base @f$ \gamma @f$ - */ - explicit ExpLayer(const LayerParameter& param) - : NeuronLayer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "Exp"; } - - protected: - /** - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x @f$ - * @param top output Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the computed outputs @f$ - * y = \gamma ^ {\alpha x + \beta} - * @f$ - */ - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - - /** - * @brief Computes the error gradient w.r.t. the exp inputs. - * - * @param top output Blob vector (length 1), providing the error gradient with - * respect to the outputs - * -# @f$ (N \times C \times H \times W) @f$ - * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ - * with respect to computed outputs @f$ y @f$ - * @param propagate_down see Layer::Backward. - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x @f$; Backward fills their diff with - * gradients @f$ - * \frac{\partial E}{\partial x} = - * \frac{\partial E}{\partial y} y \alpha \log_e(gamma) - * @f$ if propagate_down[0] - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - Dtype inner_scale_, outer_scale_; -}; - -/** - * @brief Computes @f$ y = log_{\gamma}(\alpha x + \beta) @f$, - * as specified by the scale @f$ \alpha @f$, shift @f$ \beta @f$, - * and base @f$ \gamma @f$. - */ -template -class LogLayer : public NeuronLayer { - public: - /** - * @param param provides LogParameter log_param, - * with LogLayer options: - * - scale (\b optional, default 1) the scale @f$ \alpha @f$ - * - shift (\b optional, default 0) the shift @f$ \beta @f$ - * - base (\b optional, default -1 for a value of @f$ e \approx 2.718 @f$) - * the base @f$ \gamma @f$ - */ - explicit LogLayer(const LayerParameter& param) - : NeuronLayer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "Log"; } - - protected: - /** - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x @f$ - * @param top output Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the computed outputs @f$ - * y = log_{\gamma}(\alpha x + \beta) - * @f$ - */ - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - - /** - * @brief Computes the error gradient w.r.t. the exp inputs. - * - * @param top output Blob vector (length 1), providing the error gradient with - * respect to the outputs - * -# @f$ (N \times C \times H \times W) @f$ - * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ - * with respect to computed outputs @f$ y @f$ - * @param propagate_down see Layer::Backward. - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x @f$; Backward fills their diff with - * gradients @f$ - * \frac{\partial E}{\partial x} = - * \frac{\partial E}{\partial y} y \alpha \log_e(gamma) - * @f$ if propagate_down[0] - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - Dtype base_scale_; - Dtype input_scale_, input_shift_; - Dtype backward_num_scale_; -}; - -/** - * @brief Computes @f$ y = (\alpha x + \beta) ^ \gamma @f$, - * as specified by the scale @f$ \alpha @f$, shift @f$ \beta @f$, - * and power @f$ \gamma @f$. - */ -template -class PowerLayer : public NeuronLayer { - public: - /** - * @param param provides PowerParameter power_param, - * with PowerLayer options: - * - scale (\b optional, default 1) the scale @f$ \alpha @f$ - * - shift (\b optional, default 0) the shift @f$ \beta @f$ - * - power (\b optional, default 1) the power @f$ \gamma @f$ - */ - explicit PowerLayer(const LayerParameter& param) - : NeuronLayer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "Power"; } - - protected: - /** - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x @f$ - * @param top output Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the computed outputs @f$ - * y = (\alpha x + \beta) ^ \gamma - * @f$ - */ - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - - /** - * @brief Computes the error gradient w.r.t. the power inputs. - * - * @param top output Blob vector (length 1), providing the error gradient with - * respect to the outputs - * -# @f$ (N \times C \times H \times W) @f$ - * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ - * with respect to computed outputs @f$ y @f$ - * @param propagate_down see Layer::Backward. - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x @f$; Backward fills their diff with - * gradients @f$ - * \frac{\partial E}{\partial x} = - * \frac{\partial E}{\partial y} - * \alpha \gamma (\alpha x + \beta) ^ {\gamma - 1} = - * \frac{\partial E}{\partial y} - * \frac{\alpha \gamma y}{\alpha x + \beta} - * @f$ if propagate_down[0] - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - /// @brief @f$ \gamma @f$ from layer_param_.power_param() - Dtype power_; - /// @brief @f$ \alpha @f$ from layer_param_.power_param() - Dtype scale_; - /// @brief @f$ \beta @f$ from layer_param_.power_param() - Dtype shift_; - /// @brief Result of @f$ \alpha \gamma @f$ - Dtype diff_scale_; -}; - -/** - * @brief Rectified Linear Unit non-linearity @f$ y = \max(0, x) @f$. - * The simple max is fast to compute, and the function does not saturate. - */ -template -class ReLULayer : public NeuronLayer { - public: - /** - * @param param provides ReLUParameter relu_param, - * with ReLULayer options: - * - negative_slope (\b optional, default 0). - * the value @f$ \nu @f$ by which negative values are multiplied. - */ - explicit ReLULayer(const LayerParameter& param) - : NeuronLayer(param) {} - - virtual inline const char* type() const { return "ReLU"; } - - protected: - /** - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x @f$ - * @param top output Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the computed outputs @f$ - * y = \max(0, x) - * @f$ by default. If a non-zero negative_slope @f$ \nu @f$ is provided, - * the computed outputs are @f$ y = \max(0, x) + \nu \min(0, x) @f$. - */ - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - - /** - * @brief Computes the error gradient w.r.t. the ReLU inputs. - * - * @param top output Blob vector (length 1), providing the error gradient with - * respect to the outputs - * -# @f$ (N \times C \times H \times W) @f$ - * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ - * with respect to computed outputs @f$ y @f$ - * @param propagate_down see Layer::Backward. - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x @f$; Backward fills their diff with - * gradients @f$ - * \frac{\partial E}{\partial x} = \left\{ - * \begin{array}{lr} - * 0 & \mathrm{if} \; x \le 0 \\ - * \frac{\partial E}{\partial y} & \mathrm{if} \; x > 0 - * \end{array} \right. - * @f$ if propagate_down[0], by default. - * If a non-zero negative_slope @f$ \nu @f$ is provided, - * the computed gradients are @f$ - * \frac{\partial E}{\partial x} = \left\{ - * \begin{array}{lr} - * \nu \frac{\partial E}{\partial y} & \mathrm{if} \; x \le 0 \\ - * \frac{\partial E}{\partial y} & \mathrm{if} \; x > 0 - * \end{array} \right. - * @f$. - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); -}; - -#ifdef USE_CUDNN -/** - * @brief CuDNN acceleration of ReLULayer. - */ -template -class CuDNNReLULayer : public ReLULayer { - public: - explicit CuDNNReLULayer(const LayerParameter& param) - : ReLULayer(param), handles_setup_(false) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - virtual ~CuDNNReLULayer(); - - protected: - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - bool handles_setup_; - cudnnHandle_t handle_; - cudnnTensorDescriptor_t bottom_desc_; - cudnnTensorDescriptor_t top_desc_; -}; -#endif - -/** - * @brief Sigmoid function non-linearity @f$ - * y = (1 + \exp(-x))^{-1} - * @f$, a classic choice in neural networks. - * - * Note that the gradient vanishes as the values move away from 0. - * The ReLULayer is often a better choice for this reason. - */ -template -class SigmoidLayer : public NeuronLayer { - public: - explicit SigmoidLayer(const LayerParameter& param) - : NeuronLayer(param) {} - - virtual inline const char* type() const { return "Sigmoid"; } - - protected: - /** - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x @f$ - * @param top output Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the computed outputs @f$ - * y = (1 + \exp(-x))^{-1} - * @f$ - */ - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - - /** - * @brief Computes the error gradient w.r.t. the sigmoid inputs. - * - * @param top output Blob vector (length 1), providing the error gradient with - * respect to the outputs - * -# @f$ (N \times C \times H \times W) @f$ - * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ - * with respect to computed outputs @f$ y @f$ - * @param propagate_down see Layer::Backward. - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x @f$; Backward fills their diff with - * gradients @f$ - * \frac{\partial E}{\partial x} - * = \frac{\partial E}{\partial y} y (1 - y) - * @f$ if propagate_down[0] - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); -}; - -#ifdef USE_CUDNN -/** - * @brief CuDNN acceleration of SigmoidLayer. - */ -template -class CuDNNSigmoidLayer : public SigmoidLayer { - public: - explicit CuDNNSigmoidLayer(const LayerParameter& param) - : SigmoidLayer(param), handles_setup_(false) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - virtual ~CuDNNSigmoidLayer(); - - protected: - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - bool handles_setup_; - cudnnHandle_t handle_; - cudnnTensorDescriptor_t bottom_desc_; - cudnnTensorDescriptor_t top_desc_; -}; -#endif - -/** - * @brief TanH hyperbolic tangent non-linearity @f$ - * y = \frac{\exp(2x) - 1}{\exp(2x) + 1} - * @f$, popular in auto-encoders. - * - * Note that the gradient vanishes as the values move away from 0. - * The ReLULayer is often a better choice for this reason. - */ -template -class TanHLayer : public NeuronLayer { - public: - explicit TanHLayer(const LayerParameter& param) - : NeuronLayer(param) {} - - virtual inline const char* type() const { return "TanH"; } - - protected: - /** - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x @f$ - * @param top output Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the computed outputs @f$ - * y = \frac{\exp(2x) - 1}{\exp(2x) + 1} - * @f$ - */ - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - - /** - * @brief Computes the error gradient w.r.t. the sigmoid inputs. - * - * @param top output Blob vector (length 1), providing the error gradient with - * respect to the outputs - * -# @f$ (N \times C \times H \times W) @f$ - * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ - * with respect to computed outputs @f$ y @f$ - * @param propagate_down see Layer::Backward. - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x @f$; Backward fills their diff with - * gradients @f$ - * \frac{\partial E}{\partial x} - * = \frac{\partial E}{\partial y} - * \left(1 - \left[\frac{\exp(2x) - 1}{exp(2x) + 1} \right]^2 \right) - * = \frac{\partial E}{\partial y} (1 - y^2) - * @f$ if propagate_down[0] - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); -}; - -#ifdef USE_CUDNN -/** - * @brief CuDNN acceleration of TanHLayer. - */ -template -class CuDNNTanHLayer : public TanHLayer { - public: - explicit CuDNNTanHLayer(const LayerParameter& param) - : TanHLayer(param), handles_setup_(false) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - virtual ~CuDNNTanHLayer(); - - protected: - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - bool handles_setup_; - cudnnHandle_t handle_; - cudnnTensorDescriptor_t bottom_desc_; - cudnnTensorDescriptor_t top_desc_; -}; -#endif - -/** - * @brief Tests whether the input exceeds a threshold: outputs 1 for inputs - * above threshold; 0 otherwise. - */ -template -class ThresholdLayer : public NeuronLayer { - public: - /** - * @param param provides ThresholdParameter threshold_param, - * with ThresholdLayer options: - * - threshold (\b optional, default 0). - * the threshold value @f$ t @f$ to which the input values are compared. - */ - explicit ThresholdLayer(const LayerParameter& param) - : NeuronLayer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "Threshold"; } - - protected: - /** - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the inputs @f$ x @f$ - * @param top output Blob vector (length 1) - * -# @f$ (N \times C \times H \times W) @f$ - * the computed outputs @f$ - * y = \left\{ - * \begin{array}{lr} - * 0 & \mathrm{if} \; x \le t \\ - * 1 & \mathrm{if} \; x > t - * \end{array} \right. - * @f$ - */ - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - /// @brief Not implemented (non-differentiable function) - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom) { - NOT_IMPLEMENTED; - } - - Dtype threshold_; -}; - -/** - * @brief Parameterized Rectified Linear Unit non-linearity @f$ - * y_i = \max(0, x_i) + a_i \min(0, x_i) - * @f$. The differences from ReLULayer are 1) negative slopes are - * learnable though backprop and 2) negative slopes can vary across - * channels. The number of axes of input blob should be greater than or - * equal to 2. The 1st axis (0-based) is seen as channels. - */ -template -class PReLULayer : public NeuronLayer { - public: - /** - * @param param provides PReLUParameter prelu_param, - * with PReLULayer options: - * - filler (\b optional, FillerParameter, - * default {'type': constant 'value':0.25}). - * - channel_shared (\b optional, default false). - * negative slopes are shared across channels. - */ - explicit PReLULayer(const LayerParameter& param) - : NeuronLayer(param) {} - - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "PReLU"; } - - protected: - /** - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times ...) @f$ - * the inputs @f$ x @f$ - * @param top output Blob vector (length 1) - * -# @f$ (N \times C \times ...) @f$ - * the computed outputs for each channel @f$i@f$ @f$ - * y_i = \max(0, x_i) + a_i \min(0, x_i) - * @f$. - */ - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - - /** - * @brief Computes the error gradient w.r.t. the PReLU inputs. - * - * @param top output Blob vector (length 1), providing the error gradient with - * respect to the outputs - * -# @f$ (N \times C \times ...) @f$ - * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ - * with respect to computed outputs @f$ y @f$ - * @param propagate_down see Layer::Backward. - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times ...) @f$ - * the inputs @f$ x @f$; For each channel @f$i@f$, backward fills their - * diff with gradients @f$ - * \frac{\partial E}{\partial x_i} = \left\{ - * \begin{array}{lr} - * a_i \frac{\partial E}{\partial y_i} & \mathrm{if} \; x_i \le 0 \\ - * \frac{\partial E}{\partial y_i} & \mathrm{if} \; x_i > 0 - * \end{array} \right. - * @f$. - * If param_propagate_down_[0] is true, it fills the diff with gradients - * @f$ - * \frac{\partial E}{\partial a_i} = \left\{ - * \begin{array}{lr} - * \sum_{x_i} x_i \frac{\partial E}{\partial y_i} & \mathrm{if} \; x_i \le 0 \\ - * 0 & \mathrm{if} \; x_i > 0 - * \end{array} \right. - * @f$. - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - bool channel_shared_; - Blob multiplier_; // dot multiplier for backward computation of params - Blob backward_buff_; // temporary buffer for backward computation - Blob bottom_memory_; // memory for in-place computation -}; - -} // namespace caffe - -#endif // CAFFE_NEURON_LAYERS_HPP_ diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp deleted file mode 100644 index 237b05d6..00000000 --- a/include/caffe/vision_layers.hpp +++ /dev/null @@ -1,659 +0,0 @@ -#ifndef CAFFE_VISION_LAYERS_HPP_ -#define CAFFE_VISION_LAYERS_HPP_ - -#include -#include -#include - -#include "caffe/blob.hpp" -#include "caffe/common.hpp" -#include "caffe/common_layers.hpp" -#include "caffe/data_layers.hpp" -#include "caffe/layer.hpp" -#include "caffe/loss_layers.hpp" -#include "caffe/neuron_layers.hpp" -#include "caffe/proto/caffe.pb.h" - -namespace caffe { - -/** - * @brief Abstract base class that factors out the BLAS code common to - * ConvolutionLayer and DeconvolutionLayer. - */ -template -class BaseConvolutionLayer : public Layer { - public: - explicit BaseConvolutionLayer(const LayerParameter& param) - : Layer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline int MinBottomBlobs() const { return 1; } - virtual inline int MinTopBlobs() const { return 1; } - virtual inline bool EqualNumBottomTopBlobs() const { return true; } - - protected: - // Helper functions that abstract away the column buffer and gemm arguments. - // The last argument in forward_cpu_gemm is so that we can skip the im2col if - // we just called weight_cpu_gemm with the same input. - void forward_cpu_gemm(const Dtype* input, const Dtype* weights, - Dtype* output, bool skip_im2col = false); - void forward_cpu_bias(Dtype* output, const Dtype* bias); - void backward_cpu_gemm(const Dtype* input, const Dtype* weights, - Dtype* output); - void weight_cpu_gemm(const Dtype* input, const Dtype* output, Dtype* - weights); - void backward_cpu_bias(Dtype* bias, const Dtype* input); - -#ifndef CPU_ONLY - void forward_gpu_gemm(const Dtype* col_input, const Dtype* weights, - Dtype* output, bool skip_im2col = false); - void forward_gpu_bias(Dtype* output, const Dtype* bias); - void backward_gpu_gemm(const Dtype* input, const Dtype* weights, - Dtype* col_output); - void weight_gpu_gemm(const Dtype* col_input, const Dtype* output, Dtype* - weights); - void backward_gpu_bias(Dtype* bias, const Dtype* input); -#endif - - /// @brief The spatial dimensions of the input. - inline int input_shape(int i) { - return (*bottom_shape_)[channel_axis_ + i]; - } - // reverse_dimensions should return true iff we are implementing deconv, so - // that conv helpers know which dimensions are which. - virtual bool reverse_dimensions() = 0; - // Compute height_out_ and width_out_ from other parameters. - virtual void compute_output_shape() = 0; - - /// @brief The spatial dimensions of a filter kernel. - Blob kernel_shape_; - /// @brief The spatial dimensions of the stride. - Blob stride_; - /// @brief The spatial dimensions of the padding. - Blob pad_; - /// @brief The spatial dimensions of the convolution input. - Blob conv_input_shape_; - /// @brief The spatial dimensions of the col_buffer. - vector col_buffer_shape_; - /// @brief The spatial dimensions of the output. - vector output_shape_; - const vector* bottom_shape_; - - int num_spatial_axes_; - int bottom_dim_; - int top_dim_; - - int channel_axis_; - int num_; - int channels_; - int group_; - int out_spatial_dim_; - int weight_offset_; - int num_output_; - bool bias_term_; - bool is_1x1_; - bool force_nd_im2col_; - - private: - // wrap im2col/col2im so we don't have to remember the (long) argument lists - inline void conv_im2col_cpu(const Dtype* data, Dtype* col_buff) { - if (!force_nd_im2col_ && num_spatial_axes_ == 2) { - im2col_cpu(data, conv_in_channels_, - conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], - kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], - pad_.cpu_data()[0], pad_.cpu_data()[1], - stride_.cpu_data()[0], stride_.cpu_data()[1], col_buff); - } else { - im2col_nd_cpu(data, num_spatial_axes_, conv_input_shape_.cpu_data(), - col_buffer_shape_.data(), kernel_shape_.cpu_data(), - pad_.cpu_data(), stride_.cpu_data(), col_buff); - } - } - inline void conv_col2im_cpu(const Dtype* col_buff, Dtype* data) { - if (!force_nd_im2col_ && num_spatial_axes_ == 2) { - col2im_cpu(col_buff, conv_in_channels_, - conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], - kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], - pad_.cpu_data()[0], pad_.cpu_data()[1], - stride_.cpu_data()[0], stride_.cpu_data()[1], data); - } else { - col2im_nd_cpu(col_buff, num_spatial_axes_, conv_input_shape_.cpu_data(), - col_buffer_shape_.data(), kernel_shape_.cpu_data(), - pad_.cpu_data(), stride_.cpu_data(), data); - } - } -#ifndef CPU_ONLY - inline void conv_im2col_gpu(const Dtype* data, Dtype* col_buff) { - if (!force_nd_im2col_ && num_spatial_axes_ == 2) { - im2col_gpu(data, conv_in_channels_, - conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], - kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], - pad_.cpu_data()[0], pad_.cpu_data()[1], - stride_.cpu_data()[0], stride_.cpu_data()[1], col_buff); - } else { - im2col_nd_gpu(data, num_spatial_axes_, num_kernels_im2col_, - conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(), - kernel_shape_.gpu_data(), pad_.gpu_data(), - stride_.gpu_data(), col_buff); - } - } - inline void conv_col2im_gpu(const Dtype* col_buff, Dtype* data) { - if (!force_nd_im2col_ && num_spatial_axes_ == 2) { - col2im_gpu(col_buff, conv_in_channels_, - conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], - kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], - pad_.cpu_data()[0], pad_.cpu_data()[1], - stride_.cpu_data()[0], stride_.cpu_data()[1], data); - } else { - col2im_nd_gpu(col_buff, num_spatial_axes_, num_kernels_col2im_, - conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(), - kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(), - data); - } - } -#endif - - int num_kernels_im2col_; - int num_kernels_col2im_; - int conv_out_channels_; - int conv_in_channels_; - int conv_out_spatial_dim_; - int kernel_dim_; - int col_offset_; - int output_offset_; - - Blob col_buffer_; - Blob bias_multiplier_; -}; - -/** - * @brief Convolves the input image with a bank of learned filters, - * and (optionally) adds biases. - * - * Caffe convolves by reduction to matrix multiplication. This achieves - * high-throughput and generality of input and filter dimensions but comes at - * the cost of memory for matrices. This makes use of efficiency in BLAS. - * - * The input is "im2col" transformed to a channel K' x H x W data matrix - * for multiplication with the N x K' x H x W filter matrix to yield a - * N' x H x W output matrix that is then "col2im" restored. K' is the - * input channel * kernel height * kernel width dimension of the unrolled - * inputs so that the im2col matrix has a column for each input region to - * be filtered. col2im restores the output spatial structure by rolling up - * the output channel N' columns of the output matrix. - */ -template -class ConvolutionLayer : public BaseConvolutionLayer { - public: - /** - * @param param provides ConvolutionParameter convolution_param, - * with ConvolutionLayer options: - * - num_output. The number of filters. - * - kernel_size / kernel_h / kernel_w. The filter dimensions, given by - * kernel_size for square filters or kernel_h and kernel_w for rectangular - * filters. - * - stride / stride_h / stride_w (\b optional, default 1). The filter - * stride, given by stride_size for equal dimensions or stride_h and stride_w - * for different strides. By default the convolution is dense with stride 1. - * - pad / pad_h / pad_w (\b optional, default 0). The zero-padding for - * convolution, given by pad for equal dimensions or pad_h and pad_w for - * different padding. Input padding is computed implicitly instead of - * actually padding. - * - group (\b optional, default 1). The number of filter groups. Group - * convolution is a method for reducing parameterization by selectively - * connecting input and output channels. The input and output channel dimensions must be divisible - * by the number of groups. For group @f$ \geq 1 @f$, the - * convolutional filters' input and output channels are separated s.t. each - * group takes 1 / group of the input channels and makes 1 / group of the - * output channels. Concretely 4 input channels, 8 output channels, and - * 2 groups separate input channels 1-2 and output channels 1-4 into the - * first group and input channels 3-4 and output channels 5-8 into the second - * group. - * - bias_term (\b optional, default true). Whether to have a bias. - * - engine: convolution has CAFFE (matrix multiplication) and CUDNN (library - * kernels + stream parallelism) engines. - */ - explicit ConvolutionLayer(const LayerParameter& param) - : BaseConvolutionLayer(param) {} - - virtual inline const char* type() const { return "Convolution"; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual inline bool reverse_dimensions() { return false; } - virtual void compute_output_shape(); -}; - -/** - * @brief Convolve the input with a bank of learned filters, and (optionally) - * add biases, treating filters and convolution parameters in the - * opposite sense as ConvolutionLayer. - * - * ConvolutionLayer computes each output value by dotting an input window with - * a filter; DeconvolutionLayer multiplies each input value by a filter - * elementwise, and sums over the resulting output windows. In other words, - * DeconvolutionLayer is ConvolutionLayer with the forward and backward passes - * reversed. DeconvolutionLayer reuses ConvolutionParameter for its - * parameters, but they take the opposite sense as in ConvolutionLayer (so - * padding is removed from the output rather than added to the input, and - * stride results in upsampling rather than downsampling). - */ -template -class DeconvolutionLayer : public BaseConvolutionLayer { - public: - explicit DeconvolutionLayer(const LayerParameter& param) - : BaseConvolutionLayer(param) {} - - virtual inline const char* type() const { return "Deconvolution"; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual inline bool reverse_dimensions() { return true; } - virtual void compute_output_shape(); -}; - -#ifdef USE_CUDNN -/* - * @brief cuDNN implementation of ConvolutionLayer. - * Fallback to ConvolutionLayer for CPU mode. - * - * cuDNN accelerates convolution through forward kernels for filtering and bias - * plus backward kernels for the gradient w.r.t. the filters, biases, and - * inputs. Caffe + cuDNN further speeds up the computation through forward - * parallelism across groups and backward parallelism across gradients. - * - * The CUDNN engine does not have memory overhead for matrix buffers. For many - * input and filter regimes the CUDNN engine is faster than the CAFFE engine, - * but for fully-convolutional models and large inputs the CAFFE engine can be - * faster as long as it fits in memory. -*/ -template -class CuDNNConvolutionLayer : public ConvolutionLayer { - public: - explicit CuDNNConvolutionLayer(const LayerParameter& param) - : ConvolutionLayer(param), handles_setup_(false) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - virtual ~CuDNNConvolutionLayer(); - - protected: - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - bool handles_setup_; - cudnnHandle_t* handle_; - cudaStream_t* stream_; - - // algorithms for forward and backwards convolutions - cudnnConvolutionFwdAlgo_t *fwd_algo_; - cudnnConvolutionBwdFilterAlgo_t *bwd_filter_algo_; - cudnnConvolutionBwdDataAlgo_t *bwd_data_algo_; - - vector bottom_descs_, top_descs_; - cudnnTensorDescriptor_t bias_desc_; - cudnnFilterDescriptor_t filter_desc_; - vector conv_descs_; - int bottom_offset_, top_offset_, bias_offset_; - - size_t *workspace_fwd_sizes_; - size_t *workspace_bwd_data_sizes_; - size_t *workspace_bwd_filter_sizes_; - size_t workspaceSizeInBytes; // size of underlying storage - void *workspaceData; // underlying storage - void **workspace; // aliases into workspaceData -}; -#endif - -/** - * @brief A helper for image operations that rearranges image regions into - * column vectors. Used by ConvolutionLayer to perform convolution - * by matrix multiplication. - * - * TODO(dox): thorough documentation for Forward, Backward, and proto params. - */ -template -class Im2colLayer : public Layer { - public: - explicit Im2colLayer(const LayerParameter& param) - : Layer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "Im2col"; } - virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int ExactNumTopBlobs() const { return 1; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - /// @brief The spatial dimensions of a filter kernel. - Blob kernel_shape_; - /// @brief The spatial dimensions of the stride. - Blob stride_; - /// @brief The spatial dimensions of the padding. - Blob pad_; - - int num_spatial_axes_; - int bottom_dim_; - int top_dim_; - - int channel_axis_; - int num_; - int channels_; - - bool force_nd_im2col_; -}; - -// Forward declare PoolingLayer and SplitLayer for use in LRNLayer. -template class PoolingLayer; -template class SplitLayer; - -/** - * @brief Normalize the input in a local region across or within feature maps. - * - * TODO(dox): thorough documentation for Forward, Backward, and proto params. - */ -template -class LRNLayer : public Layer { - public: - explicit LRNLayer(const LayerParameter& param) - : Layer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "LRN"; } - virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int ExactNumTopBlobs() const { return 1; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - virtual void CrossChannelForward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void CrossChannelForward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void WithinChannelForward(const vector*>& bottom, - const vector*>& top); - virtual void CrossChannelBackward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void CrossChannelBackward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void WithinChannelBackward(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - int size_; - int pre_pad_; - Dtype alpha_; - Dtype beta_; - Dtype k_; - int num_; - int channels_; - int height_; - int width_; - - // Fields used for normalization ACROSS_CHANNELS - // scale_ stores the intermediate summing results - Blob scale_; - - // Fields used for normalization WITHIN_CHANNEL - shared_ptr > split_layer_; - vector*> split_top_vec_; - shared_ptr > square_layer_; - Blob square_input_; - Blob square_output_; - vector*> square_bottom_vec_; - vector*> square_top_vec_; - shared_ptr > pool_layer_; - Blob pool_output_; - vector*> pool_top_vec_; - shared_ptr > power_layer_; - Blob power_output_; - vector*> power_top_vec_; - shared_ptr > product_layer_; - Blob product_input_; - vector*> product_bottom_vec_; -}; - -#ifdef USE_CUDNN - -template -class CuDNNLRNLayer : public LRNLayer { - public: - explicit CuDNNLRNLayer(const LayerParameter& param) - : LRNLayer(param), handles_setup_(false) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - virtual ~CuDNNLRNLayer(); - - protected: - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - bool handles_setup_; - cudnnHandle_t handle_; - cudnnLRNDescriptor_t norm_desc_; - cudnnTensorDescriptor_t bottom_desc_, top_desc_; - - int size_; - Dtype alpha_, beta_, k_; -}; - -template -class CuDNNLCNLayer : public LRNLayer { - public: - explicit CuDNNLCNLayer(const LayerParameter& param) - : LRNLayer(param), handles_setup_(false), tempDataSize(0), - tempData1(NULL), tempData2(NULL) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - virtual ~CuDNNLCNLayer(); - - protected: - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - bool handles_setup_; - cudnnHandle_t handle_; - cudnnLRNDescriptor_t norm_desc_; - cudnnTensorDescriptor_t bottom_desc_, top_desc_; - - int size_, pre_pad_; - Dtype alpha_, beta_, k_; - - size_t tempDataSize; - void *tempData1, *tempData2; -}; - -#endif - -/** - * @brief Pools the input image by taking the max, average, etc. within regions. - * - * TODO(dox): thorough documentation for Forward, Backward, and proto params. - */ -template -class PoolingLayer : public Layer { - public: - explicit PoolingLayer(const LayerParameter& param) - : Layer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "Pooling"; } - virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int MinTopBlobs() const { return 1; } - // MAX POOL layers can output an extra top blob for the mask; - // others can only output the pooled inputs. - virtual inline int MaxTopBlobs() const { - return (this->layer_param_.pooling_param().pool() == - PoolingParameter_PoolMethod_MAX) ? 2 : 1; - } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - int kernel_h_, kernel_w_; - int stride_h_, stride_w_; - int pad_h_, pad_w_; - int channels_; - int height_, width_; - int pooled_height_, pooled_width_; - bool global_pooling_; - Blob rand_idx_; - Blob max_idx_; -}; - -#ifdef USE_CUDNN -/* - * @brief cuDNN implementation of PoolingLayer. - * Fallback to PoolingLayer for CPU mode. -*/ -template -class CuDNNPoolingLayer : public PoolingLayer { - public: - explicit CuDNNPoolingLayer(const LayerParameter& param) - : PoolingLayer(param), handles_setup_(false) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - virtual ~CuDNNPoolingLayer(); - // Currently, cuDNN does not support the extra top blob. - virtual inline int MinTopBlobs() const { return -1; } - virtual inline int ExactNumTopBlobs() const { return 1; } - - protected: - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - bool handles_setup_; - cudnnHandle_t handle_; - cudnnTensorDescriptor_t bottom_desc_, top_desc_; - cudnnPoolingDescriptor_t pooling_desc_; - cudnnPoolingMode_t mode_; -}; -#endif - -/** - * @brief Does spatial pyramid pooling on the input image - * by taking the max, average, etc. within regions - * so that the result vector of different sized - * images are of the same size. - */ -template -class SPPLayer : public Layer { - public: - explicit SPPLayer(const LayerParameter& param) - : Layer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - - virtual inline const char* type() const { return "SPP"; } - virtual inline int ExactNumBottomBlobs() const { return 1; } - virtual inline int ExactNumTopBlobs() const { return 1; } - - protected: - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - // calculates the kernel and stride dimensions for the pooling layer, - // returns a correctly configured LayerParameter for a PoolingLayer - virtual LayerParameter GetPoolingParam(const int pyramid_level, - const int bottom_h, const int bottom_w, const SPPParameter spp_param); - - int pyramid_height_; - int bottom_h_, bottom_w_; - int num_; - int channels_; - int kernel_h_, kernel_w_; - int pad_h_, pad_w_; - bool reshaped_first_time_; - - /// the internal Split layer that feeds the pooling layers - shared_ptr > split_layer_; - /// top vector holder used in call to the underlying SplitLayer::Forward - vector*> split_top_vec_; - /// bottom vector holder used in call to the underlying PoolingLayer::Forward - vector*>*> pooling_bottom_vecs_; - /// the internal Pooling layers of different kernel sizes - vector > > pooling_layers_; - /// top vector holders used in call to the underlying PoolingLayer::Forward - vector*>*> pooling_top_vecs_; - /// pooling_outputs stores the outputs of the PoolingLayers - vector*> pooling_outputs_; - /// the internal Flatten layers that the Pooling layers feed into - vector*> flatten_layers_; - /// top vector holders used in call to the underlying FlattenLayer::Forward - vector*>*> flatten_top_vecs_; - /// flatten_outputs stores the outputs of the FlattenLayers - vector*> flatten_outputs_; - /// bottom vector holder used in call to the underlying ConcatLayer::Forward - vector*> concat_bottom_vec_; - /// the internal Concat layers that the Flatten layers feed into - shared_ptr > concat_layer_; -}; - -} // namespace caffe - -#endif // CAFFE_VISION_LAYERS_HPP_ diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp index 1a318f8d..69d55332 100644 --- a/python/caffe/_caffe.cpp +++ b/python/caffe/_caffe.cpp @@ -15,7 +15,8 @@ #include // NOLINT #include "caffe/caffe.hpp" -#include "caffe/python_layer.hpp" +#include "caffe/layers/memory_data_layer.hpp" +#include "caffe/layers/python_layer.hpp" #include "caffe/sgd_solvers.hpp" // Temporary solution for numpy < 1.7 versions: old macro, no promises. diff --git a/src/caffe/data_reader.cpp b/src/caffe/data_reader.cpp index 16378203..9f019bbf 100644 --- a/src/caffe/data_reader.cpp +++ b/src/caffe/data_reader.cpp @@ -4,8 +4,8 @@ #include #include "caffe/common.hpp" -#include "caffe/data_layers.hpp" #include "caffe/data_reader.hpp" +#include "caffe/layers/data_layer.hpp" #include "caffe/proto/caffe.pb.h" namespace caffe { diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp index 417ffe98..76d851af 100644 --- a/src/caffe/layer_factory.cpp +++ b/src/caffe/layer_factory.cpp @@ -7,11 +7,28 @@ #include "caffe/layer.hpp" #include "caffe/layer_factory.hpp" +#include "caffe/layers/conv_layer.hpp" +#include "caffe/layers/lrn_layer.hpp" +#include "caffe/layers/pooling_layer.hpp" +#include "caffe/layers/relu_layer.hpp" +#include "caffe/layers/sigmoid_layer.hpp" +#include "caffe/layers/softmax_layer.hpp" +#include "caffe/layers/tanh_layer.hpp" #include "caffe/proto/caffe.pb.h" -#include "caffe/vision_layers.hpp" + +#ifdef USE_CUDNN +#include "caffe/layers/cudnn_conv_layer.hpp" +#include "caffe/layers/cudnn_lcn_layer.hpp" +#include "caffe/layers/cudnn_lrn_layer.hpp" +#include "caffe/layers/cudnn_pooling_layer.hpp" +#include "caffe/layers/cudnn_relu_layer.hpp" +#include "caffe/layers/cudnn_sigmoid_layer.hpp" +#include "caffe/layers/cudnn_softmax_layer.hpp" +#include "caffe/layers/cudnn_tanh_layer.hpp" +#endif #ifdef WITH_PYTHON_LAYER -#include "caffe/python_layer.hpp" +#include "caffe/layers/python_layer.hpp" #endif namespace caffe { diff --git a/src/caffe/layers/absval_layer.cpp b/src/caffe/layers/absval_layer.cpp index 7e552352..855bf0bf 100644 --- a/src/caffe/layers/absval_layer.cpp +++ b/src/caffe/layers/absval_layer.cpp @@ -1,6 +1,6 @@ #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/absval_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/absval_layer.cu b/src/caffe/layers/absval_layer.cu index b5a6c25a..6c927e6f 100644 --- a/src/caffe/layers/absval_layer.cu +++ b/src/caffe/layers/absval_layer.cu @@ -1,6 +1,6 @@ #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/absval_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/accuracy_layer.cpp b/src/caffe/layers/accuracy_layer.cpp index ae2df1f1..4eddbb5c 100644 --- a/src/caffe/layers/accuracy_layer.cpp +++ b/src/caffe/layers/accuracy_layer.cpp @@ -2,7 +2,7 @@ #include #include -#include "caffe/loss_layers.hpp" +#include "caffe/layers/accuracy_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/argmax_layer.cpp b/src/caffe/layers/argmax_layer.cpp index 354d83f7..2d3d6f2d 100644 --- a/src/caffe/layers/argmax_layer.cpp +++ b/src/caffe/layers/argmax_layer.cpp @@ -3,7 +3,7 @@ #include #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/argmax_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/base_conv_layer.cpp b/src/caffe/layers/base_conv_layer.cpp index 316cb0fd..f6f14cd0 100644 --- a/src/caffe/layers/base_conv_layer.cpp +++ b/src/caffe/layers/base_conv_layer.cpp @@ -2,9 +2,9 @@ #include #include "caffe/filler.hpp" +#include "caffe/layers/base_conv_layer.hpp" #include "caffe/util/im2col.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/base_data_layer.cpp b/src/caffe/layers/base_data_layer.cpp index d77f91c9..989319f1 100644 --- a/src/caffe/layers/base_data_layer.cpp +++ b/src/caffe/layers/base_data_layer.cpp @@ -1,7 +1,13 @@ #include #include -#include "caffe/data_layers.hpp" +#include "caffe/blob.hpp" +#include "caffe/data_transformer.hpp" +#include "caffe/internal_thread.hpp" +#include "caffe/layer.hpp" +#include "caffe/layers/base_data_layer.hpp" +#include "caffe/proto/caffe.pb.h" +#include "caffe/util/blocking_queue.hpp" namespace caffe { diff --git a/src/caffe/layers/base_data_layer.cu b/src/caffe/layers/base_data_layer.cu index ff6e412a..4056d36a 100644 --- a/src/caffe/layers/base_data_layer.cu +++ b/src/caffe/layers/base_data_layer.cu @@ -1,6 +1,6 @@ #include -#include "caffe/data_layers.hpp" +#include "caffe/layers/base_data_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/batch_norm_layer.cpp b/src/caffe/layers/batch_norm_layer.cpp index b5c91b5e..a69d8f99 100644 --- a/src/caffe/layers/batch_norm_layer.cpp +++ b/src/caffe/layers/batch_norm_layer.cpp @@ -1,7 +1,7 @@ #include #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/batch_norm_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/batch_norm_layer.cu b/src/caffe/layers/batch_norm_layer.cu index 2a6cac54..c21713c8 100644 --- a/src/caffe/layers/batch_norm_layer.cu +++ b/src/caffe/layers/batch_norm_layer.cu @@ -1,7 +1,7 @@ #include #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/batch_norm_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/batch_reindex_layer.cpp b/src/caffe/layers/batch_reindex_layer.cpp index 3d3ce32c..b14e56f7 100644 --- a/src/caffe/layers/batch_reindex_layer.cpp +++ b/src/caffe/layers/batch_reindex_layer.cpp @@ -1,6 +1,6 @@ #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/batch_reindex_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/batch_reindex_layer.cu b/src/caffe/layers/batch_reindex_layer.cu index 0b5ccf09..83054d36 100644 --- a/src/caffe/layers/batch_reindex_layer.cu +++ b/src/caffe/layers/batch_reindex_layer.cu @@ -2,7 +2,7 @@ #include #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/batch_reindex_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/bnll_layer.cpp b/src/caffe/layers/bnll_layer.cpp index 1e422a54..448d86d7 100644 --- a/src/caffe/layers/bnll_layer.cpp +++ b/src/caffe/layers/bnll_layer.cpp @@ -1,7 +1,7 @@ #include #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/bnll_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/bnll_layer.cu b/src/caffe/layers/bnll_layer.cu index 3e328ef7..8df8ef09 100644 --- a/src/caffe/layers/bnll_layer.cu +++ b/src/caffe/layers/bnll_layer.cu @@ -1,7 +1,7 @@ #include #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/bnll_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/concat_layer.cpp b/src/caffe/layers/concat_layer.cpp index 14cbfb11..580bd479 100644 --- a/src/caffe/layers/concat_layer.cpp +++ b/src/caffe/layers/concat_layer.cpp @@ -1,6 +1,6 @@ #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/concat_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/concat_layer.cu b/src/caffe/layers/concat_layer.cu index e1e9449e..a3a0bf6f 100644 --- a/src/caffe/layers/concat_layer.cu +++ b/src/caffe/layers/concat_layer.cu @@ -1,6 +1,6 @@ #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/concat_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/contrastive_loss_layer.cpp b/src/caffe/layers/contrastive_loss_layer.cpp index 45facd4a..599e178e 100644 --- a/src/caffe/layers/contrastive_loss_layer.cpp +++ b/src/caffe/layers/contrastive_loss_layer.cpp @@ -1,7 +1,7 @@ #include #include -#include "caffe/loss_layers.hpp" +#include "caffe/layers/contrastive_loss_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/contrastive_loss_layer.cu b/src/caffe/layers/contrastive_loss_layer.cu index ee278407..fd7d67cc 100644 --- a/src/caffe/layers/contrastive_loss_layer.cu +++ b/src/caffe/layers/contrastive_loss_layer.cu @@ -1,7 +1,7 @@ #include #include -#include "caffe/loss_layers.hpp" +#include "caffe/layers/contrastive_loss_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp index efd69d45..cff09783 100644 --- a/src/caffe/layers/conv_layer.cpp +++ b/src/caffe/layers/conv_layer.cpp @@ -1,6 +1,6 @@ #include -#include "caffe/vision_layers.hpp" +#include "caffe/layers/conv_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu index a534b356..d06e4b62 100644 --- a/src/caffe/layers/conv_layer.cu +++ b/src/caffe/layers/conv_layer.cu @@ -1,6 +1,6 @@ #include -#include "caffe/vision_layers.hpp" +#include "caffe/layers/conv_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_conv_layer.cpp b/src/caffe/layers/cudnn_conv_layer.cpp index c82cb7ef..1987fb09 100644 --- a/src/caffe/layers/cudnn_conv_layer.cpp +++ b/src/caffe/layers/cudnn_conv_layer.cpp @@ -2,7 +2,7 @@ #include #include -#include "caffe/vision_layers.hpp" +#include "caffe/layers/cudnn_conv_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_conv_layer.cu b/src/caffe/layers/cudnn_conv_layer.cu index f2df4aa5..1990e932 100644 --- a/src/caffe/layers/cudnn_conv_layer.cu +++ b/src/caffe/layers/cudnn_conv_layer.cu @@ -1,7 +1,7 @@ #ifdef USE_CUDNN #include -#include "caffe/vision_layers.hpp" +#include "caffe/layers/cudnn_conv_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_lcn_layer.cpp b/src/caffe/layers/cudnn_lcn_layer.cpp index 4c700786..9c09bf26 100644 --- a/src/caffe/layers/cudnn_lcn_layer.cpp +++ b/src/caffe/layers/cudnn_lcn_layer.cpp @@ -1,7 +1,7 @@ #ifdef USE_CUDNN #include -#include "caffe/vision_layers.hpp" +#include "caffe/layers/cudnn_lcn_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_lcn_layer.cu b/src/caffe/layers/cudnn_lcn_layer.cu index e79c7458..b44ef473 100644 --- a/src/caffe/layers/cudnn_lcn_layer.cu +++ b/src/caffe/layers/cudnn_lcn_layer.cu @@ -1,7 +1,7 @@ #ifdef USE_CUDNN #include -#include "caffe/vision_layers.hpp" +#include "caffe/layers/cudnn_lcn_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_lrn_layer.cpp b/src/caffe/layers/cudnn_lrn_layer.cpp index a03db3bd..0495b802 100644 --- a/src/caffe/layers/cudnn_lrn_layer.cpp +++ b/src/caffe/layers/cudnn_lrn_layer.cpp @@ -1,7 +1,7 @@ #ifdef USE_CUDNN #include -#include "caffe/vision_layers.hpp" +#include "caffe/layers/cudnn_lrn_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_lrn_layer.cu b/src/caffe/layers/cudnn_lrn_layer.cu index 327e44b4..ca647f3c 100644 --- a/src/caffe/layers/cudnn_lrn_layer.cu +++ b/src/caffe/layers/cudnn_lrn_layer.cu @@ -1,7 +1,7 @@ #ifdef USE_CUDNN #include -#include "caffe/vision_layers.hpp" +#include "caffe/layers/cudnn_lrn_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_pooling_layer.cpp b/src/caffe/layers/cudnn_pooling_layer.cpp index 5f995d45..24f14780 100644 --- a/src/caffe/layers/cudnn_pooling_layer.cpp +++ b/src/caffe/layers/cudnn_pooling_layer.cpp @@ -1,7 +1,7 @@ #ifdef USE_CUDNN #include -#include "caffe/vision_layers.hpp" +#include "caffe/layers/cudnn_pooling_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_pooling_layer.cu b/src/caffe/layers/cudnn_pooling_layer.cu index 9aa39ed8..6f00195f 100644 --- a/src/caffe/layers/cudnn_pooling_layer.cu +++ b/src/caffe/layers/cudnn_pooling_layer.cu @@ -1,7 +1,7 @@ #ifdef USE_CUDNN #include -#include "caffe/vision_layers.hpp" +#include "caffe/layers/cudnn_pooling_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_relu_layer.cpp b/src/caffe/layers/cudnn_relu_layer.cpp index e6b6d5a9..c86c6907 100644 --- a/src/caffe/layers/cudnn_relu_layer.cpp +++ b/src/caffe/layers/cudnn_relu_layer.cpp @@ -1,7 +1,7 @@ #ifdef USE_CUDNN #include -#include "caffe/vision_layers.hpp" +#include "caffe/layers/cudnn_relu_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_relu_layer.cu b/src/caffe/layers/cudnn_relu_layer.cu index 2a53a49b..9f617183 100644 --- a/src/caffe/layers/cudnn_relu_layer.cu +++ b/src/caffe/layers/cudnn_relu_layer.cu @@ -1,7 +1,7 @@ #ifdef USE_CUDNN #include -#include "caffe/vision_layers.hpp" +#include "caffe/layers/cudnn_relu_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_sigmoid_layer.cpp b/src/caffe/layers/cudnn_sigmoid_layer.cpp index 4b489fa5..ccb955cd 100644 --- a/src/caffe/layers/cudnn_sigmoid_layer.cpp +++ b/src/caffe/layers/cudnn_sigmoid_layer.cpp @@ -1,7 +1,7 @@ #ifdef USE_CUDNN #include -#include "caffe/vision_layers.hpp" +#include "caffe/layers/cudnn_sigmoid_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_sigmoid_layer.cu b/src/caffe/layers/cudnn_sigmoid_layer.cu index 9de5c742..e2a4b460 100644 --- a/src/caffe/layers/cudnn_sigmoid_layer.cu +++ b/src/caffe/layers/cudnn_sigmoid_layer.cu @@ -1,7 +1,7 @@ #ifdef USE_CUDNN #include -#include "caffe/vision_layers.hpp" +#include "caffe/layers/cudnn_sigmoid_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_softmax_layer.cpp b/src/caffe/layers/cudnn_softmax_layer.cpp index f5cd0450..6440df98 100644 --- a/src/caffe/layers/cudnn_softmax_layer.cpp +++ b/src/caffe/layers/cudnn_softmax_layer.cpp @@ -3,7 +3,7 @@ #include "thrust/device_vector.h" -#include "caffe/vision_layers.hpp" +#include "caffe/layers/cudnn_softmax_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_softmax_layer.cu b/src/caffe/layers/cudnn_softmax_layer.cu index c270202f..7283eb71 100644 --- a/src/caffe/layers/cudnn_softmax_layer.cu +++ b/src/caffe/layers/cudnn_softmax_layer.cu @@ -3,7 +3,7 @@ #include "thrust/device_vector.h" -#include "caffe/vision_layers.hpp" +#include "caffe/layers/cudnn_softmax_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_tanh_layer.cpp b/src/caffe/layers/cudnn_tanh_layer.cpp index 46296818..1a564182 100644 --- a/src/caffe/layers/cudnn_tanh_layer.cpp +++ b/src/caffe/layers/cudnn_tanh_layer.cpp @@ -1,7 +1,7 @@ #ifdef USE_CUDNN #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/cudnn_tanh_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/cudnn_tanh_layer.cu b/src/caffe/layers/cudnn_tanh_layer.cu index 84f784b3..89df28a3 100644 --- a/src/caffe/layers/cudnn_tanh_layer.cu +++ b/src/caffe/layers/cudnn_tanh_layer.cu @@ -1,7 +1,7 @@ #ifdef USE_CUDNN #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/cudnn_tanh_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp index 49ac858e..66e6301f 100644 --- a/src/caffe/layers/data_layer.cpp +++ b/src/caffe/layers/data_layer.cpp @@ -5,8 +5,8 @@ #include -#include "caffe/data_layers.hpp" -#include "caffe/proto/caffe.pb.h" +#include "caffe/data_transformer.hpp" +#include "caffe/layers/data_layer.hpp" #include "caffe/util/benchmark.hpp" namespace caffe { diff --git a/src/caffe/layers/deconv_layer.cpp b/src/caffe/layers/deconv_layer.cpp index 5038b638..275c0562 100644 --- a/src/caffe/layers/deconv_layer.cpp +++ b/src/caffe/layers/deconv_layer.cpp @@ -1,6 +1,6 @@ #include -#include "caffe/vision_layers.hpp" +#include "caffe/layers/deconv_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/deconv_layer.cu b/src/caffe/layers/deconv_layer.cu index 0e8e2ede..22676322 100644 --- a/src/caffe/layers/deconv_layer.cu +++ b/src/caffe/layers/deconv_layer.cu @@ -1,6 +1,6 @@ #include -#include "caffe/vision_layers.hpp" +#include "caffe/layers/deconv_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/dropout_layer.cpp b/src/caffe/layers/dropout_layer.cpp index eb7a8a9a..9cb64d97 100644 --- a/src/caffe/layers/dropout_layer.cpp +++ b/src/caffe/layers/dropout_layer.cpp @@ -2,7 +2,7 @@ #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/dropout_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/dropout_layer.cu b/src/caffe/layers/dropout_layer.cu index 028fc026..186c10ca 100644 --- a/src/caffe/layers/dropout_layer.cu +++ b/src/caffe/layers/dropout_layer.cu @@ -1,11 +1,10 @@ #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/dropout_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { - template __global__ void DropoutForward(const int n, const Dtype* in, const unsigned int* mask, const unsigned int threshold, const float scale, @@ -68,5 +67,4 @@ void DropoutLayer::Backward_gpu(const vector*>& top, INSTANTIATE_LAYER_GPU_FUNCS(DropoutLayer); - } // namespace caffe diff --git a/src/caffe/layers/dummy_data_layer.cpp b/src/caffe/layers/dummy_data_layer.cpp index ab0478c8..e382bfea 100644 --- a/src/caffe/layers/dummy_data_layer.cpp +++ b/src/caffe/layers/dummy_data_layer.cpp @@ -1,7 +1,7 @@ #include -#include "caffe/data_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/dummy_data_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/eltwise_layer.cpp b/src/caffe/layers/eltwise_layer.cpp index 7924fbee..21256166 100644 --- a/src/caffe/layers/eltwise_layer.cpp +++ b/src/caffe/layers/eltwise_layer.cpp @@ -1,7 +1,7 @@ #include #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/eltwise_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/eltwise_layer.cu b/src/caffe/layers/eltwise_layer.cu index 01404209..c142852e 100644 --- a/src/caffe/layers/eltwise_layer.cu +++ b/src/caffe/layers/eltwise_layer.cu @@ -1,7 +1,7 @@ #include #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/eltwise_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/embed_layer.cpp b/src/caffe/layers/embed_layer.cpp index 52704a06..36b40d70 100644 --- a/src/caffe/layers/embed_layer.cpp +++ b/src/caffe/layers/embed_layer.cpp @@ -1,7 +1,7 @@ #include -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/embed_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/embed_layer.cu b/src/caffe/layers/embed_layer.cu index cd4b40f5..6324a3a8 100644 --- a/src/caffe/layers/embed_layer.cu +++ b/src/caffe/layers/embed_layer.cu @@ -1,7 +1,7 @@ #include -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/embed_layer.hpp" #include "caffe/util/gpu_util.cuh" #include "caffe/util/math_functions.hpp" diff --git a/src/caffe/layers/euclidean_loss_layer.cpp b/src/caffe/layers/euclidean_loss_layer.cpp index 7338953d..300d991e 100644 --- a/src/caffe/layers/euclidean_loss_layer.cpp +++ b/src/caffe/layers/euclidean_loss_layer.cpp @@ -1,6 +1,6 @@ #include -#include "caffe/loss_layers.hpp" +#include "caffe/layers/euclidean_loss_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/euclidean_loss_layer.cu b/src/caffe/layers/euclidean_loss_layer.cu index 1aa79bd5..4c221b64 100644 --- a/src/caffe/layers/euclidean_loss_layer.cu +++ b/src/caffe/layers/euclidean_loss_layer.cu @@ -1,6 +1,6 @@ #include -#include "caffe/loss_layers.hpp" +#include "caffe/layers/euclidean_loss_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/exp_layer.cpp b/src/caffe/layers/exp_layer.cpp index f85692d6..1f4a309f 100644 --- a/src/caffe/layers/exp_layer.cpp +++ b/src/caffe/layers/exp_layer.cpp @@ -1,6 +1,6 @@ #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/exp_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/exp_layer.cu b/src/caffe/layers/exp_layer.cu index 9e24bbee..61f7f11d 100644 --- a/src/caffe/layers/exp_layer.cu +++ b/src/caffe/layers/exp_layer.cu @@ -1,6 +1,6 @@ #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/exp_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/filter_layer.cpp b/src/caffe/layers/filter_layer.cpp index e8b62a5d..e226c0b6 100644 --- a/src/caffe/layers/filter_layer.cpp +++ b/src/caffe/layers/filter_layer.cpp @@ -1,6 +1,6 @@ #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/filter_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/filter_layer.cu b/src/caffe/layers/filter_layer.cu index 746e91c9..b01b16f8 100644 --- a/src/caffe/layers/filter_layer.cu +++ b/src/caffe/layers/filter_layer.cu @@ -1,6 +1,6 @@ #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/filter_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/flatten_layer.cpp b/src/caffe/layers/flatten_layer.cpp index d831fb5c..651507e2 100644 --- a/src/caffe/layers/flatten_layer.cpp +++ b/src/caffe/layers/flatten_layer.cpp @@ -1,6 +1,6 @@ #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/flatten_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/hdf5_data_layer.cpp b/src/caffe/layers/hdf5_data_layer.cpp index c765fa02..2f13dc64 100644 --- a/src/caffe/layers/hdf5_data_layer.cpp +++ b/src/caffe/layers/hdf5_data_layer.cpp @@ -14,7 +14,7 @@ #include "hdf5_hl.h" #include "stdint.h" -#include "caffe/data_layers.hpp" +#include "caffe/layers/hdf5_data_layer.hpp" #include "caffe/util/hdf5.hpp" namespace caffe { diff --git a/src/caffe/layers/hdf5_data_layer.cu b/src/caffe/layers/hdf5_data_layer.cu index 6ac499c6..595d2230 100644 --- a/src/caffe/layers/hdf5_data_layer.cu +++ b/src/caffe/layers/hdf5_data_layer.cu @@ -9,7 +9,7 @@ TODO: #include "hdf5.h" #include "hdf5_hl.h" -#include "caffe/data_layers.hpp" +#include "caffe/layers/hdf5_data_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/hdf5_output_layer.cpp b/src/caffe/layers/hdf5_output_layer.cpp index dbde65da..f8f1edcd 100644 --- a/src/caffe/layers/hdf5_output_layer.cpp +++ b/src/caffe/layers/hdf5_output_layer.cpp @@ -3,7 +3,7 @@ #include "hdf5.h" #include "hdf5_hl.h" -#include "caffe/data_layers.hpp" +#include "caffe/layers/hdf5_output_layer.hpp" #include "caffe/util/hdf5.hpp" namespace caffe { diff --git a/src/caffe/layers/hdf5_output_layer.cu b/src/caffe/layers/hdf5_output_layer.cu index ca8f2616..c1685cd3 100644 --- a/src/caffe/layers/hdf5_output_layer.cu +++ b/src/caffe/layers/hdf5_output_layer.cu @@ -3,7 +3,7 @@ #include "hdf5.h" #include "hdf5_hl.h" -#include "caffe/data_layers.hpp" +#include "caffe/layers/hdf5_output_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/hinge_loss_layer.cpp b/src/caffe/layers/hinge_loss_layer.cpp index a88c8775..374aed3c 100644 --- a/src/caffe/layers/hinge_loss_layer.cpp +++ b/src/caffe/layers/hinge_loss_layer.cpp @@ -1,7 +1,7 @@ #include #include -#include "caffe/loss_layers.hpp" +#include "caffe/layers/hinge_loss_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/im2col_layer.cpp b/src/caffe/layers/im2col_layer.cpp index f3b0f710..c12e4f52 100644 --- a/src/caffe/layers/im2col_layer.cpp +++ b/src/caffe/layers/im2col_layer.cpp @@ -1,7 +1,7 @@ #include +#include "caffe/layers/im2col_layer.hpp" #include "caffe/util/im2col.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/im2col_layer.cu b/src/caffe/layers/im2col_layer.cu index 4633628b..517b4220 100644 --- a/src/caffe/layers/im2col_layer.cu +++ b/src/caffe/layers/im2col_layer.cu @@ -1,7 +1,7 @@ #include +#include "caffe/layers/im2col_layer.hpp" #include "caffe/util/im2col.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/image_data_layer.cpp b/src/caffe/layers/image_data_layer.cpp index 9a7df5a7..62fda4ac 100644 --- a/src/caffe/layers/image_data_layer.cpp +++ b/src/caffe/layers/image_data_layer.cpp @@ -7,7 +7,9 @@ #include #include -#include "caffe/data_layers.hpp" +#include "caffe/data_transformer.hpp" +#include "caffe/layers/base_data_layer.hpp" +#include "caffe/layers/image_data_layer.hpp" #include "caffe/util/benchmark.hpp" #include "caffe/util/io.hpp" #include "caffe/util/math_functions.hpp" diff --git a/src/caffe/layers/infogain_loss_layer.cpp b/src/caffe/layers/infogain_loss_layer.cpp index 88bd8aaf..624d3118 100644 --- a/src/caffe/layers/infogain_loss_layer.cpp +++ b/src/caffe/layers/infogain_loss_layer.cpp @@ -2,7 +2,7 @@ #include #include -#include "caffe/loss_layers.hpp" +#include "caffe/layers/infogain_loss_layer.hpp" #include "caffe/util/io.hpp" namespace caffe { diff --git a/src/caffe/layers/inner_product_layer.cpp b/src/caffe/layers/inner_product_layer.cpp index 274744ea..d9088805 100644 --- a/src/caffe/layers/inner_product_layer.cpp +++ b/src/caffe/layers/inner_product_layer.cpp @@ -1,7 +1,7 @@ #include -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/inner_product_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/inner_product_layer.cu b/src/caffe/layers/inner_product_layer.cu index e91e94fc..dc25aa33 100644 --- a/src/caffe/layers/inner_product_layer.cu +++ b/src/caffe/layers/inner_product_layer.cu @@ -1,7 +1,7 @@ #include -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/inner_product_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/log_layer.cpp b/src/caffe/layers/log_layer.cpp index a1876b9d..c70a795c 100644 --- a/src/caffe/layers/log_layer.cpp +++ b/src/caffe/layers/log_layer.cpp @@ -1,6 +1,6 @@ #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/log_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/log_layer.cu b/src/caffe/layers/log_layer.cu index 055b713b..db466dba 100644 --- a/src/caffe/layers/log_layer.cu +++ b/src/caffe/layers/log_layer.cu @@ -1,6 +1,6 @@ #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/log_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/loss_layer.cpp b/src/caffe/layers/loss_layer.cpp index c10466db..c0b7a862 100644 --- a/src/caffe/layers/loss_layer.cpp +++ b/src/caffe/layers/loss_layer.cpp @@ -1,6 +1,6 @@ #include -#include "caffe/loss_layers.hpp" +#include "caffe/layers/loss_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/lrn_layer.cpp b/src/caffe/layers/lrn_layer.cpp index cc561811..210525e2 100644 --- a/src/caffe/layers/lrn_layer.cpp +++ b/src/caffe/layers/lrn_layer.cpp @@ -1,7 +1,7 @@ #include +#include "caffe/layers/lrn_layer.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/lrn_layer.cu b/src/caffe/layers/lrn_layer.cu index 4523d410..26e619c7 100644 --- a/src/caffe/layers/lrn_layer.cu +++ b/src/caffe/layers/lrn_layer.cu @@ -1,7 +1,7 @@ #include +#include "caffe/layers/lrn_layer.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/memory_data_layer.cpp b/src/caffe/layers/memory_data_layer.cpp index 13a3d9f6..82909874 100644 --- a/src/caffe/layers/memory_data_layer.cpp +++ b/src/caffe/layers/memory_data_layer.cpp @@ -4,7 +4,7 @@ #include -#include "caffe/data_layers.hpp" +#include "caffe/layers/memory_data_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/multinomial_logistic_loss_layer.cpp b/src/caffe/layers/multinomial_logistic_loss_layer.cpp index 59745923..65664998 100644 --- a/src/caffe/layers/multinomial_logistic_loss_layer.cpp +++ b/src/caffe/layers/multinomial_logistic_loss_layer.cpp @@ -2,7 +2,7 @@ #include #include -#include "caffe/loss_layers.hpp" +#include "caffe/layers/multinomial_logistic_loss_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/mvn_layer.cpp b/src/caffe/layers/mvn_layer.cpp index 0e730144..8fe4ef8c 100644 --- a/src/caffe/layers/mvn_layer.cpp +++ b/src/caffe/layers/mvn_layer.cpp @@ -1,6 +1,6 @@ #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/mvn_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/mvn_layer.cu b/src/caffe/layers/mvn_layer.cu index b7e3b3ce..739293be 100644 --- a/src/caffe/layers/mvn_layer.cu +++ b/src/caffe/layers/mvn_layer.cu @@ -1,6 +1,6 @@ #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/mvn_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/neuron_layer.cpp b/src/caffe/layers/neuron_layer.cpp index 1dcb2c06..d7b5f389 100644 --- a/src/caffe/layers/neuron_layer.cpp +++ b/src/caffe/layers/neuron_layer.cpp @@ -1,6 +1,6 @@ #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/neuron_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/pooling_layer.cpp b/src/caffe/layers/pooling_layer.cpp index 3a7de42c..90897db0 100644 --- a/src/caffe/layers/pooling_layer.cpp +++ b/src/caffe/layers/pooling_layer.cpp @@ -2,8 +2,8 @@ #include #include +#include "caffe/layers/pooling_layer.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/pooling_layer.cu b/src/caffe/layers/pooling_layer.cu index 5e94ce2b..1ea46cc8 100644 --- a/src/caffe/layers/pooling_layer.cu +++ b/src/caffe/layers/pooling_layer.cu @@ -2,8 +2,8 @@ #include #include +#include "caffe/layers/pooling_layer.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { diff --git a/src/caffe/layers/power_layer.cpp b/src/caffe/layers/power_layer.cpp index 6304fadd..d99b77ca 100644 --- a/src/caffe/layers/power_layer.cpp +++ b/src/caffe/layers/power_layer.cpp @@ -1,6 +1,6 @@ #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/power_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/power_layer.cu b/src/caffe/layers/power_layer.cu index 680faad4..07711c42 100644 --- a/src/caffe/layers/power_layer.cu +++ b/src/caffe/layers/power_layer.cu @@ -1,6 +1,6 @@ #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/power_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/prelu_layer.cpp b/src/caffe/layers/prelu_layer.cpp index b5a294e1..853181bd 100644 --- a/src/caffe/layers/prelu_layer.cpp +++ b/src/caffe/layers/prelu_layer.cpp @@ -2,7 +2,9 @@ #include #include "caffe/filler.hpp" -#include "caffe/neuron_layers.hpp" + +#include "caffe/layers/neuron_layer.hpp" +#include "caffe/layers/prelu_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/prelu_layer.cu b/src/caffe/layers/prelu_layer.cu index 992cd885..aeb80eac 100644 --- a/src/caffe/layers/prelu_layer.cu +++ b/src/caffe/layers/prelu_layer.cu @@ -1,7 +1,8 @@ #include #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/neuron_layer.hpp" +#include "caffe/layers/prelu_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/reduction_layer.cpp b/src/caffe/layers/reduction_layer.cpp index 6b7925e3..fa46487e 100644 --- a/src/caffe/layers/reduction_layer.cpp +++ b/src/caffe/layers/reduction_layer.cpp @@ -1,6 +1,6 @@ #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/reduction_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/reduction_layer.cu b/src/caffe/layers/reduction_layer.cu index a9a8c8d9..4a6b2b73 100644 --- a/src/caffe/layers/reduction_layer.cu +++ b/src/caffe/layers/reduction_layer.cu @@ -1,6 +1,6 @@ #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/reduction_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/relu_layer.cpp b/src/caffe/layers/relu_layer.cpp index 93d09026..92a729c8 100644 --- a/src/caffe/layers/relu_layer.cpp +++ b/src/caffe/layers/relu_layer.cpp @@ -1,7 +1,7 @@ #include #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/relu_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/relu_layer.cu b/src/caffe/layers/relu_layer.cu index c18ab61f..4bf15b3a 100644 --- a/src/caffe/layers/relu_layer.cu +++ b/src/caffe/layers/relu_layer.cu @@ -1,7 +1,7 @@ #include #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/relu_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/reshape_layer.cpp b/src/caffe/layers/reshape_layer.cpp index 8659049b..82339f76 100644 --- a/src/caffe/layers/reshape_layer.cpp +++ b/src/caffe/layers/reshape_layer.cpp @@ -1,6 +1,6 @@ #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/reshape_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp index 98588637..10ac9470 100644 --- a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp +++ b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp @@ -1,6 +1,6 @@ #include -#include "caffe/loss_layers.hpp" +#include "caffe/layers/sigmoid_cross_entropy_loss_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu index 48dbec41..046cb9d3 100644 --- a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu +++ b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu @@ -1,6 +1,6 @@ #include -#include "caffe/loss_layers.hpp" +#include "caffe/layers/sigmoid_cross_entropy_loss_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/sigmoid_layer.cpp b/src/caffe/layers/sigmoid_layer.cpp index d4a3f877..85fd9676 100644 --- a/src/caffe/layers/sigmoid_layer.cpp +++ b/src/caffe/layers/sigmoid_layer.cpp @@ -1,7 +1,7 @@ #include #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/sigmoid_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/sigmoid_layer.cu b/src/caffe/layers/sigmoid_layer.cu index 5730636e..184c61ed 100644 --- a/src/caffe/layers/sigmoid_layer.cu +++ b/src/caffe/layers/sigmoid_layer.cu @@ -1,7 +1,7 @@ #include #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/sigmoid_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/silence_layer.cpp b/src/caffe/layers/silence_layer.cpp index 3974f5d4..b2f85c52 100644 --- a/src/caffe/layers/silence_layer.cpp +++ b/src/caffe/layers/silence_layer.cpp @@ -1,6 +1,6 @@ #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/silence_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/silence_layer.cu b/src/caffe/layers/silence_layer.cu index c49ecb23..3494f6f6 100644 --- a/src/caffe/layers/silence_layer.cu +++ b/src/caffe/layers/silence_layer.cu @@ -1,6 +1,6 @@ #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/silence_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/slice_layer.cpp b/src/caffe/layers/slice_layer.cpp index f368a249..759beafe 100644 --- a/src/caffe/layers/slice_layer.cpp +++ b/src/caffe/layers/slice_layer.cpp @@ -1,7 +1,7 @@ #include #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/slice_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/slice_layer.cu b/src/caffe/layers/slice_layer.cu index d555f7d0..1be3a797 100644 --- a/src/caffe/layers/slice_layer.cu +++ b/src/caffe/layers/slice_layer.cu @@ -1,6 +1,6 @@ #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/slice_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/softmax_layer.cpp b/src/caffe/layers/softmax_layer.cpp index 8ae7d49c..f60e9b03 100644 --- a/src/caffe/layers/softmax_layer.cpp +++ b/src/caffe/layers/softmax_layer.cpp @@ -1,7 +1,7 @@ #include #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/softmax_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/softmax_layer.cu b/src/caffe/layers/softmax_layer.cu index a620fcc8..7a9e6833 100644 --- a/src/caffe/layers/softmax_layer.cu +++ b/src/caffe/layers/softmax_layer.cu @@ -4,7 +4,7 @@ #include "thrust/device_vector.h" -#include "caffe/common_layers.hpp" +#include "caffe/layers/softmax_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/softmax_loss_layer.cpp b/src/caffe/layers/softmax_loss_layer.cpp index 3cdef82a..dddb7606 100644 --- a/src/caffe/layers/softmax_loss_layer.cpp +++ b/src/caffe/layers/softmax_loss_layer.cpp @@ -2,7 +2,7 @@ #include #include -#include "caffe/loss_layers.hpp" +#include "caffe/layers/softmax_loss_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/softmax_loss_layer.cu b/src/caffe/layers/softmax_loss_layer.cu index 4753a1ec..660e1b39 100644 --- a/src/caffe/layers/softmax_loss_layer.cu +++ b/src/caffe/layers/softmax_loss_layer.cu @@ -2,7 +2,7 @@ #include #include -#include "caffe/loss_layers.hpp" +#include "caffe/layers/softmax_loss_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/split_layer.cpp b/src/caffe/layers/split_layer.cpp index 5333e578..1a27a9af 100644 --- a/src/caffe/layers/split_layer.cpp +++ b/src/caffe/layers/split_layer.cpp @@ -1,6 +1,6 @@ #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/split_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/split_layer.cu b/src/caffe/layers/split_layer.cu index 73d04c98..bec9987c 100644 --- a/src/caffe/layers/split_layer.cu +++ b/src/caffe/layers/split_layer.cu @@ -1,6 +1,6 @@ #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/split_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/spp_layer.cpp b/src/caffe/layers/spp_layer.cpp index 2ef4ac7a..b9af8e8a 100644 --- a/src/caffe/layers/spp_layer.cpp +++ b/src/caffe/layers/spp_layer.cpp @@ -1,7 +1,12 @@ #include #include -#include "caffe/vision_layers.hpp" +#include "caffe/layer.hpp" +#include "caffe/layers/concat_layer.hpp" +#include "caffe/layers/flatten_layer.hpp" +#include "caffe/layers/pooling_layer.hpp" +#include "caffe/layers/split_layer.hpp" +#include "caffe/layers/spp_layer.hpp" namespace caffe { @@ -217,7 +222,6 @@ void SPPLayer::Backward_cpu(const vector*>& top, split_layer_->Backward(split_top_vec_, propagate_down, bottom); } - INSTANTIATE_CLASS(SPPLayer); REGISTER_LAYER_CLASS(SPP); diff --git a/src/caffe/layers/tanh_layer.cpp b/src/caffe/layers/tanh_layer.cpp index 9d1cac76..184e926d 100644 --- a/src/caffe/layers/tanh_layer.cpp +++ b/src/caffe/layers/tanh_layer.cpp @@ -3,7 +3,7 @@ #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/tanh_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/tanh_layer.cu b/src/caffe/layers/tanh_layer.cu index d87bccec..cbfc178e 100644 --- a/src/caffe/layers/tanh_layer.cu +++ b/src/caffe/layers/tanh_layer.cu @@ -3,7 +3,7 @@ #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/tanh_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/threshold_layer.cpp b/src/caffe/layers/threshold_layer.cpp index d6514736..63822ee5 100644 --- a/src/caffe/layers/threshold_layer.cpp +++ b/src/caffe/layers/threshold_layer.cpp @@ -1,7 +1,6 @@ #include -#include "caffe/neuron_layers.hpp" - +#include "caffe/layers/threshold_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/threshold_layer.cu b/src/caffe/layers/threshold_layer.cu index 1cd62d99..b0b06655 100644 --- a/src/caffe/layers/threshold_layer.cu +++ b/src/caffe/layers/threshold_layer.cu @@ -1,6 +1,6 @@ #include -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/threshold_layer.hpp" namespace caffe { diff --git a/src/caffe/layers/tile_layer.cpp b/src/caffe/layers/tile_layer.cpp index 581546c4..cf0c1870 100644 --- a/src/caffe/layers/tile_layer.cpp +++ b/src/caffe/layers/tile_layer.cpp @@ -1,6 +1,6 @@ #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/tile_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/tile_layer.cu b/src/caffe/layers/tile_layer.cu index fdf96090..282049eb 100644 --- a/src/caffe/layers/tile_layer.cu +++ b/src/caffe/layers/tile_layer.cu @@ -1,6 +1,6 @@ #include -#include "caffe/common_layers.hpp" +#include "caffe/layers/tile_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp index 3f937bc9..4ca8315d 100644 --- a/src/caffe/layers/window_data_layer.cpp +++ b/src/caffe/layers/window_data_layer.cpp @@ -12,7 +12,10 @@ #include "opencv2/highgui/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" -#include "caffe/data_layers.hpp" +#include "caffe/data_transformer.hpp" +#include "caffe/internal_thread.hpp" +#include "caffe/layers/base_data_layer.hpp" +#include "caffe/layers/window_data_layer.hpp" #include "caffe/util/benchmark.hpp" #include "caffe/util/io.hpp" #include "caffe/util/math_functions.hpp" diff --git a/src/caffe/test/test_accuracy_layer.cpp b/src/caffe/test/test_accuracy_layer.cpp index 5960a666..6fe808bd 100644 --- a/src/caffe/test/test_accuracy_layer.cpp +++ b/src/caffe/test/test_accuracy_layer.cpp @@ -6,7 +6,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/loss_layers.hpp" +#include "caffe/layers/accuracy_layer.hpp" #include "caffe/util/rng.hpp" #include "caffe/test/test_caffe_main.hpp" diff --git a/src/caffe/test/test_argmax_layer.cpp b/src/caffe/test/test_argmax_layer.cpp index f3f2094e..472e6652 100644 --- a/src/caffe/test/test_argmax_layer.cpp +++ b/src/caffe/test/test_argmax_layer.cpp @@ -5,8 +5,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/argmax_layer.hpp" #include "caffe/test/test_caffe_main.hpp" diff --git a/src/caffe/test/test_batch_norm_layer.cpp b/src/caffe/test/test_batch_norm_layer.cpp index 22b9667f..936b93a1 100644 --- a/src/caffe/test/test_batch_norm_layer.cpp +++ b/src/caffe/test/test_batch_norm_layer.cpp @@ -6,8 +6,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/batch_norm_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_batch_reindex_layer.cpp b/src/caffe/test/test_batch_reindex_layer.cpp index 17e47f05..9ea1a2f6 100644 --- a/src/caffe/test/test_batch_reindex_layer.cpp +++ b/src/caffe/test/test_batch_reindex_layer.cpp @@ -4,8 +4,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/batch_reindex_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_concat_layer.cpp b/src/caffe/test/test_concat_layer.cpp index 8ba51f4f..23c1e8c1 100644 --- a/src/caffe/test/test_concat_layer.cpp +++ b/src/caffe/test/test_concat_layer.cpp @@ -4,8 +4,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/concat_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_contrastive_loss_layer.cpp b/src/caffe/test/test_contrastive_loss_layer.cpp index 95901f14..2fa055ee 100644 --- a/src/caffe/test/test_contrastive_loss_layer.cpp +++ b/src/caffe/test/test_contrastive_loss_layer.cpp @@ -7,7 +7,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/loss_layers.hpp" +#include "caffe/layers/contrastive_loss_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_convolution_layer.cpp b/src/caffe/test/test_convolution_layer.cpp index b4747357..e2d43f31 100644 --- a/src/caffe/test/test_convolution_layer.cpp +++ b/src/caffe/test/test_convolution_layer.cpp @@ -5,7 +5,11 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/layers/conv_layer.hpp" + +#ifdef USE_CUDNN +#include "caffe/layers/cudnn_conv_layer.hpp" +#endif #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_data_layer.cpp b/src/caffe/test/test_data_layer.cpp index 9e03954a..3e8d113d 100644 --- a/src/caffe/test/test_data_layer.cpp +++ b/src/caffe/test/test_data_layer.cpp @@ -7,8 +7,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/data_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/data_layer.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/db.hpp" #include "caffe/util/io.hpp" diff --git a/src/caffe/test/test_deconvolution_layer.cpp b/src/caffe/test/test_deconvolution_layer.cpp index b473dbb9..c4b09ad5 100644 --- a/src/caffe/test/test_deconvolution_layer.cpp +++ b/src/caffe/test/test_deconvolution_layer.cpp @@ -5,7 +5,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/layers/deconv_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_dummy_data_layer.cpp b/src/caffe/test/test_dummy_data_layer.cpp index c9ed38db..1a01ca85 100644 --- a/src/caffe/test/test_dummy_data_layer.cpp +++ b/src/caffe/test/test_dummy_data_layer.cpp @@ -5,8 +5,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" +#include "caffe/layers/dummy_data_layer.hpp" #include "caffe/proto/caffe.pb.h" -#include "caffe/vision_layers.hpp" #include "caffe/test/test_caffe_main.hpp" diff --git a/src/caffe/test/test_eltwise_layer.cpp b/src/caffe/test/test_eltwise_layer.cpp index 3b56c5ca..c06e3baa 100644 --- a/src/caffe/test/test_eltwise_layer.cpp +++ b/src/caffe/test/test_eltwise_layer.cpp @@ -5,8 +5,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/eltwise_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_embed_layer.cpp b/src/caffe/test/test_embed_layer.cpp index 0f4caf15..acd4b0f6 100644 --- a/src/caffe/test/test_embed_layer.cpp +++ b/src/caffe/test/test_embed_layer.cpp @@ -4,8 +4,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/embed_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_euclidean_loss_layer.cpp b/src/caffe/test/test_euclidean_loss_layer.cpp index 9dc14de4..f253f9fd 100644 --- a/src/caffe/test/test_euclidean_loss_layer.cpp +++ b/src/caffe/test/test_euclidean_loss_layer.cpp @@ -6,7 +6,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/loss_layers.hpp" +#include "caffe/layers/euclidean_loss_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_filter_layer.cpp b/src/caffe/test/test_filter_layer.cpp index a2d0c293..9ea2b8b2 100644 --- a/src/caffe/test/test_filter_layer.cpp +++ b/src/caffe/test/test_filter_layer.cpp @@ -4,8 +4,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/filter_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_flatten_layer.cpp b/src/caffe/test/test_flatten_layer.cpp index 5d1caac2..d929ac7a 100644 --- a/src/caffe/test/test_flatten_layer.cpp +++ b/src/caffe/test/test_flatten_layer.cpp @@ -4,8 +4,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/flatten_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_hdf5_output_layer.cpp b/src/caffe/test/test_hdf5_output_layer.cpp index adc27df4..3833ebff 100644 --- a/src/caffe/test/test_hdf5_output_layer.cpp +++ b/src/caffe/test/test_hdf5_output_layer.cpp @@ -5,7 +5,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/data_layers.hpp" +#include "caffe/layers/hdf5_output_layer.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/hdf5.hpp" #include "caffe/util/io.hpp" diff --git a/src/caffe/test/test_hdf5data_layer.cpp b/src/caffe/test/test_hdf5data_layer.cpp index 7169e7bf..8884ce95 100644 --- a/src/caffe/test/test_hdf5data_layer.cpp +++ b/src/caffe/test/test_hdf5data_layer.cpp @@ -1,11 +1,13 @@ #include #include +#include "hdf5.h" + #include "gtest/gtest.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/data_layers.hpp" +#include "caffe/layers/hdf5_data_layer.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/test/test_caffe_main.hpp" diff --git a/src/caffe/test/test_hinge_loss_layer.cpp b/src/caffe/test/test_hinge_loss_layer.cpp index dfdd01d0..8bf89fa6 100644 --- a/src/caffe/test/test_hinge_loss_layer.cpp +++ b/src/caffe/test/test_hinge_loss_layer.cpp @@ -6,7 +6,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/loss_layers.hpp" +#include "caffe/layers/hinge_loss_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_im2col_kernel.cu b/src/caffe/test/test_im2col_kernel.cu index bafcacf7..3f97cf6d 100644 --- a/src/caffe/test/test_im2col_kernel.cu +++ b/src/caffe/test/test_im2col_kernel.cu @@ -5,8 +5,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/im2col_layer.hpp" #include "caffe/util/im2col.hpp" -#include "caffe/vision_layers.hpp" #include "caffe/test/test_caffe_main.hpp" diff --git a/src/caffe/test/test_im2col_layer.cpp b/src/caffe/test/test_im2col_layer.cpp index ec055b20..8274dd48 100644 --- a/src/caffe/test/test_im2col_layer.cpp +++ b/src/caffe/test/test_im2col_layer.cpp @@ -5,7 +5,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/layers/im2col_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_image_data_layer.cpp b/src/caffe/test/test_image_data_layer.cpp index 77690245..a4080ccd 100644 --- a/src/caffe/test/test_image_data_layer.cpp +++ b/src/caffe/test/test_image_data_layer.cpp @@ -7,8 +7,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/data_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/image_data_layer.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/io.hpp" diff --git a/src/caffe/test/test_infogain_loss_layer.cpp b/src/caffe/test/test_infogain_loss_layer.cpp index b2a6754f..a24ac683 100644 --- a/src/caffe/test/test_infogain_loss_layer.cpp +++ b/src/caffe/test/test_infogain_loss_layer.cpp @@ -5,7 +5,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/loss_layers.hpp" +#include "caffe/layers/infogain_loss_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_inner_product_layer.cpp b/src/caffe/test/test_inner_product_layer.cpp index 1ad2c97e..b888b510 100644 --- a/src/caffe/test/test_inner_product_layer.cpp +++ b/src/caffe/test/test_inner_product_layer.cpp @@ -4,8 +4,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/inner_product_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_lrn_layer.cpp b/src/caffe/test/test_lrn_layer.cpp index bd1c4fe8..4c97b1ae 100644 --- a/src/caffe/test/test_lrn_layer.cpp +++ b/src/caffe/test/test_lrn_layer.cpp @@ -6,7 +6,12 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/layers/lrn_layer.hpp" + +#ifdef USE_CUDNN +#include "caffe/layers/cudnn_lcn_layer.hpp" +#include "caffe/layers/cudnn_lrn_layer.hpp" +#endif #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_maxpool_dropout_layers.cpp b/src/caffe/test/test_maxpool_dropout_layers.cpp index 8fc944f3..4f0e20ac 100644 --- a/src/caffe/test/test_maxpool_dropout_layers.cpp +++ b/src/caffe/test/test_maxpool_dropout_layers.cpp @@ -5,7 +5,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/layers/dropout_layer.hpp" +#include "caffe/layers/pooling_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_memory_data_layer.cpp b/src/caffe/test/test_memory_data_layer.cpp index 7269a4d4..7998bc18 100644 --- a/src/caffe/test/test_memory_data_layer.cpp +++ b/src/caffe/test/test_memory_data_layer.cpp @@ -5,8 +5,8 @@ #include #include -#include "caffe/data_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/memory_data_layer.hpp" #include "caffe/test/test_caffe_main.hpp" diff --git a/src/caffe/test/test_multinomial_logistic_loss_layer.cpp b/src/caffe/test/test_multinomial_logistic_loss_layer.cpp index 0404aa25..8cc21022 100644 --- a/src/caffe/test/test_multinomial_logistic_loss_layer.cpp +++ b/src/caffe/test/test_multinomial_logistic_loss_layer.cpp @@ -5,7 +5,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/loss_layers.hpp" +#include "caffe/layers/multinomial_logistic_loss_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_mvn_layer.cpp b/src/caffe/test/test_mvn_layer.cpp index e9a7d54c..28a762d2 100644 --- a/src/caffe/test/test_mvn_layer.cpp +++ b/src/caffe/test/test_mvn_layer.cpp @@ -2,8 +2,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/mvn_layer.hpp" #include "google/protobuf/text_format.h" #include "gtest/gtest.h" diff --git a/src/caffe/test/test_neuron_layer.cpp b/src/caffe/test/test_neuron_layer.cpp index b333fdee..21441b41 100644 --- a/src/caffe/test/test_neuron_layer.cpp +++ b/src/caffe/test/test_neuron_layer.cpp @@ -6,9 +6,26 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" -#include "caffe/neuron_layers.hpp" + +#include "caffe/layers/absval_layer.hpp" +#include "caffe/layers/bnll_layer.hpp" +#include "caffe/layers/dropout_layer.hpp" +#include "caffe/layers/exp_layer.hpp" +#include "caffe/layers/inner_product_layer.hpp" +#include "caffe/layers/log_layer.hpp" +#include "caffe/layers/power_layer.hpp" +#include "caffe/layers/prelu_layer.hpp" +#include "caffe/layers/relu_layer.hpp" +#include "caffe/layers/sigmoid_layer.hpp" +#include "caffe/layers/tanh_layer.hpp" +#include "caffe/layers/threshold_layer.hpp" + +#ifdef USE_CUDNN +#include "caffe/layers/cudnn_relu_layer.hpp" +#include "caffe/layers/cudnn_sigmoid_layer.hpp" +#include "caffe/layers/cudnn_tanh_layer.hpp" +#endif #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_pooling_layer.cpp b/src/caffe/test/test_pooling_layer.cpp index 9e986e66..bb95cae0 100644 --- a/src/caffe/test/test_pooling_layer.cpp +++ b/src/caffe/test/test_pooling_layer.cpp @@ -5,7 +5,11 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/layers/pooling_layer.hpp" + +#ifdef USE_CUDNN +#include "caffe/layers/cudnn_pooling_layer.hpp" +#endif #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_power_layer.cpp b/src/caffe/test/test_power_layer.cpp index 1041ddd4..1aa587ac 100644 --- a/src/caffe/test/test_power_layer.cpp +++ b/src/caffe/test/test_power_layer.cpp @@ -6,7 +6,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/power_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_reduction_layer.cpp b/src/caffe/test/test_reduction_layer.cpp index a8d43727..6ed7cda6 100644 --- a/src/caffe/test/test_reduction_layer.cpp +++ b/src/caffe/test/test_reduction_layer.cpp @@ -4,8 +4,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/reduction_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_reshape_layer.cpp b/src/caffe/test/test_reshape_layer.cpp index e0f4ba42..4f261386 100644 --- a/src/caffe/test/test_reshape_layer.cpp +++ b/src/caffe/test/test_reshape_layer.cpp @@ -4,8 +4,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/reshape_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_sigmoid_cross_entropy_loss_layer.cpp b/src/caffe/test/test_sigmoid_cross_entropy_loss_layer.cpp index b4f831c8..5dfd7656 100644 --- a/src/caffe/test/test_sigmoid_cross_entropy_loss_layer.cpp +++ b/src/caffe/test/test_sigmoid_cross_entropy_loss_layer.cpp @@ -6,7 +6,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/loss_layers.hpp" +#include "caffe/layers/sigmoid_cross_entropy_loss_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_slice_layer.cpp b/src/caffe/test/test_slice_layer.cpp index 45fbcffd..c2b231e1 100644 --- a/src/caffe/test/test_slice_layer.cpp +++ b/src/caffe/test/test_slice_layer.cpp @@ -4,8 +4,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/slice_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_softmax_layer.cpp b/src/caffe/test/test_softmax_layer.cpp index 4b01f5cf..94443576 100644 --- a/src/caffe/test/test_softmax_layer.cpp +++ b/src/caffe/test/test_softmax_layer.cpp @@ -5,8 +5,12 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/softmax_layer.hpp" + +#ifdef USE_CUDNN +#include "caffe/layers/cudnn_softmax_layer.hpp" +#endif #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_softmax_with_loss_layer.cpp b/src/caffe/test/test_softmax_with_loss_layer.cpp index 0ae4cd68..c67f3e0d 100644 --- a/src/caffe/test/test_softmax_with_loss_layer.cpp +++ b/src/caffe/test/test_softmax_with_loss_layer.cpp @@ -7,7 +7,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/loss_layers.hpp" +#include "caffe/layers/softmax_loss_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_split_layer.cpp b/src/caffe/test/test_split_layer.cpp index e27e355c..ba2ccbb2 100644 --- a/src/caffe/test/test_split_layer.cpp +++ b/src/caffe/test/test_split_layer.cpp @@ -6,8 +6,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/split_layer.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/insert_splits.hpp" diff --git a/src/caffe/test/test_spp_layer.cpp b/src/caffe/test/test_spp_layer.cpp index 1b48a842..59a3af2a 100644 --- a/src/caffe/test/test_spp_layer.cpp +++ b/src/caffe/test/test_spp_layer.cpp @@ -5,7 +5,12 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/layers/concat_layer.hpp" +#include "caffe/layers/flatten_layer.hpp" +#include "caffe/layers/pooling_layer.hpp" +#include "caffe/layers/split_layer.hpp" +#include "caffe/layers/spp_layer.hpp" + #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_stochastic_pooling.cpp b/src/caffe/test/test_stochastic_pooling.cpp index 5a412bd4..cd5db838 100644 --- a/src/caffe/test/test_stochastic_pooling.cpp +++ b/src/caffe/test/test_stochastic_pooling.cpp @@ -6,7 +6,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/vision_layers.hpp" +#include "caffe/layers/pooling_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_tanh_layer.cpp b/src/caffe/test/test_tanh_layer.cpp index f31579ca..bb8699a8 100644 --- a/src/caffe/test/test_tanh_layer.cpp +++ b/src/caffe/test/test_tanh_layer.cpp @@ -6,7 +6,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/tanh_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/test/test_threshold_layer.cpp b/src/caffe/test/test_threshold_layer.cpp index 903a9bc8..1e84cc5a 100644 --- a/src/caffe/test/test_threshold_layer.cpp +++ b/src/caffe/test/test_threshold_layer.cpp @@ -5,7 +5,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" -#include "caffe/neuron_layers.hpp" +#include "caffe/layers/threshold_layer.hpp" #include "caffe/test/test_caffe_main.hpp" diff --git a/src/caffe/test/test_tile_layer.cpp b/src/caffe/test/test_tile_layer.cpp index 5c459604..7ff75520 100644 --- a/src/caffe/test/test_tile_layer.cpp +++ b/src/caffe/test/test_tile_layer.cpp @@ -4,8 +4,8 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" -#include "caffe/common_layers.hpp" #include "caffe/filler.hpp" +#include "caffe/layers/tile_layer.hpp" #include "caffe/test/test_caffe_main.hpp" #include "caffe/test/test_gradient_check_util.hpp" diff --git a/src/caffe/util/blocking_queue.cpp b/src/caffe/util/blocking_queue.cpp index d1d1fa86..058668fe 100644 --- a/src/caffe/util/blocking_queue.cpp +++ b/src/caffe/util/blocking_queue.cpp @@ -1,8 +1,8 @@ #include #include -#include "caffe/data_layers.hpp" #include "caffe/data_reader.hpp" +#include "caffe/layers/base_data_layer.hpp" #include "caffe/parallel.hpp" #include "caffe/util/blocking_queue.hpp" diff --git a/tools/extract_features.cpp b/tools/extract_features.cpp index b94dbb98..1ef13266 100644 --- a/tools/extract_features.cpp +++ b/tools/extract_features.cpp @@ -11,7 +11,6 @@ #include "caffe/util/db.hpp" #include "caffe/util/format.hpp" #include "caffe/util/io.hpp" -#include "caffe/vision_layers.hpp" using caffe::Blob; using caffe::Caffe; From 36bf811574a0787910d80d12fd9ea186481d7939 Mon Sep 17 00:00:00 2001 From: Tea Date: Wed, 2 Dec 2015 15:39:19 +0800 Subject: [PATCH 189/223] Remove hamming_distance and popcount --- include/caffe/util/math_functions.hpp | 7 ---- src/caffe/test/test_math_functions.cpp | 41 ----------------------- src/caffe/util/math_functions.cpp | 22 ------------- src/caffe/util/math_functions.cu | 45 -------------------------- 4 files changed, 115 deletions(-) diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp index 2cacd8e7..6f6d3fee 100644 --- a/include/caffe/util/math_functions.hpp +++ b/include/caffe/util/math_functions.hpp @@ -101,9 +101,6 @@ template Dtype caffe_cpu_strided_dot(const int n, const Dtype* x, const int incx, const Dtype* y, const int incy); -template -int caffe_cpu_hamming_distance(const int n, const Dtype* x, const Dtype* y); - // Returns the sum of the absolute values of the elements of vector x template Dtype caffe_cpu_asum(const int n, const Dtype* x); @@ -234,10 +231,6 @@ void caffe_gpu_rng_bernoulli(const int n, const Dtype p, int* r); template void caffe_gpu_dot(const int n, const Dtype* x, const Dtype* y, Dtype* out); -template -uint32_t caffe_gpu_hamming_distance(const int n, const Dtype* x, - const Dtype* y); - template void caffe_gpu_asum(const int n, const Dtype* x, Dtype* y); diff --git a/src/caffe/test/test_math_functions.cpp b/src/caffe/test/test_math_functions.cpp index fbee3f9c..efc5a278 100644 --- a/src/caffe/test/test_math_functions.cpp +++ b/src/caffe/test/test_math_functions.cpp @@ -39,27 +39,6 @@ class MathFunctionsTest : public MultiDeviceTest { delete blob_top_; } - // http://en.wikipedia.org/wiki/Hamming_distance - int ReferenceHammingDistance(const int n, const Dtype* x, const Dtype* y) { - int dist = 0; - uint64_t val; - for (int i = 0; i < n; ++i) { - if (sizeof(Dtype) == 8) { - val = static_cast(x[i]) ^ static_cast(y[i]); - } else if (sizeof(Dtype) == 4) { - val = static_cast(x[i]) ^ static_cast(y[i]); - } else { - LOG(FATAL) << "Unrecognized Dtype size: " << sizeof(Dtype); - } - // Count the number of set bits - while (val) { - ++dist; - val &= val - 1; - } - } - return dist; - } - Blob* const blob_bottom_; Blob* const blob_top_; }; @@ -76,14 +55,6 @@ TYPED_TEST(CPUMathFunctionsTest, TestNothing) { // due to the set up overhead. } -TYPED_TEST(CPUMathFunctionsTest, TestHammingDistance) { - int n = this->blob_bottom_->count(); - const TypeParam* x = this->blob_bottom_->cpu_data(); - const TypeParam* y = this->blob_top_->cpu_data(); - EXPECT_EQ(this->ReferenceHammingDistance(n, x, y), - caffe_cpu_hamming_distance(n, x, y)); -} - TYPED_TEST(CPUMathFunctionsTest, TestAsum) { int n = this->blob_bottom_->count(); const TypeParam* x = this->blob_bottom_->cpu_data(); @@ -156,18 +127,6 @@ class GPUMathFunctionsTest : public MathFunctionsTest > { TYPED_TEST_CASE(GPUMathFunctionsTest, TestDtypes); -// TODO: Fix caffe_gpu_hamming_distance and re-enable this test. -TYPED_TEST(GPUMathFunctionsTest, DISABLED_TestHammingDistance) { - int n = this->blob_bottom_->count(); - const TypeParam* x = this->blob_bottom_->cpu_data(); - const TypeParam* y = this->blob_top_->cpu_data(); - int reference_distance = this->ReferenceHammingDistance(n, x, y); - x = this->blob_bottom_->gpu_data(); - y = this->blob_top_->gpu_data(); - int computed_distance = caffe_gpu_hamming_distance(n, x, y); - EXPECT_EQ(reference_distance, computed_distance); -} - TYPED_TEST(GPUMathFunctionsTest, TestAsum) { int n = this->blob_bottom_->count(); const TypeParam* x = this->blob_bottom_->cpu_data(); diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp index 0aab6b17..71c02274 100644 --- a/src/caffe/util/math_functions.cpp +++ b/src/caffe/util/math_functions.cpp @@ -348,28 +348,6 @@ float caffe_cpu_dot(const int n, const float* x, const float* y); template double caffe_cpu_dot(const int n, const double* x, const double* y); -template <> -int caffe_cpu_hamming_distance(const int n, const float* x, - const float* y) { - int dist = 0; - for (int i = 0; i < n; ++i) { - dist += __builtin_popcount(static_cast(x[i]) ^ - static_cast(y[i])); - } - return dist; -} - -template <> -int caffe_cpu_hamming_distance(const int n, const double* x, - const double* y) { - int dist = 0; - for (int i = 0; i < n; ++i) { - dist += __builtin_popcountl(static_cast(x[i]) ^ - static_cast(y[i])); - } - return dist; -} - template <> float caffe_cpu_asum(const int n, const float* x) { return cblas_sasum(n, x, 1); diff --git a/src/caffe/util/math_functions.cu b/src/caffe/util/math_functions.cu index e4d0c4b0..4c587537 100644 --- a/src/caffe/util/math_functions.cu +++ b/src/caffe/util/math_functions.cu @@ -371,51 +371,6 @@ DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sign, y[index] = (Dtype(0) < x[index]) - (x[index] < Dtype(0))); DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sgnbit, y[index] = signbit(x[index])); -__global__ void popc_kernel(const int n, const float* a, - const float* b, uint8_t* y) { - CUDA_KERNEL_LOOP(index, n) { - y[index] = __popc(static_cast(a[index]) ^ - static_cast(b[index])); - } -} - -__global__ void popcll_kernel(const int n, const double* a, - const double* b, uint8_t* y) { - CUDA_KERNEL_LOOP(index, n) { - y[index] = __popcll(static_cast(a[index]) ^ - static_cast(b[index])); - } -} - -template <> -uint32_t caffe_gpu_hamming_distance(const int n, const float* x, - const float* y) { - // TODO: Fix caffe_gpu_hamming_distance (see failing unit test - // TestHammingDistanceGPU in test_math_functions.cpp). - NOT_IMPLEMENTED; - thrust::device_vector popcounts(n); - // NOLINT_NEXT_LINE(whitespace/operators) - popc_kernel<<>>( - n, x, y, thrust::raw_pointer_cast(popcounts.data())); - return thrust::reduce(popcounts.begin(), popcounts.end(), - (uint32_t) 0, thrust::plus()); -} - -template <> -uint32_t caffe_gpu_hamming_distance(const int n, const double* x, - const double* y) { - // TODO: Fix caffe_gpu_hamming_distance (see failing unit test - // TestHammingDistanceGPU in test_math_functions.cpp). - NOT_IMPLEMENTED; - thrust::device_vector popcounts(n); - // NOLINT_NEXT_LINE(whitespace/operators) - popcll_kernel<<>>( - n, x, y, thrust::raw_pointer_cast(popcounts.data())); - return thrust::reduce(popcounts.begin(), popcounts.end(), - /* NOLINT_NEXT_LINE(build/include_what_you_use) */ - (uint32_t) 0, thrust::plus()); -} - void caffe_gpu_rng_uniform(const int n, unsigned int* r) { CURAND_CHECK(curandGenerate(Caffe::curand_generator(), r, n)); } From 99571c471d493c650c53be1416bb26d5b984f178 Mon Sep 17 00:00:00 2001 From: "T.E.A de Souza" Date: Sun, 29 Nov 2015 14:24:09 +0800 Subject: [PATCH 190/223] Correct type of device_id; disambiguate shared_ptr --- tools/extract_features.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tools/extract_features.cpp b/tools/extract_features.cpp index 1ef13266..d6562f98 100644 --- a/tools/extract_features.cpp +++ b/tools/extract_features.cpp @@ -16,7 +16,6 @@ using caffe::Blob; using caffe::Caffe; using caffe::Datum; using caffe::Net; -using boost::shared_ptr; using std::string; namespace db = caffe::db; @@ -51,7 +50,7 @@ int feature_extraction_pipeline(int argc, char** argv) { arg_pos = num_required_args; if (argc > arg_pos && strcmp(argv[arg_pos], "GPU") == 0) { LOG(ERROR)<< "Using GPU"; - uint device_id = 0; + int device_id = 0; if (argc > arg_pos + 1) { device_id = atoi(argv[arg_pos + 1]); CHECK_GE(device_id, 0); @@ -95,7 +94,7 @@ int feature_extraction_pipeline(int argc, char** argv) { } */ std::string feature_extraction_proto(argv[++arg_pos]); - shared_ptr > feature_extraction_net( + boost::shared_ptr > feature_extraction_net( new Net(feature_extraction_proto, caffe::TEST)); feature_extraction_net->CopyTrainedLayersFrom(pretrained_binary_proto); @@ -119,15 +118,15 @@ int feature_extraction_pipeline(int argc, char** argv) { int num_mini_batches = atoi(argv[++arg_pos]); - std::vector > feature_dbs; - std::vector > txns; + std::vector > feature_dbs; + std::vector > txns; const char* db_type = argv[++arg_pos]; for (size_t i = 0; i < num_features; ++i) { LOG(INFO)<< "Opening dataset " << dataset_names[i]; - shared_ptr db(db::GetDB(db_type)); + boost::shared_ptr db(db::GetDB(db_type)); db->Open(dataset_names.at(i), db::NEW); feature_dbs.push_back(db); - shared_ptr txn(db->NewTransaction()); + boost::shared_ptr txn(db->NewTransaction()); txns.push_back(txn); } @@ -139,8 +138,8 @@ int feature_extraction_pipeline(int argc, char** argv) { for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index) { feature_extraction_net->Forward(input_vec); for (int i = 0; i < num_features; ++i) { - const shared_ptr > feature_blob = feature_extraction_net - ->blob_by_name(blob_names[i]); + const boost::shared_ptr > feature_blob = + feature_extraction_net->blob_by_name(blob_names[i]); int batch_size = feature_blob->num(); int dim_features = feature_blob->count() / batch_size; const Dtype* feature_blob_data; From a6681945be4736a584adadfaf2bffe43ad31422e Mon Sep 17 00:00:00 2001 From: Mohamed Omran Date: Thu, 26 Nov 2015 01:46:42 +0100 Subject: [PATCH 191/223] ELU layer with basic tests --- include/caffe/layers/elu_layer.hpp | 86 ++++++++++++++++++++++++++++ src/caffe/layers/elu_layer.cpp | 47 +++++++++++++++ src/caffe/layers/elu_layer.cu | 62 ++++++++++++++++++++ src/caffe/proto/caffe.proto | 11 +++- src/caffe/test/test_neuron_layer.cpp | 59 +++++++++++++++++++ 5 files changed, 264 insertions(+), 1 deletion(-) create mode 100644 include/caffe/layers/elu_layer.hpp create mode 100644 src/caffe/layers/elu_layer.cpp create mode 100644 src/caffe/layers/elu_layer.cu diff --git a/include/caffe/layers/elu_layer.hpp b/include/caffe/layers/elu_layer.hpp new file mode 100644 index 00000000..0796e898 --- /dev/null +++ b/include/caffe/layers/elu_layer.hpp @@ -0,0 +1,86 @@ +#ifndef CAFFE_ELU_LAYER_HPP_ +#define CAFFE_ELU_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/neuron_layer.hpp" + +namespace caffe { + +/** + * @brief Exponential Linear Unit non-linearity @f$ + * y = \left\{ + * \begin{array}{lr} + * x & \mathrm{if} \; x > 0 \\ + * \alpha (\exp(x)-1) & \mathrm{if} \; x \le 0 + * \end{array} \right. + * @f$. + */ +template +class ELULayer : public NeuronLayer { + public: + /** + * @param param provides ELUParameter elu_param, + * with ELULayer options: + * - alpha (\b optional, default 1). + * the value @f$ \alpha @f$ by which controls saturation for negative inputs. + */ + explicit ELULayer(const LayerParameter& param) + : NeuronLayer(param) {} + + virtual inline const char* type() const { return "ELU"; } + + protected: + /** + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$ + * @param top output Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the computed outputs @f$ + * y = \left\{ + * \begin{array}{lr} + * x & \mathrm{if} \; x > 0 \\ + * \alpha (\exp(x)-1) & \mathrm{if} \; x \le 0 + * \end{array} \right. + * @f$. + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + /** + * @brief Computes the error gradient w.r.t. the ELU inputs. + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * -# @f$ (N \times C \times H \times W) @f$ + * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ + * with respect to computed outputs @f$ y @f$ + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times H \times W) @f$ + * the inputs @f$ x @f$; Backward fills their diff with + * gradients @f$ + * \frac{\partial E}{\partial x} = \left\{ + * \begin{array}{lr} + * 1 & \mathrm{if} \; x > 0 \\ + * y + \alpha & \mathrm{if} \; x \le 0 + * \end{array} \right. + * @f$ if propagate_down[0]. + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); +}; + + +} // namespace caffe + +#endif // CAFFE_ELU_LAYER_HPP_ diff --git a/src/caffe/layers/elu_layer.cpp b/src/caffe/layers/elu_layer.cpp new file mode 100644 index 00000000..a0f87635 --- /dev/null +++ b/src/caffe/layers/elu_layer.cpp @@ -0,0 +1,47 @@ +#include +#include + +#include "caffe/layers/elu_layer.hpp" + +namespace caffe { + +template +void ELULayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* top_data = top[0]->mutable_cpu_data(); + const int count = bottom[0]->count(); + Dtype alpha = this->layer_param_.elu_param().alpha(); + for (int i = 0; i < count; ++i) { + top_data[i] = std::max(bottom_data[i], Dtype(0)) + + alpha * (exp(std::min(bottom_data[i], Dtype(0))) - Dtype(1)); + } +} + +template +void ELULayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, + const vector*>& bottom) { + if (propagate_down[0]) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + const Dtype* top_data = top[0]->cpu_data(); + const Dtype* top_diff = top[0]->cpu_diff(); + Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); + const int count = bottom[0]->count(); + Dtype alpha = this->layer_param_.elu_param().alpha(); + for (int i = 0; i < count; ++i) { + bottom_diff[i] = top_diff[i] * ((bottom_data[i] > 0) + + (alpha + top_data[i]) * (bottom_data[i] <= 0)); + } + } +} + + +#ifdef CPU_ONLY +STUB_GPU(ELULayer); +#endif + +INSTANTIATE_CLASS(ELULayer); +REGISTER_LAYER_CLASS(ELU); + +} // namespace caffe diff --git a/src/caffe/layers/elu_layer.cu b/src/caffe/layers/elu_layer.cu new file mode 100644 index 00000000..12545aa8 --- /dev/null +++ b/src/caffe/layers/elu_layer.cu @@ -0,0 +1,62 @@ +#include +#include + +#include "caffe/layers/elu_layer.hpp" + +namespace caffe { + +template +__global__ void ELUForward(const int n, const Dtype* in, Dtype* out, + Dtype alpha) { + CUDA_KERNEL_LOOP(index, n) { + out[index] = in[index] > 0 ? in[index] : + alpha * (exp(in[index]) - 1); + } +} + +template +void ELULayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* top_data = top[0]->mutable_gpu_data(); + const int count = bottom[0]->count(); + Dtype alpha = this->layer_param_.elu_param().alpha(); + // NOLINT_NEXT_LINE(whitespace/operators) + ELUForward<<>>( + count, bottom_data, top_data, alpha); + CUDA_POST_KERNEL_CHECK; +} + +template +__global__ void ELUBackward(const int n, const Dtype* in_diff, + const Dtype* out_data, const Dtype* in_data, + Dtype* out_diff, Dtype alpha) { + CUDA_KERNEL_LOOP(index, n) { + out_diff[index] = in_data[index] > 0 ? in_diff[index] : + in_diff[index] * (out_data[index] + alpha); + } +} + +template +void ELULayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, + const vector*>& bottom) { + if (propagate_down[0]) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + const Dtype* top_diff = top[0]->gpu_diff(); + const Dtype* top_data = top[0]->gpu_data(); + Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); + const int count = bottom[0]->count(); + Dtype alpha = this->layer_param_.elu_param().alpha(); + // NOLINT_NEXT_LINE(whitespace/operators) + ELUBackward<<>>( + count, top_diff, top_data, bottom_data, bottom_diff, alpha); + CUDA_POST_KERNEL_CHECK; + } +} + + +INSTANTIATE_LAYER_GPU_FUNCS(ELULayer); + + +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 787369f7..1daf148d 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -306,7 +306,7 @@ message ParamSpec { // NOTE // Update the next available ID when you add a new LayerParameter field. // -// LayerParameter next available layer-specific ID: 140 (last added: batch_norm_param) +// LayerParameter next available layer-specific ID: 141 (last added: elu_param) message LayerParameter { optional string name = 1; // the layer name optional string type = 2; // the layer type @@ -363,6 +363,7 @@ message LayerParameter { optional DropoutParameter dropout_param = 108; optional DummyDataParameter dummy_data_param = 109; optional EltwiseParameter eltwise_param = 110; + optional ELUParameter elu_param = 140; optional EmbedParameter embed_param = 137; optional ExpParameter exp_param = 111; optional FlattenParameter flatten_param = 135; @@ -629,6 +630,14 @@ message EltwiseParameter { optional bool stable_prod_grad = 3 [default = true]; } +// Message that stores parameters used by ELULayer +message ELUParameter { + // Described in: + // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate + // Deep Network Learning by Exponential Linear Units (ELUs). arXiv + optional float alpha = 1 [default = 1]; +} + // Message that stores parameters used by EmbedLayer message EmbedParameter { optional uint32 num_output = 1; // The number of outputs for the layer diff --git a/src/caffe/test/test_neuron_layer.cpp b/src/caffe/test/test_neuron_layer.cpp index 21441b41..dd591f7d 100644 --- a/src/caffe/test/test_neuron_layer.cpp +++ b/src/caffe/test/test_neuron_layer.cpp @@ -11,6 +11,7 @@ #include "caffe/layers/absval_layer.hpp" #include "caffe/layers/bnll_layer.hpp" #include "caffe/layers/dropout_layer.hpp" +#include "caffe/layers/elu_layer.hpp" #include "caffe/layers/exp_layer.hpp" #include "caffe/layers/inner_product_layer.hpp" #include "caffe/layers/log_layer.hpp" @@ -259,6 +260,64 @@ TYPED_TEST(NeuronLayerTest, TestReLUGradientWithNegativeSlope) { this->blob_top_vec_); } +TYPED_TEST(NeuronLayerTest, TestELU) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + CHECK(google::protobuf::TextFormat::ParseFromString( + "elu_param { alpha: 0.5 }", &layer_param)); + ELULayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype kDelta = 2e-4; + // Now, check values + const Dtype* bottom_data = this->blob_bottom_->cpu_data(); + const Dtype* top_data = this->blob_top_->cpu_data(); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + if (bottom_data[i] > 0) { + EXPECT_FLOAT_EQ(top_data[i], bottom_data[i]); + } else { + EXPECT_NEAR(top_data[i], 0.5 * (exp(bottom_data[i]) - 1), kDelta); + } + } +} + +TYPED_TEST(NeuronLayerTest, TestELUasReLU) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + CHECK(google::protobuf::TextFormat::ParseFromString( + "elu_param { alpha: 0 }", &layer_param)); + ELULayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // Now, check values + const Dtype* bottom_data = this->blob_bottom_->cpu_data(); + const Dtype* top_data = this->blob_top_->cpu_data(); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + EXPECT_GE(top_data[i], 0.); + EXPECT_TRUE(top_data[i] == 0 || top_data[i] == bottom_data[i]); + } +} + +TYPED_TEST(NeuronLayerTest, TestELUGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ELULayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3, 1701, 0., 0.01); + checker.CheckGradientEltwise(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(NeuronLayerTest, TestELUasReLUGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + CHECK(google::protobuf::TextFormat::ParseFromString( + "elu_param { alpha: 0 }", &layer_param)); + ELULayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3, 1701, 0., 0.01); + checker.CheckGradientEltwise(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + TYPED_TEST(NeuronLayerTest, TestSigmoid) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; From b13bda2fcc984ce916d4911c90f5466056f25092 Mon Sep 17 00:00:00 2001 From: Ian Hunter Date: Wed, 9 Dec 2015 13:29:01 +0000 Subject: [PATCH 192/223] Update interfaces.md typo --- docs/tutorial/interfaces.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorial/interfaces.md b/docs/tutorial/interfaces.md index 9006179d..d7ff3782 100644 --- a/docs/tutorial/interfaces.md +++ b/docs/tutorial/interfaces.md @@ -61,7 +61,7 @@ For a full example of fine-tuning, see examples/finetuning_on_flickr_style, but The Python interface -- pycaffe -- is the `caffe` module and its scripts in caffe/python. `import caffe` to load models, do forward and backward, handle IO, visualize networks, and even instrument model solving. All model data, derivatives, and parameters are exposed for reading and writing. -- `caffe.Net` is the central interface for loading, configuring, and running models. `caffe.Classsifier` and `caffe.Detector` provide convenience interfaces for common tasks. +- `caffe.Net` is the central interface for loading, configuring, and running models. `caffe.Classifier` and `caffe.Detector` provide convenience interfaces for common tasks. - `caffe.SGDSolver` exposes the solving interface. - `caffe.io` handles input / output with preprocessing and protocol buffers. - `caffe.draw` visualizes network architectures. From eb2b848df173f7a07eb0d76a432c5d4badca7ba6 Mon Sep 17 00:00:00 2001 From: Felix Abecassis Date: Thu, 10 Dec 2015 15:11:51 -0800 Subject: [PATCH 193/223] Fix CuDNNConvolutionLayer for cuDNN v4 Add a macro to check the current cuDNN version --- include/caffe/util/cudnn.hpp | 3 +++ src/caffe/layers/cudnn_conv_layer.cu | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/include/caffe/util/cudnn.hpp b/include/caffe/util/cudnn.hpp index b531dd5f..8a7e17c6 100644 --- a/include/caffe/util/cudnn.hpp +++ b/include/caffe/util/cudnn.hpp @@ -7,6 +7,9 @@ #include "caffe/common.hpp" #include "caffe/proto/caffe.pb.h" +#define CUDNN_VERSION_MIN(major, minor, patch) \ + (CUDNN_VERSION >= (major * 1000 + minor * 100 + patch)) + #define CUDNN_CHECK(condition) \ do { \ cudnnStatus_t status = condition; \ diff --git a/src/caffe/layers/cudnn_conv_layer.cu b/src/caffe/layers/cudnn_conv_layer.cu index 1990e932..42c4fd02 100644 --- a/src/caffe/layers/cudnn_conv_layer.cu +++ b/src/caffe/layers/cudnn_conv_layer.cu @@ -30,11 +30,19 @@ void CuDNNConvolutionLayer::Forward_gpu( // Bias. if (this->bias_term_) { const Dtype* bias_data = this->blobs_[1]->gpu_data(); +#if CUDNN_VERSION_MIN(4, 0, 0) + CUDNN_CHECK(cudnnAddTensor(handle_[g], + cudnn::dataType::one, + bias_desc_, bias_data + bias_offset_ * g, + cudnn::dataType::one, + top_descs_[i], top_data + top_offset_ * g)); +#else CUDNN_CHECK(cudnnAddTensor(handle_[g], CUDNN_ADD_SAME_C, cudnn::dataType::one, bias_desc_, bias_data + bias_offset_ * g, cudnn::dataType::one, top_descs_[i], top_data + top_offset_ * g)); +#endif } } From f19896ccca23f091abb82d77a2f281a9c954a147 Mon Sep 17 00:00:00 2001 From: Muneyuki Noguchi Date: Sun, 20 Dec 2015 19:12:09 +0900 Subject: [PATCH 194/223] Replace blobs_lr with lr_mult in readme.md. models/finetune_flickr_style/deploy.prototxt uses lr_mult now. --- examples/finetune_flickr_style/readme.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/finetune_flickr_style/readme.md b/examples/finetune_flickr_style/readme.md index ecb9d3d2..4e9d41f1 100644 --- a/examples/finetune_flickr_style/readme.md +++ b/examples/finetune_flickr_style/readme.md @@ -22,10 +22,10 @@ Because we are predicting 20 classes instead of a 1,000, we do need to change th Therefore, we change the name of the last layer from `fc8` to `fc8_flickr` in our prototxt. Since there is no layer named that in the `bvlc_reference_caffenet`, that layer will begin training with random weights. -We will also decrease the overall learning rate `base_lr` in the solver prototxt, but boost the `blobs_lr` on the newly introduced layer. +We will also decrease the overall learning rate `base_lr` in the solver prototxt, but boost the `lr_mult` on the newly introduced layer. The idea is to have the rest of the model change very slowly with new data, but let the new layer learn fast. Additionally, we set `stepsize` in the solver to a lower value than if we were training from scratch, since we're virtually far along in training and therefore want the learning rate to go down faster. -Note that we could also entirely prevent fine-tuning of all layers other than `fc8_flickr` by setting their `blobs_lr` to 0. +Note that we could also entirely prevent fine-tuning of all layers other than `fc8_flickr` by setting their `lr_mult` to 0. ## Procedure From 93bfcb53120416255d6d7261b638f0b38ff9e9bf Mon Sep 17 00:00:00 2001 From: Fisher Yu Date: Sun, 6 Dec 2015 20:04:43 -0500 Subject: [PATCH 195/223] add support for 2D dilated convolution --- include/caffe/layers/base_conv_layer.hpp | 14 ++-- include/caffe/layers/conv_layer.hpp | 3 + include/caffe/layers/im2col_layer.hpp | 2 + include/caffe/util/im2col.hpp | 12 ++-- src/caffe/layer_factory.cpp | 17 ++++- src/caffe/layers/base_conv_layer.cpp | 20 +++++- src/caffe/layers/conv_layer.cpp | 4 +- src/caffe/layers/im2col_layer.cpp | 21 +++++- src/caffe/layers/im2col_layer.cu | 2 + src/caffe/proto/caffe.proto | 1 + src/caffe/test/test_convolution_layer.cpp | 14 ++-- src/caffe/test/test_im2col_kernel.cu | 17 ++++- src/caffe/test/test_im2col_layer.cpp | 3 +- src/caffe/util/im2col.cpp | 34 ++++++---- src/caffe/util/im2col.cu | 80 ++++++++++++----------- 15 files changed, 170 insertions(+), 74 deletions(-) diff --git a/include/caffe/layers/base_conv_layer.hpp b/include/caffe/layers/base_conv_layer.hpp index f3def16c..db471b58 100644 --- a/include/caffe/layers/base_conv_layer.hpp +++ b/include/caffe/layers/base_conv_layer.hpp @@ -68,6 +68,8 @@ class BaseConvolutionLayer : public Layer { Blob stride_; /// @brief The spatial dimensions of the padding. Blob pad_; + /// @brief The spatial dimensions of the dilation. + Blob dilation_; /// @brief The spatial dimensions of the convolution input. Blob conv_input_shape_; /// @brief The spatial dimensions of the col_buffer. @@ -99,7 +101,8 @@ class BaseConvolutionLayer : public Layer { conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], pad_.cpu_data()[0], pad_.cpu_data()[1], - stride_.cpu_data()[0], stride_.cpu_data()[1], col_buff); + stride_.cpu_data()[0], stride_.cpu_data()[1], + dilation_.cpu_data()[0], dilation_.cpu_data()[1], col_buff); } else { im2col_nd_cpu(data, num_spatial_axes_, conv_input_shape_.cpu_data(), col_buffer_shape_.data(), kernel_shape_.cpu_data(), @@ -112,7 +115,8 @@ class BaseConvolutionLayer : public Layer { conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], pad_.cpu_data()[0], pad_.cpu_data()[1], - stride_.cpu_data()[0], stride_.cpu_data()[1], data); + stride_.cpu_data()[0], stride_.cpu_data()[1], + dilation_.cpu_data()[0], dilation_.cpu_data()[1], data); } else { col2im_nd_cpu(col_buff, num_spatial_axes_, conv_input_shape_.cpu_data(), col_buffer_shape_.data(), kernel_shape_.cpu_data(), @@ -126,7 +130,8 @@ class BaseConvolutionLayer : public Layer { conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], pad_.cpu_data()[0], pad_.cpu_data()[1], - stride_.cpu_data()[0], stride_.cpu_data()[1], col_buff); + stride_.cpu_data()[0], stride_.cpu_data()[1], + dilation_.cpu_data()[0], dilation_.cpu_data()[1], col_buff); } else { im2col_nd_gpu(data, num_spatial_axes_, num_kernels_im2col_, conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(), @@ -140,7 +145,8 @@ class BaseConvolutionLayer : public Layer { conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], pad_.cpu_data()[0], pad_.cpu_data()[1], - stride_.cpu_data()[0], stride_.cpu_data()[1], data); + stride_.cpu_data()[0], stride_.cpu_data()[1], + dilation_.cpu_data()[0], dilation_.cpu_data()[1], data); } else { col2im_nd_gpu(col_buff, num_spatial_axes_, num_kernels_col2im_, conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(), diff --git a/include/caffe/layers/conv_layer.hpp b/include/caffe/layers/conv_layer.hpp index 15574766..93a618dd 100644 --- a/include/caffe/layers/conv_layer.hpp +++ b/include/caffe/layers/conv_layer.hpp @@ -44,6 +44,9 @@ class ConvolutionLayer : public BaseConvolutionLayer { * convolution, given by pad for equal dimensions or pad_h and pad_w for * different padding. Input padding is computed implicitly instead of * actually padding. + * - dilation (\b optional, default 1). The filter + * dilation, given by dilation_size for equal dimensions for different + * dilation. By default the convolution has dilation 1. * - group (\b optional, default 1). The number of filter groups. Group * convolution is a method for reducing parameterization by selectively * connecting input and output channels. The input and output channel dimensions must be divisible diff --git a/include/caffe/layers/im2col_layer.hpp b/include/caffe/layers/im2col_layer.hpp index 1d3b2eb6..71e32f74 100644 --- a/include/caffe/layers/im2col_layer.hpp +++ b/include/caffe/layers/im2col_layer.hpp @@ -46,6 +46,8 @@ class Im2colLayer : public Layer { Blob stride_; /// @brief The spatial dimensions of the padding. Blob pad_; + /// @brief The spatial dimensions of the dilation. + Blob dilation_; int num_spatial_axes_; int bottom_dim_; diff --git a/include/caffe/util/im2col.hpp b/include/caffe/util/im2col.hpp index d3eb6ccd..748b65c4 100644 --- a/include/caffe/util/im2col.hpp +++ b/include/caffe/util/im2col.hpp @@ -13,7 +13,8 @@ template void im2col_cpu(const Dtype* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, - const int stride_w, Dtype* data_col); + const int stride_w, const int dilation_h, const int dilation_w, + Dtype* data_col); template void col2im_nd_cpu(const Dtype* data_col, const int num_spatial_axes, @@ -25,7 +26,8 @@ template void col2im_cpu(const Dtype* data_col, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, - const int stride_w, Dtype* data_im); + const int stride_w, const int dilation_h, const int dilation_w, + Dtype* data_im); template void im2col_nd_gpu(const Dtype* data_im, const int num_spatial_axes, @@ -37,7 +39,8 @@ template void im2col_gpu(const Dtype* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, - const int stride_w, Dtype* data_col); + const int stride_w, const int dilation_h, const int dilation_w, + Dtype* data_col); template void col2im_nd_gpu(const Dtype* data_col, const int num_spatial_axes, @@ -49,7 +52,8 @@ template void col2im_gpu(const Dtype* data_col, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, - const int stride_w, Dtype* data_im); + const int stride_w, const int dilation_h, const int dilation_w, + Dtype* data_im); } // namespace caffe diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp index 76d851af..6b1d1c1a 100644 --- a/src/caffe/layer_factory.cpp +++ b/src/caffe/layer_factory.cpp @@ -37,17 +37,30 @@ namespace caffe { template shared_ptr > GetConvolutionLayer( const LayerParameter& param) { - ConvolutionParameter_Engine engine = param.convolution_param().engine(); + ConvolutionParameter conv_param = param.convolution_param(); + ConvolutionParameter_Engine engine = conv_param.engine(); + bool use_dilation = false; + for (int i = 0; i < conv_param.dilation_size(); ++i) { + if (conv_param.dilation(i) > 1) { + use_dilation = true; + } + } if (engine == ConvolutionParameter_Engine_DEFAULT) { engine = ConvolutionParameter_Engine_CAFFE; #ifdef USE_CUDNN - engine = ConvolutionParameter_Engine_CUDNN; + if (!use_dilation) { + engine = ConvolutionParameter_Engine_CUDNN; + } #endif } if (engine == ConvolutionParameter_Engine_CAFFE) { return shared_ptr >(new ConvolutionLayer(param)); #ifdef USE_CUDNN } else if (engine == ConvolutionParameter_Engine_CUDNN) { + if (use_dilation) { + LOG(FATAL) << "CuDNN doesn't support the dilated convolution at Layer " + << param.name(); + } return shared_ptr >(new CuDNNConvolutionLayer(param)); #endif } else { diff --git a/src/caffe/layers/base_conv_layer.cpp b/src/caffe/layers/base_conv_layer.cpp index f6f14cd0..4a4c68e0 100644 --- a/src/caffe/layers/base_conv_layer.cpp +++ b/src/caffe/layers/base_conv_layer.cpp @@ -36,7 +36,7 @@ void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, CHECK(num_kernel_dims == 1 || num_kernel_dims == num_spatial_axes_) << "kernel_size must be specified once, or once per spatial dimension " << "(kernel_size specified " << num_kernel_dims << " times; " - << num_spatial_axes_ << " spatial dims);"; + << num_spatial_axes_ << " spatial dims)."; for (int i = 0; i < num_spatial_axes_; ++i) { kernel_shape_data[i] = conv_param.kernel_size((num_kernel_dims == 1) ? 0 : i); @@ -61,7 +61,7 @@ void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, num_stride_dims == num_spatial_axes_) << "stride must be specified once, or once per spatial dimension " << "(stride specified " << num_stride_dims << " times; " - << num_spatial_axes_ << " spatial dims);"; + << num_spatial_axes_ << " spatial dims)."; const int kDefaultStride = 1; for (int i = 0; i < num_spatial_axes_; ++i) { stride_data[i] = (num_stride_dims == 0) ? kDefaultStride : @@ -85,13 +85,27 @@ void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, num_pad_dims == num_spatial_axes_) << "pad must be specified once, or once per spatial dimension " << "(pad specified " << num_pad_dims << " times; " - << num_spatial_axes_ << " spatial dims);"; + << num_spatial_axes_ << " spatial dims)."; const int kDefaultPad = 0; for (int i = 0; i < num_spatial_axes_; ++i) { pad_data[i] = (num_pad_dims == 0) ? kDefaultPad : conv_param.pad((num_pad_dims == 1) ? 0 : i); } } + // Setup dilation dimensions (dilation_). + dilation_.Reshape(spatial_dim_blob_shape); + int* dilation_data = dilation_.mutable_cpu_data(); + const int num_dilation_dims = conv_param.dilation_size(); + CHECK(num_dilation_dims == 0 || num_dilation_dims == 1 || + num_dilation_dims == num_spatial_axes_) + << "dilation must be specified once, or once per spatial dimension " + << "(dilation specified " << num_dilation_dims << " times; " + << num_spatial_axes_ << " spatial dims)."; + const int kDefaultDilation = 1; + for (int i = 0; i < num_spatial_axes_; ++i) { + dilation_data[i] = (num_dilation_dims == 0) ? kDefaultDilation : + conv_param.dilation((num_dilation_dims == 1) ? 0 : i); + } // Special case: im2col is the identity for 1x1 convolution with stride 1 // and no padding, so flag for skipping the buffer and transformation. is_1x1_ = true; diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp index cff09783..5d522ab3 100644 --- a/src/caffe/layers/conv_layer.cpp +++ b/src/caffe/layers/conv_layer.cpp @@ -9,11 +9,13 @@ void ConvolutionLayer::compute_output_shape() { const int* kernel_shape_data = this->kernel_shape_.cpu_data(); const int* stride_data = this->stride_.cpu_data(); const int* pad_data = this->pad_.cpu_data(); + const int* dilation_data = this->dilation_.cpu_data(); this->output_shape_.clear(); for (int i = 0; i < this->num_spatial_axes_; ++i) { // i + 1 to skip channel axis const int input_dim = this->input_shape(i + 1); - const int output_dim = (input_dim + 2 * pad_data[i] - kernel_shape_data[i]) + const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1; + const int output_dim = (input_dim + 2 * pad_data[i] - kernel_extent) / stride_data[i] + 1; this->output_shape_.push_back(output_dim); } diff --git a/src/caffe/layers/im2col_layer.cpp b/src/caffe/layers/im2col_layer.cpp index c12e4f52..19ae3019 100644 --- a/src/caffe/layers/im2col_layer.cpp +++ b/src/caffe/layers/im2col_layer.cpp @@ -87,6 +87,20 @@ void Im2colLayer::LayerSetUp(const vector*>& bottom, conv_param.pad((num_pad_dims == 1) ? 0 : i); } } + // Setup dilation dimensions (dilation_). + dilation_.Reshape(dim_blob_shape); + int* dilation_data = dilation_.mutable_cpu_data(); + const int num_dilation_dims = conv_param.dilation_size(); + CHECK(num_dilation_dims == 0 || num_dilation_dims == 1 || + num_dilation_dims == num_spatial_axes_) + << "dilation must be specified once, or once per spatial dimension " + << "(dilation specified " << num_dilation_dims << " times; " + << num_spatial_axes_ << " spatial dims)."; + const int kDefaultDilation = 1; + for (int i = 0; i < num_spatial_axes_; ++i) { + dilation_data[i] = (num_dilation_dims == 0) ? kDefaultDilation : + conv_param.dilation((num_dilation_dims == 1) ? 0 : i); + } } template @@ -96,10 +110,12 @@ void Im2colLayer::Reshape(const vector*>& bottom, const int* kernel_shape_data = kernel_shape_.cpu_data(); const int* stride_data = stride_.cpu_data(); const int* pad_data = pad_.cpu_data(); + const int* dilation_data = dilation_.cpu_data(); for (int i = 0; i < num_spatial_axes_; ++i) { top_shape[channel_axis_] *= kernel_shape_data[i]; const int input_dim = bottom[0]->shape(channel_axis_ + i + 1); - const int output_dim = (input_dim + 2 * pad_data[i] - kernel_shape_data[i]) + const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1; + const int output_dim = (input_dim + 2 * pad_data[i] - kernel_extent) / stride_data[i] + 1; top_shape[channel_axis_ + i + 1] = output_dim; } @@ -122,6 +138,7 @@ void Im2colLayer::Forward_cpu(const vector*>& bottom, DCHECK_EQ(kernel_shape_.count(), num_spatial_axes_); DCHECK_EQ(pad_.count(), num_spatial_axes_); DCHECK_EQ(stride_.count(), num_spatial_axes_); + DCHECK_EQ(dilation_.count(), num_spatial_axes_); if (!force_nd_im2col_ && num_spatial_axes_ == 2) { im2col_cpu(bottom_data + n * bottom_dim_, channels_, bottom[0]->shape(channel_axis_ + 1), @@ -129,6 +146,7 @@ void Im2colLayer::Forward_cpu(const vector*>& bottom, kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], pad_.cpu_data()[0], pad_.cpu_data()[1], stride_.cpu_data()[0], stride_.cpu_data()[1], + dilation_.cpu_data()[0], dilation_.cpu_data()[1], top_data + n * top_dim_); } else { im2col_nd_cpu(bottom_data + n * bottom_dim_, num_spatial_axes_, @@ -153,6 +171,7 @@ void Im2colLayer::Backward_cpu(const vector*>& top, kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], pad_.cpu_data()[0], pad_.cpu_data()[1], stride_.cpu_data()[0], stride_.cpu_data()[1], + dilation_.cpu_data()[0], dilation_.cpu_data()[1], bottom_diff + n * bottom_dim_); } else { col2im_nd_cpu(top_diff + n * top_dim_, num_spatial_axes_, diff --git a/src/caffe/layers/im2col_layer.cu b/src/caffe/layers/im2col_layer.cu index 517b4220..d90075d4 100644 --- a/src/caffe/layers/im2col_layer.cu +++ b/src/caffe/layers/im2col_layer.cu @@ -19,6 +19,7 @@ void Im2colLayer::Forward_gpu(const vector*>& bottom, kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], pad_.cpu_data()[0], pad_.cpu_data()[1], stride_.cpu_data()[0], stride_.cpu_data()[1], + dilation_.cpu_data()[0], dilation_.cpu_data()[1], top_data + n * top_dim_); } else { im2col_nd_gpu(bottom_data + n * bottom_dim_, num_spatial_axes_, @@ -43,6 +44,7 @@ void Im2colLayer::Backward_gpu(const vector*>& top, kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], pad_.cpu_data()[0], pad_.cpu_data()[1], stride_.cpu_data()[0], stride_.cpu_data()[1], + dilation_.cpu_data()[0], dilation_.cpu_data()[1], bottom_diff + n * bottom_dim_); } else { col2im_nd_gpu(top_diff + n * top_dim_, num_spatial_axes_, bottom_dim_, diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 787369f7..87c46629 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -518,6 +518,7 @@ message ConvolutionParameter { repeated uint32 pad = 3; // The padding size; defaults to 0 repeated uint32 kernel_size = 4; // The kernel size repeated uint32 stride = 6; // The stride; defaults to 1 + repeated uint32 dilation = 18; // The dilation; defaults to 1 // For 2D convolution only, the *_h and *_w versions may also be used to // specify both spatial dimensions. diff --git a/src/caffe/test/test_convolution_layer.cpp b/src/caffe/test/test_convolution_layer.cpp index e2d43f31..95c3c80c 100644 --- a/src/caffe/test/test_convolution_layer.cpp +++ b/src/caffe/test/test_convolution_layer.cpp @@ -46,13 +46,17 @@ void caffe_conv(const Blob* in, ConvolutionParameter* conv_param, } else { stride_h = stride_w = conv_param->stride_size() ? conv_param->stride(0) : 1; } - int kernel_d, pad_d, stride_d; + int dilation_h, dilation_w; + dilation_h = dilation_w = conv_param->dilation_size() ? + conv_param->dilation(0) : 1; + int kernel_d, pad_d, stride_d, dilation_d; if (has_depth) { kernel_d = kernel_h; stride_d = stride_h; pad_d = pad_h; + dilation_d = dilation_h; } else { - kernel_d = stride_d = 1; + kernel_d = stride_d = dilation_d = 1; pad_d = 0; } // Groups @@ -77,9 +81,9 @@ void caffe_conv(const Blob* in, ConvolutionParameter* conv_param, for (int r = 0; r < kernel_d; r++) { for (int p = 0; p < kernel_h; p++) { for (int q = 0; q < kernel_w; q++) { - int in_z = z * stride_d - pad_d + r; - int in_y = y * stride_h - pad_h + p; - int in_x = x * stride_w - pad_w + q; + int in_z = z * stride_d - pad_d + r * dilation_d; + int in_y = y * stride_h - pad_h + p * dilation_h; + int in_x = x * stride_w - pad_w + q * dilation_w; if (in_z >= 0 && in_z < (has_depth ? in->shape(2) : 1) && in_y >= 0 && in_y < in->shape(2 + has_depth) && in_x >= 0 && in_x < in->shape(3 + has_depth)) { diff --git a/src/caffe/test/test_im2col_kernel.cu b/src/caffe/test/test_im2col_kernel.cu index 3f97cf6d..15e06aa8 100644 --- a/src/caffe/test/test_im2col_kernel.cu +++ b/src/caffe/test/test_im2col_kernel.cu @@ -18,6 +18,7 @@ __global__ void im2col_gpu_kernel(const int n, const Dtype* data_im, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, const int height_col, const int width_col, Dtype* data_col); @@ -38,6 +39,7 @@ class Im2colKernelTest : public GPUDeviceTest { blob_kernel_shape_(new Blob()), blob_stride_(new Blob()), blob_pad_(new Blob()), + blob_dilation_(new Blob()), blob_top_(new Blob()), blob_top_cpu_(new Blob()) { FillerParameter filler_param; @@ -47,20 +49,25 @@ class Im2colKernelTest : public GPUDeviceTest { blob_kernel_shape_->Reshape(dim_blob_shape); blob_stride_->Reshape(dim_blob_shape); blob_pad_->Reshape(dim_blob_shape); + blob_dilation_->Reshape(dim_blob_shape); height_ = blob_bottom_->height(); width_ = blob_bottom_->width(); channels_ = blob_bottom_->channels(); pad_ = 0; stride_ = 2; + dilation_ = 1; kernel_size_ = 3; - height_col_ = (height_ + 2 * pad_ - kernel_size_) / stride_ + 1; - width_col_ = (width_ + 2 * pad_ - kernel_size_) / stride_ + 1; + height_col_ = (height_ + 2 * pad_ - + (dilation_ * (kernel_size_ - 1) + 1)) / stride_ + 1; + width_col_ = (width_ + 2 * pad_ - + (dilation_ * (kernel_size_ - 1) + 1)) / stride_ + 1; for (int i = 0; i < 2; ++i) { blob_kernel_shape_->mutable_cpu_data()[i] = kernel_size_; blob_stride_->mutable_cpu_data()[i] = stride_; blob_pad_->mutable_cpu_data()[i] = pad_; + blob_dilation_->mutable_cpu_data()[i] = dilation_; } } @@ -71,11 +78,13 @@ class Im2colKernelTest : public GPUDeviceTest { delete blob_kernel_shape_; delete blob_stride_; delete blob_pad_; + delete blob_dilation_; } Blob* const blob_kernel_shape_; Blob* const blob_stride_; Blob* const blob_pad_; + Blob* const blob_dilation_; Blob* const blob_bottom_; Blob* const blob_top_; Blob* const blob_top_cpu_; @@ -84,6 +93,7 @@ class Im2colKernelTest : public GPUDeviceTest { int channels_; int pad_; int stride_; + int dilation_; int kernel_size_; int height_col_; int width_col_; @@ -112,7 +122,7 @@ TYPED_TEST(Im2colKernelTest, Test2D) { im2col_cpu(this->blob_bottom_->cpu_data() + this->blob_bottom_->offset(n), this->channels_, this->height_, this->width_, this->kernel_size_, this->kernel_size_, this->pad_, this->pad_, - this->stride_, this->stride_, + this->stride_, this->stride_, this->dilation_, this->dilation_, cpu_data + this->blob_top_cpu_->offset(n)); } @@ -129,6 +139,7 @@ TYPED_TEST(Im2colKernelTest, Test2D) { num_kernels, bottom_data + this->blob_bottom_->offset(n), this->height_, this->width_, this->kernel_size_, this->kernel_size_, this->pad_, this->pad_, this->stride_, this->stride_, + this->dilation_, this->dilation_, this->height_col_, this->width_col_, top_data + this->blob_top_->offset(n)); CUDA_POST_KERNEL_CHECK; diff --git a/src/caffe/test/test_im2col_layer.cpp b/src/caffe/test/test_im2col_layer.cpp index 8274dd48..932d3f21 100644 --- a/src/caffe/test/test_im2col_layer.cpp +++ b/src/caffe/test/test_im2col_layer.cpp @@ -17,7 +17,7 @@ class Im2colLayerTest : public MultiDeviceTest { typedef typename TypeParam::Dtype Dtype; protected: Im2colLayerTest() - : blob_bottom_(new Blob(2, 3, 6, 5)), + : blob_bottom_(new Blob(2, 3, 10, 9)), blob_top_(new Blob()) { // fill the values Caffe::set_random_seed(1701); @@ -75,6 +75,7 @@ TYPED_TEST(Im2colLayerTest, TestGradient) { layer_param.mutable_convolution_param(); convolution_param->add_kernel_size(3); convolution_param->add_stride(2); + convolution_param->add_dilation(3); Im2colLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, diff --git a/src/caffe/util/im2col.cpp b/src/caffe/util/im2col.cpp index 27e5b7c0..1e578e7c 100644 --- a/src/caffe/util/im2col.cpp +++ b/src/caffe/util/im2col.cpp @@ -10,9 +10,12 @@ void im2col_cpu(const Dtype* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, Dtype* data_col) { - const int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; - const int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; + const int height_col = (height + 2 * pad_h - + (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; + const int width_col = (width + 2 * pad_w - + (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; const int channels_col = channels * kernel_h * kernel_w; for (int c_col = 0; c_col < channels_col; ++c_col) { int w_offset = c_col % kernel_w; @@ -20,8 +23,8 @@ void im2col_cpu(const Dtype* data_im, const int channels, int c_im = c_col / kernel_h / kernel_w; for (int h_col = 0; h_col < height_col; ++h_col) { for (int w_col = 0; w_col < width_col; ++w_col) { - int h_im = h_col * stride_h - pad_h + h_offset; - int w_im = w_col * stride_w - pad_w + w_offset; + int h_im = h_col * stride_h - pad_h + h_offset * dilation_h; + int w_im = w_col * stride_w - pad_w + w_offset * dilation_w; data_col[(c_col * height_col + h_col) * width_col + w_col] = (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ? data_im[(c_im * height + h_im) * width + w_im] : 0; @@ -34,11 +37,13 @@ void im2col_cpu(const Dtype* data_im, const int channels, template void im2col_cpu(const float* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, - const int stride_w, float* data_col); + const int stride_w, const int dilation_h, const int dilation_w, + float* data_col); template void im2col_cpu(const double* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, - const int stride_w, double* data_col); + const int stride_w, const int dilation_h, const int dilation_w, + double* data_col); template inline void im2col_nd_core_cpu(const Dtype* data_input, const bool im2col, @@ -137,10 +142,13 @@ void col2im_cpu(const Dtype* data_col, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, Dtype* data_im) { caffe_set(height * width * channels, Dtype(0), data_im); - const int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; - const int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; + const int height_col = (height + 2 * pad_h - + (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; + const int width_col = (width + 2 * pad_w - + (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; const int channels_col = channels * kernel_h * kernel_w; for (int c_col = 0; c_col < channels_col; ++c_col) { int w_offset = c_col % kernel_w; @@ -148,8 +156,8 @@ void col2im_cpu(const Dtype* data_col, const int channels, int c_im = c_col / kernel_h / kernel_w; for (int h_col = 0; h_col < height_col; ++h_col) { for (int w_col = 0; w_col < width_col; ++w_col) { - int h_im = h_col * stride_h - pad_h + h_offset; - int w_im = w_col * stride_w - pad_w + w_offset; + int h_im = h_col * stride_h - pad_h + h_offset * dilation_h; + int w_im = w_col * stride_w - pad_w + w_offset * dilation_w; if (h_im >= 0 && h_im < height && w_im >= 0 && w_im < width) data_im[(c_im * height + h_im) * width + w_im] += data_col[(c_col * height_col + h_col) * width_col + w_col]; @@ -162,11 +170,13 @@ void col2im_cpu(const Dtype* data_col, const int channels, template void col2im_cpu(const float* data_col, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, - const int stride_w, float* data_im); + const int stride_w, const int dilation_h, const int dilation_w, + float* data_im); template void col2im_cpu(const double* data_col, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, - const int stride_w, double* data_im); + const int stride_w, const int dilation_h, const int dilation_w, + double* data_im); template void col2im_nd_cpu(const Dtype* data_col, const int num_spatial_axes, diff --git a/src/caffe/util/im2col.cu b/src/caffe/util/im2col.cu index 49354ab7..cdcaac5b 100644 --- a/src/caffe/util/im2col.cu +++ b/src/caffe/util/im2col.cu @@ -10,6 +10,7 @@ __global__ void im2col_gpu_kernel(const int n, const Dtype* data_im, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, const int height_col, const int width_col, Dtype* data_col) { CUDA_KERNEL_LOOP(index, n) { @@ -26,11 +27,11 @@ __global__ void im2col_gpu_kernel(const int n, const Dtype* data_im, data_im_ptr += (c_im * height + h_offset) * width + w_offset; for (int i = 0; i < kernel_h; ++i) { for (int j = 0; j < kernel_w; ++j) { - int h_im = h_offset + i; - int w_im = w_offset + j; + int h_im = h_offset + i * dilation_h; + int w_im = w_offset + j * dilation_w; *data_col_ptr = (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ? - data_im_ptr[i * width + j] : 0; + data_im_ptr[i * dilation_h * width + j * dilation_w] : 0; data_col_ptr += height_col * width_col; } } @@ -42,17 +43,20 @@ void im2col_gpu(const Dtype* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, Dtype* data_col) { // We are going to launch channels * height_col * width_col kernels, each // kernel responsible for copying a single-channel grid. - int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; - int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; + int height_col = (height + 2 * pad_h - + (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; + int width_col = (width + 2 * pad_w - + (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; int num_kernels = channels * height_col * width_col; // NOLINT_NEXT_LINE(whitespace/operators) im2col_gpu_kernel<<>>( num_kernels, data_im, height, width, kernel_h, kernel_w, pad_h, - pad_w, stride_h, stride_w, height_col, + pad_w, stride_h, stride_w, dilation_h, dilation_w, height_col, width_col, data_col); CUDA_POST_KERNEL_CHECK; } @@ -61,11 +65,11 @@ void im2col_gpu(const Dtype* data_im, const int channels, template void im2col_gpu(const float* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, - float* data_col); + const int dilation_h, const int dilation_w, float* data_col); template void im2col_gpu(const double* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, - double* data_col); + const int dilation_h, const int dilation_w, double* data_col); template __global__ void im2col_nd_gpu_kernel(const int n, const Dtype* data_im, @@ -223,6 +227,7 @@ __global__ void col2im_gpu_kernel(const int n, const Dtype* data_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, const int height_col, const int width_col, Dtype* data_im) { CUDA_KERNEL_LOOP(index, n) { @@ -230,33 +235,27 @@ __global__ void col2im_gpu_kernel(const int n, const Dtype* data_col, const int w_im = index % width + pad_w; const int h_im = (index / width) % height + pad_h; const int c_im = index / (width * height); + int kernel_extent_w = (kernel_w - 1) * dilation_w + 1; + int kernel_extent_h = (kernel_h - 1) * dilation_h + 1; // compute the start and end of the output const int w_col_start = - (w_im < kernel_w) ? 0 : (w_im - kernel_w) / stride_w + 1; - const int w_col_end = - min(w_im / stride_w + 1, width_col); + (w_im < kernel_extent_w) ? 0 : (w_im - kernel_extent_w) / stride_w + 1; + const int w_col_end = min(w_im / stride_w + 1, width_col); const int h_col_start = - (h_im < kernel_h) ? 0 : (h_im - kernel_h) / stride_h + 1; - const int h_col_end = - min(h_im / stride_h + 1, height_col); - /* - for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { - for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { - // the col location: [c * width * height + h_out, w_out] - int c_col = c_im * kernel_h * kernel_w - + (h_im - h_col * stride_h) * kernel_w + (w_im - w_col * stride_w); - val += data_col[(c_col * height_col + h_col) * width_col + w_col]; - } - } - */ - // equivalent implementation - int offset = (c_im * kernel_h * kernel_w + h_im * kernel_w + w_im) - * height_col * width_col; - int coeff_h_col = (1 - stride_h * kernel_w * height_col) * width_col; - int coeff_w_col = (1 - stride_w * height_col * width_col); - for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { - for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { - val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col]; + (h_im < kernel_extent_h) ? 0 : (h_im - kernel_extent_h) / stride_h + 1; + const int h_col_end = min(h_im / stride_h + 1, height_col); + // TODO: use LCM of stride and dilation to avoid unnecessary loops + for (int h_col = h_col_start; h_col < h_col_end; h_col += 1) { + for (int w_col = w_col_start; w_col < w_col_end; w_col += 1) { + int h_k = (h_im - h_col * stride_h); + int w_k = (w_im - w_col * stride_w); + if (h_k % dilation_h == 0 && w_k % dilation_w == 0) { + h_k /= dilation_h; + w_k /= dilation_w; + int data_col_index = (((c_im * kernel_h + h_k) * kernel_w + w_k) * + height_col + h_col) * width_col + w_col; + val += data_col[data_col_index]; + } } } data_im[index] = val; @@ -267,9 +266,12 @@ template void col2im_gpu(const Dtype* data_col, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, - const int stride_w, Dtype* data_im) { - int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; - int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; + const int stride_w, const int dilation_h, const int dilation_w, + Dtype* data_im) { + int height_col = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / + stride_h + 1; + int width_col = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / + stride_w + 1; int num_kernels = channels * height * width; // To avoid involving atomic operations, we will launch one kernel per // bottom dimension, and then in the kernel add up the top dimensions. @@ -277,7 +279,7 @@ void col2im_gpu(const Dtype* data_col, const int channels, col2im_gpu_kernel<<>>( num_kernels, data_col, height, width, channels, kernel_h, kernel_w, - pad_h, pad_w, stride_h, stride_w, + pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, height_col, width_col, data_im); CUDA_POST_KERNEL_CHECK; } @@ -286,11 +288,13 @@ void col2im_gpu(const Dtype* data_col, const int channels, template void col2im_gpu(const float* data_col, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, - const int stride_w, float* data_im); + const int stride_w, const int dilation_h, const int dilation_w, + float* data_im); template void col2im_gpu(const double* data_col, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, - const int stride_w, double* data_im); + const int stride_w, const int dilation_h, const int dilation_w, + double* data_im); template __global__ void col2im_nd_gpu_kernel(const int n, const Dtype* data_col, From 18c795ebe8401cb82c9f8350664de665f1ec8733 Mon Sep 17 00:00:00 2001 From: Fisher Yu Date: Sun, 27 Dec 2015 20:48:30 -0800 Subject: [PATCH 196/223] add support for N-D dilated convolution --- include/caffe/layers/base_conv_layer.hpp | 8 +- include/caffe/util/im2col.hpp | 8 +- src/caffe/layer_factory.cpp | 2 + src/caffe/layers/im2col_layer.cpp | 4 +- src/caffe/layers/im2col_layer.cu | 4 +- src/caffe/test/test_im2col_kernel.cu | 9 +- src/caffe/test/test_im2col_layer.cpp | 8 +- src/caffe/util/im2col.cpp | 21 +-- src/caffe/util/im2col.cu | 166 +++++++++++++++-------- 9 files changed, 148 insertions(+), 82 deletions(-) diff --git a/include/caffe/layers/base_conv_layer.hpp b/include/caffe/layers/base_conv_layer.hpp index db471b58..0160a833 100644 --- a/include/caffe/layers/base_conv_layer.hpp +++ b/include/caffe/layers/base_conv_layer.hpp @@ -106,7 +106,7 @@ class BaseConvolutionLayer : public Layer { } else { im2col_nd_cpu(data, num_spatial_axes_, conv_input_shape_.cpu_data(), col_buffer_shape_.data(), kernel_shape_.cpu_data(), - pad_.cpu_data(), stride_.cpu_data(), col_buff); + pad_.cpu_data(), stride_.cpu_data(), dilation_.cpu_data(), col_buff); } } inline void conv_col2im_cpu(const Dtype* col_buff, Dtype* data) { @@ -120,7 +120,7 @@ class BaseConvolutionLayer : public Layer { } else { col2im_nd_cpu(col_buff, num_spatial_axes_, conv_input_shape_.cpu_data(), col_buffer_shape_.data(), kernel_shape_.cpu_data(), - pad_.cpu_data(), stride_.cpu_data(), data); + pad_.cpu_data(), stride_.cpu_data(), dilation_.cpu_data(), data); } } #ifndef CPU_ONLY @@ -136,7 +136,7 @@ class BaseConvolutionLayer : public Layer { im2col_nd_gpu(data, num_spatial_axes_, num_kernels_im2col_, conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(), kernel_shape_.gpu_data(), pad_.gpu_data(), - stride_.gpu_data(), col_buff); + stride_.gpu_data(), dilation_.gpu_data(), col_buff); } } inline void conv_col2im_gpu(const Dtype* col_buff, Dtype* data) { @@ -151,7 +151,7 @@ class BaseConvolutionLayer : public Layer { col2im_nd_gpu(col_buff, num_spatial_axes_, num_kernels_col2im_, conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(), kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(), - data); + dilation_.gpu_data(), data); } } #endif diff --git a/include/caffe/util/im2col.hpp b/include/caffe/util/im2col.hpp index 748b65c4..a35bc6e0 100644 --- a/include/caffe/util/im2col.hpp +++ b/include/caffe/util/im2col.hpp @@ -7,7 +7,7 @@ template void im2col_nd_cpu(const Dtype* data_im, const int num_spatial_axes, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_col); + const int* dilation, Dtype* data_col); template void im2col_cpu(const Dtype* data_im, const int channels, @@ -20,7 +20,7 @@ template void col2im_nd_cpu(const Dtype* data_col, const int num_spatial_axes, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_im); + const int* dilation, Dtype* data_im); template void col2im_cpu(const Dtype* data_col, const int channels, @@ -33,7 +33,7 @@ template void im2col_nd_gpu(const Dtype* data_im, const int num_spatial_axes, const int col_size, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_col); + const int* dilation, Dtype* data_col); template void im2col_gpu(const Dtype* data_im, const int channels, @@ -46,7 +46,7 @@ template void col2im_nd_gpu(const Dtype* data_col, const int num_spatial_axes, const int im_size, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_im); + const int* dilation, Dtype* data_im); template void col2im_gpu(const Dtype* data_col, const int channels, diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp index 6b1d1c1a..4d912d28 100644 --- a/src/caffe/layer_factory.cpp +++ b/src/caffe/layer_factory.cpp @@ -39,12 +39,14 @@ shared_ptr > GetConvolutionLayer( const LayerParameter& param) { ConvolutionParameter conv_param = param.convolution_param(); ConvolutionParameter_Engine engine = conv_param.engine(); +#ifdef USE_CUDNN bool use_dilation = false; for (int i = 0; i < conv_param.dilation_size(); ++i) { if (conv_param.dilation(i) > 1) { use_dilation = true; } } +#endif if (engine == ConvolutionParameter_Engine_DEFAULT) { engine = ConvolutionParameter_Engine_CAFFE; #ifdef USE_CUDNN diff --git a/src/caffe/layers/im2col_layer.cpp b/src/caffe/layers/im2col_layer.cpp index 19ae3019..2fb9b3c1 100644 --- a/src/caffe/layers/im2col_layer.cpp +++ b/src/caffe/layers/im2col_layer.cpp @@ -153,7 +153,7 @@ void Im2colLayer::Forward_cpu(const vector*>& bottom, bottom[0]->shape().data() + channel_axis_, top[0]->shape().data() + channel_axis_, kernel_shape_.cpu_data(), pad_.cpu_data(), stride_.cpu_data(), - top_data + n * top_dim_); + dilation_.cpu_data(), top_data + n * top_dim_); } } } @@ -178,7 +178,7 @@ void Im2colLayer::Backward_cpu(const vector*>& top, bottom[0]->shape().data() + channel_axis_, top[0]->shape().data() + channel_axis_, kernel_shape_.cpu_data(), pad_.cpu_data(), stride_.cpu_data(), - bottom_diff + n * bottom_dim_); + dilation_.cpu_data(), bottom_diff + n * bottom_dim_); } } } diff --git a/src/caffe/layers/im2col_layer.cu b/src/caffe/layers/im2col_layer.cu index d90075d4..792c97f7 100644 --- a/src/caffe/layers/im2col_layer.cu +++ b/src/caffe/layers/im2col_layer.cu @@ -26,7 +26,7 @@ void Im2colLayer::Forward_gpu(const vector*>& bottom, num_kernels, bottom[0]->gpu_shape() + channel_axis_, top[0]->gpu_shape() + channel_axis_, kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(), - top_data + n * top_dim_); + dilation_.gpu_data(), top_data + n * top_dim_); } } } @@ -51,7 +51,7 @@ void Im2colLayer::Backward_gpu(const vector*>& top, bottom[0]->gpu_shape() + channel_axis_, top[0]->gpu_shape() + channel_axis_, kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(), - bottom_diff + n * bottom_dim_); + dilation_.gpu_data(), bottom_diff + n * bottom_dim_); } } } diff --git a/src/caffe/test/test_im2col_kernel.cu b/src/caffe/test/test_im2col_kernel.cu index 15e06aa8..5d8f01f1 100644 --- a/src/caffe/test/test_im2col_kernel.cu +++ b/src/caffe/test/test_im2col_kernel.cu @@ -26,7 +26,7 @@ template __global__ void im2col_nd_gpu_kernel(const int n, const Dtype* data_im, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_col); + const int* dilation, Dtype* data_col); extern cudaDeviceProp CAFFE_TEST_CUDA_PROP; @@ -35,7 +35,7 @@ class Im2colKernelTest : public GPUDeviceTest { protected: Im2colKernelTest() // big so launches > 1024 threads - : blob_bottom_(new Blob(5, 500, 10, 10)), + : blob_bottom_(new Blob(5, 500, 15, 15)), blob_kernel_shape_(new Blob()), blob_stride_(new Blob()), blob_pad_(new Blob()), @@ -56,7 +56,7 @@ class Im2colKernelTest : public GPUDeviceTest { channels_ = blob_bottom_->channels(); pad_ = 0; stride_ = 2; - dilation_ = 1; + dilation_ = 3; kernel_size_ = 3; height_col_ = (height_ + 2 * pad_ - (dilation_ * (kernel_size_ - 1) + 1)) / stride_ + 1; @@ -176,6 +176,7 @@ TYPED_TEST(Im2colKernelTest, TestND) { this->blob_top_cpu_->shape().data() + 1, this->blob_kernel_shape_->cpu_data(), this->blob_pad_->cpu_data(), this->blob_stride_->cpu_data(), + this->blob_dilation_->cpu_data(), top_data_cpu + this->blob_top_cpu_->offset(n)); } @@ -194,7 +195,7 @@ TYPED_TEST(Im2colKernelTest, TestND) { num_kernels, bottom_data_gpu + this->blob_bottom_->offset(n), this->blob_bottom_->gpu_shape() + 1, this->blob_top_->gpu_shape() + 1, this->blob_kernel_shape_->gpu_data(), this->blob_pad_->gpu_data(), - this->blob_stride_->gpu_data(), + this->blob_stride_->gpu_data(), this->blob_dilation_->gpu_data(), top_data_gpu + this->blob_top_->offset(n)); CUDA_POST_KERNEL_CHECK; } diff --git a/src/caffe/test/test_im2col_layer.cpp b/src/caffe/test/test_im2col_layer.cpp index 932d3f21..24885e6b 100644 --- a/src/caffe/test/test_im2col_layer.cpp +++ b/src/caffe/test/test_im2col_layer.cpp @@ -17,7 +17,7 @@ class Im2colLayerTest : public MultiDeviceTest { typedef typename TypeParam::Dtype Dtype; protected: Im2colLayerTest() - : blob_bottom_(new Blob(2, 3, 10, 9)), + : blob_bottom_(new Blob(2, 3, 10, 11)), blob_top_(new Blob()) { // fill the values Caffe::set_random_seed(1701); @@ -43,12 +43,13 @@ TYPED_TEST(Im2colLayerTest, TestSetup) { layer_param.mutable_convolution_param(); convolution_param->add_kernel_size(3); convolution_param->add_stride(2); + convolution_param->add_dilation(3); Im2colLayer layer(layer_param); layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); EXPECT_EQ(this->blob_top_->num(), 2); EXPECT_EQ(this->blob_top_->channels(), 27); EXPECT_EQ(this->blob_top_->height(), 2); - EXPECT_EQ(this->blob_top_->width(), 2); + EXPECT_EQ(this->blob_top_->width(), 3); } TYPED_TEST(Im2colLayerTest, TestForward) { @@ -89,6 +90,7 @@ TYPED_TEST(Im2colLayerTest, TestGradientForceND) { layer_param.mutable_convolution_param(); convolution_param->add_kernel_size(3); convolution_param->add_stride(2); + convolution_param->add_dilation(3); convolution_param->set_force_nd_im2col(true); Im2colLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); @@ -123,6 +125,8 @@ TYPED_TEST(Im2colLayerTest, TestRectGradient) { convolution_param->set_kernel_h(5); convolution_param->set_kernel_w(3); convolution_param->add_stride(2); + convolution_param->add_dilation(1); + convolution_param->add_dilation(3); Im2colLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, diff --git a/src/caffe/util/im2col.cpp b/src/caffe/util/im2col.cpp index 1e578e7c..6e5ea875 100644 --- a/src/caffe/util/im2col.cpp +++ b/src/caffe/util/im2col.cpp @@ -49,7 +49,7 @@ template inline void im2col_nd_core_cpu(const Dtype* data_input, const bool im2col, const int num_spatial_axes, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_output) { + const int* dilation, Dtype* data_output) { if (!im2col) { int im_size = im_shape[0]; for (int i = 0; i < num_spatial_axes; ++i) { @@ -81,7 +81,8 @@ inline void im2col_nd_core_cpu(const Dtype* data_input, const bool im2col, bool is_padding = false; for (int d_i = 0; d_i < num_spatial_axes; ++d_i) { const int d = d_iter[d_i]; - const int d_im = d * stride[d_i] - pad[d_i] + d_offset[d_i]; + const int d_im = d * stride[d_i] - pad[d_i] + + d_offset[d_i] * dilation[d_i]; is_padding |= d_im < 0 || d_im >= im_shape[d_i + 1]; index_col *= col_shape[d_i + 1]; index_col += d; @@ -119,10 +120,10 @@ template void im2col_nd_cpu(const Dtype* data_im, const int num_spatial_axes, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_col) { + const int* dilation, Dtype* data_col) { const bool kIm2Col = true; im2col_nd_core_cpu(data_im, kIm2Col, num_spatial_axes, im_shape, col_shape, - kernel_shape, pad, stride, data_col); + kernel_shape, pad, stride, dilation, data_col); } // Explicit instantiation @@ -130,12 +131,12 @@ template void im2col_nd_cpu(const float* data_im, const int num_spatial_axes, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, - float* data_col); + const int* dilation, float* data_col); template void im2col_nd_cpu(const double* data_im, const int num_spatial_axes, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, - double* data_col); + const int* dilation, double* data_col); template void col2im_cpu(const Dtype* data_col, const int channels, @@ -182,10 +183,10 @@ template void col2im_nd_cpu(const Dtype* data_col, const int num_spatial_axes, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_im) { + const int* dilation, Dtype* data_im) { const bool kIm2Col = false; im2col_nd_core_cpu(data_col, kIm2Col, num_spatial_axes, im_shape, col_shape, - kernel_shape, pad, stride, data_im); + kernel_shape, pad, stride, dilation, data_im); } // Explicit instantiation @@ -193,12 +194,12 @@ template void col2im_nd_cpu(const float* data_col, const int num_spatial_axes, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, - float* data_im); + const int* dilation, float* data_im); template void col2im_nd_cpu(const double* data_col, const int num_spatial_axes, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, - double* data_im); + const int* dilation, double* data_im); } // namespace caffe diff --git a/src/caffe/util/im2col.cu b/src/caffe/util/im2col.cu index cdcaac5b..a8f30a02 100644 --- a/src/caffe/util/im2col.cu +++ b/src/caffe/util/im2col.cu @@ -75,9 +75,29 @@ template __global__ void im2col_nd_gpu_kernel(const int n, const Dtype* data_im, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_col) { + const int* dilation, Dtype* data_col) { int d_temp[num_axes]; // NOLINT(runtime/arrays) int d_iter[num_axes]; // NOLINT(runtime/arrays) + + __shared__ int shared_dilation[num_axes]; + __shared__ int shared_kernel_shape[num_axes]; + __shared__ int shared_pad[num_axes]; + __shared__ int shared_stride[num_axes]; + __shared__ int shared_col_shape[num_axes + 1]; + __shared__ int shared_im_shape[num_axes + 1]; + + if (threadIdx.x < num_axes) { + shared_dilation[threadIdx.x] = dilation[threadIdx.x]; + shared_kernel_shape[threadIdx.x] = kernel_shape[threadIdx.x]; + shared_pad[threadIdx.x] = pad[threadIdx.x]; + shared_stride[threadIdx.x] = stride[threadIdx.x]; + } + if (threadIdx.x < num_axes + 1) { + shared_col_shape[threadIdx.x] = col_shape[threadIdx.x]; + shared_im_shape[threadIdx.x] = im_shape[threadIdx.x]; + } + __syncthreads(); + int i; CUDA_KERNEL_LOOP(index, n) { // Initialize channel_in, computed in the loop below, with intermediate @@ -85,19 +105,19 @@ __global__ void im2col_nd_gpu_kernel(const int n, const Dtype* data_im, int channel_in = index; int channel_out = 1; for (i = num_axes - 1; i >= 0; --i) { - d_temp[i] = channel_in % col_shape[i + 1]; - channel_in /= col_shape[i + 1]; - channel_out *= kernel_shape[i]; + d_temp[i] = channel_in % shared_col_shape[i + 1]; + channel_in /= shared_col_shape[i + 1]; + channel_out *= shared_kernel_shape[i]; } channel_out *= channel_in; int data_col_inc = 1; for (i = 0; i < num_axes; ++i) { - channel_out *= col_shape[i + 1]; + channel_out *= shared_col_shape[i + 1]; channel_out += d_temp[i]; - d_temp[i] = d_temp[i] * stride[i] - pad[i]; - channel_in *= im_shape[i + 1]; + d_temp[i] = d_temp[i] * shared_stride[i] - shared_pad[i]; + channel_in *= shared_im_shape[i + 1]; channel_in += d_temp[i]; - data_col_inc *= col_shape[i + 1]; + data_col_inc *= shared_col_shape[i + 1]; d_iter[i] = 0; } Dtype* data_col_ptr = data_col + channel_out; @@ -106,15 +126,15 @@ __global__ void im2col_nd_gpu_kernel(const int n, const Dtype* data_im, do { bool in_range = true; for (i = 0; i < num_axes; ++i) { - const int d_iter_im = d_iter[i] + d_temp[i]; - in_range &= d_iter_im >= 0 && d_iter_im < im_shape[i + 1]; + const int d_iter_im = d_iter[i] * shared_dilation[i] + d_temp[i]; + in_range &= d_iter_im >= 0 && d_iter_im < shared_im_shape[i + 1]; if (!in_range) { break; } } if (in_range) { - int data_im_offset = d_iter[0]; + int data_im_offset = d_iter[0] * shared_dilation[0]; for (i = 1; i < num_axes; ++i) { - data_im_offset *= im_shape[i + 1]; - data_im_offset += d_iter[i]; + data_im_offset *= shared_im_shape[i + 1]; + data_im_offset += d_iter[i] * shared_dilation[i]; } *data_col_ptr = data_im_ptr[data_im_offset]; } else { @@ -123,7 +143,7 @@ __global__ void im2col_nd_gpu_kernel(const int n, const Dtype* data_im, data_col_ptr += data_col_inc; incremented = false; for (i = num_axes - 1; i >= 0; --i) { - const int d_max = kernel_shape[i]; + const int d_max = shared_kernel_shape[i]; if (d_iter[i] == d_max - 1) { d_iter[i] = 0; } else { // d_iter[i] < d_max - 1 @@ -140,67 +160,69 @@ template void im2col_nd_gpu(const Dtype* data_im, const int num_spatial_axes, const int num_kernels, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_col) { + const int* dilation, Dtype* data_col) { + // num_axes should be smaller than block size + DCHECK_LT(num_spatial_axes, CAFFE_CUDA_NUM_THREADS); switch (num_spatial_axes) { case 1: im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) <<>>( num_kernels, data_im, im_shape, col_shape, - kernel_shape, pad, stride, data_col); + kernel_shape, pad, stride, dilation, data_col); break; case 2: im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) <<>>( num_kernels, data_im, im_shape, col_shape, - kernel_shape, pad, stride, data_col); + kernel_shape, pad, stride, dilation, data_col); break; case 3: im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) <<>>( num_kernels, data_im, im_shape, col_shape, - kernel_shape, pad, stride, data_col); + kernel_shape, pad, stride, dilation, data_col); break; case 4: im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) <<>>( num_kernels, data_im, im_shape, col_shape, - kernel_shape, pad, stride, data_col); + kernel_shape, pad, stride, dilation, data_col); break; case 5: im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) <<>>( num_kernels, data_im, im_shape, col_shape, - kernel_shape, pad, stride, data_col); + kernel_shape, pad, stride, dilation, data_col); break; case 6: im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) <<>>( num_kernels, data_im, im_shape, col_shape, - kernel_shape, pad, stride, data_col); + kernel_shape, pad, stride, dilation, data_col); break; case 7: im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) <<>>( num_kernels, data_im, im_shape, col_shape, - kernel_shape, pad, stride, data_col); + kernel_shape, pad, stride, dilation, data_col); break; case 8: im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) <<>>( num_kernels, data_im, im_shape, col_shape, - kernel_shape, pad, stride, data_col); + kernel_shape, pad, stride, dilation, data_col); break; case 9: im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) <<>>( num_kernels, data_im, im_shape, col_shape, - kernel_shape, pad, stride, data_col); + kernel_shape, pad, stride, dilation, data_col); break; case 10: im2col_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) <<>>( num_kernels, data_im, im_shape, col_shape, - kernel_shape, pad, stride, data_col); + kernel_shape, pad, stride, dilation, data_col); break; default: LOG(FATAL) << "im2col_nd_gpu does not support computation with " @@ -214,12 +236,12 @@ template void im2col_nd_gpu(const float* data_im, const int num_spatial_axes, const int col_size, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, - float* data_col); + const int* dilation, float* data_col); template void im2col_nd_gpu(const double* data_im, const int num_spatial_axes, const int col_size, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, - double* data_col); + const int* dilation, double* data_col); template __global__ void col2im_gpu_kernel(const int n, const Dtype* data_col, @@ -300,27 +322,50 @@ template __global__ void col2im_nd_gpu_kernel(const int n, const Dtype* data_col, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_im) { + const int* dilation, Dtype* data_im) { int d_im[num_axes]; // NOLINT(runtime/arrays) int d_col_iter[num_axes]; // NOLINT(runtime/arrays) int d_col_start[num_axes]; // NOLINT(runtime/arrays) int d_col_end[num_axes]; // NOLINT(runtime/arrays) + + __shared__ int shared_dilation[num_axes]; + __shared__ int shared_kernel_shape[num_axes]; + __shared__ int shared_pad[num_axes]; + __shared__ int shared_stride[num_axes]; + __shared__ int shared_col_shape[num_axes + 1]; + __shared__ int shared_im_shape[num_axes + 1]; + + if (threadIdx.x < num_axes) { + shared_dilation[threadIdx.x] = dilation[threadIdx.x]; + shared_kernel_shape[threadIdx.x] = kernel_shape[threadIdx.x]; + shared_pad[threadIdx.x] = pad[threadIdx.x]; + shared_stride[threadIdx.x] = stride[threadIdx.x]; + } + if (threadIdx.x < num_axes + 1) { + shared_col_shape[threadIdx.x] = col_shape[threadIdx.x]; + shared_im_shape[threadIdx.x] = im_shape[threadIdx.x]; + } + __syncthreads(); + CUDA_KERNEL_LOOP(index, n) { // Initialize channel_in, computed in the loop below, with intermediate // computations used to compute the spatial indices. int c_im = index; // Calculate d_im (image dimensions). for (int i = num_axes - 1; i >= 0; --i) { - d_im[i] = c_im % im_shape[i + 1] + pad[i]; - c_im /= im_shape[i + 1]; + d_im[i] = c_im % shared_im_shape[i + 1] + shared_pad[i]; + c_im /= shared_im_shape[i + 1]; } // Calculate col start/end indices. bool done = false; for (int i = 0; i < num_axes; ++i) { + const int kernel_extent = + shared_dilation[i] * (shared_kernel_shape[i] - 1) + 1; d_col_start[i] = d_col_iter[i] = - (d_im[i] < kernel_shape[i]) ? - 0 : (d_im[i] - kernel_shape[i]) / stride[i] + 1; - d_col_end[i] = min(d_im[i] / stride[i] + 1, col_shape[i + 1]); + (d_im[i] < kernel_extent) ? 0 : + (d_im[i] - kernel_extent) / shared_stride[i] + 1; + d_col_end[i] = + min(d_im[i] / shared_stride[i] + 1, shared_col_shape[i + 1]); if (d_col_start[i] >= d_col_end[i]) { // Skip computation if the dimension is 0 at any spatial axis -- // final val will be 0. @@ -335,21 +380,32 @@ __global__ void col2im_nd_gpu_kernel(const int n, const Dtype* data_col, // Loop over the col to compute the output val. Dtype val = 0; bool incremented = true; + bool skip = false; do { // Compute the final offset. int final_offset = 0; int kernel_shape_prod = 1; + int kernel_index; for (int i = num_axes - 1; i >= 0; --i) { - final_offset += - (d_im[i] - d_col_iter[i] * stride[i]) * kernel_shape_prod; - kernel_shape_prod *= kernel_shape[i]; + kernel_index = d_im[i] - d_col_iter[i] * shared_stride[i]; + if (kernel_index % shared_dilation[i]) { + skip = true; + break; + } else { + kernel_index /= shared_dilation[i]; + final_offset += kernel_index * kernel_shape_prod; + kernel_shape_prod *= shared_kernel_shape[i]; + } } - final_offset += kernel_shape_prod * c_im; - for (int i = 0; i < num_axes; ++i) { - final_offset *= col_shape[i + 1]; - final_offset += d_col_iter[i]; + if (!skip) { + final_offset += kernel_shape_prod * c_im; + for (int i = 0; i < num_axes; ++i) { + final_offset *= shared_col_shape[i + 1]; + final_offset += d_col_iter[i]; + } + val += data_col[final_offset]; } - val += data_col[final_offset]; + skip = false; incremented = false; for (int i = num_axes - 1; i >= 0; --i) { const int d_max = d_col_end[i]; @@ -370,67 +426,69 @@ template void col2im_nd_gpu(const Dtype* data_col, const int num_spatial_axes, const int im_size, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, - Dtype* data_im) { + const int* dilation, Dtype* data_im) { + // num_axes should be smaller than block size + DCHECK_LT(num_spatial_axes, CAFFE_CUDA_NUM_THREADS); switch (num_spatial_axes) { case 1: col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) <<>>( im_size, data_col, im_shape, col_shape, - kernel_shape, pad, stride, data_im); + kernel_shape, pad, stride, dilation, data_im); break; case 2: col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) <<>>( im_size, data_col, im_shape, col_shape, - kernel_shape, pad, stride, data_im); + kernel_shape, pad, stride, dilation, data_im); break; case 3: col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) <<>>( im_size, data_col, im_shape, col_shape, - kernel_shape, pad, stride, data_im); + kernel_shape, pad, stride, dilation, data_im); break; case 4: col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) <<>>( im_size, data_col, im_shape, col_shape, - kernel_shape, pad, stride, data_im); + kernel_shape, pad, stride, dilation, data_im); break; case 5: col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) <<>>( im_size, data_col, im_shape, col_shape, - kernel_shape, pad, stride, data_im); + kernel_shape, pad, stride, dilation, data_im); break; case 6: col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) <<>>( im_size, data_col, im_shape, col_shape, - kernel_shape, pad, stride, data_im); + kernel_shape, pad, stride, dilation, data_im); break; case 7: col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) <<>>( im_size, data_col, im_shape, col_shape, - kernel_shape, pad, stride, data_im); + kernel_shape, pad, stride, dilation, data_im); break; case 8: col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) <<>>( im_size, data_col, im_shape, col_shape, - kernel_shape, pad, stride, data_im); + kernel_shape, pad, stride, dilation, data_im); break; case 9: col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) <<>>( im_size, data_col, im_shape, col_shape, - kernel_shape, pad, stride, data_im); + kernel_shape, pad, stride, dilation, data_im); break; case 10: col2im_nd_gpu_kernel // NOLINT_NEXT_LINE(whitespace/operators) <<>>( im_size, data_col, im_shape, col_shape, - kernel_shape, pad, stride, data_im); + kernel_shape, pad, stride, dilation, data_im); break; default: LOG(FATAL) << "col2im_nd_gpu does not support computation with " @@ -444,11 +502,11 @@ template void col2im_nd_gpu(const float* data_col, const int num_spatial_axes, const int im_size, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, - float* data_im); + const int* dilation, float* data_im); template void col2im_nd_gpu(const double* data_col, const int num_spatial_axes, const int im_size, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, - double* data_im); + const int* dilation, double* data_im); } // namespace caffe From 7674799475598fcb0494c83d93a46b41f8261a11 Mon Sep 17 00:00:00 2001 From: Fisher Yu Date: Sat, 26 Dec 2015 13:04:25 -0800 Subject: [PATCH 197/223] add and improve tests for dilated convolution/im2col --- src/caffe/test/test_convolution_layer.cpp | 115 ++++++++++++++++++++++ src/caffe/test/test_im2col_layer.cpp | 54 ++++++++-- 2 files changed, 163 insertions(+), 6 deletions(-) diff --git a/src/caffe/test/test_convolution_layer.cpp b/src/caffe/test/test_convolution_layer.cpp index 95c3c80c..9bb19d13 100644 --- a/src/caffe/test/test_convolution_layer.cpp +++ b/src/caffe/test/test_convolution_layer.cpp @@ -264,6 +264,50 @@ TYPED_TEST(ConvolutionLayerTest, TestSimpleConvolution) { } } +TYPED_TEST(ConvolutionLayerTest, TestDilatedConvolution) { + typedef typename TypeParam::Dtype Dtype; + vector bottom_shape; + bottom_shape.push_back(2); + bottom_shape.push_back(3); + bottom_shape.push_back(8); + bottom_shape.push_back(7); + this->blob_bottom_vec_.push_back(this->blob_bottom_2_); + this->blob_top_vec_.push_back(this->blob_top_2_); + for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { + this->blob_bottom_vec_[i]->Reshape(bottom_shape); + } + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->add_kernel_size(3); + convolution_param->add_dilation(2); + convolution_param->set_num_output(4); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("constant"); + convolution_param->mutable_bias_filler()->set_value(0.1); + shared_ptr > layer( + new ConvolutionLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // Check against reference convolution. + const Dtype* top_data; + const Dtype* ref_top_data; + caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), + this->MakeReferenceTop(this->blob_top_)); + top_data = this->blob_top_->cpu_data(); + ref_top_data = this->ref_blob_top_->cpu_data(); + for (int i = 0; i < this->blob_top_->count(); ++i) { + EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); + } + caffe_conv(this->blob_bottom_2_, convolution_param, layer->blobs(), + this->MakeReferenceTop(this->blob_top_2_)); + top_data = this->blob_top_2_->cpu_data(); + ref_top_data = this->ref_blob_top_->cpu_data(); + for (int i = 0; i < this->blob_top_->count(); ++i) { + EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); + } +} + TYPED_TEST(ConvolutionLayerTest, Test0DConvolution) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; @@ -349,6 +393,53 @@ TYPED_TEST(ConvolutionLayerTest, TestSimple3DConvolution) { } } +TYPED_TEST(ConvolutionLayerTest, TestDilated3DConvolution) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_2_); + this->blob_top_vec_.push_back(this->blob_top_2_); + vector bottom_shape(5); + bottom_shape[0] = this->blob_bottom_vec_[0]->shape(0); + bottom_shape[1] = this->blob_bottom_vec_[0]->shape(1); + bottom_shape[2] = 6; + bottom_shape[3] = 7; + bottom_shape[4] = 8; + FillerParameter filler_param; + GaussianFiller filler(filler_param); + for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { + this->blob_bottom_vec_[i]->Reshape(bottom_shape); + filler.Fill(this->blob_bottom_vec_[i]); + } + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->add_kernel_size(3); + convolution_param->add_dilation(2); + convolution_param->set_num_output(4); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("gaussian"); + shared_ptr > layer( + new ConvolutionLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // Check against reference convolution. + const Dtype* top_data; + const Dtype* ref_top_data; + caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(), + this->MakeReferenceTop(this->blob_top_)); + top_data = this->blob_top_->cpu_data(); + ref_top_data = this->ref_blob_top_->cpu_data(); + for (int i = 0; i < this->blob_top_->count(); ++i) { + EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); + } + caffe_conv(this->blob_bottom_2_, convolution_param, layer->blobs(), + this->MakeReferenceTop(this->blob_top_2_)); + top_data = this->blob_top_2_->cpu_data(); + ref_top_data = this->ref_blob_top_->cpu_data(); + for (int i = 0; i < this->blob_top_->count(); ++i) { + EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4); + } +} + TYPED_TEST(ConvolutionLayerTest, Test1x1Convolution) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; @@ -633,6 +724,30 @@ TYPED_TEST(ConvolutionLayerTest, TestGradient) { this->blob_top_vec_); } +TYPED_TEST(ConvolutionLayerTest, TestDilatedGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + vector bottom_shape; + bottom_shape.push_back(2); + bottom_shape.push_back(3); + bottom_shape.push_back(5); + bottom_shape.push_back(6); + for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) { + this->blob_bottom_vec_[i]->Reshape(bottom_shape); + } + convolution_param->add_kernel_size(3); + convolution_param->add_dilation(2); + convolution_param->set_num_output(2); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("gaussian"); + ConvolutionLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + TYPED_TEST(ConvolutionLayerTest, TestGradient3D) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; diff --git a/src/caffe/test/test_im2col_layer.cpp b/src/caffe/test/test_im2col_layer.cpp index 24885e6b..a7faf18f 100644 --- a/src/caffe/test/test_im2col_layer.cpp +++ b/src/caffe/test/test_im2col_layer.cpp @@ -17,7 +17,7 @@ class Im2colLayerTest : public MultiDeviceTest { typedef typename TypeParam::Dtype Dtype; protected: Im2colLayerTest() - : blob_bottom_(new Blob(2, 3, 10, 11)), + : blob_bottom_(new Blob(2, 3, 6, 5)), blob_top_(new Blob()) { // fill the values Caffe::set_random_seed(1701); @@ -41,6 +41,12 @@ TYPED_TEST(Im2colLayerTest, TestSetup) { LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); + vector bottom_shape; + bottom_shape.push_back(2); + bottom_shape.push_back(3); + bottom_shape.push_back(10); + bottom_shape.push_back(11); + this->blob_bottom_->Reshape(bottom_shape); convolution_param->add_kernel_size(3); convolution_param->add_stride(2); convolution_param->add_dilation(3); @@ -76,21 +82,39 @@ TYPED_TEST(Im2colLayerTest, TestGradient) { layer_param.mutable_convolution_param(); convolution_param->add_kernel_size(3); convolution_param->add_stride(2); - convolution_param->add_dilation(3); Im2colLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, this->blob_top_vec_); } -TYPED_TEST(Im2colLayerTest, TestGradientForceND) { +TYPED_TEST(Im2colLayerTest, TestDilatedGradient) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; ConvolutionParameter* convolution_param = layer_param.mutable_convolution_param(); + vector bottom_shape; + bottom_shape.push_back(2); + bottom_shape.push_back(3); + bottom_shape.push_back(10); + bottom_shape.push_back(9); + this->blob_bottom_->Reshape(bottom_shape); convolution_param->add_kernel_size(3); convolution_param->add_stride(2); convolution_param->add_dilation(3); + Im2colLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-2); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(Im2colLayerTest, TestGradientForceND) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); convolution_param->set_force_nd_im2col(true); Im2colLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); @@ -98,6 +122,27 @@ TYPED_TEST(Im2colLayerTest, TestGradientForceND) { this->blob_top_vec_); } +TYPED_TEST(Im2colLayerTest, TestDilatedGradientForceND) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + vector bottom_shape; + bottom_shape.push_back(2); + bottom_shape.push_back(3); + bottom_shape.push_back(10); + bottom_shape.push_back(9); + this->blob_bottom_->Reshape(bottom_shape); + convolution_param->add_kernel_size(3); + convolution_param->add_stride(2); + convolution_param->add_dilation(3); + convolution_param->set_force_nd_im2col(true); + Im2colLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-2); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + TYPED_TEST(Im2colLayerTest, TestRect) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; @@ -116,7 +161,6 @@ TYPED_TEST(Im2colLayerTest, TestRect) { } } - TYPED_TEST(Im2colLayerTest, TestRectGradient) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; @@ -125,8 +169,6 @@ TYPED_TEST(Im2colLayerTest, TestRectGradient) { convolution_param->set_kernel_h(5); convolution_param->set_kernel_w(3); convolution_param->add_stride(2); - convolution_param->add_dilation(1); - convolution_param->add_dilation(3); Im2colLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, From c25c5796385eda485a743cae6222845ca8eb52bb Mon Sep 17 00:00:00 2001 From: Fisher Yu Date: Sat, 26 Dec 2015 13:10:02 -0800 Subject: [PATCH 198/223] disable dilated deconvolution --- src/caffe/layers/base_conv_layer.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/caffe/layers/base_conv_layer.cpp b/src/caffe/layers/base_conv_layer.cpp index 4a4c68e0..deb58a71 100644 --- a/src/caffe/layers/base_conv_layer.cpp +++ b/src/caffe/layers/base_conv_layer.cpp @@ -105,6 +105,9 @@ void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, for (int i = 0; i < num_spatial_axes_; ++i) { dilation_data[i] = (num_dilation_dims == 0) ? kDefaultDilation : conv_param.dilation((num_dilation_dims == 1) ? 0 : i); + if (reverse_dimensions()) { + CHECK_EQ(dilation_data[i], 1) << "Deconvolution doesn't support dilation"; + } } // Special case: im2col is the identity for 1x1 convolution with stride 1 // and no padding, so flag for skipping the buffer and transformation. From 3e3e9ce17636f813c80b5b22afc069d3c1c802cb Mon Sep 17 00:00:00 2001 From: Jonathan L Long Date: Sat, 26 Dec 2015 13:10:11 -0800 Subject: [PATCH 199/223] add short description of dilation to caffe.proto --- src/caffe/proto/caffe.proto | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 87c46629..019aa614 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -518,6 +518,9 @@ message ConvolutionParameter { repeated uint32 pad = 3; // The padding size; defaults to 0 repeated uint32 kernel_size = 4; // The kernel size repeated uint32 stride = 6; // The stride; defaults to 1 + // Factor used to dilate the kernel, (implicitly) zero-filling the resulting + // holes. (Kernel dilation is sometimes referred to by its use in the + // algorithme à trous from Holschneider et al. 1987.) repeated uint32 dilation = 18; // The dilation; defaults to 1 // For 2D convolution only, the *_h and *_w versions may also be used to From bbc4e578a54546bbc41ce9e959386dbba6e269c2 Mon Sep 17 00:00:00 2001 From: Jonathan L Long Date: Sun, 27 Dec 2015 20:56:24 -0800 Subject: [PATCH 200/223] enable dilated deconvolution Since the underlying routines are shared, we need only upgrade compute_output_shape. --- src/caffe/layers/base_conv_layer.cpp | 3 --- src/caffe/layers/deconv_layer.cpp | 4 +++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/caffe/layers/base_conv_layer.cpp b/src/caffe/layers/base_conv_layer.cpp index deb58a71..4a4c68e0 100644 --- a/src/caffe/layers/base_conv_layer.cpp +++ b/src/caffe/layers/base_conv_layer.cpp @@ -105,9 +105,6 @@ void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, for (int i = 0; i < num_spatial_axes_; ++i) { dilation_data[i] = (num_dilation_dims == 0) ? kDefaultDilation : conv_param.dilation((num_dilation_dims == 1) ? 0 : i); - if (reverse_dimensions()) { - CHECK_EQ(dilation_data[i], 1) << "Deconvolution doesn't support dilation"; - } } // Special case: im2col is the identity for 1x1 convolution with stride 1 // and no padding, so flag for skipping the buffer and transformation. diff --git a/src/caffe/layers/deconv_layer.cpp b/src/caffe/layers/deconv_layer.cpp index 275c0562..20a460fb 100644 --- a/src/caffe/layers/deconv_layer.cpp +++ b/src/caffe/layers/deconv_layer.cpp @@ -9,12 +9,14 @@ void DeconvolutionLayer::compute_output_shape() { const int* kernel_shape_data = this->kernel_shape_.cpu_data(); const int* stride_data = this->stride_.cpu_data(); const int* pad_data = this->pad_.cpu_data(); + const int* dilation_data = this->dilation_.cpu_data(); this->output_shape_.clear(); for (int i = 0; i < this->num_spatial_axes_; ++i) { // i + 1 to skip channel axis const int input_dim = this->input_shape(i + 1); + const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1; const int output_dim = stride_data[i] * (input_dim - 1) - + kernel_shape_data[i] - 2 * pad_data[i]; + + kernel_extent - 2 * pad_data[i]; this->output_shape_.push_back(output_dim); } } From 708c1a122c33bd35b0d53630fb74965488e1947a Mon Sep 17 00:00:00 2001 From: Fisher Yu Date: Mon, 28 Dec 2015 22:46:49 -0500 Subject: [PATCH 201/223] remove extra space before + --- src/caffe/solvers/adam_solver.cpp | 2 +- tools/caffe.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/caffe/solvers/adam_solver.cpp b/src/caffe/solvers/adam_solver.cpp index cb0fbfe2..c3378d38 100644 --- a/src/caffe/solvers/adam_solver.cpp +++ b/src/caffe/solvers/adam_solver.cpp @@ -30,7 +30,7 @@ void AdamSolver::ComputeUpdateValue(int param_id, Dtype rate) { Blob* val_v = this->history_[param_id + update_history_offset].get(); Blob* val_t = this->temp_[param_id].get(); - const int t = this->iter_ + 1; + const int t = this->iter_ + 1; const Dtype correction = std::sqrt(Dtype(1) - pow(beta2, t)) / (Dtype(1.) - pow(beta1, t)); const int N = net_params[param_id]->count(); diff --git a/tools/caffe.cpp b/tools/caffe.cpp index 305cfc36..6b342ace 100644 --- a/tools/caffe.cpp +++ b/tools/caffe.cpp @@ -164,7 +164,7 @@ int train() { if (FLAGS_gpu.size() == 0 && solver_param.solver_mode() == caffe::SolverParameter_SolverMode_GPU) { if (solver_param.has_device_id()) { - FLAGS_gpu = "" + + FLAGS_gpu = "" + boost::lexical_cast(solver_param.device_id()); } else { // Set default GPU if unspecified FLAGS_gpu = "" + boost::lexical_cast(0); From 6320d8d2663aa80b54e74e374a34441124f88c24 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Tue, 29 Dec 2015 21:10:14 -0800 Subject: [PATCH 202/223] TestDataTransformer: fix some memory leaks caused by use of 'new' --- src/caffe/test/test_data_transformer.cpp | 136 +++++++++++------------ 1 file changed, 62 insertions(+), 74 deletions(-) diff --git a/src/caffe/test/test_data_transformer.cpp b/src/caffe/test/test_data_transformer.cpp index 8a101374..6103918f 100644 --- a/src/caffe/test/test_data_transformer.cpp +++ b/src/caffe/test/test_data_transformer.cpp @@ -40,23 +40,21 @@ class DataTransformTest : public ::testing::Test { int NumSequenceMatches(const TransformationParameter transform_param, const Datum& datum, Phase phase) { // Get crop sequence with Caffe seed 1701. - DataTransformer* transformer = - new DataTransformer(transform_param, phase); + DataTransformer transformer(transform_param, phase); const int crop_size = transform_param.crop_size(); Caffe::set_random_seed(seed_); - transformer->InitRand(); - Blob* blob = - new Blob(1, datum.channels(), datum.height(), datum.width()); + transformer.InitRand(); + Blob blob(1, datum.channels(), datum.height(), datum.width()); if (transform_param.crop_size() > 0) { - blob->Reshape(1, datum.channels(), crop_size, crop_size); + blob.Reshape(1, datum.channels(), crop_size, crop_size); } vector > crop_sequence; for (int iter = 0; iter < this->num_iter_; ++iter) { vector iter_crop_sequence; - transformer->Transform(datum, blob); - for (int j = 0; j < blob->count(); ++j) { - iter_crop_sequence.push_back(blob->cpu_data()[j]); + transformer.Transform(datum, &blob); + for (int j = 0; j < blob.count(); ++j) { + iter_crop_sequence.push_back(blob.cpu_data()[j]); } crop_sequence.push_back(iter_crop_sequence); } @@ -64,17 +62,14 @@ class DataTransformTest : public ::testing::Test { int num_sequence_matches = 0; for (int iter = 0; iter < this->num_iter_; ++iter) { vector iter_crop_sequence = crop_sequence[iter]; - transformer->Transform(datum, blob); - for (int j = 0; j < blob->count(); ++j) { - num_sequence_matches += - (crop_sequence[iter][j] == blob->cpu_data()[j]); + transformer.Transform(datum, &blob); + for (int j = 0; j < blob.count(); ++j) { + num_sequence_matches += (crop_sequence[iter][j] == blob.cpu_data()[j]); } } return num_sequence_matches; } - virtual ~DataTransformTest() { } - int seed_; int num_iter_; }; @@ -91,17 +86,16 @@ TYPED_TEST(DataTransformTest, TestEmptyTransform) { Datum datum; FillDatum(label, channels, height, width, unique_pixels, &datum); - Blob* blob = new Blob(1, channels, height, width); - DataTransformer* transformer = - new DataTransformer(transform_param, TEST); - transformer->InitRand(); - transformer->Transform(datum, blob); - EXPECT_EQ(blob->num(), 1); - EXPECT_EQ(blob->channels(), datum.channels()); - EXPECT_EQ(blob->height(), datum.height()); - EXPECT_EQ(blob->width(), datum.width()); - for (int j = 0; j < blob->count(); ++j) { - EXPECT_EQ(blob->cpu_data()[j], label); + Blob blob(1, channels, height, width); + DataTransformer transformer(transform_param, TEST); + transformer.InitRand(); + transformer.Transform(datum, &blob); + EXPECT_EQ(blob.num(), 1); + EXPECT_EQ(blob.channels(), datum.channels()); + EXPECT_EQ(blob.height(), datum.height()); + EXPECT_EQ(blob.width(), datum.width()); + for (int j = 0; j < blob.count(); ++j) { + EXPECT_EQ(blob.cpu_data()[j], label); } } @@ -115,17 +109,16 @@ TYPED_TEST(DataTransformTest, TestEmptyTransformUniquePixels) { Datum datum; FillDatum(label, channels, height, width, unique_pixels, &datum); - Blob* blob = new Blob(1, 3, 4, 5); - DataTransformer* transformer = - new DataTransformer(transform_param, TEST); - transformer->InitRand(); - transformer->Transform(datum, blob); - EXPECT_EQ(blob->num(), 1); - EXPECT_EQ(blob->channels(), datum.channels()); - EXPECT_EQ(blob->height(), datum.height()); - EXPECT_EQ(blob->width(), datum.width()); - for (int j = 0; j < blob->count(); ++j) { - EXPECT_EQ(blob->cpu_data()[j], j); + Blob blob(1, 3, 4, 5); + DataTransformer transformer(transform_param, TEST); + transformer.InitRand(); + transformer.Transform(datum, &blob); + EXPECT_EQ(blob.num(), 1); + EXPECT_EQ(blob.channels(), datum.channels()); + EXPECT_EQ(blob.height(), datum.height()); + EXPECT_EQ(blob.width(), datum.width()); + for (int j = 0; j < blob.count(); ++j) { + EXPECT_EQ(blob.cpu_data()[j], j); } } @@ -141,19 +134,17 @@ TYPED_TEST(DataTransformTest, TestCropSize) { transform_param.set_crop_size(crop_size); Datum datum; FillDatum(label, channels, height, width, unique_pixels, &datum); - DataTransformer* transformer = - new DataTransformer(transform_param, TEST); - transformer->InitRand(); - Blob* blob = - new Blob(1, channels, crop_size, crop_size); + DataTransformer transformer(transform_param, TEST); + transformer.InitRand(); + Blob blob(1, channels, crop_size, crop_size); for (int iter = 0; iter < this->num_iter_; ++iter) { - transformer->Transform(datum, blob); - EXPECT_EQ(blob->num(), 1); - EXPECT_EQ(blob->channels(), datum.channels()); - EXPECT_EQ(blob->height(), crop_size); - EXPECT_EQ(blob->width(), crop_size); - for (int j = 0; j < blob->count(); ++j) { - EXPECT_EQ(blob->cpu_data()[j], label); + transformer.Transform(datum, &blob); + EXPECT_EQ(blob.num(), 1); + EXPECT_EQ(blob.channels(), datum.channels()); + EXPECT_EQ(blob.height(), crop_size); + EXPECT_EQ(blob.width(), crop_size); + for (int j = 0; j < blob.count(); ++j) { + EXPECT_EQ(blob.cpu_data()[j], label); } } } @@ -280,13 +271,12 @@ TYPED_TEST(DataTransformTest, TestMeanValue) { transform_param.add_mean_value(mean_value); Datum datum; FillDatum(label, channels, height, width, unique_pixels, &datum); - Blob* blob = new Blob(1, channels, height, width); - DataTransformer* transformer = - new DataTransformer(transform_param, TEST); - transformer->InitRand(); - transformer->Transform(datum, blob); - for (int j = 0; j < blob->count(); ++j) { - EXPECT_EQ(blob->cpu_data()[j], label - mean_value); + Blob blob(1, channels, height, width); + DataTransformer transformer(transform_param, TEST); + transformer.InitRand(); + transformer.Transform(datum, &blob); + for (int j = 0; j < blob.count(); ++j) { + EXPECT_EQ(blob.cpu_data()[j], label - mean_value); } } @@ -303,14 +293,13 @@ TYPED_TEST(DataTransformTest, TestMeanValues) { transform_param.add_mean_value(2); Datum datum; FillDatum(label, channels, height, width, unique_pixels, &datum); - Blob* blob = new Blob(1, channels, height, width); - DataTransformer* transformer = - new DataTransformer(transform_param, TEST); - transformer->InitRand(); - transformer->Transform(datum, blob); + Blob blob(1, channels, height, width); + DataTransformer transformer(transform_param, TEST); + transformer.InitRand(); + transformer.Transform(datum, &blob); for (int c = 0; c < channels; ++c) { for (int j = 0; j < height * width; ++j) { - EXPECT_EQ(blob->cpu_data()[blob->offset(0, c) + j], label - c); + EXPECT_EQ(blob.cpu_data()[blob.offset(0, c) + j], label - c); } } } @@ -325,8 +314,8 @@ TYPED_TEST(DataTransformTest, TestMeanFile) { const int size = channels * height * width; // Create a mean file - string* mean_file = new string(); - MakeTempFilename(mean_file); + string mean_file; + MakeTempFilename(&mean_file); BlobProto blob_mean; blob_mean.set_num(1); blob_mean.set_channels(channels); @@ -337,19 +326,18 @@ TYPED_TEST(DataTransformTest, TestMeanFile) { blob_mean.add_data(j); } - LOG(INFO) << "Using temporary mean_file " << *mean_file; - WriteProtoToBinaryFile(blob_mean, *mean_file); + LOG(INFO) << "Using temporary mean_file " << mean_file; + WriteProtoToBinaryFile(blob_mean, mean_file); - transform_param.set_mean_file(*mean_file); + transform_param.set_mean_file(mean_file); Datum datum; FillDatum(label, channels, height, width, unique_pixels, &datum); - Blob* blob = new Blob(1, channels, height, width); - DataTransformer* transformer = - new DataTransformer(transform_param, TEST); - transformer->InitRand(); - transformer->Transform(datum, blob); - for (int j = 0; j < blob->count(); ++j) { - EXPECT_EQ(blob->cpu_data()[j], 0); + Blob blob(1, channels, height, width); + DataTransformer transformer(transform_param, TEST); + transformer.InitRand(); + transformer.Transform(datum, &blob); + for (int j = 0; j < blob.count(); ++j) { + EXPECT_EQ(blob.cpu_data()[j], 0); } } From 1137e89fef767c68e9368779a57dfc61c6d8d834 Mon Sep 17 00:00:00 2001 From: philkr Date: Wed, 5 Aug 2015 11:54:08 -0700 Subject: [PATCH 203/223] Exposing layer top and bottom names to python --- include/caffe/net.hpp | 12 ++++++++++++ python/caffe/_caffe.cpp | 4 ++++ python/caffe/pycaffe.py | 18 ++++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp index 1bf07d28..3b56f307 100644 --- a/include/caffe/net.hpp +++ b/include/caffe/net.hpp @@ -149,6 +149,18 @@ class Net { inline const vector*> >& top_vecs() const { return top_vecs_; } + /// @brief returns the ids of the top blobs of layer i + inline const vector & top_ids(int i) const { + CHECK_GE(i, 0) << "Invalid layer id"; + CHECK_LT(i, top_id_vecs_.size()) << "Invalid layer id"; + return top_id_vecs_[i]; + } + /// @brief returns the ids of the bottom blobs of layer i + inline const vector & bottom_ids(int i) const { + CHECK_GE(i, 0) << "Invalid layer id"; + CHECK_LT(i, bottom_id_vecs_.size()) << "Invalid layer id"; + return bottom_id_vecs_[i]; + } inline const vector >& bottom_need_backward() const { return bottom_need_backward_; } diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp index 69d55332..4ea2ec60 100644 --- a/python/caffe/_caffe.cpp +++ b/python/caffe/_caffe.cpp @@ -232,6 +232,10 @@ BOOST_PYTHON_MODULE(_caffe) { .def("share_with", &Net::ShareTrainedLayersWith) .add_property("_blob_loss_weights", bp::make_function( &Net::blob_loss_weights, bp::return_internal_reference<>())) + .def("_bottom_ids", bp::make_function(&Net::bottom_ids, + bp::return_value_policy())) + .def("_top_ids", bp::make_function(&Net::top_ids, + bp::return_value_policy())) .add_property("_blobs", bp::make_function(&Net::blobs, bp::return_internal_reference<>())) .add_property("layers", bp::make_function(&Net::layers, diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py index 31dc702f..30541107 100644 --- a/python/caffe/pycaffe.py +++ b/python/caffe/pycaffe.py @@ -276,6 +276,22 @@ def _Net_batch(self, blobs): padding]) yield padded_batch + +class _Net_IdNameWrapper: + """ + A simple wrapper that allows the ids propery to be accessed as a dict + indexed by names. Used for top and bottom names + """ + def __init__(self, net, func): + self.net, self.func = net, func + + def __getitem__(self, name): + # Map the layer name to id + ids = self.func(self.net, list(self.net._layer_names).index(name)) + # Map the blob id to name + id_to_name = list(self.net.blobs) + return [id_to_name[i] for i in ids] + # Attach methods to Net. Net.blobs = _Net_blobs Net.blob_loss_weights = _Net_blob_loss_weights @@ -288,3 +304,5 @@ def _Net_batch(self, blobs): Net._batch = _Net_batch Net.inputs = _Net_inputs Net.outputs = _Net_outputs +Net.top_names = property(lambda n: _Net_IdNameWrapper(n, Net._top_ids)) +Net.bottom_names = property(lambda n: _Net_IdNameWrapper(n, Net._bottom_ids)) From 6d09ca2829dad0c7ae4ba1474fd351f41125ab2a Mon Sep 17 00:00:00 2001 From: philkr Date: Tue, 5 Jan 2016 12:45:52 -0800 Subject: [PATCH 204/223] Speeding up the GPU solvers --- src/caffe/solvers/adadelta_solver.cpp | 66 +++++---------------------- src/caffe/solvers/adadelta_solver.cu | 30 ++++++++++++ src/caffe/solvers/adagrad_solver.cpp | 37 ++++----------- src/caffe/solvers/adagrad_solver.cu | 26 +++++++++++ src/caffe/solvers/adam_solver.cpp | 37 ++++----------- src/caffe/solvers/adam_solver.cu | 29 ++++++++++++ src/caffe/solvers/nesterov_solver.cpp | 29 ++++-------- src/caffe/solvers/nesterov_solver.cu | 27 +++++++++++ src/caffe/solvers/rmsprop_solver.cpp | 35 ++++---------- src/caffe/solvers/rmsprop_solver.cu | 28 ++++++++++++ src/caffe/solvers/sgd_solver.cpp | 16 ++++--- src/caffe/solvers/sgd_solver.cu | 24 ++++++++++ 12 files changed, 223 insertions(+), 161 deletions(-) create mode 100644 src/caffe/solvers/adadelta_solver.cu create mode 100644 src/caffe/solvers/adagrad_solver.cu create mode 100644 src/caffe/solvers/adam_solver.cu create mode 100644 src/caffe/solvers/nesterov_solver.cu create mode 100644 src/caffe/solvers/rmsprop_solver.cu create mode 100644 src/caffe/solvers/sgd_solver.cu diff --git a/src/caffe/solvers/adadelta_solver.cpp b/src/caffe/solvers/adadelta_solver.cpp index a37899eb..fd30f19a 100644 --- a/src/caffe/solvers/adadelta_solver.cpp +++ b/src/caffe/solvers/adadelta_solver.cpp @@ -16,6 +16,12 @@ void AdaDeltaSolver::AdaDeltaPreSolve() { } } +#ifndef CPU_ONLY +template +void adadelta_update_gpu(int N, Dtype* g, Dtype* h, Dtype* h2, Dtype momentum, + Dtype delta, Dtype local_rate); +#endif + template void AdaDeltaSolver::ComputeUpdateValue(int param_id, Dtype rate) { const vector*>& net_params = this->net_->learnable_params(); @@ -85,61 +91,11 @@ void AdaDeltaSolver::ComputeUpdateValue(int param_id, Dtype rate) { } case Caffe::GPU: { #ifndef CPU_ONLY - // compute square of gradient in update - caffe_gpu_powx(net_params[param_id]->count(), - net_params[param_id]->gpu_diff(), Dtype(2), - this->update_[param_id]->mutable_gpu_data()); - - // update history of gradients - caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, - this->update_[param_id]->gpu_data(), momentum, - this->history_[param_id]->mutable_gpu_data()); - - // add delta to history to guard against dividing by zero later - caffe_gpu_set(net_params[param_id]->count(), delta, - this->temp_[param_id]->mutable_gpu_data()); - - caffe_gpu_add(net_params[param_id]->count(), - this->temp_[param_id]->gpu_data(), - this->history_[update_history_offset + param_id]->gpu_data(), - this->update_[param_id]->mutable_gpu_data()); - - caffe_gpu_add(net_params[param_id]->count(), - this->temp_[param_id]->gpu_data(), - this->history_[param_id]->gpu_data(), - this->temp_[param_id]->mutable_gpu_data()); - - // divide history of updates by history of gradients - caffe_gpu_div(net_params[param_id]->count(), - this->update_[param_id]->gpu_data(), - this->temp_[param_id]->gpu_data(), - this->update_[param_id]->mutable_gpu_data()); - - // jointly compute the RMS of both for update and gradient history - caffe_gpu_powx(net_params[param_id]->count(), - this->update_[param_id]->gpu_data(), Dtype(0.5), - this->update_[param_id]->mutable_gpu_data()); - - // compute the update and copy to net_diff - caffe_gpu_mul(net_params[param_id]->count(), - net_params[param_id]->gpu_diff(), - this->update_[param_id]->gpu_data(), - net_params[param_id]->mutable_gpu_diff()); - - // compute square of update - caffe_gpu_powx(net_params[param_id]->count(), - net_params[param_id]->gpu_diff(), Dtype(2), - this->update_[param_id]->mutable_gpu_data()); - - // update history of updates - caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, - this->update_[param_id]->gpu_data(), momentum, - this->history_[update_history_offset + param_id]->mutable_gpu_data()); - - // apply learning rate - caffe_gpu_scale(net_params[param_id]->count(), local_rate, - net_params[param_id]->gpu_diff(), - net_params[param_id]->mutable_gpu_diff()); + adadelta_update_gpu(net_params[param_id]->count(), + net_params[param_id]->mutable_gpu_diff(), + this->history_[param_id]->mutable_gpu_data(), + this->history_[update_history_offset + param_id]->mutable_gpu_data(), + momentum, delta, local_rate); #else NO_GPU; #endif diff --git a/src/caffe/solvers/adadelta_solver.cu b/src/caffe/solvers/adadelta_solver.cu new file mode 100644 index 00000000..6c94585b --- /dev/null +++ b/src/caffe/solvers/adadelta_solver.cu @@ -0,0 +1,30 @@ +#include "caffe/util/math_functions.hpp" + + +namespace caffe { + +template +__global__ void AdaDeltaUpdate(int N, Dtype* g, Dtype* h, Dtype* h2, + Dtype momentum, Dtype delta, Dtype local_rate) { + CUDA_KERNEL_LOOP(i, N) { + float gi = g[i]; + float hi = h[i] = momentum * h[i] + (1-momentum) * gi * gi; + gi = gi * sqrt((h2[i] + delta) / (hi + delta)); + h2[i] = momentum * h2[i] + (1-momentum) * gi * gi; + g[i] = local_rate * gi; + } +} +template +void adadelta_update_gpu(int N, Dtype* g, Dtype* h, Dtype* h2, Dtype momentum, + Dtype delta, Dtype local_rate) { + AdaDeltaUpdate // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + N, g, h, h2, momentum, delta, local_rate); + CUDA_POST_KERNEL_CHECK; +} +template void adadelta_update_gpu(int , float*, float*, float*, + float, float, float); +template void adadelta_update_gpu(int, double*, double*, double*, + double, double, double); + +} // namespace caffe diff --git a/src/caffe/solvers/adagrad_solver.cpp b/src/caffe/solvers/adagrad_solver.cpp index 5e406326..e78eadca 100644 --- a/src/caffe/solvers/adagrad_solver.cpp +++ b/src/caffe/solvers/adagrad_solver.cpp @@ -4,6 +4,12 @@ namespace caffe { +#ifndef CPU_ONLY +template +void adagrad_update_gpu(int N, Dtype* g, Dtype* h, Dtype delta, + Dtype local_rate); +#endif + template void AdaGradSolver::ComputeUpdateValue(int param_id, Dtype rate) { CHECK(Caffe::root_solver()); @@ -45,34 +51,9 @@ void AdaGradSolver::ComputeUpdateValue(int param_id, Dtype rate) { } case Caffe::GPU: { #ifndef CPU_ONLY - // compute square of gradient in update - caffe_gpu_powx(net_params[param_id]->count(), - net_params[param_id]->gpu_diff(), Dtype(2), - this->update_[param_id]->mutable_gpu_data()); - - // update history - caffe_gpu_add(net_params[param_id]->count(), - this->update_[param_id]->gpu_data(), - this->history_[param_id]->gpu_data(), - this->history_[param_id]->mutable_gpu_data()); - - // prepare update - caffe_gpu_powx(net_params[param_id]->count(), - this->history_[param_id]->gpu_data(), Dtype(0.5), - this->update_[param_id]->mutable_gpu_data()); - - caffe_gpu_add_scalar(net_params[param_id]->count(), - delta, this->update_[param_id]->mutable_gpu_data()); - - caffe_gpu_div(net_params[param_id]->count(), - net_params[param_id]->gpu_diff(), - this->update_[param_id]->gpu_data(), - this->update_[param_id]->mutable_gpu_data()); - - // scale and copy - caffe_gpu_axpby(net_params[param_id]->count(), local_rate, - this->update_[param_id]->gpu_data(), Dtype(0), - net_params[param_id]->mutable_gpu_diff()); + adagrad_update_gpu(net_params[param_id]->count(), + net_params[param_id]->mutable_gpu_diff(), + this->history_[param_id]->mutable_gpu_data(), delta, local_rate); #else NO_GPU; #endif diff --git a/src/caffe/solvers/adagrad_solver.cu b/src/caffe/solvers/adagrad_solver.cu new file mode 100644 index 00000000..adefd554 --- /dev/null +++ b/src/caffe/solvers/adagrad_solver.cu @@ -0,0 +1,26 @@ +#include "caffe/util/math_functions.hpp" + + +namespace caffe { + +template +__global__ void AdaGradUpdate(int N, Dtype* g, Dtype* h, Dtype delta, + Dtype local_rate) { + CUDA_KERNEL_LOOP(i, N) { + float gi = g[i]; + float hi = h[i] = h[i] + gi*gi; + g[i] = local_rate * gi / (sqrt(hi) + delta); + } +} +template +void adagrad_update_gpu(int N, Dtype* g, Dtype* h, Dtype delta, + Dtype local_rate) { + AdaGradUpdate // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + N, g, h, delta, local_rate); + CUDA_POST_KERNEL_CHECK; +} +template void adagrad_update_gpu(int, float*, float*, float, float); +template void adagrad_update_gpu(int, double*, double*, double, double); + +} // namespace caffe diff --git a/src/caffe/solvers/adam_solver.cpp b/src/caffe/solvers/adam_solver.cpp index c3378d38..4a91f00b 100644 --- a/src/caffe/solvers/adam_solver.cpp +++ b/src/caffe/solvers/adam_solver.cpp @@ -16,6 +16,12 @@ void AdamSolver::AdamPreSolve() { } } +#ifndef CPU_ONLY +template +void adam_update_gpu(int N, Dtype* g, Dtype* m, Dtype* v, Dtype beta1, + Dtype beta2, Dtype eps_hat, Dtype corrected_local_rate); +#endif + template void AdamSolver::ComputeUpdateValue(int param_id, Dtype rate) { const vector*>& net_params = this->net_->learnable_params(); @@ -69,34 +75,9 @@ void AdamSolver::ComputeUpdateValue(int param_id, Dtype rate) { } case Caffe::GPU: { #ifndef CPU_ONLY - // update m <- \beta_1 m_{t-1} + (1-\beta_1)g_t - caffe_gpu_axpby(N, Dtype(1)-beta1, - net_params[param_id]->gpu_diff(), beta1, - val_m->mutable_gpu_data()); - - // update v <- \beta_2 m_{t-1} + (1-\beta_2)g_t^2 - caffe_gpu_mul(N, - net_params[param_id]->gpu_diff(), - net_params[param_id]->gpu_diff(), - val_t->mutable_gpu_data()); - caffe_gpu_axpby(N, Dtype(1)-beta2, - val_t->gpu_data(), beta2, - val_v->mutable_gpu_data()); - - // set update - caffe_gpu_powx(N, - val_v->gpu_data(), Dtype(0.5), - val_t->mutable_gpu_data()); - caffe_gpu_add_scalar(N, eps_hat, - val_t->mutable_gpu_data()); - caffe_gpu_div(N, - val_m->gpu_data(), - val_t->gpu_data(), - val_t->mutable_gpu_data()); - - caffe_gpu_scale(N, local_rate*correction, - val_t->gpu_data(), - net_params[param_id]->mutable_gpu_diff()); + adam_update_gpu(N, net_params[param_id]->mutable_gpu_diff(), + val_m->mutable_gpu_data(), val_v->mutable_gpu_data(), beta1, beta2, + eps_hat, local_rate*correction); #else NO_GPU; #endif diff --git a/src/caffe/solvers/adam_solver.cu b/src/caffe/solvers/adam_solver.cu new file mode 100644 index 00000000..917ae100 --- /dev/null +++ b/src/caffe/solvers/adam_solver.cu @@ -0,0 +1,29 @@ +#include "caffe/util/math_functions.hpp" + + +namespace caffe { + +template +__global__ void AdamUpdate(int N, Dtype* g, Dtype* m, Dtype* v, + Dtype beta1, Dtype beta2, Dtype eps_hat, Dtype corrected_local_rate) { + CUDA_KERNEL_LOOP(i, N) { + float gi = g[i]; + float mi = m[i] = m[i]*beta1 + gi*(1-beta1); + float vi = v[i] = v[i]*beta2 + gi*gi*(1-beta2); + g[i] = corrected_local_rate * mi / (sqrt(vi) + eps_hat); + } +} +template +void adam_update_gpu(int N, Dtype* g, Dtype* m, Dtype* v, Dtype beta1, + Dtype beta2, Dtype eps_hat, Dtype corrected_local_rate) { + AdamUpdate // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + N, g, m, v, beta1, beta2, eps_hat, corrected_local_rate); + CUDA_POST_KERNEL_CHECK; +} +template void adam_update_gpu(int, float*, float*, float*, + float, float, float, float); +template void adam_update_gpu(int, double*, double*, double*, + double, double, double, double); + +} // namespace caffe diff --git a/src/caffe/solvers/nesterov_solver.cpp b/src/caffe/solvers/nesterov_solver.cpp index 34bf01eb..23ab2d43 100644 --- a/src/caffe/solvers/nesterov_solver.cpp +++ b/src/caffe/solvers/nesterov_solver.cpp @@ -4,6 +4,12 @@ namespace caffe { +#ifndef CPU_ONLY +template +void nesterov_update_gpu(int N, Dtype* g, Dtype* h, Dtype momentum, + Dtype local_rate); +#endif + template void NesterovSolver::ComputeUpdateValue(int param_id, Dtype rate) { CHECK(Caffe::root_solver()); @@ -36,25 +42,10 @@ void NesterovSolver::ComputeUpdateValue(int param_id, Dtype rate) { } case Caffe::GPU: { #ifndef CPU_ONLY - // save history momentum for stepping back - caffe_copy(net_params[param_id]->count(), - this->history_[param_id]->gpu_data(), - this->update_[param_id]->mutable_gpu_data()); - - // update history - caffe_gpu_axpby(net_params[param_id]->count(), local_rate, - net_params[param_id]->gpu_diff(), momentum, - this->history_[param_id]->mutable_gpu_data()); - - // compute update: step back then over step - caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) + momentum, - this->history_[param_id]->gpu_data(), -momentum, - this->update_[param_id]->mutable_gpu_data()); - - // copy - caffe_copy(net_params[param_id]->count(), - this->update_[param_id]->gpu_data(), - net_params[param_id]->mutable_gpu_diff()); + nesterov_update_gpu(net_params[param_id]->count(), + net_params[param_id]->mutable_gpu_diff(), + this->history_[param_id]->mutable_gpu_data(), + momentum, local_rate); #else NO_GPU; #endif diff --git a/src/caffe/solvers/nesterov_solver.cu b/src/caffe/solvers/nesterov_solver.cu new file mode 100644 index 00000000..57a456b8 --- /dev/null +++ b/src/caffe/solvers/nesterov_solver.cu @@ -0,0 +1,27 @@ +#include "caffe/util/math_functions.hpp" + + +namespace caffe { + +template +__global__ void NesterovUpdate(int N, Dtype* g, Dtype* h, + Dtype momentum, Dtype local_rate) { + CUDA_KERNEL_LOOP(i, N) { + float hi = h[i]; + float hi_new = h[i] = momentum * hi + local_rate * g[i]; + g[i] = (1+momentum) * hi_new - momentum * hi; + } +} +template +void nesterov_update_gpu(int N, Dtype* g, Dtype* h, Dtype momentum, + Dtype local_rate) { + NesterovUpdate // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + N, g, h, momentum, local_rate); + CUDA_POST_KERNEL_CHECK; +} +template void nesterov_update_gpu(int, float*, float*, float, float); +template void nesterov_update_gpu(int, double*, double*, double, + double); + +} // namespace caffe diff --git a/src/caffe/solvers/rmsprop_solver.cpp b/src/caffe/solvers/rmsprop_solver.cpp index c6247676..3251ee42 100644 --- a/src/caffe/solvers/rmsprop_solver.cpp +++ b/src/caffe/solvers/rmsprop_solver.cpp @@ -4,6 +4,12 @@ namespace caffe { +#ifndef CPU_ONLY +template +void rmsprop_update_gpu(int N, Dtype* g, Dtype* h, Dtype rms_decay, + Dtype delta, Dtype local_rate); +#endif + template void RMSPropSolver::ComputeUpdateValue(int param_id, Dtype rate) { const vector*>& net_params = this->net_->learnable_params(); @@ -45,31 +51,10 @@ void RMSPropSolver::ComputeUpdateValue(int param_id, Dtype rate) { break; case Caffe::GPU: #ifndef CPU_ONLY - // compute square of gradient in update - caffe_gpu_powx(net_params[param_id]->count(), - net_params[param_id]->gpu_diff(), Dtype(2), - this->update_[param_id]->mutable_gpu_data()); - - // update history - caffe_gpu_axpby(net_params[param_id] -> count(), - Dtype(1-rms_decay), this->update_[param_id]->gpu_data(), - rms_decay, this->history_[param_id]-> mutable_gpu_data()); - - // prepare update - caffe_gpu_powx(net_params[param_id]->count(), - this->history_[param_id]->gpu_data(), Dtype(0.5), - this->update_[param_id]->mutable_gpu_data()); - - caffe_gpu_add_scalar(net_params[param_id]->count(), - delta, this->update_[param_id]->mutable_gpu_data()); - - caffe_gpu_div(net_params[param_id]->count(), - net_params[param_id]->gpu_diff(), this->update_[param_id]->gpu_data(), - this->update_[param_id]->mutable_gpu_data()); - - caffe_gpu_axpby(net_params[param_id]->count(), local_rate, - this->update_[param_id]->gpu_data(), Dtype(0), - net_params[param_id]->mutable_gpu_diff()); + rmsprop_update_gpu(net_params[param_id]->count(), + net_params[param_id]->mutable_gpu_diff(), + this->history_[param_id]->mutable_gpu_data(), + rms_decay, delta, local_rate); #else NO_GPU; #endif diff --git a/src/caffe/solvers/rmsprop_solver.cu b/src/caffe/solvers/rmsprop_solver.cu new file mode 100644 index 00000000..c5ffd329 --- /dev/null +++ b/src/caffe/solvers/rmsprop_solver.cu @@ -0,0 +1,28 @@ +#include "caffe/util/math_functions.hpp" + + +namespace caffe { + +template +__global__ void RMSPropUpdate(int N, Dtype* g, Dtype* h, + Dtype rms_decay, Dtype delta, Dtype local_rate) { + CUDA_KERNEL_LOOP(i, N) { + float gi = g[i]; + float hi = h[i] = rms_decay*h[i] + (1-rms_decay)*gi*gi; + g[i] = local_rate * g[i] / (sqrt(hi) + delta); + } +} +template +void rmsprop_update_gpu(int N, Dtype* g, Dtype* h, Dtype rms_decay, + Dtype delta, Dtype local_rate) { + RMSPropUpdate // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + N, g, h, rms_decay, delta, local_rate); + CUDA_POST_KERNEL_CHECK; +} +template void rmsprop_update_gpu(int, float*, float*, float, float, + float); +template void rmsprop_update_gpu(int, double*, double*, double, double, + double); + +} // namespace caffe diff --git a/src/caffe/solvers/sgd_solver.cpp b/src/caffe/solvers/sgd_solver.cpp index 32bf19b1..f30f316d 100644 --- a/src/caffe/solvers/sgd_solver.cpp +++ b/src/caffe/solvers/sgd_solver.cpp @@ -203,6 +203,12 @@ void SGDSolver::Regularize(int param_id) { } } +#ifndef CPU_ONLY +template +void sgd_update_gpu(int N, Dtype* g, Dtype* h, Dtype momentum, + Dtype local_rate); +#endif + template void SGDSolver::ComputeUpdateValue(int param_id, Dtype rate) { const vector*>& net_params = this->net_->learnable_params(); @@ -222,12 +228,10 @@ void SGDSolver::ComputeUpdateValue(int param_id, Dtype rate) { } case Caffe::GPU: { #ifndef CPU_ONLY - caffe_gpu_axpby(net_params[param_id]->count(), local_rate, - net_params[param_id]->gpu_diff(), momentum, - history_[param_id]->mutable_gpu_data()); - caffe_copy(net_params[param_id]->count(), - history_[param_id]->gpu_data(), - net_params[param_id]->mutable_gpu_diff()); + sgd_update_gpu(net_params[param_id]->count(), + net_params[param_id]->mutable_gpu_diff(), + history_[param_id]->mutable_gpu_data(), + momentum, local_rate); #else NO_GPU; #endif diff --git a/src/caffe/solvers/sgd_solver.cu b/src/caffe/solvers/sgd_solver.cu new file mode 100644 index 00000000..e5410352 --- /dev/null +++ b/src/caffe/solvers/sgd_solver.cu @@ -0,0 +1,24 @@ +#include "caffe/util/math_functions.hpp" + + +namespace caffe { + +template +__global__ void SGDUpdate(int N, Dtype* g, Dtype* h, + Dtype momentum, Dtype local_rate) { + CUDA_KERNEL_LOOP(i, N) { + g[i] = h[i] = momentum*h[i] + local_rate*g[i]; + } +} +template +void sgd_update_gpu(int N, Dtype* g, Dtype* h, Dtype momentum, + Dtype local_rate) { + SGDUpdate // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + N, g, h, momentum, local_rate); + CUDA_POST_KERNEL_CHECK; +} +template void sgd_update_gpu(int, float*, float*, float, float); +template void sgd_update_gpu(int, double*, double*, double, double); + +} // namespace caffe From 672f30ece38b41c0133d83501882551c53610885 Mon Sep 17 00:00:00 2001 From: philkr Date: Wed, 6 Jan 2016 07:23:35 -0800 Subject: [PATCH 205/223] CMake python version fix --- cmake/Dependencies.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 51a803c1..c7b6a17a 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -114,14 +114,14 @@ if(BUILD_python) # Find the matching boost python implementation set(version ${PYTHONLIBS_VERSION_STRING}) - STRING( REPLACE "." "" boost_py_version ${version} ) + STRING( REGEX REPLACE "[^0-9]" "" boost_py_version ${version} ) find_package(Boost 1.46 COMPONENTS "python-py${boost_py_version}") set(Boost_PYTHON_FOUND ${Boost_PYTHON-PY${boost_py_version}_FOUND}) while(NOT "${version}" STREQUAL "" AND NOT Boost_PYTHON_FOUND) STRING( REGEX REPLACE "([0-9.]+).[0-9]+" "\\1" version ${version} ) - STRING( REPLACE "." "" boost_py_version ${version} ) + STRING( REGEX REPLACE "[^0-9]" "" boost_py_version ${version} ) find_package(Boost 1.46 COMPONENTS "python-py${boost_py_version}") set(Boost_PYTHON_FOUND ${Boost_PYTHON-PY${boost_py_version}_FOUND}) From 581c1cc3fd6c04640c4b89e5ed003a40cd67e855 Mon Sep 17 00:00:00 2001 From: Mariusz Moczala Date: Wed, 20 Jan 2016 09:28:35 +0100 Subject: [PATCH 206/223] Performance related update of im2col() and col2im() functions --- src/caffe/util/im2col.cpp | 93 +++++++++++++++++++++++++-------------- 1 file changed, 61 insertions(+), 32 deletions(-) diff --git a/src/caffe/util/im2col.cpp b/src/caffe/util/im2col.cpp index 6e5ea875..114a86cb 100644 --- a/src/caffe/util/im2col.cpp +++ b/src/caffe/util/im2col.cpp @@ -5,6 +5,16 @@ namespace caffe { +// Function uses casting from int to unsigned to compare if value of +// parameter a is greater or equal to zero and lower than value of +// parameter b. The b parameter is of type signed and is always positive, +// therefore its value is always lower than 0x800... where casting +// negative value of a parameter converts it to value higher than 0x800... +// The casting allows to use one condition instead of two. +inline bool is_a_ge_zero_and_a_lt_b(int a, int b) { + return static_cast(a) < static_cast(b); +} + template void im2col_cpu(const Dtype* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, @@ -12,22 +22,33 @@ void im2col_cpu(const Dtype* data_im, const int channels, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, Dtype* data_col) { - const int height_col = (height + 2 * pad_h - - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; - const int width_col = (width + 2 * pad_w - - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; - const int channels_col = channels * kernel_h * kernel_w; - for (int c_col = 0; c_col < channels_col; ++c_col) { - int w_offset = c_col % kernel_w; - int h_offset = (c_col / kernel_w) % kernel_h; - int c_im = c_col / kernel_h / kernel_w; - for (int h_col = 0; h_col < height_col; ++h_col) { - for (int w_col = 0; w_col < width_col; ++w_col) { - int h_im = h_col * stride_h - pad_h + h_offset * dilation_h; - int w_im = w_col * stride_w - pad_w + w_offset * dilation_w; - data_col[(c_col * height_col + h_col) * width_col + w_col] = - (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ? - data_im[(c_im * height + h_im) * width + w_im] : 0; + const int output_h = (height + 2 * pad_h - + (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; + const int output_w = (width + 2 * pad_w - + (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; + const int channel_size = height * width; + for (int channel = channels; channel--; data_im += channel_size) { + for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) { + for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) { + int input_row = -pad_h + kernel_row * dilation_h; + for (int output_rows = output_h; output_rows; output_rows--) { + if (!is_a_ge_zero_and_a_lt_b(input_row, height)) { + for (int output_cols = output_w; output_cols; output_cols--) { + *(data_col++) = 0; + } + } else { + int input_col = -pad_w + kernel_col * dilation_w; + for (int output_col = output_w; output_col; output_col--) { + if (is_a_ge_zero_and_a_lt_b(input_col, width)) { + *(data_col++) = data_im[input_row * width + input_col]; + } else { + *(data_col++) = 0; + } + input_col += stride_w; + } + } + input_row += stride_h; + } } } } @@ -146,22 +167,30 @@ void col2im_cpu(const Dtype* data_col, const int channels, const int dilation_h, const int dilation_w, Dtype* data_im) { caffe_set(height * width * channels, Dtype(0), data_im); - const int height_col = (height + 2 * pad_h - - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; - const int width_col = (width + 2 * pad_w - - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; - const int channels_col = channels * kernel_h * kernel_w; - for (int c_col = 0; c_col < channels_col; ++c_col) { - int w_offset = c_col % kernel_w; - int h_offset = (c_col / kernel_w) % kernel_h; - int c_im = c_col / kernel_h / kernel_w; - for (int h_col = 0; h_col < height_col; ++h_col) { - for (int w_col = 0; w_col < width_col; ++w_col) { - int h_im = h_col * stride_h - pad_h + h_offset * dilation_h; - int w_im = w_col * stride_w - pad_w + w_offset * dilation_w; - if (h_im >= 0 && h_im < height && w_im >= 0 && w_im < width) - data_im[(c_im * height + h_im) * width + w_im] += - data_col[(c_col * height_col + h_col) * width_col + w_col]; + const int output_h = (height + 2 * pad_h - + (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; + const int output_w = (width + 2 * pad_w - + (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; + const int channel_size = height * width; + for (int channel = channels; channel--; data_im += channel_size) { + for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) { + for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) { + int input_row = -pad_h + kernel_row * dilation_h; + for (int output_rows = output_h; output_rows; output_rows--) { + if (!is_a_ge_zero_and_a_lt_b(input_row, height)) { + data_col += output_w; + } else { + int input_col = -pad_w + kernel_col * dilation_w; + for (int output_col = output_w; output_col; output_col--) { + if (is_a_ge_zero_and_a_lt_b(input_col, width)) { + data_im[input_row * width + input_col] += *data_col; + } + data_col++; + input_col += stride_w; + } + } + input_row += stride_h; + } } } } From d0100ba632b767d2242c10fd1bd3e5782494c079 Mon Sep 17 00:00:00 2001 From: thatguymike Date: Tue, 19 Jan 2016 17:01:34 -0800 Subject: [PATCH 207/223] Workaround for inplace max pooling issue --- src/caffe/layer_factory.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp index 4d912d28..e967bd61 100644 --- a/src/caffe/layer_factory.cpp +++ b/src/caffe/layer_factory.cpp @@ -91,7 +91,16 @@ shared_ptr > GetPoolingLayer(const LayerParameter& param) { << "Using Caffe's own pooling layer."; return shared_ptr >(new PoolingLayer(param)); } - return shared_ptr >(new CuDNNPoolingLayer(param)); + // CuDNN assumes layers are not being modified in place, thus + // breaking our index tracking for updates in some cases in Caffe. + // Until there is a workaround in Caffe (index management) or + // cuDNN, use Caffe layer to max pooling, or don't use in place + // layers after max pooling layers + if (param.pooling_param().pool() == PoolingParameter_PoolMethod_MAX) { + return shared_ptr >(new PoolingLayer(param)); + } else { + return shared_ptr >(new CuDNNPoolingLayer(param)); + } #endif } else { LOG(FATAL) << "Layer " << param.name() << " has unknown engine."; From d95998129d4a306693a7228905688ddfcffa2f49 Mon Sep 17 00:00:00 2001 From: Robbie Cooper Date: Thu, 21 Jan 2016 14:11:00 -0500 Subject: [PATCH 208/223] Add makefile config option for linking Python 3 libraries --- Makefile | 2 +- Makefile.config.example | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 985fffd6..ac7d12e2 100644 --- a/Makefile +++ b/Makefile @@ -191,7 +191,7 @@ ifeq ($(USE_OPENCV), 1) endif endif -PYTHON_LIBRARIES := boost_python python2.7 +PYTHON_LIBRARIES ?= boost_python python2.7 WARNINGS := -Wall -Wno-sign-compare ############################## diff --git a/Makefile.config.example b/Makefile.config.example index 1dd6a8f7..8fd49c9c 100644 --- a/Makefile.config.example +++ b/Makefile.config.example @@ -70,6 +70,11 @@ PYTHON_INCLUDE := /usr/include/python2.7 \ # $(ANACONDA_HOME)/include/python2.7 \ # $(ANACONDA_HOME)/lib/python2.7/site-packages/numpy/core/include \ +# Uncomment to use Python 3 (default is Python 2) +# PYTHON_LIBRARIES := boost_python3 python3.5m +# PYTHON_INCLUDE := /usr/include/python3.5m \ +# /usr/lib/python3.5/dist-packages/numpy/core/include + # We need to be able to find libpythonX.X.so or .dylib. PYTHON_LIB := /usr/lib # PYTHON_LIB := $(ANACONDA_HOME)/lib From 1954f0f76eb9129c8bf9f34814750dbd5b5e46c9 Mon Sep 17 00:00:00 2001 From: Jun Shi Date: Fri, 22 Jan 2016 05:09:21 -0800 Subject: [PATCH 209/223] copy proto to distribute directory --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 985fffd6..5a6e74f6 100644 --- a/Makefile +++ b/Makefile @@ -651,6 +651,8 @@ superclean: clean supercleanfiles $(DIST_ALIASES): $(DISTRIBUTE_DIR) $(DISTRIBUTE_DIR): all py | $(DISTRIBUTE_SUBDIRS) + # add proto + cp -r src/caffe/proto $(DISTRIBUTE_DIR)/ # add include cp -r include $(DISTRIBUTE_DIR)/ mkdir -p $(DISTRIBUTE_DIR)/include/caffe/proto From ec04197479d263d1c4801639f5635ceb3e7dcef1 Mon Sep 17 00:00:00 2001 From: Dmytro Mishkin Date: Thu, 14 Jan 2016 17:10:11 +0200 Subject: [PATCH 210/223] Add ChannelwiseAffine for batch norm --- .../caffe/layers/channelwise_affine_layer.hpp | 103 ++++++++++ src/caffe/layers/channelwise_affine_layer.cpp | 189 ++++++++++++++++++ src/caffe/layers/channelwise_affine_layer.cu | 144 +++++++++++++ src/caffe/proto/caffe.proto | 14 +- .../test/test_channelwise_affine_layer.cpp | 105 ++++++++++ 5 files changed, 554 insertions(+), 1 deletion(-) create mode 100644 include/caffe/layers/channelwise_affine_layer.hpp create mode 100644 src/caffe/layers/channelwise_affine_layer.cpp create mode 100644 src/caffe/layers/channelwise_affine_layer.cu create mode 100644 src/caffe/test/test_channelwise_affine_layer.cpp diff --git a/include/caffe/layers/channelwise_affine_layer.hpp b/include/caffe/layers/channelwise_affine_layer.hpp new file mode 100644 index 00000000..6d8ac98b --- /dev/null +++ b/include/caffe/layers/channelwise_affine_layer.hpp @@ -0,0 +1,103 @@ +#ifndef CAFFE_CHANNELWISE_AFFINE_LAYER_HPP_ +#define CAFFE_CHANNELWISE_AFFINE_LAYER_HPP_ + +#include +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/layers/neuron_layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + /** + * @brief Affine non-linearity function @f$ + * y = ax+b + * @f$, could be used after batch normalization layer + * + */ +template +class ChannelwiseAffineLayer : public NeuronLayer { + public: + /** + * @param param provides ChannelwiseAffineParameter ChannelwiseAffine_param, + * with ChannelwiseAffineLayer options: + * - slope_filler (\b optional, FillerParameter, + * default {'type': constant 'value':1.0001}). + * - bias_filler (\b optional, FillerParameter, + * default {'type': constant 'value':0.0001}). + * - channel_shared (\b optional, default false). + * slopes and biases are shared across channels. + */ + explicit ChannelwiseAffineLayer(const LayerParameter& param) + : NeuronLayer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + virtual inline const char* type() const { return "ChannelwiseAffine"; } + + protected: + /** + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times ...) @f$ + * the inputs @f$ x @f$ + * @param top output Blob vector (length 1) + * -# @f$ (N \times C \times ...) @f$ + * the computed outputs for each channel @f$i@f$ @f$ + * y_i = a_i x_i + b_i + * @f$. + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + /** + * @brief Computes the error gradient w.r.t. the ChannelwiseAffine inputs. + * + * @param top output Blob vector (length 1), providing the error gradient with + * respect to the outputs + * -# @f$ (N \times C \times ...) @f$ + * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ + * with respect to computed outputs @f$ y @f$ + * @param propagate_down see Layer::Backward. + * @param bottom input Blob vector (length 1) + * -# @f$ (N \times C \times ...) @f$ + * the inputs @f$ x @f$; For each channel @f$i@f$, backward fills their + * diff with gradients @f$ + * \frac{\partial E}{\partial x_i} = \left\{ + * \begin{array}{lr} + * a_i \frac{\partial E}{\partial y_i} + * \end{array} \right. + * @f$. + * If param_propagate_down_[0] is true, it fills the diff with gradients + * @f$ + * \frac{\partial E}{\partial a_i} = \left\{ + * \begin{array}{lr} + * \sum_{x_i} x_i \frac{\partial E}{\partial y_i} + * \end{array} \right. + * @f$. + * If param_propagate_down_[1] is true, it fills the diff with gradients + * @f$ + * \frac{\partial E}{\partial b_i} = \left\{ + * \begin{array}{lr} + * frac{\partial E}{\partial y_i} + * \end{array} \right. + * @f$. + */ + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, + const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, + const vector*>& bottom); + bool channel_shared_; + Blob multiplier_; + // dot multiplier for backward computation of params + Blob bias_multiplier_; + Blob backward_buff_; + // temporary buffer for backward computation + Blob bottom_memory_; + // memory for in-place computation +}; +} // namespace caffe + +#endif // CAFFE_CHANNELWISE_AFFINE_LAYER_HPP_ diff --git a/src/caffe/layers/channelwise_affine_layer.cpp b/src/caffe/layers/channelwise_affine_layer.cpp new file mode 100644 index 00000000..e9f31fb1 --- /dev/null +++ b/src/caffe/layers/channelwise_affine_layer.cpp @@ -0,0 +1,189 @@ +#include +#include + +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/layers/channelwise_affine_layer.hpp" + +namespace caffe { + +template +void ChannelwiseAffineLayer::LayerSetUp( + const vector*>& bottom, + const vector*>& top) { + CHECK_GE(bottom[0]->num_axes(), 2) + << "Number of axes of bottom blob must be >=2."; + ChannelwiseAffineParameter channelwise_affine_param = + this->layer_param().channelwise_affine_param(); + int channels = bottom[0]->channels(); + channel_shared_ = channelwise_affine_param.channel_shared(); + if (this->blobs_.size() > 0) { + LOG(INFO) << "Skipping parameter initialization"; + } else { + this->blobs_.resize(2); + if (channel_shared_) { + this->blobs_[0].reset(new Blob(vector(0))); + this->blobs_[1].reset(new Blob(vector(0))); + + } else { + this->blobs_[0].reset(new Blob(vector(1, channels))); + this->blobs_[1].reset(new Blob(vector(1, channels))); + } + shared_ptr > filler; + if (channelwise_affine_param.has_slope_filler()) { + filler.reset(GetFiller(channelwise_affine_param.slope_filler())); + } else { + FillerParameter filler_param; + filler_param.set_type("constant"); + filler_param.set_value(1.0001); + filler.reset(GetFiller(filler_param)); + } + filler->Fill(this->blobs_[0].get()); + + if (channelwise_affine_param.has_bias_filler()) { + filler.reset(GetFiller(channelwise_affine_param.bias_filler())); + } else { + FillerParameter filler_param; + filler_param.set_type("constant"); + filler_param.set_value(0.0001); + filler.reset(GetFiller(filler_param)); + } + filler->Fill(this->blobs_[1].get()); + } + if (channel_shared_) { + CHECK_EQ(this->blobs_[0]->count(), 1) + << "Slope size is inconsistent with prototxt config"; + } else { + CHECK_EQ(this->blobs_[0]->count(), channels) + << "Slope size is inconsistent with prototxt config"; + } + + // Propagate gradients to the parameters (as directed by backward pass). + this->param_propagate_down_.resize(this->blobs_.size(), true); + multiplier_.Reshape(vector(1, bottom[0]->count(1))); + bias_multiplier_.Reshape(vector(1, bottom[0]->count(1))); + backward_buff_.Reshape(vector(1, bottom[0]->count(1))); + caffe_set(multiplier_.count(), Dtype(1.0), + multiplier_.mutable_cpu_data()); + caffe_set(bias_multiplier_.count(), Dtype(1.0), + bias_multiplier_.mutable_cpu_data()); +} + +template +void ChannelwiseAffineLayer::Reshape( + const vector*>& bottom, + const vector*>& top) { + CHECK_GE(bottom[0]->num_axes(), 2) + << "Number of axes of bottom blob must be >=2."; + top[0]->ReshapeLike(*bottom[0]); + if (bottom[0] == top[0]) { + // For in-place computation + bottom_memory_.ReshapeLike(*bottom[0]); + } + int height = 1; + int width = 1; + if (bottom[0]->num_axes() > 2) { + height = bottom[0]->shape(2); + width = bottom[0]->shape(3); + } + vector bias_multiplier_shape(1, height * width); + bias_multiplier_.Reshape(bias_multiplier_shape); + caffe_set(bias_multiplier_.count(), Dtype(1), + bias_multiplier_.mutable_cpu_data()); +} + +template +void ChannelwiseAffineLayer::Forward_cpu( + const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* top_data = top[0]->mutable_cpu_data(); + const int count = bottom[0]->count(); + const int dim = bottom[0]->count(2); + const int channels = bottom[0]->channels(); + const Dtype* slope_data = this->blobs_[0]->cpu_data(); + const Dtype* bias_data = this->blobs_[1]->cpu_data(); + // For in-place computation + if (bottom[0] == top[0]) { + caffe_copy(count, bottom_data, bottom_memory_.mutable_cpu_data()); + } + // if channel_shared, channel index in the following computation becomes + // always zero. + const int div_factor = channel_shared_ ? channels : 1; + for (int i = 0; i < count; ++i) { + int c = (i / dim) % channels / div_factor; + top_data[i] = bottom_data[i] * slope_data[c] + bias_data[c]; + } +} + +template +void ChannelwiseAffineLayer::Backward_cpu( + const vector*>& top, + const vector& propagate_down, + const vector*>& bottom) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + const Dtype* slope_data = this->blobs_[0]->cpu_data(); + + const Dtype* top_diff = top[0]->cpu_diff(); + const int count = bottom[0]->count(); + const int dim = bottom[0]->count(2); + const int channels = bottom[0]->shape(1); + const int num = bottom[0]->shape(0); + int height = 1; + int width = 1; + if (bottom[0]->num_axes() > 2) { + height = bottom[0]->shape(2); + width = bottom[0]->shape(3); + } + + // For in-place computation + if (top[0] == bottom[0]) { + bottom_data = bottom_memory_.cpu_data(); + } + + // if channel_shared, channel index in the following computation becomes + // always zero. + const int div_factor = channel_shared_ ? channels : 1; + + // Propagte to param + // Since to write bottom diff will affect top diff if top and bottom blobs + // are identical (in-place computaion), we first compute param backward to + // keep top_diff unchanged. + + if (this->param_propagate_down_[1]) { + Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff(); + caffe_set(this->blobs_[1]->count(), Dtype(0), bias_diff); + for (int n = 0; n < num; ++n) { + caffe_cpu_gemv(CblasNoTrans, channels, height * width, 1., + top_diff + top[0]->offset(n), + bias_multiplier_.cpu_data(), 1., bias_diff); + } + } + if (this->param_propagate_down_[0]) { + Dtype* slope_diff = this->blobs_[0]->mutable_cpu_diff(); + caffe_set(this->blobs_[0]->count(), Dtype(0), slope_diff); + for (int i = 0; i < count; ++i) { + int c = (i / dim) % channels / div_factor; + slope_diff[c] += top_diff[i] * bottom_data[i]; + } + } + + // Propagate to bottom + if (propagate_down[0]) { + Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); + for (int i = 0; i < count; ++i) { + int c = (i / dim) % channels / div_factor; + bottom_diff[i] = slope_data[c] * top_diff[i]; + } + } +} + + +#ifdef CPU_ONLY +STUB_GPU(ChannelwiseAffineLayer); +#endif + +INSTANTIATE_CLASS(ChannelwiseAffineLayer); +REGISTER_LAYER_CLASS(ChannelwiseAffine); + +} // namespace caffe diff --git a/src/caffe/layers/channelwise_affine_layer.cu b/src/caffe/layers/channelwise_affine_layer.cu new file mode 100644 index 00000000..2066b265 --- /dev/null +++ b/src/caffe/layers/channelwise_affine_layer.cu @@ -0,0 +1,144 @@ +#include +#include + +#include "caffe/layer.hpp" +#include "caffe/layers/channelwise_affine_layer.hpp" + +namespace caffe { + +// CUDA kernel for forward +template +__global__ void ChannelwiseAffineForward(const int n, const int channels, + const int dim, const Dtype* in, Dtype* out, const Dtype* slope_data, + const Dtype* bias_data, const int div_factor) { + CUDA_KERNEL_LOOP(index, n) { + int c = (index / dim) % channels / div_factor; + out[index] = in[index] * slope_data[c] + bias_data[c]; + } +} + +// CUDA kernel for bottom backward +template +__global__ void ChannelwiseAffineBackward(const int n, + const int channels, const int dim, const Dtype* in_diff, + Dtype* out_diff, const Dtype* slope_data, const int div_factor) { + CUDA_KERNEL_LOOP(index, n) { + int c = (index / dim) % channels / div_factor; + out_diff[index] = slope_data[c] * in_diff[index]; + } +} + +// CUDA kernel for element-wise parameter backward +template +__global__ void ChannelwiseAffineParamSlopeBackward(const int n, + const int rows, const int rowPitch, const Dtype* in_diff, + const Dtype* in_data, Dtype* out_diff) { + CUDA_KERNEL_LOOP(index, n) { + out_diff[index] = in_diff[index] * in_data[index]; + for ( int k = 1; k < rows; k++ ) { + out_diff[index] += in_diff[index + k*rowPitch] + * in_data[index + k*rowPitch]; + } + } +} + +template +void ChannelwiseAffineLayer::Forward_gpu( + const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* top_data = top[0]->mutable_gpu_data(); + const int count = bottom[0]->count(); + const int dim = bottom[0]->count(2); + const int channels = bottom[0]->channels(); + const Dtype* slope_data = this->blobs_[0]->gpu_data(); + const Dtype* bias_data = this->blobs_[1]->gpu_data(); + const int div_factor = channel_shared_ ? channels : 1; + + // For in-place computation + if (top[0] == bottom[0]) { + caffe_copy(count, bottom_data, bottom_memory_.mutable_gpu_data()); + } + // NOLINT_NEXT_LINE(whitespace/operators) + ChannelwiseAffineForward<<>>( + count, channels, dim, bottom_data, top_data, + slope_data, bias_data, div_factor); + CUDA_POST_KERNEL_CHECK; +} + +template +void ChannelwiseAffineLayer::Backward_gpu( + const vector*>& top, + const vector& propagate_down, + const vector*>& bottom) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + const Dtype* top_diff = top[0]->gpu_diff(); + const int count = bottom[0]->count(); + const int num = bottom[0]->shape(0); + const int dim = bottom[0]->count(2); + const int channels = bottom[0]->shape(1); + int height = 1; + int width = 1; + if (bottom[0]->num_axes() > 2) { + height = bottom[0]->shape(2); + width = bottom[0]->shape(3); + } + + // For in-place computation + if (top[0] == bottom[0]) { + bottom_data = bottom_memory_.gpu_data(); + } + // Propagate to param + // Since to write bottom diff will affect top diff if top and bottom blobs + // are identical (in-place computaion), we first compute param backward to + // keep top_diff unchanged. + if (this->param_propagate_down_[1]) { + Dtype* bias_diff = this->blobs_[1]->mutable_gpu_diff(); + caffe_gpu_set(this->blobs_[1]->count(), Dtype(0.0), bias_diff); + // Gradient with respect to bias + for (int n = 0; n < num; ++n) { + caffe_gpu_gemv( + CblasNoTrans, channels, height * width, (Dtype)1., + top_diff + top[0]->offset(n), bias_multiplier_.gpu_data(), + (Dtype)1., bias_diff); + } + } + if (this->param_propagate_down_[0]) { + Dtype* slope_diff = this->blobs_[0]->mutable_gpu_diff(); + int cdim = channels * dim; + // compute element-wise diff + // NOLINT_NEXT_LINE(whitespace/operators) + ChannelwiseAffineParamSlopeBackward<<>>( + cdim, num, top[0]->offset(1), top_diff , + bottom_data, + backward_buff_.mutable_gpu_diff()); + CUDA_POST_KERNEL_CHECK; + if (channel_shared_) { + Dtype d = 0; + caffe_gpu_dot(cdim, backward_buff_.gpu_diff(), + multiplier_.gpu_data(), &d); + caffe_gpu_add_scalar(this->blobs_[0]->count(), Dtype(d), slope_diff); + } else { + caffe_gpu_gemv(CblasNoTrans, channels, dim, Dtype(1.), + backward_buff_.gpu_diff(), multiplier_.gpu_data(), Dtype(1.), + slope_diff); + } + } + // Propagate to bottom + if (propagate_down[0]) { + Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); + const Dtype* slope_data = this->blobs_[0]->gpu_data(); + int div_factor = channel_shared_ ? channels : 1; + // NOLINT_NEXT_LINE(whitespace/operators) + ChannelwiseAffineBackward<<>>( + count, channels, dim, top_diff, bottom_diff, slope_data, div_factor); + CUDA_POST_KERNEL_CHECK; + } +} + +INSTANTIATE_LAYER_GPU_FUNCS(ChannelwiseAffineLayer); + +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index f873deba..fe6209cf 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -306,7 +306,7 @@ message ParamSpec { // NOTE // Update the next available ID when you add a new LayerParameter field. // -// LayerParameter next available layer-specific ID: 141 (last added: elu_param) +// LayerParameter next available layer-specific ID: 142 (last added: channelwise_affine_param) message LayerParameter { optional string name = 1; // the layer name optional string type = 2; // the layer type @@ -356,6 +356,7 @@ message LayerParameter { optional AccuracyParameter accuracy_param = 102; optional ArgMaxParameter argmax_param = 103; optional BatchNormParameter batch_norm_param = 139; + optional ChannelwiseAffineParameter channelwise_affine_param = 141; optional ConcatParameter concat_param = 104; optional ContrastiveLossParameter contrastive_loss_param = 105; optional ConvolutionParameter convolution_param = 106; @@ -498,6 +499,17 @@ message BatchNormParameter { optional float eps = 3 [default = 1e-5]; } +message ChannelwiseAffineParameter { + + // Initial value of a_i. Default is a_i=1.0 for all i. + optional FillerParameter slope_filler = 1; + + optional FillerParameter bias_filler = 2; + + // Whether or not slope paramters are shared across channels. + optional bool channel_shared = 3 [default = false]; +} + message ContrastiveLossParameter { // margin for dissimilar pair optional float margin = 1 [default = 1.0]; diff --git a/src/caffe/test/test_channelwise_affine_layer.cpp b/src/caffe/test/test_channelwise_affine_layer.cpp new file mode 100644 index 00000000..a3e2544f --- /dev/null +++ b/src/caffe/test/test_channelwise_affine_layer.cpp @@ -0,0 +1,105 @@ +#include + +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/layers/channelwise_affine_layer.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +namespace caffe { + +template +class ChannelwiseAffineLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + + protected: + ChannelwiseAffineLayerTest() + : blob_bottom_(new Blob(2, 3, 4, 5)), + blob_top_(new Blob()) { + Caffe::set_random_seed(1701); + // fill the values + FillerParameter filler_param; + GaussianFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + blob_bottom_vec_.push_back(blob_bottom_); + blob_top_vec_.push_back(blob_top_); + } + virtual ~ChannelwiseAffineLayerTest() { + delete blob_bottom_; delete blob_top_; } + Blob* const blob_bottom_; + Blob* const blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; + + void TestChannelwiseAffine(ChannelwiseAffineLayer *layer) { + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // Now, check values + const Dtype* bottom_data = this->blob_bottom_->cpu_data(); + const Dtype* top_data = this->blob_top_->cpu_data(); + const Dtype* slope_data = layer->blobs()[0]->cpu_data(); + const Dtype* bias_data = layer->blobs()[1]->cpu_data(); + const Dtype kDelta = 2e-5; + int hw = this->blob_bottom_->height() * this->blob_bottom_->width(); + int channels = this->blob_bottom_->channels(); + bool channel_shared = + layer->layer_param().channelwise_affine_param().channel_shared(); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + int c = channel_shared ? 0 : (i / hw) % channels; + EXPECT_NEAR(top_data[i], + bottom_data[i]* slope_data[c] + bias_data[c], kDelta); + } + } +}; +TYPED_TEST_CASE(ChannelwiseAffineLayerTest, TestDtypesAndDevices); + + +TYPED_TEST(ChannelwiseAffineLayerTest, TestChannelwiseAffineForward) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ChannelwiseAffineLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + FillerParameter filler_param; + GaussianFiller filler(filler_param); + filler.Fill(layer.blobs()[0].get()); + filler.Fill(layer.blobs()[1].get()); + this->TestChannelwiseAffine(&layer); +} + +TYPED_TEST(ChannelwiseAffineLayerTest, + TestChannelwiseAffineForwardChannelShared) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + layer_param.mutable_channelwise_affine_param()->set_channel_shared(true); + ChannelwiseAffineLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + this->TestChannelwiseAffine(&layer); +} + +TYPED_TEST(ChannelwiseAffineLayerTest, TestChannelwiseAffineGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + layer_param.mutable_channelwise_affine_param()->set_channel_shared(false); + ChannelwiseAffineLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + GradientChecker checker(1e-2, 1e-3, 1701, 0., 0.01); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(ChannelwiseAffineLayerTest, + TestChannelwiseAffineGradientChannelShared) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + layer_param.mutable_channelwise_affine_param()->set_channel_shared(true); + ChannelwiseAffineLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + GradientChecker checker(1e-2, 1e-3, 1701, 0., 0.01); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +} // namespace caffe From 67b497d4ec70018b168639df1e4342f78fb44bb0 Mon Sep 17 00:00:00 2001 From: Luke Yeager Date: Fri, 22 Jan 2016 15:30:35 -0800 Subject: [PATCH 211/223] Version 1.0.0-rc3 --- CMakeLists.txt | 5 +++++ Makefile | 30 +++++++++++++++++++++--------- cmake/Summary.cmake | 2 +- include/caffe/common.hpp | 4 ++++ python/caffe/__init__.py | 1 + python/caffe/_caffe.cpp | 3 +++ src/caffe/CMakeLists.txt | 4 ++++ tools/caffe.cpp | 3 +++ 8 files changed, 42 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c446c608..32cc42ac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,11 @@ endif() # ---[ Caffe project project(Caffe C CXX) +# ---[ Caffe version +set(CAFFE_TARGET_VERSION "1.0.0-rc3") +set(CAFFE_TARGET_SOVERSION "1.0.0-rc3") +add_definitions(-DCAFFE_VERSION=${CAFFE_TARGET_VERSION}) + # ---[ Using cmake scripts and modules list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules) diff --git a/Makefile b/Makefile index 985fffd6..f3135d35 100644 --- a/Makefile +++ b/Makefile @@ -29,9 +29,17 @@ SRC_DIRS := $(shell find * -type d -exec bash -c "find {} -maxdepth 1 \ \( -name '*.cpp' -o -name '*.proto' \) | grep -q ." \; -print) # The target shared library name +LIBRARY_NAME := $(PROJECT) LIB_BUILD_DIR := $(BUILD_DIR)/lib -STATIC_NAME := $(LIB_BUILD_DIR)/lib$(PROJECT).a -DYNAMIC_NAME := $(LIB_BUILD_DIR)/lib$(PROJECT).so +STATIC_NAME := $(LIB_BUILD_DIR)/lib$(LIBRARY_NAME).a +DYNAMIC_VERSION_MAJOR := 1 +DYNAMIC_VERSION_MINOR := 0 +DYNAMIC_VERSION_REVISION := 0-rc3 +DYNAMIC_NAME_SHORT := lib$(LIBRARY_NAME).so +#DYNAMIC_SONAME_SHORT := $(DYNAMIC_NAME_SHORT).$(DYNAMIC_VERSION_MAJOR) +DYNAMIC_VERSIONED_NAME_SHORT := $(DYNAMIC_NAME_SHORT).$(DYNAMIC_VERSION_MAJOR).$(DYNAMIC_VERSION_MINOR).$(DYNAMIC_VERSION_REVISION) +DYNAMIC_NAME := $(LIB_BUILD_DIR)/$(DYNAMIC_VERSIONED_NAME_SHORT) +COMMON_FLAGS += -DCAFFE_VERSION=$(DYNAMIC_VERSION_MAJOR).$(DYNAMIC_VERSION_MINOR).$(DYNAMIC_VERSION_REVISION) ############################## # Get all source files @@ -253,6 +261,7 @@ ifeq ($(LINUX), 1) # boost::thread is reasonably called boost_thread (compare OS X) # We will also explicitly add stdc++ to the link target. LIBRARIES += boost_thread stdc++ + VERSIONFLAGS += -Wl,-soname,$(DYNAMIC_VERSIONED_NAME_SHORT) -Wl,-rpath,$(ORIGIN)/../lib endif # OS X: @@ -276,6 +285,7 @@ ifeq ($(OSX), 1) # we need to explicitly ask for the rpath to be obeyed DYNAMIC_FLAGS := -install_name @rpath/libcaffe.so ORIGIN := @loader_path + VERSIONFLAGS += -Wl,-install_name,$(DYNAMIC_VERSIONED_NAME_SHORT) -Wl,-rpath,$(ORIGIN)/../../build/lib else ORIGIN := \$$ORIGIN endif @@ -478,7 +488,7 @@ py: $(PY$(PROJECT)_SO) $(PROTO_GEN_PY) $(PY$(PROJECT)_SO): $(PY$(PROJECT)_SRC) $(PY$(PROJECT)_HXX) | $(DYNAMIC_NAME) @ echo CXX/LD -o $@ $< $(Q)$(CXX) -shared -o $@ $(PY$(PROJECT)_SRC) \ - -o $@ $(LINKFLAGS) -l$(PROJECT) $(PYTHON_LDFLAGS) \ + -o $@ $(LINKFLAGS) -l$(LIBRARY_NAME) $(PYTHON_LDFLAGS) \ -Wl,-rpath,$(ORIGIN)/../../build/lib mat$(PROJECT): mat @@ -542,7 +552,8 @@ $(ALL_BUILD_DIRS): | $(BUILD_DIR_LINK) $(DYNAMIC_NAME): $(OBJS) | $(LIB_BUILD_DIR) @ echo LD -o $@ - $(Q)$(CXX) -shared -o $@ $(OBJS) $(LINKFLAGS) $(LDFLAGS) $(DYNAMIC_FLAGS) + $(Q)$(CXX) -shared -o $@ $(OBJS) $(VERSIONFLAGS) $(LINKFLAGS) $(LDFLAGS) $(DYNAMIC_FLAGS) + @ cd $(BUILD_DIR)/lib; rm -f $(DYNAMIC_NAME_SHORT); ln -s $(DYNAMIC_VERSIONED_NAME_SHORT) $(DYNAMIC_NAME_SHORT) $(STATIC_NAME): $(OBJS) | $(LIB_BUILD_DIR) @ echo AR -o $@ @@ -573,19 +584,19 @@ $(TEST_ALL_BIN): $(TEST_MAIN_SRC) $(TEST_OBJS) $(GTEST_OBJ) \ | $(DYNAMIC_NAME) $(TEST_BIN_DIR) @ echo CXX/LD -o $@ $< $(Q)$(CXX) $(TEST_MAIN_SRC) $(TEST_OBJS) $(GTEST_OBJ) \ - -o $@ $(LINKFLAGS) $(LDFLAGS) -l$(PROJECT) -Wl,-rpath,$(ORIGIN)/../lib + -o $@ $(LINKFLAGS) $(LDFLAGS) -l$(LIBRARY_NAME) -Wl,-rpath,$(ORIGIN)/../lib $(TEST_CU_BINS): $(TEST_BIN_DIR)/%.testbin: $(TEST_CU_BUILD_DIR)/%.o \ $(GTEST_OBJ) | $(DYNAMIC_NAME) $(TEST_BIN_DIR) @ echo LD $< $(Q)$(CXX) $(TEST_MAIN_SRC) $< $(GTEST_OBJ) \ - -o $@ $(LINKFLAGS) $(LDFLAGS) -l$(PROJECT) -Wl,-rpath,$(ORIGIN)/../lib + -o $@ $(LINKFLAGS) $(LDFLAGS) -l$(LIBRARY_NAME) -Wl,-rpath,$(ORIGIN)/../lib $(TEST_CXX_BINS): $(TEST_BIN_DIR)/%.testbin: $(TEST_CXX_BUILD_DIR)/%.o \ $(GTEST_OBJ) | $(DYNAMIC_NAME) $(TEST_BIN_DIR) @ echo LD $< $(Q)$(CXX) $(TEST_MAIN_SRC) $< $(GTEST_OBJ) \ - -o $@ $(LINKFLAGS) $(LDFLAGS) -l$(PROJECT) -Wl,-rpath,$(ORIGIN)/../lib + -o $@ $(LINKFLAGS) $(LDFLAGS) -l$(LIBRARY_NAME) -Wl,-rpath,$(ORIGIN)/../lib # Target for extension-less symlinks to tool binaries with extension '*.bin'. $(TOOL_BUILD_DIR)/%: $(TOOL_BUILD_DIR)/%.bin | $(TOOL_BUILD_DIR) @@ -594,12 +605,12 @@ $(TOOL_BUILD_DIR)/%: $(TOOL_BUILD_DIR)/%.bin | $(TOOL_BUILD_DIR) $(TOOL_BINS): %.bin : %.o | $(DYNAMIC_NAME) @ echo CXX/LD -o $@ - $(Q)$(CXX) $< -o $@ $(LINKFLAGS) -l$(PROJECT) $(LDFLAGS) \ + $(Q)$(CXX) $< -o $@ $(LINKFLAGS) -l$(LIBRARY_NAME) $(LDFLAGS) \ -Wl,-rpath,$(ORIGIN)/../lib $(EXAMPLE_BINS): %.bin : %.o | $(DYNAMIC_NAME) @ echo CXX/LD -o $@ - $(Q)$(CXX) $< -o $@ $(LINKFLAGS) -l$(PROJECT) $(LDFLAGS) \ + $(Q)$(CXX) $< -o $@ $(LINKFLAGS) -l$(LIBRARY_NAME) $(LDFLAGS) \ -Wl,-rpath,$(ORIGIN)/../../lib proto: $(PROTO_GEN_CC) $(PROTO_GEN_HEADER) @@ -661,6 +672,7 @@ $(DISTRIBUTE_DIR): all py | $(DISTRIBUTE_SUBDIRS) # add libraries cp $(STATIC_NAME) $(DISTRIBUTE_DIR)/lib install -m 644 $(DYNAMIC_NAME) $(DISTRIBUTE_DIR)/lib + cd $(DISTRIBUTE_DIR)/lib; rm -f $(DYNAMIC_NAME_SHORT); ln -s $(DYNAMIC_VERSIONED_NAME_SHORT) $(DYNAMIC_NAME_SHORT) # add python - it's not the standard way, indeed... cp -r python $(DISTRIBUTE_DIR)/python diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index 557a6f04..ba025cf8 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -101,7 +101,7 @@ function(caffe_print_configuration_summary) caffe_status("") caffe_status("******************* Caffe Configuration Summary *******************") caffe_status("General:") - caffe_status(" Version : ${Caffe_VERSION}") + caffe_status(" Version : ${CAFFE_TARGET_VERSION}") caffe_status(" Git : ${Caffe_GIT_VERSION}") caffe_status(" System : ${CMAKE_SYSTEM_NAME}") caffe_status(" C++ compiler : ${CMAKE_CXX_COMPILER}") diff --git a/include/caffe/common.hpp b/include/caffe/common.hpp index 1df6b9a1..6b902a42 100644 --- a/include/caffe/common.hpp +++ b/include/caffe/common.hpp @@ -18,6 +18,10 @@ #include "caffe/util/device_alternate.hpp" +// Convert macro to string +#define STRINGIFY(m) #m +#define AS_STRING(m) STRINGIFY(m) + // gflags 2.1 issue: namespace google was changed to gflags without warning. // Luckily we will be able to use GFLAGS_GFLAGS_H_ to detect if it is version // 2.1. If yes, we will add a temporary solution to redirect the namespace. diff --git a/python/caffe/__init__.py b/python/caffe/__init__.py index ccda1bca..e2881b89 100644 --- a/python/caffe/__init__.py +++ b/python/caffe/__init__.py @@ -1,5 +1,6 @@ from .pycaffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, RMSPropSolver, AdaDeltaSolver, AdamSolver from ._caffe import set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list +from ._caffe import __version__ from .proto.caffe_pb2 import TRAIN, TEST from .classifier import Classifier from .detector import Detector diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp index 4ea2ec60..12a57455 100644 --- a/python/caffe/_caffe.cpp +++ b/python/caffe/_caffe.cpp @@ -212,6 +212,9 @@ BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(SolveOverloads, Solve, 0, 1); BOOST_PYTHON_MODULE(_caffe) { // below, we prepend an underscore to methods that will be replaced // in Python + + bp::scope().attr("__version__") = AS_STRING(CAFFE_VERSION); + // Caffe utility functions bp::def("set_mode_cpu", &set_mode_cpu); bp::def("set_mode_gpu", &set_mode_gpu); diff --git a/src/caffe/CMakeLists.txt b/src/caffe/CMakeLists.txt index 40e6c11f..8a80c940 100644 --- a/src/caffe/CMakeLists.txt +++ b/src/caffe/CMakeLists.txt @@ -20,6 +20,10 @@ endif() add_library(caffe ${srcs}) target_link_libraries(caffe proto ${Caffe_LINKER_LIBS}) caffe_default_properties(caffe) +set_target_properties(caffe PROPERTIES + VERSION ${CAFFE_TARGET_VERSION} + SOVERSION ${CAFFE_TARGET_SOVERSION} + ) # ---[ Tests add_subdirectory(test) diff --git a/tools/caffe.cpp b/tools/caffe.cpp index 6b342ace..470165ad 100644 --- a/tools/caffe.cpp +++ b/tools/caffe.cpp @@ -3,6 +3,7 @@ namespace bp = boost::python; #endif +#include #include #include @@ -378,6 +379,8 @@ RegisterBrewFunction(time); int main(int argc, char** argv) { // Print output to stderr (while still logging). FLAGS_alsologtostderr = 1; + // Set version + gflags::SetVersionString(AS_STRING(CAFFE_VERSION)); // Usage message. gflags::SetUsageMessage("command line brew\n" "usage: caffe \n\n" From 081690709e4a199824f433cc196c55c47731073f Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Fri, 22 Jan 2016 15:57:47 -0800 Subject: [PATCH 212/223] Separation and generalization of ChannelwiseAffineLayer into BiasLayer and ScaleLayer. The behavior of ChannelwiseAffineLayer can be reproduced by a ScaleLayer with `scale_param { bias_term: true }`. BiasLayer and ScaleLayer each take 1 or 2 bottoms, with the output having the same shape as the first. The second input -- either another bottom or a learned parameter -- will have its axes (virtually) broadcast and tiled to have the same shape as the first, after which elementwise addition (Bias) or multiplication (Scale) is performed. --- include/caffe/layers/bias_layer.hpp | 54 ++ .../caffe/layers/channelwise_affine_layer.hpp | 103 ---- include/caffe/layers/scale_layer.hpp | 83 +++ src/caffe/layers/bias_layer.cpp | 121 +++++ src/caffe/layers/bias_layer.cu | 59 ++ src/caffe/layers/channelwise_affine_layer.cpp | 189 ------- src/caffe/layers/channelwise_affine_layer.cu | 144 ----- src/caffe/layers/scale_layer.cpp | 219 ++++++++ src/caffe/layers/scale_layer.cu | 135 +++++ src/caffe/proto/caffe.proto | 79 ++- src/caffe/test/test_bias_layer.cpp | 467 ++++++++++++++++ .../test/test_channelwise_affine_layer.cpp | 105 ---- src/caffe/test/test_scale_layer.cpp | 507 ++++++++++++++++++ 13 files changed, 1714 insertions(+), 551 deletions(-) create mode 100644 include/caffe/layers/bias_layer.hpp delete mode 100644 include/caffe/layers/channelwise_affine_layer.hpp create mode 100644 include/caffe/layers/scale_layer.hpp create mode 100644 src/caffe/layers/bias_layer.cpp create mode 100644 src/caffe/layers/bias_layer.cu delete mode 100644 src/caffe/layers/channelwise_affine_layer.cpp delete mode 100644 src/caffe/layers/channelwise_affine_layer.cu create mode 100644 src/caffe/layers/scale_layer.cpp create mode 100644 src/caffe/layers/scale_layer.cu create mode 100644 src/caffe/test/test_bias_layer.cpp delete mode 100644 src/caffe/test/test_channelwise_affine_layer.cpp create mode 100644 src/caffe/test/test_scale_layer.cpp diff --git a/include/caffe/layers/bias_layer.hpp b/include/caffe/layers/bias_layer.hpp new file mode 100644 index 00000000..eedc3aaa --- /dev/null +++ b/include/caffe/layers/bias_layer.hpp @@ -0,0 +1,54 @@ +#ifndef CAFFE_BIAS_LAYER_HPP_ +#define CAFFE_BIAS_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Computes a sum of two input Blobs, with the shape of the + * latter Blob "broadcast" to match the shape of the former. + * Equivalent to tiling the latter Blob, then computing the elementwise + * sum. + * + * The second input may be omitted, in which case it's learned as a parameter + * of the layer. + */ +template +class BiasLayer : public Layer { + public: + explicit BiasLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Bias"; } + virtual inline int MinBottomBlobs() const { return 1; } + virtual inline int MaxBottomBlobs() const { return 2; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + private: + Blob bias_multiplier_; + int outer_dim_, bias_dim_, inner_dim_, dim_; +}; + + + +} // namespace caffe + +#endif // CAFFE_BIAS_LAYER_HPP_ diff --git a/include/caffe/layers/channelwise_affine_layer.hpp b/include/caffe/layers/channelwise_affine_layer.hpp deleted file mode 100644 index 6d8ac98b..00000000 --- a/include/caffe/layers/channelwise_affine_layer.hpp +++ /dev/null @@ -1,103 +0,0 @@ -#ifndef CAFFE_CHANNELWISE_AFFINE_LAYER_HPP_ -#define CAFFE_CHANNELWISE_AFFINE_LAYER_HPP_ - -#include -#include "caffe/blob.hpp" -#include "caffe/layer.hpp" -#include "caffe/layers/neuron_layer.hpp" -#include "caffe/proto/caffe.pb.h" - -namespace caffe { - /** - * @brief Affine non-linearity function @f$ - * y = ax+b - * @f$, could be used after batch normalization layer - * - */ -template -class ChannelwiseAffineLayer : public NeuronLayer { - public: - /** - * @param param provides ChannelwiseAffineParameter ChannelwiseAffine_param, - * with ChannelwiseAffineLayer options: - * - slope_filler (\b optional, FillerParameter, - * default {'type': constant 'value':1.0001}). - * - bias_filler (\b optional, FillerParameter, - * default {'type': constant 'value':0.0001}). - * - channel_shared (\b optional, default false). - * slopes and biases are shared across channels. - */ - explicit ChannelwiseAffineLayer(const LayerParameter& param) - : NeuronLayer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - virtual inline const char* type() const { return "ChannelwiseAffine"; } - - protected: - /** - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times ...) @f$ - * the inputs @f$ x @f$ - * @param top output Blob vector (length 1) - * -# @f$ (N \times C \times ...) @f$ - * the computed outputs for each channel @f$i@f$ @f$ - * y_i = a_i x_i + b_i - * @f$. - */ - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - /** - * @brief Computes the error gradient w.r.t. the ChannelwiseAffine inputs. - * - * @param top output Blob vector (length 1), providing the error gradient with - * respect to the outputs - * -# @f$ (N \times C \times ...) @f$ - * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ - * with respect to computed outputs @f$ y @f$ - * @param propagate_down see Layer::Backward. - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times ...) @f$ - * the inputs @f$ x @f$; For each channel @f$i@f$, backward fills their - * diff with gradients @f$ - * \frac{\partial E}{\partial x_i} = \left\{ - * \begin{array}{lr} - * a_i \frac{\partial E}{\partial y_i} - * \end{array} \right. - * @f$. - * If param_propagate_down_[0] is true, it fills the diff with gradients - * @f$ - * \frac{\partial E}{\partial a_i} = \left\{ - * \begin{array}{lr} - * \sum_{x_i} x_i \frac{\partial E}{\partial y_i} - * \end{array} \right. - * @f$. - * If param_propagate_down_[1] is true, it fills the diff with gradients - * @f$ - * \frac{\partial E}{\partial b_i} = \left\{ - * \begin{array}{lr} - * frac{\partial E}{\partial y_i} - * \end{array} \right. - * @f$. - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, - const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, - const vector*>& bottom); - bool channel_shared_; - Blob multiplier_; - // dot multiplier for backward computation of params - Blob bias_multiplier_; - Blob backward_buff_; - // temporary buffer for backward computation - Blob bottom_memory_; - // memory for in-place computation -}; -} // namespace caffe - -#endif // CAFFE_CHANNELWISE_AFFINE_LAYER_HPP_ diff --git a/include/caffe/layers/scale_layer.hpp b/include/caffe/layers/scale_layer.hpp new file mode 100644 index 00000000..924df2e5 --- /dev/null +++ b/include/caffe/layers/scale_layer.hpp @@ -0,0 +1,83 @@ +#ifndef CAFFE_SCALE_LAYER_HPP_ +#define CAFFE_SCALE_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/bias_layer.hpp" + +namespace caffe { + +/** + * @brief Computes a product of two input Blobs, with the shape of the + * latter Blob "broadcast" to match the shape of the former. + * Equivalent to tiling the latter Blob, then computing the elementwise + * product. + * + * The second input may be omitted, in which case it's learned as a parameter + * of the layer. + */ +template +class ScaleLayer: public Layer { + public: + explicit ScaleLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Scale"; } + // Scale + virtual inline int MinBottomBlobs() const { return 1; } + virtual inline int MaxBottomBlobs() const { return 2; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + /** + * In the below shape specifications, @f$ i @f$ denotes the value of the + * `axis` field given by `this->layer_param_.scale_param().axis()`, after + * canonicalization (i.e., conversion from negative to positive index, + * if applicable). + * + * @param bottom input Blob vector (length 2) + * -# @f$ (d_0 \times ... \times + * d_i \times ... \times d_j \times ... \times d_n) @f$ + * the first factor @f$ x @f$ + * -# @f$ (d_i \times ... \times d_j) @f$ + * the second factor @f$ y @f$ + * @param top output Blob vector (length 1) + * -# @f$ (d_0 \times ... \times + * d_i \times ... \times d_j \times ... \times d_n) @f$ + * the product @f$ z = x y @f$ computed after "broadcasting" y. + * Equivalent to tiling @f$ y @f$ to have the same shape as @f$ x @f$, + * then computing the elementwise product. + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + shared_ptr > bias_layer_; + vector*> bias_bottom_vec_; + vector bias_propagate_down_; + int bias_param_id_; + + Blob sum_multiplier_; + Blob sum_result_; + Blob temp_; + int axis_; + int outer_dim_, scale_dim_, inner_dim_; +}; + + +} // namespace caffe + +#endif // CAFFE_SCALE_LAYER_HPP_ diff --git a/src/caffe/layers/bias_layer.cpp b/src/caffe/layers/bias_layer.cpp new file mode 100644 index 00000000..0a786b5d --- /dev/null +++ b/src/caffe/layers/bias_layer.cpp @@ -0,0 +1,121 @@ +#include + +#include "caffe/filler.hpp" +#include "caffe/layers/bias_layer.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void BiasLayer::LayerSetUp(const vector*>& bottom, + const vector*>& top) { + if (bottom.size() == 1 && this->blobs_.size() > 0) { + LOG(INFO) << "Skipping parameter initialization"; + } else if (bottom.size() == 1) { + // bias is a learned parameter; initialize it + const BiasParameter& param = this->layer_param_.bias_param(); + const int axis = bottom[0]->CanonicalAxisIndex(param.axis()); + const int num_axes = param.num_axes(); + CHECK_GE(num_axes, -1) << "num_axes must be non-negative, " + << "or -1 to extend to the end of bottom[0]"; + if (num_axes >= 0) { + CHECK_GE(bottom[0]->num_axes(), axis + num_axes) + << "bias blob's shape extends past bottom[0]'s shape when applied " + << "starting with bottom[0] axis = " << axis; + } + this->blobs_.resize(1); + const vector::const_iterator& shape_start = + bottom[0]->shape().begin() + axis; + const vector::const_iterator& shape_end = + (num_axes == -1) ? bottom[0]->shape().end() : (shape_start + num_axes); + vector bias_shape(shape_start, shape_end); + this->blobs_[0].reset(new Blob(bias_shape)); + shared_ptr > filler(GetFiller(param.filler())); + filler->Fill(this->blobs_[0].get()); + } + this->param_propagate_down_.resize(this->blobs_.size(), true); +} + +template +void BiasLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + const BiasParameter& param = this->layer_param_.bias_param(); + Blob* bias = (bottom.size() > 1) ? bottom[1] : this->blobs_[0].get(); + // Always set axis == 0 in special case where bias is a scalar + // (num_axes == 0). Mathematically equivalent for any choice of axis, so the + // actual setting can be safely ignored; and computation is most efficient + // with axis == 0 and (therefore) outer_dim_ == 1. + const int axis = (bias->num_axes() == 0) ? + 0 : bottom[0]->CanonicalAxisIndex(param.axis()); + CHECK_GE(bottom[0]->num_axes(), axis + bias->num_axes()) + << "bias blob's shape extends past bottom[0]'s shape when applied " + << "starting with bottom[0] axis = " << axis; + for (int i = 0; i < bias->num_axes(); ++i) { + CHECK_EQ(bottom[0]->shape(axis + i), bias->shape(i)) + << "dimension mismatch between bottom[0]->shape(" << axis + i + << ") and bias->shape(" << i << ")"; + } + outer_dim_ = bottom[0]->count(0, axis); + bias_dim_ = bias->count(); + inner_dim_ = bottom[0]->count(axis + bias->num_axes()); + dim_ = bias_dim_ * inner_dim_; + if (bottom[0] != top[0]) { + top[0]->ReshapeLike(*bottom[0]); + } + bias_multiplier_.Reshape(vector(1, inner_dim_)); + if (bias_multiplier_.cpu_data()[inner_dim_ - 1] != Dtype(1)) { + caffe_set(inner_dim_, Dtype(1), bias_multiplier_.mutable_cpu_data()); + } +} + +template +void BiasLayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* bias_data = + ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->cpu_data(); + Dtype* top_data = top[0]->mutable_cpu_data(); + if (bottom[0] != top[0]) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + caffe_copy(bottom[0]->count(), bottom_data, top_data); + } + for (int n = 0; n < outer_dim_; ++n) { + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, bias_dim_, + inner_dim_, Dtype(1), Dtype(1), bias_data, + bias_multiplier_.cpu_data(), Dtype(1), top_data); + top_data += dim_; + } +} + +template +void BiasLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + if (propagate_down[0] && bottom[0] != top[0]) { + const Dtype* top_diff = top[0]->cpu_diff(); + Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); + caffe_copy(bottom[0]->count(), top_diff, bottom_diff); + } + // in-place, we don't need to do anything with the data diff + const bool bias_param = (bottom.size() == 1); + if ((!bias_param && propagate_down[1]) || + (bias_param && this->param_propagate_down_[0])) { + const Dtype* top_diff = top[0]->cpu_diff(); + Dtype* bias_diff = (bias_param ? this->blobs_[0].get() : bottom[1]) + ->mutable_cpu_diff(); + bool accum = bias_param; + for (int n = 0; n < outer_dim_; ++n) { + caffe_cpu_gemv(CblasNoTrans, bias_dim_, inner_dim_, Dtype(1), + top_diff, bias_multiplier_.cpu_data(), Dtype(accum), bias_diff); + top_diff += dim_; + accum = true; + } + } +} + +#ifdef CPU_ONLY +STUB_GPU(BiasLayer); +#endif + +INSTANTIATE_CLASS(BiasLayer); +REGISTER_LAYER_CLASS(Bias); + +} // namespace caffe diff --git a/src/caffe/layers/bias_layer.cu b/src/caffe/layers/bias_layer.cu new file mode 100644 index 00000000..8ac913a5 --- /dev/null +++ b/src/caffe/layers/bias_layer.cu @@ -0,0 +1,59 @@ +#include + +#include "caffe/filler.hpp" +#include "caffe/layers/bias_layer.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +__global__ void BiasForward(const int n, const Dtype* in, + const Dtype* bias, const int bias_dim, const int inner_dim, + Dtype* out) { + CUDA_KERNEL_LOOP(index, n) { + const int bias_index = (index / inner_dim) % bias_dim; + out[index] = in[index] + bias[bias_index]; + } +} + +template +void BiasLayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + const int count = top[0]->count(); + const Dtype* bottom_data = bottom[0]->gpu_data(); + const Dtype* bias_data = + ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->gpu_data(); + Dtype* top_data = top[0]->mutable_gpu_data(); + BiasForward // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + count, bottom_data, bias_data, bias_dim_, inner_dim_, top_data); +} + +template +void BiasLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + if (propagate_down[0] && bottom[0] != top[0]) { + const Dtype* top_diff = top[0]->gpu_diff(); + Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); + caffe_copy(bottom[0]->count(), top_diff, bottom_diff); + } + // in-place, we don't need to do anything with the data diff + const bool bias_param = (bottom.size() == 1); + if ((!bias_param && propagate_down[1]) || + (bias_param && this->param_propagate_down_[0])) { + const Dtype* top_diff = top[0]->gpu_diff(); + Dtype* bias_diff = (bias_param ? this->blobs_[0].get() : bottom[1]) + ->mutable_gpu_diff(); + bool accum = bias_param; + for (int n = 0; n < outer_dim_; ++n) { + caffe_gpu_gemv(CblasNoTrans, bias_dim_, inner_dim_, Dtype(1), + top_diff, bias_multiplier_.gpu_data(), Dtype(accum), bias_diff); + top_diff += dim_; + accum = true; + } + } +} + +INSTANTIATE_LAYER_GPU_FUNCS(BiasLayer); + +} // namespace caffe diff --git a/src/caffe/layers/channelwise_affine_layer.cpp b/src/caffe/layers/channelwise_affine_layer.cpp deleted file mode 100644 index e9f31fb1..00000000 --- a/src/caffe/layers/channelwise_affine_layer.cpp +++ /dev/null @@ -1,189 +0,0 @@ -#include -#include - -#include "caffe/filler.hpp" -#include "caffe/layer.hpp" -#include "caffe/layers/channelwise_affine_layer.hpp" - -namespace caffe { - -template -void ChannelwiseAffineLayer::LayerSetUp( - const vector*>& bottom, - const vector*>& top) { - CHECK_GE(bottom[0]->num_axes(), 2) - << "Number of axes of bottom blob must be >=2."; - ChannelwiseAffineParameter channelwise_affine_param = - this->layer_param().channelwise_affine_param(); - int channels = bottom[0]->channels(); - channel_shared_ = channelwise_affine_param.channel_shared(); - if (this->blobs_.size() > 0) { - LOG(INFO) << "Skipping parameter initialization"; - } else { - this->blobs_.resize(2); - if (channel_shared_) { - this->blobs_[0].reset(new Blob(vector(0))); - this->blobs_[1].reset(new Blob(vector(0))); - - } else { - this->blobs_[0].reset(new Blob(vector(1, channels))); - this->blobs_[1].reset(new Blob(vector(1, channels))); - } - shared_ptr > filler; - if (channelwise_affine_param.has_slope_filler()) { - filler.reset(GetFiller(channelwise_affine_param.slope_filler())); - } else { - FillerParameter filler_param; - filler_param.set_type("constant"); - filler_param.set_value(1.0001); - filler.reset(GetFiller(filler_param)); - } - filler->Fill(this->blobs_[0].get()); - - if (channelwise_affine_param.has_bias_filler()) { - filler.reset(GetFiller(channelwise_affine_param.bias_filler())); - } else { - FillerParameter filler_param; - filler_param.set_type("constant"); - filler_param.set_value(0.0001); - filler.reset(GetFiller(filler_param)); - } - filler->Fill(this->blobs_[1].get()); - } - if (channel_shared_) { - CHECK_EQ(this->blobs_[0]->count(), 1) - << "Slope size is inconsistent with prototxt config"; - } else { - CHECK_EQ(this->blobs_[0]->count(), channels) - << "Slope size is inconsistent with prototxt config"; - } - - // Propagate gradients to the parameters (as directed by backward pass). - this->param_propagate_down_.resize(this->blobs_.size(), true); - multiplier_.Reshape(vector(1, bottom[0]->count(1))); - bias_multiplier_.Reshape(vector(1, bottom[0]->count(1))); - backward_buff_.Reshape(vector(1, bottom[0]->count(1))); - caffe_set(multiplier_.count(), Dtype(1.0), - multiplier_.mutable_cpu_data()); - caffe_set(bias_multiplier_.count(), Dtype(1.0), - bias_multiplier_.mutable_cpu_data()); -} - -template -void ChannelwiseAffineLayer::Reshape( - const vector*>& bottom, - const vector*>& top) { - CHECK_GE(bottom[0]->num_axes(), 2) - << "Number of axes of bottom blob must be >=2."; - top[0]->ReshapeLike(*bottom[0]); - if (bottom[0] == top[0]) { - // For in-place computation - bottom_memory_.ReshapeLike(*bottom[0]); - } - int height = 1; - int width = 1; - if (bottom[0]->num_axes() > 2) { - height = bottom[0]->shape(2); - width = bottom[0]->shape(3); - } - vector bias_multiplier_shape(1, height * width); - bias_multiplier_.Reshape(bias_multiplier_shape); - caffe_set(bias_multiplier_.count(), Dtype(1), - bias_multiplier_.mutable_cpu_data()); -} - -template -void ChannelwiseAffineLayer::Forward_cpu( - const vector*>& bottom, - const vector*>& top) { - const Dtype* bottom_data = bottom[0]->cpu_data(); - Dtype* top_data = top[0]->mutable_cpu_data(); - const int count = bottom[0]->count(); - const int dim = bottom[0]->count(2); - const int channels = bottom[0]->channels(); - const Dtype* slope_data = this->blobs_[0]->cpu_data(); - const Dtype* bias_data = this->blobs_[1]->cpu_data(); - // For in-place computation - if (bottom[0] == top[0]) { - caffe_copy(count, bottom_data, bottom_memory_.mutable_cpu_data()); - } - // if channel_shared, channel index in the following computation becomes - // always zero. - const int div_factor = channel_shared_ ? channels : 1; - for (int i = 0; i < count; ++i) { - int c = (i / dim) % channels / div_factor; - top_data[i] = bottom_data[i] * slope_data[c] + bias_data[c]; - } -} - -template -void ChannelwiseAffineLayer::Backward_cpu( - const vector*>& top, - const vector& propagate_down, - const vector*>& bottom) { - const Dtype* bottom_data = bottom[0]->cpu_data(); - const Dtype* slope_data = this->blobs_[0]->cpu_data(); - - const Dtype* top_diff = top[0]->cpu_diff(); - const int count = bottom[0]->count(); - const int dim = bottom[0]->count(2); - const int channels = bottom[0]->shape(1); - const int num = bottom[0]->shape(0); - int height = 1; - int width = 1; - if (bottom[0]->num_axes() > 2) { - height = bottom[0]->shape(2); - width = bottom[0]->shape(3); - } - - // For in-place computation - if (top[0] == bottom[0]) { - bottom_data = bottom_memory_.cpu_data(); - } - - // if channel_shared, channel index in the following computation becomes - // always zero. - const int div_factor = channel_shared_ ? channels : 1; - - // Propagte to param - // Since to write bottom diff will affect top diff if top and bottom blobs - // are identical (in-place computaion), we first compute param backward to - // keep top_diff unchanged. - - if (this->param_propagate_down_[1]) { - Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff(); - caffe_set(this->blobs_[1]->count(), Dtype(0), bias_diff); - for (int n = 0; n < num; ++n) { - caffe_cpu_gemv(CblasNoTrans, channels, height * width, 1., - top_diff + top[0]->offset(n), - bias_multiplier_.cpu_data(), 1., bias_diff); - } - } - if (this->param_propagate_down_[0]) { - Dtype* slope_diff = this->blobs_[0]->mutable_cpu_diff(); - caffe_set(this->blobs_[0]->count(), Dtype(0), slope_diff); - for (int i = 0; i < count; ++i) { - int c = (i / dim) % channels / div_factor; - slope_diff[c] += top_diff[i] * bottom_data[i]; - } - } - - // Propagate to bottom - if (propagate_down[0]) { - Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); - for (int i = 0; i < count; ++i) { - int c = (i / dim) % channels / div_factor; - bottom_diff[i] = slope_data[c] * top_diff[i]; - } - } -} - - -#ifdef CPU_ONLY -STUB_GPU(ChannelwiseAffineLayer); -#endif - -INSTANTIATE_CLASS(ChannelwiseAffineLayer); -REGISTER_LAYER_CLASS(ChannelwiseAffine); - -} // namespace caffe diff --git a/src/caffe/layers/channelwise_affine_layer.cu b/src/caffe/layers/channelwise_affine_layer.cu deleted file mode 100644 index 2066b265..00000000 --- a/src/caffe/layers/channelwise_affine_layer.cu +++ /dev/null @@ -1,144 +0,0 @@ -#include -#include - -#include "caffe/layer.hpp" -#include "caffe/layers/channelwise_affine_layer.hpp" - -namespace caffe { - -// CUDA kernel for forward -template -__global__ void ChannelwiseAffineForward(const int n, const int channels, - const int dim, const Dtype* in, Dtype* out, const Dtype* slope_data, - const Dtype* bias_data, const int div_factor) { - CUDA_KERNEL_LOOP(index, n) { - int c = (index / dim) % channels / div_factor; - out[index] = in[index] * slope_data[c] + bias_data[c]; - } -} - -// CUDA kernel for bottom backward -template -__global__ void ChannelwiseAffineBackward(const int n, - const int channels, const int dim, const Dtype* in_diff, - Dtype* out_diff, const Dtype* slope_data, const int div_factor) { - CUDA_KERNEL_LOOP(index, n) { - int c = (index / dim) % channels / div_factor; - out_diff[index] = slope_data[c] * in_diff[index]; - } -} - -// CUDA kernel for element-wise parameter backward -template -__global__ void ChannelwiseAffineParamSlopeBackward(const int n, - const int rows, const int rowPitch, const Dtype* in_diff, - const Dtype* in_data, Dtype* out_diff) { - CUDA_KERNEL_LOOP(index, n) { - out_diff[index] = in_diff[index] * in_data[index]; - for ( int k = 1; k < rows; k++ ) { - out_diff[index] += in_diff[index + k*rowPitch] - * in_data[index + k*rowPitch]; - } - } -} - -template -void ChannelwiseAffineLayer::Forward_gpu( - const vector*>& bottom, - const vector*>& top) { - const Dtype* bottom_data = bottom[0]->gpu_data(); - Dtype* top_data = top[0]->mutable_gpu_data(); - const int count = bottom[0]->count(); - const int dim = bottom[0]->count(2); - const int channels = bottom[0]->channels(); - const Dtype* slope_data = this->blobs_[0]->gpu_data(); - const Dtype* bias_data = this->blobs_[1]->gpu_data(); - const int div_factor = channel_shared_ ? channels : 1; - - // For in-place computation - if (top[0] == bottom[0]) { - caffe_copy(count, bottom_data, bottom_memory_.mutable_gpu_data()); - } - // NOLINT_NEXT_LINE(whitespace/operators) - ChannelwiseAffineForward<<>>( - count, channels, dim, bottom_data, top_data, - slope_data, bias_data, div_factor); - CUDA_POST_KERNEL_CHECK; -} - -template -void ChannelwiseAffineLayer::Backward_gpu( - const vector*>& top, - const vector& propagate_down, - const vector*>& bottom) { - const Dtype* bottom_data = bottom[0]->gpu_data(); - const Dtype* top_diff = top[0]->gpu_diff(); - const int count = bottom[0]->count(); - const int num = bottom[0]->shape(0); - const int dim = bottom[0]->count(2); - const int channels = bottom[0]->shape(1); - int height = 1; - int width = 1; - if (bottom[0]->num_axes() > 2) { - height = bottom[0]->shape(2); - width = bottom[0]->shape(3); - } - - // For in-place computation - if (top[0] == bottom[0]) { - bottom_data = bottom_memory_.gpu_data(); - } - // Propagate to param - // Since to write bottom diff will affect top diff if top and bottom blobs - // are identical (in-place computaion), we first compute param backward to - // keep top_diff unchanged. - if (this->param_propagate_down_[1]) { - Dtype* bias_diff = this->blobs_[1]->mutable_gpu_diff(); - caffe_gpu_set(this->blobs_[1]->count(), Dtype(0.0), bias_diff); - // Gradient with respect to bias - for (int n = 0; n < num; ++n) { - caffe_gpu_gemv( - CblasNoTrans, channels, height * width, (Dtype)1., - top_diff + top[0]->offset(n), bias_multiplier_.gpu_data(), - (Dtype)1., bias_diff); - } - } - if (this->param_propagate_down_[0]) { - Dtype* slope_diff = this->blobs_[0]->mutable_gpu_diff(); - int cdim = channels * dim; - // compute element-wise diff - // NOLINT_NEXT_LINE(whitespace/operators) - ChannelwiseAffineParamSlopeBackward<<>>( - cdim, num, top[0]->offset(1), top_diff , - bottom_data, - backward_buff_.mutable_gpu_diff()); - CUDA_POST_KERNEL_CHECK; - if (channel_shared_) { - Dtype d = 0; - caffe_gpu_dot(cdim, backward_buff_.gpu_diff(), - multiplier_.gpu_data(), &d); - caffe_gpu_add_scalar(this->blobs_[0]->count(), Dtype(d), slope_diff); - } else { - caffe_gpu_gemv(CblasNoTrans, channels, dim, Dtype(1.), - backward_buff_.gpu_diff(), multiplier_.gpu_data(), Dtype(1.), - slope_diff); - } - } - // Propagate to bottom - if (propagate_down[0]) { - Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); - const Dtype* slope_data = this->blobs_[0]->gpu_data(); - int div_factor = channel_shared_ ? channels : 1; - // NOLINT_NEXT_LINE(whitespace/operators) - ChannelwiseAffineBackward<<>>( - count, channels, dim, top_diff, bottom_diff, slope_data, div_factor); - CUDA_POST_KERNEL_CHECK; - } -} - -INSTANTIATE_LAYER_GPU_FUNCS(ChannelwiseAffineLayer); - -} // namespace caffe diff --git a/src/caffe/layers/scale_layer.cpp b/src/caffe/layers/scale_layer.cpp new file mode 100644 index 00000000..ecdbb123 --- /dev/null +++ b/src/caffe/layers/scale_layer.cpp @@ -0,0 +1,219 @@ +#include +#include + +#include "caffe/filler.hpp" +#include "caffe/layer_factory.hpp" +#include "caffe/layers/scale_layer.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void ScaleLayer::LayerSetUp(const vector*>& bottom, + const vector*>& top) { + const ScaleParameter& param = this->layer_param_.scale_param(); + if (bottom.size() == 1 && this->blobs_.size() > 0) { + LOG(INFO) << "Skipping parameter initialization"; + } else if (bottom.size() == 1) { + // scale is a learned parameter; initialize it + axis_ = bottom[0]->CanonicalAxisIndex(param.axis()); + const int num_axes = param.num_axes(); + CHECK_GE(num_axes, -1) << "num_axes must be non-negative, " + << "or -1 to extend to the end of bottom[0]"; + if (num_axes >= 0) { + CHECK_GE(bottom[0]->num_axes(), axis_ + num_axes) + << "scale blob's shape extends past bottom[0]'s shape when applied " + << "starting with bottom[0] axis = " << axis_; + } + this->blobs_.resize(1); + const vector::const_iterator& shape_start = + bottom[0]->shape().begin() + axis_; + const vector::const_iterator& shape_end = + (num_axes == -1) ? bottom[0]->shape().end() : (shape_start + num_axes); + vector scale_shape(shape_start, shape_end); + this->blobs_[0].reset(new Blob(scale_shape)); + FillerParameter filler_param(param.filler()); + if (!param.has_filler()) { + // Default to unit (1) filler for identity operation. + filler_param.set_type("constant"); + filler_param.set_value(1); + } + shared_ptr > filler(GetFiller(filler_param)); + filler->Fill(this->blobs_[0].get()); + } + if (param.bias_term()) { + LayerParameter layer_param(this->layer_param_); + layer_param.set_type("Bias"); + BiasParameter* bias_param = layer_param.mutable_bias_param(); + bias_param->set_axis(param.axis()); + if (bottom.size() > 1) { + bias_param->set_num_axes(bottom[1]->num_axes()); + } else { + bias_param->set_num_axes(param.num_axes()); + } + bias_param->mutable_filler()->CopyFrom(param.bias_filler()); + bias_layer_ = LayerRegistry::CreateLayer(layer_param); + bias_bottom_vec_.resize(1); + bias_bottom_vec_[0] = bottom[0]; + bias_layer_->SetUp(bias_bottom_vec_, top); + bias_param_id_ = this->blobs_.size(); + this->blobs_.resize(bias_param_id_ + 1); + this->blobs_[bias_param_id_] = bias_layer_->blobs()[0]; + bias_propagate_down_.resize(1, false); + } + this->param_propagate_down_.resize(this->blobs_.size(), true); +} + +template +void ScaleLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + const ScaleParameter& param = this->layer_param_.scale_param(); + Blob* scale = (bottom.size() > 1) ? bottom[1] : this->blobs_[0].get(); + // Always set axis_ == 0 in special case where scale is a scalar + // (num_axes == 0). Mathematically equivalent for any choice of axis_, so the + // actual setting can be safely ignored; and computation is most efficient + // with axis_ == 0 and (therefore) outer_dim_ == 1. (Setting axis_ to + // bottom[0]->num_axes() - 1, giving inner_dim_ == 1, would be equally + // performant.) + axis_ = (scale->num_axes() == 0) ? + 0 : bottom[0]->CanonicalAxisIndex(param.axis()); + CHECK_GE(bottom[0]->num_axes(), axis_ + scale->num_axes()) + << "scale blob's shape extends past bottom[0]'s shape when applied " + << "starting with bottom[0] axis = " << axis_; + for (int i = 0; i < scale->num_axes(); ++i) { + CHECK_EQ(bottom[0]->shape(axis_ + i), scale->shape(i)) + << "dimension mismatch between bottom[0]->shape(" << axis_ + i + << ") and scale->shape(" << i << ")"; + } + outer_dim_ = bottom[0]->count(0, axis_); + scale_dim_ = scale->count(); + inner_dim_ = bottom[0]->count(axis_ + scale->num_axes()); + if (bottom[0] == top[0]) { // in-place computation + temp_.ReshapeLike(*bottom[0]); + } else { + top[0]->ReshapeLike(*bottom[0]); + } + sum_result_.Reshape(vector(1, outer_dim_ * scale_dim_)); + const int sum_mult_size = std::max(outer_dim_, inner_dim_); + sum_multiplier_.Reshape(vector(1, sum_mult_size)); + if (sum_multiplier_.cpu_data()[sum_mult_size - 1] != Dtype(1)) { + caffe_set(sum_mult_size, Dtype(1), sum_multiplier_.mutable_cpu_data()); + } + if (bias_layer_) { + bias_bottom_vec_[0] = top[0]; + bias_layer_->Reshape(bias_bottom_vec_, top); + } +} + +template +void ScaleLayer::Forward_cpu( + const vector*>& bottom, const vector*>& top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + if (bottom[0] == top[0]) { + // In-place computation; need to store bottom data before overwriting it. + // Note that this is only necessary for Backward; we could skip this if not + // doing Backward, but Caffe currently provides no way of knowing whether + // we'll need to do Backward at the time of the Forward call. + caffe_copy(bottom[0]->count(), bottom[0]->cpu_data(), + temp_.mutable_cpu_data()); + } + const Dtype* scale_data = + ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->cpu_data(); + Dtype* top_data = top[0]->mutable_cpu_data(); + for (int n = 0; n < outer_dim_; ++n) { + for (int d = 0; d < scale_dim_; ++d) { + const Dtype factor = scale_data[d]; + caffe_cpu_scale(inner_dim_, factor, bottom_data, top_data); + bottom_data += inner_dim_; + top_data += inner_dim_; + } + } + if (bias_layer_) { + bias_layer_->Forward(bias_bottom_vec_, top); + } +} + +template +void ScaleLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + if (bias_layer_ && + this->param_propagate_down_[this->param_propagate_down_.size() - 1]) { + bias_layer_->Backward(top, bias_propagate_down_, bias_bottom_vec_); + } + const bool scale_param = (bottom.size() == 1); + Blob* scale = scale_param ? this->blobs_[0].get() : bottom[1]; + if ((!scale_param && propagate_down[1]) || + (scale_param && this->param_propagate_down_[0])) { + const Dtype* top_diff = top[0]->cpu_diff(); + const bool in_place = (bottom[0] == top[0]); + const Dtype* bottom_data = (in_place ? &temp_ : bottom[0])->cpu_data(); + // Hack: store big eltwise product in bottom[0] diff, except in the special + // case where this layer itself does the eltwise product, in which case we + // can store it directly in the scale diff, and we're done. + // If we're computing in-place (and not doing eltwise computation), this + // hack doesn't work and we store the product in temp_. + const bool is_eltwise = (bottom[0]->count() == scale->count()); + Dtype* product = (is_eltwise ? scale->mutable_cpu_diff() : + (in_place ? temp_.mutable_cpu_data() : bottom[0]->mutable_cpu_diff())); + caffe_mul(top[0]->count(), top_diff, bottom_data, product); + if (!is_eltwise) { + Dtype* sum_result = NULL; + if (inner_dim_ == 1) { + sum_result = product; + } else if (sum_result_.count() == 1) { + const Dtype* sum_mult = sum_multiplier_.cpu_data(); + Dtype* scale_diff = scale->mutable_cpu_diff(); + if (scale_param) { + Dtype result = caffe_cpu_dot(inner_dim_, product, sum_mult); + *scale_diff += result; + } else { + *scale_diff = caffe_cpu_dot(inner_dim_, product, sum_mult); + } + } else { + const Dtype* sum_mult = sum_multiplier_.cpu_data(); + sum_result = (outer_dim_ == 1) ? + scale->mutable_cpu_diff() : sum_result_.mutable_cpu_data(); + caffe_cpu_gemv(CblasNoTrans, sum_result_.count(), inner_dim_, + Dtype(1), product, sum_mult, Dtype(0), sum_result); + } + if (outer_dim_ != 1) { + const Dtype* sum_mult = sum_multiplier_.cpu_data(); + Dtype* scale_diff = scale->mutable_cpu_diff(); + if (scale_dim_ == 1) { + if (scale_param) { + Dtype result = caffe_cpu_dot(outer_dim_, sum_mult, sum_result); + *scale_diff += result; + } else { + *scale_diff = caffe_cpu_dot(outer_dim_, sum_mult, sum_result); + } + } else { + caffe_cpu_gemv(CblasTrans, outer_dim_, scale_dim_, + Dtype(1), sum_result, sum_mult, Dtype(scale_param), + scale_diff); + } + } + } + } + if (propagate_down[0]) { + const Dtype* top_diff = top[0]->cpu_diff(); + const Dtype* scale_data = scale->cpu_data(); + Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); + for (int n = 0; n < outer_dim_; ++n) { + for (int d = 0; d < scale_dim_; ++d) { + const Dtype factor = scale_data[d]; + caffe_cpu_scale(inner_dim_, factor, top_diff, bottom_diff); + bottom_diff += inner_dim_; + top_diff += inner_dim_; + } + } + } +} + +#ifdef CPU_ONLY +STUB_GPU(ScaleLayer); +#endif + +INSTANTIATE_CLASS(ScaleLayer); +REGISTER_LAYER_CLASS(Scale); + +} // namespace caffe diff --git a/src/caffe/layers/scale_layer.cu b/src/caffe/layers/scale_layer.cu new file mode 100644 index 00000000..fc9a8064 --- /dev/null +++ b/src/caffe/layers/scale_layer.cu @@ -0,0 +1,135 @@ +#include +#include + +#include "caffe/layers/scale_layer.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +__global__ void ScaleForward(const int n, const Dtype* in, + const Dtype* scale, const int scale_dim, const int inner_dim, + Dtype* out) { + CUDA_KERNEL_LOOP(index, n) { + const int scale_index = (index / inner_dim) % scale_dim; + out[index] = in[index] * scale[scale_index]; + } +} + +template +__global__ void ScaleBiasForward(const int n, const Dtype* in, + const Dtype* scale, const Dtype* bias, + const int scale_dim, const int inner_dim, Dtype* out) { + CUDA_KERNEL_LOOP(index, n) { + const int scale_index = (index / inner_dim) % scale_dim; + out[index] = in[index] * scale[scale_index] + bias[scale_index]; + } +} + +template +void ScaleLayer::Forward_gpu( + const vector*>& bottom, const vector*>& top) { + const int count = top[0]->count(); + const Dtype* bottom_data = bottom[0]->gpu_data(); + if (bottom[0] == top[0]) { + // in-place computation; need to store bottom data before overwriting it. + // Note that this is only necessary for Backward; we could skip this if not + // doing Backward, but Caffe currently provides no way of knowing whether + // we'll need to do Backward at the time of the Forward call. + caffe_copy(bottom[0]->count(), bottom[0]->gpu_data(), + temp_.mutable_gpu_data()); + } + const Dtype* scale_data = + ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->gpu_data(); + Dtype* top_data = top[0]->mutable_gpu_data(); + if (bias_layer_) { + const Dtype* bias_data = this->blobs_[bias_param_id_]->gpu_data(); + ScaleBiasForward // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + count, bottom_data, scale_data, bias_data, scale_dim_, inner_dim_, + top_data); + } else { + ScaleForward // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + count, bottom_data, scale_data, scale_dim_, inner_dim_, top_data); + } +} + +template +void ScaleLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + if (bias_layer_ && + this->param_propagate_down_[this->param_propagate_down_.size() - 1]) { + bias_layer_->Backward(top, bias_propagate_down_, bias_bottom_vec_); + } + const bool scale_param = (bottom.size() == 1); + Blob* scale = scale_param ? this->blobs_[0].get() : bottom[1]; + if ((!scale_param && propagate_down[1]) || + (scale_param && this->param_propagate_down_[0])) { + const Dtype* top_diff = top[0]->gpu_diff(); + const bool in_place = (bottom[0] == top[0]); + const Dtype* bottom_data = (in_place ? &temp_ : bottom[0])->gpu_data(); + // Hack: store big eltwise product in bottom[0] diff, except in the special + // case where this layer itself does the eltwise product, in which case we + // can store it directly in the scale diff, and we're done. + // If we're computing in-place (and not doing eltwise computation), this + // hack doesn't work and we store the product in temp_. + const bool is_eltwise = (bottom[0]->count() == scale->count()); + Dtype* product = (is_eltwise ? scale->mutable_gpu_diff() : + (in_place ? temp_.mutable_gpu_data() : bottom[0]->mutable_gpu_diff())); + caffe_gpu_mul(top[0]->count(), top_diff, bottom_data, product); + if (!is_eltwise) { + Dtype* sum_result = NULL; + if (inner_dim_ == 1) { + sum_result = product; + } else if (sum_result_.count() == 1) { + const Dtype* sum_mult = sum_multiplier_.gpu_data(); + Dtype* scale_diff = scale->mutable_cpu_diff(); + if (scale_param) { + Dtype result; + caffe_gpu_dot(inner_dim_, product, sum_mult, &result); + *scale_diff += result; + } else { + caffe_gpu_dot(inner_dim_, product, sum_mult, scale_diff); + } + } else { + const Dtype* sum_mult = sum_multiplier_.gpu_data(); + sum_result = (outer_dim_ == 1) ? + scale->mutable_gpu_diff() : sum_result_.mutable_gpu_data(); + caffe_gpu_gemv(CblasNoTrans, sum_result_.count(), inner_dim_, + Dtype(1), product, sum_mult, Dtype(0), sum_result); + } + if (outer_dim_ != 1) { + const Dtype* sum_mult = sum_multiplier_.gpu_data(); + if (scale_dim_ == 1) { + Dtype* scale_diff = scale->mutable_cpu_diff(); + if (scale_param) { + Dtype result; + caffe_gpu_dot(outer_dim_, sum_mult, sum_result, &result); + *scale_diff += result; + } else { + caffe_gpu_dot(outer_dim_, sum_mult, sum_result, scale_diff); + } + } else { + Dtype* scale_diff = scale->mutable_gpu_diff(); + caffe_gpu_gemv(CblasTrans, outer_dim_, scale_dim_, + Dtype(1), sum_result, sum_mult, Dtype(scale_param), + scale_diff); + } + } + } + } + if (propagate_down[0]) { + const int count = top[0]->count(); + const Dtype* top_diff = top[0]->gpu_diff(); + const Dtype* scale_data = scale->gpu_data(); + Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); + ScaleForward // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + count, top_diff, scale_data, scale_dim_, inner_dim_, bottom_diff); + } +} + +INSTANTIATE_LAYER_GPU_FUNCS(ScaleLayer); + +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index fe6209cf..6493a72d 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -306,7 +306,7 @@ message ParamSpec { // NOTE // Update the next available ID when you add a new LayerParameter field. // -// LayerParameter next available layer-specific ID: 142 (last added: channelwise_affine_param) +// LayerParameter next available layer-specific ID: 143 (last added: scale_param) message LayerParameter { optional string name = 1; // the layer name optional string type = 2; // the layer type @@ -356,7 +356,7 @@ message LayerParameter { optional AccuracyParameter accuracy_param = 102; optional ArgMaxParameter argmax_param = 103; optional BatchNormParameter batch_norm_param = 139; - optional ChannelwiseAffineParameter channelwise_affine_param = 141; + optional BiasParameter bias_param = 141; optional ConcatParameter concat_param = 104; optional ContrastiveLossParameter contrastive_loss_param = 105; optional ConvolutionParameter convolution_param = 106; @@ -385,6 +385,7 @@ message LayerParameter { optional ReductionParameter reduction_param = 136; optional ReLUParameter relu_param = 123; optional ReshapeParameter reshape_param = 133; + optional ScaleParameter scale_param = 142; optional SigmoidParameter sigmoid_param = 124; optional SoftmaxParameter softmax_param = 125; optional SPPParameter spp_param = 132; @@ -499,15 +500,36 @@ message BatchNormParameter { optional float eps = 3 [default = 1e-5]; } -message ChannelwiseAffineParameter { - - // Initial value of a_i. Default is a_i=1.0 for all i. - optional FillerParameter slope_filler = 1; - - optional FillerParameter bias_filler = 2; +message BiasParameter { + // The first axis of bottom[0] (the first input Blob) along which to apply + // bottom[1] (the second input Blob). May be negative to index from the end + // (e.g., -1 for the last axis). + // + // For example, if bottom[0] is 4D with shape 100x3x40x60, the output + // top[0] will have the same shape, and bottom[1] may have any of the + // following shapes (for the given value of axis): + // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 + // (axis == 1 == -3) 3; 3x40; 3x40x60 + // (axis == 2 == -2) 40; 40x60 + // (axis == 3 == -1) 60 + // Furthermore, bottom[1] may have the empty shape (regardless of the value of + // "axis") -- a scalar bias. + optional int32 axis = 1 [default = 1]; - // Whether or not slope paramters are shared across channels. - optional bool channel_shared = 3 [default = false]; + // (num_axes is ignored unless just one bottom is given and the bias is + // a learned parameter of the layer. Otherwise, num_axes is determined by the + // number of axes by the second bottom.) + // The number of axes of the input (bottom[0]) covered by the bias + // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. + // Set num_axes := 0, to add a zero-axis Blob: a scalar. + optional int32 num_axes = 2 [default = 1]; + + // (filler is ignored unless just one bottom is given and the bias is + // a learned parameter of the layer.) + // The initialization for the learned bias parameter. + // Default is the zero (0) initialization, resulting in the BiasLayer + // initially performing the identity operation. + optional FillerParameter filler = 3; } message ContrastiveLossParameter { @@ -972,6 +994,43 @@ message ReshapeParameter { optional int32 num_axes = 3 [default = -1]; } +message ScaleParameter { + // The first axis of bottom[0] (the first input Blob) along which to apply + // bottom[1] (the second input Blob). May be negative to index from the end + // (e.g., -1 for the last axis). + // + // For example, if bottom[0] is 4D with shape 100x3x40x60, the output + // top[0] will have the same shape, and bottom[1] may have any of the + // following shapes (for the given value of axis): + // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 + // (axis == 1 == -3) 3; 3x40; 3x40x60 + // (axis == 2 == -2) 40; 40x60 + // (axis == 3 == -1) 60 + // Furthermore, bottom[1] may have the empty shape (regardless of the value of + // "axis") -- a scalar multiplier. + optional int32 axis = 1 [default = 1]; + + // (num_axes is ignored unless just one bottom is given and the scale is + // a learned parameter of the layer. Otherwise, num_axes is determined by the + // number of axes by the second bottom.) + // The number of axes of the input (bottom[0]) covered by the scale + // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. + // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar. + optional int32 num_axes = 2 [default = 1]; + + // (filler is ignored unless just one bottom is given and the scale is + // a learned parameter of the layer.) + // The initialization for the learned scale parameter. + // Default is the unit (1) initialization, resulting in the ScaleLayer + // initially performing the identity operation. + optional FillerParameter filler = 3; + + // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but + // may be more efficient). Initialized with bias_filler (defaults to 0). + optional bool bias_term = 4 [default = false]; + optional FillerParameter bias_filler = 5; +} + message SigmoidParameter { enum Engine { DEFAULT = 0; diff --git a/src/caffe/test/test_bias_layer.cpp b/src/caffe/test/test_bias_layer.cpp new file mode 100644 index 00000000..3862e763 --- /dev/null +++ b/src/caffe/test/test_bias_layer.cpp @@ -0,0 +1,467 @@ +#include +#include + +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/layers/bias_layer.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +namespace caffe { + +template +class BiasLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + + protected: + BiasLayerTest() + : blob_bottom_(new Blob(2, 3, 4, 5)), + blob_bottom_eltwise_(new Blob(2, 3, 4, 5)), + blob_bottom_broadcast_0_(new Blob()), + blob_bottom_broadcast_1_(new Blob()), + blob_bottom_broadcast_2_(new Blob()), + blob_bottom_bias_(new Blob(vector())), + blob_top_(new Blob()) { + Caffe::set_random_seed(1701); + vector broadcast_shape(2); + broadcast_shape[0] = 2; broadcast_shape[1] = 3; + this->blob_bottom_broadcast_0_->Reshape(broadcast_shape); + broadcast_shape[0] = 3; broadcast_shape[1] = 4; + this->blob_bottom_broadcast_1_->Reshape(broadcast_shape); + broadcast_shape[0] = 4; broadcast_shape[1] = 5; + this->blob_bottom_broadcast_2_->Reshape(broadcast_shape); + FillerParameter filler_param; + filler_param.set_min(1); + filler_param.set_max(10); + UniformFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + filler.Fill(this->blob_bottom_eltwise_); + filler.Fill(this->blob_bottom_broadcast_0_); + filler.Fill(this->blob_bottom_broadcast_1_); + filler.Fill(this->blob_bottom_broadcast_2_); + filler.Fill(this->blob_bottom_bias_); + blob_bottom_vec_.push_back(blob_bottom_); + blob_top_vec_.push_back(blob_top_); + } + virtual ~BiasLayerTest() { + delete blob_bottom_; + delete blob_bottom_eltwise_; + delete blob_bottom_broadcast_0_; + delete blob_bottom_broadcast_1_; + delete blob_bottom_broadcast_2_; + delete blob_bottom_bias_; + delete blob_top_; + } + Blob* const blob_bottom_; + Blob* const blob_bottom_eltwise_; + Blob* const blob_bottom_broadcast_0_; + Blob* const blob_bottom_broadcast_1_; + Blob* const blob_bottom_broadcast_2_; + Blob* const blob_bottom_bias_; + Blob* const blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +TYPED_TEST_CASE(BiasLayerTest, TestDtypesAndDevices); + +TYPED_TEST(BiasLayerTest, TestForwardEltwise) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(0); + shared_ptr > layer(new BiasLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_top_->cpu_data(); + const int count = this->blob_top_->count(); + const Dtype* in_data_a = this->blob_bottom_->cpu_data(); + const Dtype* in_data_b = this->blob_bottom_eltwise_->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_NEAR(data[i], in_data_a[i] + in_data_b[i], 1e-5); + } +} + +TYPED_TEST(BiasLayerTest, TestForwardEltwiseInPlace) { + typedef typename TypeParam::Dtype Dtype; + this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation + Blob orig_bottom(this->blob_bottom_->shape()); + orig_bottom.CopyFrom(*this->blob_bottom_); + this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(0); + shared_ptr > layer(new BiasLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_bottom_->cpu_data(); + const int count = this->blob_bottom_->count(); + const Dtype* in_data_a = orig_bottom.cpu_data(); + const Dtype* in_data_b = this->blob_bottom_eltwise_->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_NEAR(data[i], in_data_a[i] + in_data_b[i], 1e-5); + } +} + +TYPED_TEST(BiasLayerTest, TestBackwardEltwiseInPlace) { + typedef typename TypeParam::Dtype Dtype; + Blob orig_bottom(this->blob_bottom_->shape()); + orig_bottom.CopyFrom(*this->blob_bottom_); + this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(0); + shared_ptr > layer(new BiasLayer(layer_param)); + Blob top_diff(this->blob_bottom_->shape()); + FillerParameter filler_param; + filler_param.set_type("gaussian"); + filler_param.set_std(1); + GaussianFiller filler(filler_param); + filler.Fill(&top_diff); + vector propagate_down(2, true); + // Run forward + backward without in-place computation; + // save resulting bottom diffs. + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_top_->mutable_cpu_diff()); + layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + const bool kReshape = true; + const bool kCopyDiff = true; + Blob orig_bottom_diff; + orig_bottom_diff.CopyFrom(*this->blob_bottom_, kCopyDiff, kReshape); + Blob orig_bias_diff; + orig_bias_diff.CopyFrom(*this->blob_bottom_eltwise_, + kCopyDiff, kReshape); + // Rerun forward + backward with in-place computation; + // check that resulting bottom diffs are the same. + this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_bottom_->mutable_cpu_diff()); + layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + EXPECT_NEAR(orig_bottom_diff.cpu_diff()[i], + this->blob_bottom_->cpu_diff()[i], 1e-5); + } + for (int i = 0; i < this->blob_bottom_eltwise_->count(); ++i) { + EXPECT_NEAR(orig_bias_diff.cpu_diff()[i], + this->blob_bottom_eltwise_->cpu_diff()[i], 1e-5); + } +} + +TYPED_TEST(BiasLayerTest, TestForwardEltwiseWithParam) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + BiasParameter* bias_param = layer_param.mutable_bias_param(); + bias_param->set_axis(0); + bias_param->set_num_axes(-1); + bias_param->mutable_filler()->set_type("gaussian"); + shared_ptr > layer(new BiasLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_top_->cpu_data(); + const int count = this->blob_top_->count(); + const Dtype* in_data_a = this->blob_bottom_->cpu_data(); + const Dtype* in_data_b = layer->blobs()[0]->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_NEAR(data[i], in_data_a[i] + in_data_b[i], 1e-5); + } +} + +TYPED_TEST(BiasLayerTest, TestForwardBroadcastBegin) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_0_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(0); + shared_ptr > layer(new BiasLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w), + this->blob_bottom_->data_at(n, c, h, w) + + this->blob_bottom_broadcast_0_->data_at(n, c, 0, 0), + 1e-5); + } + } + } + } +} + +TYPED_TEST(BiasLayerTest, TestForwardBroadcastMiddle) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(1); + shared_ptr > layer(new BiasLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w), + this->blob_bottom_->data_at(n, c, h, w) + + this->blob_bottom_broadcast_1_->data_at(c, h, 0, 0), + 1e-5); + } + } + } + } +} + +TYPED_TEST(BiasLayerTest, TestForwardBroadcastMiddleInPlace) { + typedef typename TypeParam::Dtype Dtype; + this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation + Blob orig_bottom(this->blob_bottom_->shape()); + orig_bottom.CopyFrom(*this->blob_bottom_); + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(1); + shared_ptr > layer(new BiasLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_bottom_->data_at(n, c, h, w), + orig_bottom.data_at(n, c, h, w) + + this->blob_bottom_broadcast_1_->data_at(c, h, 0, 0), + 1e-5); + } + } + } + } +} + +TYPED_TEST(BiasLayerTest, TestBackwardBroadcastMiddleInPlace) { + typedef typename TypeParam::Dtype Dtype; + Blob orig_bottom(this->blob_bottom_->shape()); + orig_bottom.CopyFrom(*this->blob_bottom_); + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(1); + shared_ptr > layer(new BiasLayer(layer_param)); + Blob top_diff(this->blob_bottom_->shape()); + FillerParameter filler_param; + filler_param.set_type("gaussian"); + filler_param.set_std(1); + GaussianFiller filler(filler_param); + filler.Fill(&top_diff); + vector propagate_down(2, true); + // Run forward + backward without in-place computation; + // save resulting bottom diffs. + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_top_->mutable_cpu_diff()); + layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + const bool kReshape = true; + const bool kCopyDiff = true; + Blob orig_bottom_diff; + orig_bottom_diff.CopyFrom(*this->blob_bottom_, kCopyDiff, kReshape); + Blob orig_bias_diff; + orig_bias_diff.CopyFrom(*this->blob_bottom_broadcast_1_, + kCopyDiff, kReshape); + // Rerun forward + backward with in-place computation; + // check that resulting bottom diffs are the same. + this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_bottom_->mutable_cpu_diff()); + layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + EXPECT_NEAR(orig_bottom_diff.cpu_diff()[i], + this->blob_bottom_->cpu_diff()[i], 1e-5); + } + for (int i = 0; i < this->blob_bottom_broadcast_1_->count(); ++i) { + EXPECT_NEAR(orig_bias_diff.cpu_diff()[i], + this->blob_bottom_broadcast_1_->cpu_diff()[i], 1e-5); + } +} + +TYPED_TEST(BiasLayerTest, TestForwardBroadcastMiddleWithParam) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + BiasParameter* bias_param = layer_param.mutable_bias_param(); + bias_param->set_axis(1); + bias_param->set_num_axes(2); + bias_param->mutable_filler()->set_type("gaussian"); + shared_ptr > layer(new BiasLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w), + this->blob_bottom_->data_at(n, c, h, w) + + layer->blobs()[0]->data_at(c, h, 0, 0), 1e-5); + } + } + } + } +} + +TYPED_TEST(BiasLayerTest, TestForwardBroadcastEnd) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_2_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(2); + shared_ptr > layer(new BiasLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w), + this->blob_bottom_->data_at(n, c, h, w) + + this->blob_bottom_broadcast_2_->data_at(h, w, 0, 0), + 1e-5); + } + } + } + } +} + +TYPED_TEST(BiasLayerTest, TestForwardBias) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_bias_); + LayerParameter layer_param; + shared_ptr > layer(new BiasLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_top_->cpu_data(); + const int count = this->blob_top_->count(); + const Dtype* in_data = this->blob_bottom_->cpu_data(); + const Dtype bias = *this->blob_bottom_bias_->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_NEAR(data[i], in_data[i] + bias, 1e-5); + } +} + +TYPED_TEST(BiasLayerTest, TestForwardBiasAxis2) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_bias_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(2); + shared_ptr > layer(new BiasLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_top_->cpu_data(); + const int count = this->blob_top_->count(); + const Dtype* in_data = this->blob_bottom_->cpu_data(); + const Dtype bias = *this->blob_bottom_bias_->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_NEAR(data[i], in_data[i] + bias, 1e-5); + } +} + +TYPED_TEST(BiasLayerTest, TestGradientEltwise) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(0); + BiasLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientEltwise(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(BiasLayerTest, TestGradientEltwiseWithParam) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + BiasParameter* bias_param = layer_param.mutable_bias_param(); + bias_param->set_axis(0); + bias_param->set_num_axes(-1); + bias_param->mutable_filler()->set_type("gaussian"); + BiasLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(BiasLayerTest, TestGradientBroadcastBegin) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_0_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(0); + BiasLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(BiasLayerTest, TestGradientBroadcastMiddle) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(1); + BiasLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(BiasLayerTest, TestGradientBroadcastMiddleWithParam) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + BiasParameter* bias_param = layer_param.mutable_bias_param(); + bias_param->set_axis(1); + bias_param->set_num_axes(2); + bias_param->mutable_filler()->set_type("gaussian"); + BiasLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(BiasLayerTest, TestGradientBroadcastEnd) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_2_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(2); + BiasLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(BiasLayerTest, TestGradientBias) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_bias_); + LayerParameter layer_param; + BiasLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(BiasLayerTest, TestGradientBiasAxis2) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_bias_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(2); + BiasLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +} // namespace caffe diff --git a/src/caffe/test/test_channelwise_affine_layer.cpp b/src/caffe/test/test_channelwise_affine_layer.cpp deleted file mode 100644 index a3e2544f..00000000 --- a/src/caffe/test/test_channelwise_affine_layer.cpp +++ /dev/null @@ -1,105 +0,0 @@ -#include - -#include "gtest/gtest.h" - -#include "caffe/blob.hpp" -#include "caffe/common.hpp" -#include "caffe/filler.hpp" -#include "caffe/layers/channelwise_affine_layer.hpp" - -#include "caffe/test/test_caffe_main.hpp" -#include "caffe/test/test_gradient_check_util.hpp" - -namespace caffe { - -template -class ChannelwiseAffineLayerTest : public MultiDeviceTest { - typedef typename TypeParam::Dtype Dtype; - - protected: - ChannelwiseAffineLayerTest() - : blob_bottom_(new Blob(2, 3, 4, 5)), - blob_top_(new Blob()) { - Caffe::set_random_seed(1701); - // fill the values - FillerParameter filler_param; - GaussianFiller filler(filler_param); - filler.Fill(this->blob_bottom_); - blob_bottom_vec_.push_back(blob_bottom_); - blob_top_vec_.push_back(blob_top_); - } - virtual ~ChannelwiseAffineLayerTest() { - delete blob_bottom_; delete blob_top_; } - Blob* const blob_bottom_; - Blob* const blob_top_; - vector*> blob_bottom_vec_; - vector*> blob_top_vec_; - - void TestChannelwiseAffine(ChannelwiseAffineLayer *layer) { - layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); - // Now, check values - const Dtype* bottom_data = this->blob_bottom_->cpu_data(); - const Dtype* top_data = this->blob_top_->cpu_data(); - const Dtype* slope_data = layer->blobs()[0]->cpu_data(); - const Dtype* bias_data = layer->blobs()[1]->cpu_data(); - const Dtype kDelta = 2e-5; - int hw = this->blob_bottom_->height() * this->blob_bottom_->width(); - int channels = this->blob_bottom_->channels(); - bool channel_shared = - layer->layer_param().channelwise_affine_param().channel_shared(); - for (int i = 0; i < this->blob_bottom_->count(); ++i) { - int c = channel_shared ? 0 : (i / hw) % channels; - EXPECT_NEAR(top_data[i], - bottom_data[i]* slope_data[c] + bias_data[c], kDelta); - } - } -}; -TYPED_TEST_CASE(ChannelwiseAffineLayerTest, TestDtypesAndDevices); - - -TYPED_TEST(ChannelwiseAffineLayerTest, TestChannelwiseAffineForward) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - ChannelwiseAffineLayer layer(layer_param); - layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - FillerParameter filler_param; - GaussianFiller filler(filler_param); - filler.Fill(layer.blobs()[0].get()); - filler.Fill(layer.blobs()[1].get()); - this->TestChannelwiseAffine(&layer); -} - -TYPED_TEST(ChannelwiseAffineLayerTest, - TestChannelwiseAffineForwardChannelShared) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - layer_param.mutable_channelwise_affine_param()->set_channel_shared(true); - ChannelwiseAffineLayer layer(layer_param); - layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - this->TestChannelwiseAffine(&layer); -} - -TYPED_TEST(ChannelwiseAffineLayerTest, TestChannelwiseAffineGradient) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - layer_param.mutable_channelwise_affine_param()->set_channel_shared(false); - ChannelwiseAffineLayer layer(layer_param); - layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - GradientChecker checker(1e-2, 1e-3, 1701, 0., 0.01); - checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, - this->blob_top_vec_); -} - -TYPED_TEST(ChannelwiseAffineLayerTest, - TestChannelwiseAffineGradientChannelShared) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - layer_param.mutable_channelwise_affine_param()->set_channel_shared(true); - ChannelwiseAffineLayer layer(layer_param); - layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - GradientChecker checker(1e-2, 1e-3, 1701, 0., 0.01); - checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, - this->blob_top_vec_); -} - -} // namespace caffe diff --git a/src/caffe/test/test_scale_layer.cpp b/src/caffe/test/test_scale_layer.cpp new file mode 100644 index 00000000..ad116795 --- /dev/null +++ b/src/caffe/test/test_scale_layer.cpp @@ -0,0 +1,507 @@ +#include +#include + +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/layers/scale_layer.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +namespace caffe { + +template +class ScaleLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + + protected: + ScaleLayerTest() + : blob_bottom_(new Blob(2, 3, 4, 5)), + blob_bottom_eltwise_(new Blob(2, 3, 4, 5)), + blob_bottom_broadcast_0_(new Blob()), + blob_bottom_broadcast_1_(new Blob()), + blob_bottom_broadcast_2_(new Blob()), + blob_bottom_scale_(new Blob(vector())), + blob_top_(new Blob()) { + Caffe::set_random_seed(1701); + vector broadcast_shape(2); + broadcast_shape[0] = 2; broadcast_shape[1] = 3; + this->blob_bottom_broadcast_0_->Reshape(broadcast_shape); + broadcast_shape[0] = 3; broadcast_shape[1] = 4; + this->blob_bottom_broadcast_1_->Reshape(broadcast_shape); + broadcast_shape[0] = 4; broadcast_shape[1] = 5; + this->blob_bottom_broadcast_2_->Reshape(broadcast_shape); + FillerParameter filler_param; + filler_param.set_min(1); + filler_param.set_max(10); + UniformFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + filler.Fill(this->blob_bottom_eltwise_); + filler.Fill(this->blob_bottom_broadcast_0_); + filler.Fill(this->blob_bottom_broadcast_1_); + filler.Fill(this->blob_bottom_broadcast_2_); + filler.Fill(this->blob_bottom_scale_); + blob_bottom_vec_.push_back(blob_bottom_); + blob_top_vec_.push_back(blob_top_); + } + virtual ~ScaleLayerTest() { + delete blob_bottom_; + delete blob_bottom_eltwise_; + delete blob_bottom_broadcast_0_; + delete blob_bottom_broadcast_1_; + delete blob_bottom_broadcast_2_; + delete blob_bottom_scale_; + delete blob_top_; + } + Blob* const blob_bottom_; + Blob* const blob_bottom_eltwise_; + Blob* const blob_bottom_broadcast_0_; + Blob* const blob_bottom_broadcast_1_; + Blob* const blob_bottom_broadcast_2_; + Blob* const blob_bottom_scale_; + Blob* const blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +TYPED_TEST_CASE(ScaleLayerTest, TestDtypesAndDevices); + +TYPED_TEST(ScaleLayerTest, TestForwardEltwise) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(0); + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_top_->cpu_data(); + const int count = this->blob_top_->count(); + const Dtype* in_data_a = this->blob_bottom_->cpu_data(); + const Dtype* in_data_b = this->blob_bottom_eltwise_->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_NEAR(data[i], in_data_a[i] * in_data_b[i], 1e-5); + } +} + +TYPED_TEST(ScaleLayerTest, TestForwardEltwiseInPlace) { + typedef typename TypeParam::Dtype Dtype; + this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation + Blob orig_bottom(this->blob_bottom_->shape()); + orig_bottom.CopyFrom(*this->blob_bottom_); + this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(0); + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_bottom_->cpu_data(); + const int count = this->blob_bottom_->count(); + const Dtype* in_data_a = orig_bottom.cpu_data(); + const Dtype* in_data_b = this->blob_bottom_eltwise_->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_NEAR(data[i], in_data_a[i] * in_data_b[i], 1e-5); + } +} + +TYPED_TEST(ScaleLayerTest, TestBackwardEltwiseInPlace) { + typedef typename TypeParam::Dtype Dtype; + Blob orig_bottom(this->blob_bottom_->shape()); + orig_bottom.CopyFrom(*this->blob_bottom_); + this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(0); + shared_ptr > layer(new ScaleLayer(layer_param)); + Blob top_diff(this->blob_bottom_->shape()); + FillerParameter filler_param; + filler_param.set_type("gaussian"); + filler_param.set_std(1); + GaussianFiller filler(filler_param); + filler.Fill(&top_diff); + vector propagate_down(2, true); + // Run forward + backward without in-place computation; + // save resulting bottom diffs. + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_top_->mutable_cpu_diff()); + layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + const bool kReshape = true; + const bool kCopyDiff = true; + Blob orig_bottom_diff; + orig_bottom_diff.CopyFrom(*this->blob_bottom_, kCopyDiff, kReshape); + Blob orig_scale_diff; + orig_scale_diff.CopyFrom(*this->blob_bottom_eltwise_, + kCopyDiff, kReshape); + // Rerun forward + backward with in-place computation; + // check that resulting bottom diffs are the same. + this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_bottom_->mutable_cpu_diff()); + layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + EXPECT_NEAR(orig_bottom_diff.cpu_diff()[i], + this->blob_bottom_->cpu_diff()[i], 1e-5); + } + for (int i = 0; i < this->blob_bottom_eltwise_->count(); ++i) { + EXPECT_NEAR(orig_scale_diff.cpu_diff()[i], + this->blob_bottom_eltwise_->cpu_diff()[i], 1e-5); + } +} + +TYPED_TEST(ScaleLayerTest, TestForwardEltwiseWithParam) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ScaleParameter* scale_param = layer_param.mutable_scale_param(); + scale_param->set_axis(0); + scale_param->set_num_axes(-1); + scale_param->mutable_filler()->set_type("gaussian"); + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_top_->cpu_data(); + const int count = this->blob_top_->count(); + const Dtype* in_data_a = this->blob_bottom_->cpu_data(); + const Dtype* in_data_b = layer->blobs()[0]->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_NEAR(data[i], in_data_a[i] * in_data_b[i], 1e-5); + } +} + +TYPED_TEST(ScaleLayerTest, TestForwardBroadcastBegin) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_0_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(0); + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w), + this->blob_bottom_->data_at(n, c, h, w) * + this->blob_bottom_broadcast_0_->data_at(n, c, 0, 0), + 1e-5); + } + } + } + } +} + +TYPED_TEST(ScaleLayerTest, TestForwardBroadcastMiddle) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(1); + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w), + this->blob_bottom_->data_at(n, c, h, w) * + this->blob_bottom_broadcast_1_->data_at(c, h, 0, 0), + 1e-5); + } + } + } + } +} + +TYPED_TEST(ScaleLayerTest, TestForwardBroadcastMiddleInPlace) { + typedef typename TypeParam::Dtype Dtype; + this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation + Blob orig_bottom(this->blob_bottom_->shape()); + orig_bottom.CopyFrom(*this->blob_bottom_); + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(1); + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_bottom_->data_at(n, c, h, w), + orig_bottom.data_at(n, c, h, w) * + this->blob_bottom_broadcast_1_->data_at(c, h, 0, 0), + 1e-5); + } + } + } + } +} + +TYPED_TEST(ScaleLayerTest, TestBackwardBroadcastMiddleInPlace) { + typedef typename TypeParam::Dtype Dtype; + Blob orig_bottom(this->blob_bottom_->shape()); + orig_bottom.CopyFrom(*this->blob_bottom_); + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(1); + shared_ptr > layer(new ScaleLayer(layer_param)); + Blob top_diff(this->blob_bottom_->shape()); + FillerParameter filler_param; + filler_param.set_type("gaussian"); + filler_param.set_std(1); + GaussianFiller filler(filler_param); + filler.Fill(&top_diff); + vector propagate_down(2, true); + // Run forward + backward without in-place computation; + // save resulting bottom diffs. + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_top_->mutable_cpu_diff()); + layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + const bool kReshape = true; + const bool kCopyDiff = true; + Blob orig_bottom_diff; + orig_bottom_diff.CopyFrom(*this->blob_bottom_, kCopyDiff, kReshape); + Blob orig_scale_diff; + orig_scale_diff.CopyFrom(*this->blob_bottom_broadcast_1_, + kCopyDiff, kReshape); + // Rerun forward + backward with in-place computation; + // check that resulting bottom diffs are the same. + this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_bottom_->mutable_cpu_diff()); + layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + EXPECT_NEAR(orig_bottom_diff.cpu_diff()[i], + this->blob_bottom_->cpu_diff()[i], 1e-5); + } + for (int i = 0; i < this->blob_bottom_broadcast_1_->count(); ++i) { + EXPECT_NEAR(orig_scale_diff.cpu_diff()[i], + this->blob_bottom_broadcast_1_->cpu_diff()[i], 1e-5); + } +} + +TYPED_TEST(ScaleLayerTest, TestForwardBroadcastMiddleWithParam) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ScaleParameter* scale_param = layer_param.mutable_scale_param(); + scale_param->set_axis(1); + scale_param->set_num_axes(2); + scale_param->mutable_filler()->set_type("gaussian"); + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w), + this->blob_bottom_->data_at(n, c, h, w) * + layer->blobs()[0]->data_at(c, h, 0, 0), 1e-5); + } + } + } + } +} + +TYPED_TEST(ScaleLayerTest, TestForwardBroadcastMiddleWithParamAndBias) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ScaleParameter* scale_param = layer_param.mutable_scale_param(); + scale_param->set_axis(1); + scale_param->set_num_axes(2); + scale_param->mutable_filler()->set_type("gaussian"); + scale_param->set_bias_term(true); + scale_param->mutable_bias_filler()->set_type("gaussian"); + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w), + this->blob_bottom_->data_at(n, c, h, w) * + layer->blobs()[0]->data_at(c, h, 0, 0) + + layer->blobs()[1]->data_at(c, h, 0, 0), 1e-5); + } + } + } + } +} + +TYPED_TEST(ScaleLayerTest, TestForwardBroadcastEnd) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_2_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(2); + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w), + this->blob_bottom_->data_at(n, c, h, w) * + this->blob_bottom_broadcast_2_->data_at(h, w, 0, 0), + 1e-5); + } + } + } + } +} + +TYPED_TEST(ScaleLayerTest, TestForwardScale) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_scale_); + LayerParameter layer_param; + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_top_->cpu_data(); + const int count = this->blob_top_->count(); + const Dtype* in_data = this->blob_bottom_->cpu_data(); + const Dtype scale = *this->blob_bottom_scale_->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_NEAR(data[i], in_data[i] * scale, 1e-5); + } +} + +TYPED_TEST(ScaleLayerTest, TestForwardScaleAxis2) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_scale_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(2); + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_top_->cpu_data(); + const int count = this->blob_top_->count(); + const Dtype* in_data = this->blob_bottom_->cpu_data(); + const Dtype scale = *this->blob_bottom_scale_->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_NEAR(data[i], in_data[i] * scale, 1e-5); + } +} + +TYPED_TEST(ScaleLayerTest, TestGradientEltwise) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(0); + ScaleLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientEltwise(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(ScaleLayerTest, TestGradientEltwiseWithParam) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ScaleParameter* scale_param = layer_param.mutable_scale_param(); + scale_param->set_axis(0); + scale_param->set_num_axes(-1); + scale_param->mutable_filler()->set_type("gaussian"); + ScaleLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(ScaleLayerTest, TestGradientBroadcastBegin) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_0_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(0); + ScaleLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(ScaleLayerTest, TestGradientBroadcastMiddle) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(1); + ScaleLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(ScaleLayerTest, TestGradientBroadcastMiddleWithParam) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + ScaleParameter* scale_param = layer_param.mutable_scale_param(); + scale_param->set_axis(1); + scale_param->set_num_axes(2); + scale_param->mutable_filler()->set_type("gaussian"); + ScaleLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(ScaleLayerTest, TestGradientBroadcastEnd) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_2_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(2); + ScaleLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(ScaleLayerTest, TestGradientScale) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_scale_); + LayerParameter layer_param; + ScaleLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(ScaleLayerTest, TestGradientScaleAndBias) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_scale_); + LayerParameter layer_param; + ScaleParameter* scale_param = layer_param.mutable_scale_param(); + scale_param->set_bias_term(true); + scale_param->mutable_bias_filler()->set_type("gaussian"); + ScaleLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(ScaleLayerTest, TestGradientScaleAxis2) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_scale_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(2); + ScaleLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +} // namespace caffe From e94065efd516668e5168ed82669063b69315595d Mon Sep 17 00:00:00 2001 From: Ronghang Hu Date: Sat, 23 Jan 2016 01:22:04 -0800 Subject: [PATCH 213/223] show Caffe's version from MatCaffe --- matlab/+caffe/private/caffe_.cpp | 8 ++++++++ matlab/+caffe/version.m | 7 +++++++ 2 files changed, 15 insertions(+) create mode 100644 matlab/+caffe/version.m diff --git a/matlab/+caffe/private/caffe_.cpp b/matlab/+caffe/private/caffe_.cpp index 1641e14b..1b1b2bff 100644 --- a/matlab/+caffe/private/caffe_.cpp +++ b/matlab/+caffe/private/caffe_.cpp @@ -504,6 +504,13 @@ static void write_mean(MEX_ARGS) { mxFree(mean_proto_file); } +// Usage: caffe_('version') +static void version(MEX_ARGS) { + mxCHECK(nrhs == 0, "Usage: caffe_('version')"); + // Return version string + plhs[0] = mxCreateString(AS_STRING(CAFFE_VERSION)); +} + /** ----------------------------------------------------------------- ** Available commands. **/ @@ -542,6 +549,7 @@ static handler_registry handlers[] = { { "reset", reset }, { "read_mean", read_mean }, { "write_mean", write_mean }, + { "version", version }, // The end. { "END", NULL }, }; diff --git a/matlab/+caffe/version.m b/matlab/+caffe/version.m new file mode 100644 index 00000000..61cae4f7 --- /dev/null +++ b/matlab/+caffe/version.m @@ -0,0 +1,7 @@ +function version_str = version() +% version() +% show Caffe's version. + +version_str = caffe_('version'); + +end From 407050e02790b3738cc42fbbf1b51c35ee7c3021 Mon Sep 17 00:00:00 2001 From: Hugo Serrat Date: Thu, 21 Jan 2016 14:34:01 +0100 Subject: [PATCH 214/223] Updated import to make it work with pydotplus --- python/caffe/draw.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/python/caffe/draw.py b/python/caffe/draw.py index f8bf5722..cfa3fc5b 100644 --- a/python/caffe/draw.py +++ b/python/caffe/draw.py @@ -10,7 +10,16 @@ """ from caffe.proto import caffe_pb2 -import pydot + +""" +pydot is not supported under python 3 and pydot2 doesn't work properly. +pydotplus works nicely (pip install pydotplus) +""" +try: + # Try to load pydotplus + import pydotplus as pydot +except ImportError: + import pydot # Internal layer and blob styles. LAYER_STYLE_DEFAULT = {'shape': 'record', From ca402f6d15b8f36c2e53f7de7f9817a6b73ac04d Mon Sep 17 00:00:00 2001 From: Kang Kim Date: Fri, 27 Nov 2015 21:57:51 +0900 Subject: [PATCH 215/223] Prevent in-place computation in ReshapeLayer and FlattenLayer --- src/caffe/layers/flatten_layer.cpp | 2 ++ src/caffe/layers/reshape_layer.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/caffe/layers/flatten_layer.cpp b/src/caffe/layers/flatten_layer.cpp index 651507e2..d4ab3935 100644 --- a/src/caffe/layers/flatten_layer.cpp +++ b/src/caffe/layers/flatten_layer.cpp @@ -7,6 +7,8 @@ namespace caffe { template void FlattenLayer::Reshape(const vector*>& bottom, const vector*>& top) { + CHECK_NE(top[0], bottom[0]) << this->type() << " Layer does not " + "allow in-place computation."; const int start_axis = bottom[0]->CanonicalAxisIndex( this->layer_param_.flatten_param().axis()); const int end_axis = bottom[0]->CanonicalAxisIndex( diff --git a/src/caffe/layers/reshape_layer.cpp b/src/caffe/layers/reshape_layer.cpp index 82339f76..45dd0902 100644 --- a/src/caffe/layers/reshape_layer.cpp +++ b/src/caffe/layers/reshape_layer.cpp @@ -7,6 +7,8 @@ namespace caffe { template void ReshapeLayer::LayerSetUp(const vector*>& bottom, const vector*>& top) { + CHECK_NE(top[0], bottom[0]) << this->type() << " Layer does not " + "allow in-place computation."; inferred_axis_ = -1; copy_axes_.clear(); const BlobShape& top_blob_shape = this->layer_param_.reshape_param().shape(); From 9a43dcf0c738fa799256318162d29a3969446efb Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Tue, 26 Jan 2016 13:58:58 -0800 Subject: [PATCH 216/223] Remove unnecessary CAFFE_TEST_CUDA_PROP declarations --- src/caffe/test/test_embed_layer.cpp | 4 ---- src/caffe/test/test_im2col_kernel.cu | 2 -- 2 files changed, 6 deletions(-) diff --git a/src/caffe/test/test_embed_layer.cpp b/src/caffe/test/test_embed_layer.cpp index acd4b0f6..dc7f5c4a 100644 --- a/src/caffe/test/test_embed_layer.cpp +++ b/src/caffe/test/test_embed_layer.cpp @@ -12,10 +12,6 @@ namespace caffe { -#ifndef CPU_ONLY -extern cudaDeviceProp CAFFE_TEST_CUDA_PROP; -#endif - template class EmbedLayerTest : public MultiDeviceTest { typedef typename TypeParam::Dtype Dtype; diff --git a/src/caffe/test/test_im2col_kernel.cu b/src/caffe/test/test_im2col_kernel.cu index 5d8f01f1..e3a9791b 100644 --- a/src/caffe/test/test_im2col_kernel.cu +++ b/src/caffe/test/test_im2col_kernel.cu @@ -28,8 +28,6 @@ __global__ void im2col_nd_gpu_kernel(const int n, const Dtype* data_im, const int* kernel_shape, const int* pad, const int* stride, const int* dilation, Dtype* data_col); -extern cudaDeviceProp CAFFE_TEST_CUDA_PROP; - template class Im2colKernelTest : public GPUDeviceTest { protected: From 91c02f3b3bc48af1ab24a4687331492cf0171815 Mon Sep 17 00:00:00 2001 From: Madan Ram Date: Thu, 23 Jul 2015 16:42:15 +0530 Subject: [PATCH 217/223] Update mnist readme.md: scale moved to transform_param --- examples/mnist/readme.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/mnist/readme.md b/examples/mnist/readme.md index 413d4a1f..b87a0f53 100644 --- a/examples/mnist/readme.md +++ b/examples/mnist/readme.md @@ -41,11 +41,13 @@ Currently, we will read the MNIST data from the lmdb we created earlier in the d layer { name: "mnist" type: "Data" + transform_param { + scale: 0.00390625 + } data_param { source: "mnist_train_lmdb" backend: LMDB batch_size: 64 - scale: 0.00390625 } top: "data" top: "label" From ae31adcdca0bc12e33e691ee7cd9c4ad75c229bb Mon Sep 17 00:00:00 2001 From: Keir Mierle Date: Fri, 26 Jun 2015 00:10:21 -0700 Subject: [PATCH 218/223] Make the two separate build systems clearer in the documentation --- docs/installation.md | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/docs/installation.md b/docs/installation.md index cce7ec35..ef781e8d 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -87,15 +87,20 @@ There is an unofficial Windows port of Caffe at [niuzhiheng/caffe:windows](https ## Compilation -Now that you have the prerequisites, edit your `Makefile.config` to change the paths for your setup The defaults should work, but uncomment the relevant lines if using Anaconda Python. +Caffe can be compiled with either Make or CMake. Make is officially supported while CMake is supported by the community. + +### Compilation with Make + +Configure the build by copying and modifying the example `Makefile.config` for your setup. The defaults should work, but uncomment the relevant lines if using Anaconda Python. cp Makefile.config.example Makefile.config - # Adjust Makefile.config (for example, if using Anaconda Python) + # Adjust Makefile.config (for example, if using Anaconda Python, or if cuDNN is desired) make all make test make runtest -- For cuDNN acceleration, you should uncomment the `USE_CUDNN := 1` switch in `Makefile.config`. +- For CPU & GPU accelerated Caffe, no changes are needed. +- For cuDNN acceleration using NVIDIA's proprietary cuDNN software, uncomment the `USE_CUDNN := 1` switch in `Makefile.config`. cuDNN is sometimes but not always faster than Caffe's GPU acceleration. - For CPU-only Caffe, uncomment `CPU_ONLY := 1` in `Makefile.config`. To compile the Python and MATLAB wrappers do `make pycaffe` and `make matcaffe` respectively. @@ -107,7 +112,7 @@ Be sure to set your MATLAB and Python paths in `Makefile.config` first! Now that you have installed Caffe, check out the [MNIST tutorial](gathered/examples/mnist.html) and the [reference ImageNet model tutorial](gathered/examples/imagenet.html). -### CMake Compilation +### Compilation with CMake In lieu of manually editing `Makefile.config` to configure the build, Caffe offers an unofficial CMake build thanks to @Nerei, @akosiorek, and other members of the community. It requires CMake version >= 2.8.7. The basic steps are as follows: @@ -116,6 +121,7 @@ The basic steps are as follows: cd build cmake .. make all + make install make runtest See [PR #1667](https://github.com/BVLC/caffe/pull/1667) for options and details. From afcaf253daa942821250db3f9a6afbe1d955bdf1 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Tue, 26 Jan 2016 23:09:27 -0800 Subject: [PATCH 219/223] Remove incorrect cast of gemm int arg to Dtype in BiasLayer --- src/caffe/layers/bias_layer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/caffe/layers/bias_layer.cpp b/src/caffe/layers/bias_layer.cpp index 0a786b5d..4726a729 100644 --- a/src/caffe/layers/bias_layer.cpp +++ b/src/caffe/layers/bias_layer.cpp @@ -80,7 +80,7 @@ void BiasLayer::Forward_cpu(const vector*>& bottom, } for (int n = 0; n < outer_dim_; ++n) { caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, bias_dim_, - inner_dim_, Dtype(1), Dtype(1), bias_data, + inner_dim_, 1, Dtype(1), bias_data, bias_multiplier_.cpu_data(), Dtype(1), top_data); top_data += dim_; } From 14d0bb4767cba22b826eae03a6e5cfa4c1cd4287 Mon Sep 17 00:00:00 2001 From: gdh1995 Date: Wed, 13 Jan 2016 18:20:41 +0800 Subject: [PATCH 220/223] use relative paths on making build/tools/ links The old uses `abspath`, which I think is so harmful: * If I `cp -a` the whole project, `build/tools/caffe` still refer to the old file, until `make clean`, making debugging very hard * For `tar` and `scp`, the soft links can not work unless the target project folder has the same path --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 787b0e8d..76d51ad8 100644 --- a/Makefile +++ b/Makefile @@ -601,7 +601,7 @@ $(TEST_CXX_BINS): $(TEST_BIN_DIR)/%.testbin: $(TEST_CXX_BUILD_DIR)/%.o \ # Target for extension-less symlinks to tool binaries with extension '*.bin'. $(TOOL_BUILD_DIR)/%: $(TOOL_BUILD_DIR)/%.bin | $(TOOL_BUILD_DIR) @ $(RM) $@ - @ ln -s $(abspath $<) $@ + @ ln -s $(notdir $<) $@ $(TOOL_BINS): %.bin : %.o | $(DYNAMIC_NAME) @ echo CXX/LD -o $@ From dd2099786f11033ded6e9f46bc772ef9b2166399 Mon Sep 17 00:00:00 2001 From: Sergei Nikolaev Date: Tue, 2 Feb 2016 13:48:18 -0800 Subject: [PATCH 221/223] Nicely prints GPU names --- src/caffe/test/test_caffe_main.cpp | 1 + tools/caffe.cpp | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/caffe/test/test_caffe_main.cpp b/src/caffe/test/test_caffe_main.cpp index c8caf5ac..fccf6f16 100644 --- a/src/caffe/test/test_caffe_main.cpp +++ b/src/caffe/test/test_caffe_main.cpp @@ -34,6 +34,7 @@ int main(int argc, char** argv) { cudaGetDevice(&device); cout << "Current device id: " << device << endl; cudaGetDeviceProperties(&CAFFE_TEST_CUDA_PROP, device); + cout << "Current device name: " << CAFFE_TEST_CUDA_PROP.name << endl; #endif // invoke the test. return RUN_ALL_TESTS(); diff --git a/tools/caffe.cpp b/tools/caffe.cpp index 470165ad..ebe95d61 100644 --- a/tools/caffe.cpp +++ b/tools/caffe.cpp @@ -183,7 +183,13 @@ int train() { s << (i ? ", " : "") << gpus[i]; } LOG(INFO) << "Using GPUs " << s.str(); - +#ifndef CPU_ONLY + cudaDeviceProp device_prop; + for (int i = 0; i < gpus.size(); ++i) { + cudaGetDeviceProperties(&device_prop, gpus[i]); + LOG(INFO) << "GPU " << gpus[i] << ": " << device_prop.name; + } +#endif solver_param.set_device_id(gpus[0]); Caffe::SetDevice(gpus[0]); Caffe::set_mode(Caffe::GPU); @@ -229,6 +235,11 @@ int test() { get_gpus(&gpus); if (gpus.size() != 0) { LOG(INFO) << "Use GPU with device ID " << gpus[0]; +#ifndef CPU_ONLY + cudaDeviceProp device_prop; + cudaGetDeviceProperties(&device_prop, gpus[0]); + LOG(INFO) << "GPU device name: " << device_prop.name; +#endif Caffe::SetDevice(gpus[0]); Caffe::set_mode(Caffe::GPU); } else { From 68c751c6f7a521994ccdc9330b89aef9c9024a0a Mon Sep 17 00:00:00 2001 From: Abhijit Kundu Date: Tue, 9 Feb 2016 02:45:46 -0500 Subject: [PATCH 222/223] bugfix for incorrect behaviour in caffe_parse_linker_libs function while extracting libflags from absolute library path with multiple (dots) --- cmake/Utils.cmake | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake index a1bde1ae..653de5fd 100644 --- a/cmake/Utils.cmake +++ b/cmake/Utils.cmake @@ -346,10 +346,11 @@ function(caffe_parse_linker_libs Caffe_LINKER_LIBS_variable folders_var flags_va elseif(lib MATCHES "^-l.*") list(APPEND libflags ${lib}) elseif(IS_ABSOLUTE ${lib}) - get_filename_component(name_we ${lib} NAME_WE) get_filename_component(folder ${lib} PATH) + get_filename_component(filename ${lib} NAME) + string(REGEX REPLACE "\\.[^.]*$" "" filename_without_shortest_ext ${filename}) - string(REGEX MATCH "^lib(.*)" __match ${name_we}) + string(REGEX MATCH "^lib(.*)" __match ${filename_without_shortest_ext}) list(APPEND libflags -l${CMAKE_MATCH_1}) list(APPEND folders ${folder}) else() From 0dcd397b29507b8314e252e850518c5695efbb83 Mon Sep 17 00:00:00 2001 From: Ross Girshick Date: Sun, 9 Aug 2015 13:41:39 -0700 Subject: [PATCH 223/223] Fast and Faster R-CNN change set - smooth l1 loss - roi pooling - expose phase in pycaffe - dropout scaling at test time (needed for MSRA-trained ZF network) --- LICENSE | 93 +++++---- include/caffe/fast_rcnn_layers.hpp | 104 ++++++++++ include/caffe/layer.hpp | 1 + include/caffe/layers/dropout_layer.hpp | 1 + include/caffe/layers/python_layer.hpp | 2 +- python/caffe/__init__.py | 2 +- python/caffe/_caffe.cpp | 8 + src/caffe/layers/dropout_layer.cpp | 27 ++- src/caffe/layers/dropout_layer.cu | 25 ++- src/caffe/layers/roi_pooling_layer.cpp | 141 ++++++++++++++ src/caffe/layers/roi_pooling_layer.cu | 188 +++++++++++++++++++ src/caffe/layers/smooth_L1_loss_layer.cpp | 70 +++++++ src/caffe/layers/smooth_L1_loss_layer.cu | 117 ++++++++++++ src/caffe/proto/caffe.proto | 21 +++ src/caffe/test/test_roi_pooling_layer.cpp | 103 ++++++++++ src/caffe/test/test_smooth_L1_loss_layer.cpp | 90 +++++++++ 16 files changed, 947 insertions(+), 46 deletions(-) create mode 100644 include/caffe/fast_rcnn_layers.hpp create mode 100644 src/caffe/layers/roi_pooling_layer.cpp create mode 100644 src/caffe/layers/roi_pooling_layer.cu create mode 100644 src/caffe/layers/smooth_L1_loss_layer.cpp create mode 100644 src/caffe/layers/smooth_L1_loss_layer.cu create mode 100644 src/caffe/test/test_roi_pooling_layer.cpp create mode 100644 src/caffe/test/test_smooth_L1_loss_layer.cpp diff --git a/LICENSE b/LICENSE index d69d16f5..52599ac3 100644 --- a/LICENSE +++ b/LICENSE @@ -1,44 +1,67 @@ -COPYRIGHT +--------------------------START OF THIRD PARTY NOTICE-------------------------- -All contributions by the University of California: -Copyright (c) 2014, 2015, The Regents of the University of California (Regents) -All rights reserved. +Microsoft licenses this Third Party IP to you under the licensing +terms for the Microsoft product. Microsoft reserves all other rights +not expressly granted under this agreement, whether by implication, +estoppel or otherwise. -All other contributions: -Copyright (c) 2014, 2015, the respective contributors -All rights reserved. +Caffe + +Copyrights can be found here: https://github.com/BVLC/caffe/blob/master/LICENSE + +Provided for Informational Purposes Only -Caffe uses a shared copyright model: each contributor holds copyright over -their contributions to Caffe. The project versioning records all such -contribution and copyright details. If a contributor wants to further mark -their specific copyright on a particular contribution, they should indicate -their copyright solely in the commit message of the change when it is -committed. +BSD License -LICENSE +All rights reserved. Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +modification, are permitted provided that the following conditions +are met: + +Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +*AS IS* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -CONTRIBUTION AGREEMENT +---------------------------END OF THIRD PARTY NOTICE--------------------------- + +Fast R-CNN + +Copyright (c) Microsoft Corporation + +All rights reserved. + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. -By contributing to the BVLC/caffe repository through pull-request, comment, -or otherwise, the contributor releases their content to the -license and copyright terms herein. +THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. diff --git a/include/caffe/fast_rcnn_layers.hpp b/include/caffe/fast_rcnn_layers.hpp new file mode 100644 index 00000000..81d6d5b8 --- /dev/null +++ b/include/caffe/fast_rcnn_layers.hpp @@ -0,0 +1,104 @@ +// ------------------------------------------------------------------ +// Fast R-CNN +// Copyright (c) 2015 Microsoft +// Licensed under The MIT License [see fast-rcnn/LICENSE for details] +// Written by Ross Girshick +// ------------------------------------------------------------------ + +#ifndef CAFFE_FAST_RCNN_LAYERS_HPP_ +#define CAFFE_FAST_RCNN_LAYERS_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/loss_layer.hpp" + +namespace caffe { + +/* ROIPoolingLayer - Region of Interest Pooling Layer +*/ +template +class ROIPoolingLayer : public Layer { + public: + explicit ROIPoolingLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "ROIPooling"; } + + virtual inline int MinBottomBlobs() const { return 2; } + virtual inline int MaxBottomBlobs() const { return 2; } + virtual inline int MinTopBlobs() const { return 1; } + virtual inline int MaxTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + int channels_; + int height_; + int width_; + int pooled_height_; + int pooled_width_; + Dtype spatial_scale_; + Blob max_idx_; +}; + +template +class SmoothL1LossLayer : public LossLayer { + public: + explicit SmoothL1LossLayer(const LayerParameter& param) + : LossLayer(param), diff_() {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "SmoothL1Loss"; } + + virtual inline int ExactNumBottomBlobs() const { return -1; } + virtual inline int MinBottomBlobs() const { return 2; } + virtual inline int MaxBottomBlobs() const { return 4; } + + /** + * Unlike most loss layers, in the SmoothL1LossLayer we can backpropagate + * to both inputs -- override to return true and always allow force_backward. + */ + virtual inline bool AllowForceBackward(const int bottom_index) const { + return true; + } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + Blob diff_; + Blob errors_; + Blob ones_; + bool has_weights_; + Dtype sigma2_; +}; + +} // namespace caffe + +#endif // CAFFE_FAST_RCNN_LAYERS_HPP_ diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp index 10f353f9..8544137c 100644 --- a/include/caffe/layer.hpp +++ b/include/caffe/layer.hpp @@ -316,6 +316,7 @@ class Layer { param_propagate_down_[param_id] = value; } + inline Phase phase() { return phase_; } protected: /** The protobuf that stores the layer parameters */ diff --git a/include/caffe/layers/dropout_layer.hpp b/include/caffe/layers/dropout_layer.hpp index e83143bc..fc19a392 100644 --- a/include/caffe/layers/dropout_layer.hpp +++ b/include/caffe/layers/dropout_layer.hpp @@ -73,6 +73,7 @@ class DropoutLayer : public NeuronLayer { /// the scale for undropped inputs at train time @f$ 1 / (1 - p) @f$ Dtype scale_; unsigned int uint_thres_; + bool scale_train_; }; } // namespace caffe diff --git a/include/caffe/layers/python_layer.hpp b/include/caffe/layers/python_layer.hpp index b839d526..3c61d1af 100644 --- a/include/caffe/layers/python_layer.hpp +++ b/include/caffe/layers/python_layer.hpp @@ -24,7 +24,7 @@ class PythonLayer : public Layer { && !ShareInParallel()) { LOG(FATAL) << "PythonLayer is not implemented in Multi-GPU training"; } - self_.attr("param_str") = bp::str( + self_.attr("param_str_") = bp::str( this->layer_param_.python_param().param_str()); self_.attr("setup")(bottom, top); } diff --git a/python/caffe/__init__.py b/python/caffe/__init__.py index e2881b89..35868a40 100644 --- a/python/caffe/__init__.py +++ b/python/caffe/__init__.py @@ -1,5 +1,5 @@ from .pycaffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, RMSPropSolver, AdaDeltaSolver, AdamSolver -from ._caffe import set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list +from ._caffe import set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list, set_random_seed from ._caffe import __version__ from .proto.caffe_pb2 import TRAIN, TEST from .classifier import Classifier diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp index 12a57455..234c6ab0 100644 --- a/python/caffe/_caffe.cpp +++ b/python/caffe/_caffe.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include // these need to be included after boost on OS X @@ -219,9 +220,15 @@ BOOST_PYTHON_MODULE(_caffe) { bp::def("set_mode_cpu", &set_mode_cpu); bp::def("set_mode_gpu", &set_mode_gpu); bp::def("set_device", &Caffe::SetDevice); + bp::def("set_random_seed", &Caffe::set_random_seed); bp::def("layer_type_list", &LayerRegistry::LayerTypeList); + bp::enum_("Phase") + .value("TRAIN", caffe::TRAIN) + .value("TEST", caffe::TEST) + .export_values(); + bp::class_, shared_ptr >, boost::noncopyable >("Net", bp::no_init) .def("__init__", bp::make_constructor(&Net_Init)) @@ -281,6 +288,7 @@ BOOST_PYTHON_MODULE(_caffe) { bp::return_internal_reference<>())) .def("setup", &Layer::LayerSetUp) .def("reshape", &Layer::Reshape) + .add_property("phase", bp::make_function(&Layer::phase)) .add_property("type", bp::make_function(&Layer::type)); bp::register_ptr_to_python > >(); diff --git a/src/caffe/layers/dropout_layer.cpp b/src/caffe/layers/dropout_layer.cpp index 9cb64d97..5a20da17 100644 --- a/src/caffe/layers/dropout_layer.cpp +++ b/src/caffe/layers/dropout_layer.cpp @@ -16,6 +16,7 @@ void DropoutLayer::LayerSetUp(const vector*>& bottom, DCHECK(threshold_ < 1.); scale_ = 1. / (1. - threshold_); uint_thres_ = static_cast(UINT_MAX * threshold_); + scale_train_ = this->layer_param_.dropout_param().scale_train(); } template @@ -37,11 +38,20 @@ void DropoutLayer::Forward_cpu(const vector*>& bottom, if (this->phase_ == TRAIN) { // Create random numbers caffe_rng_bernoulli(count, 1. - threshold_, mask); - for (int i = 0; i < count; ++i) { - top_data[i] = bottom_data[i] * mask[i] * scale_; + if (scale_train_) { + for (int i = 0; i < count; ++i) { + top_data[i] = bottom_data[i] * mask[i] * scale_; + } + } else { + for (int i = 0; i < count; ++i) { + top_data[i] = bottom_data[i] * mask[i]; + } } } else { caffe_copy(bottom[0]->count(), bottom_data, top_data); + if (!scale_train_) { + caffe_scal( count, 1. / scale_, top_data); + } } } @@ -55,11 +65,20 @@ void DropoutLayer::Backward_cpu(const vector*>& top, if (this->phase_ == TRAIN) { const unsigned int* mask = rand_vec_.cpu_data(); const int count = bottom[0]->count(); - for (int i = 0; i < count; ++i) { - bottom_diff[i] = top_diff[i] * mask[i] * scale_; + if (scale_train_) { + for (int i = 0; i < count; ++i) { + bottom_diff[i] = top_diff[i] * mask[i] * scale_; + } + } else { + for (int i = 0; i < count; ++i) { + bottom_diff[i] = top_diff[i] * mask[i]; + } } } else { caffe_copy(top[0]->count(), top_diff, bottom_diff); + if (!scale_train_) { + caffe_scal(top[0]->count(), 1. / scale_, bottom_diff); + } } } } diff --git a/src/caffe/layers/dropout_layer.cu b/src/caffe/layers/dropout_layer.cu index 186c10ca..54afbe6c 100644 --- a/src/caffe/layers/dropout_layer.cu +++ b/src/caffe/layers/dropout_layer.cu @@ -26,11 +26,19 @@ void DropoutLayer::Forward_gpu(const vector*>& bottom, caffe_gpu_rng_uniform(count, mask); // set thresholds // NOLINT_NEXT_LINE(whitespace/operators) - DropoutForward<<>>( - count, bottom_data, mask, uint_thres_, scale_, top_data); + if (scale_train_) { + DropoutForward<<>>( + count, bottom_data, mask, uint_thres_, scale_, top_data); + } else { + DropoutForward<<>>( + count, bottom_data, mask, uint_thres_, 1.f, top_data); + } CUDA_POST_KERNEL_CHECK; } else { caffe_copy(count, bottom_data, top_data); + if (!scale_train_) { + caffe_gpu_scal(count, 1. / scale_, top_data); + } } } @@ -55,12 +63,19 @@ void DropoutLayer::Backward_gpu(const vector*>& top, static_cast(rand_vec_.gpu_data()); const int count = bottom[0]->count(); // NOLINT_NEXT_LINE(whitespace/operators) - DropoutBackward<<>>( - count, top_diff, mask, uint_thres_, scale_, bottom_diff); + if (scale_train_) { + DropoutBackward<<>>( + count, top_diff, mask, uint_thres_, scale_, bottom_diff); + } else { + DropoutBackward<<>>( + count, top_diff, mask, uint_thres_, 1.f, bottom_diff); + } CUDA_POST_KERNEL_CHECK; } else { caffe_copy(top[0]->count(), top_diff, bottom_diff); + if (!scale_train_) { + caffe_gpu_scal(top[0]->count(), 1. / scale_, bottom_diff); + } } } } diff --git a/src/caffe/layers/roi_pooling_layer.cpp b/src/caffe/layers/roi_pooling_layer.cpp new file mode 100644 index 00000000..d2595194 --- /dev/null +++ b/src/caffe/layers/roi_pooling_layer.cpp @@ -0,0 +1,141 @@ +// ------------------------------------------------------------------ +// Fast R-CNN +// Copyright (c) 2015 Microsoft +// Licensed under The MIT License [see fast-rcnn/LICENSE for details] +// Written by Ross Girshick +// ------------------------------------------------------------------ + +#include + +#include "caffe/fast_rcnn_layers.hpp" + +using std::max; +using std::min; +using std::floor; +using std::ceil; + +namespace caffe { + +template +void ROIPoolingLayer::LayerSetUp(const vector*>& bottom, + const vector*>& top) { + ROIPoolingParameter roi_pool_param = this->layer_param_.roi_pooling_param(); + CHECK_GT(roi_pool_param.pooled_h(), 0) + << "pooled_h must be > 0"; + CHECK_GT(roi_pool_param.pooled_w(), 0) + << "pooled_w must be > 0"; + pooled_height_ = roi_pool_param.pooled_h(); + pooled_width_ = roi_pool_param.pooled_w(); + spatial_scale_ = roi_pool_param.spatial_scale(); + LOG(INFO) << "Spatial scale: " << spatial_scale_; +} + +template +void ROIPoolingLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + channels_ = bottom[0]->channels(); + height_ = bottom[0]->height(); + width_ = bottom[0]->width(); + top[0]->Reshape(bottom[1]->num(), channels_, pooled_height_, + pooled_width_); + max_idx_.Reshape(bottom[1]->num(), channels_, pooled_height_, + pooled_width_); +} + +template +void ROIPoolingLayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + const Dtype* bottom_rois = bottom[1]->cpu_data(); + // Number of ROIs + int num_rois = bottom[1]->num(); + int batch_size = bottom[0]->num(); + int top_count = top[0]->count(); + Dtype* top_data = top[0]->mutable_cpu_data(); + caffe_set(top_count, Dtype(-FLT_MAX), top_data); + int* argmax_data = max_idx_.mutable_cpu_data(); + caffe_set(top_count, -1, argmax_data); + + // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R + for (int n = 0; n < num_rois; ++n) { + int roi_batch_ind = bottom_rois[0]; + int roi_start_w = round(bottom_rois[1] * spatial_scale_); + int roi_start_h = round(bottom_rois[2] * spatial_scale_); + int roi_end_w = round(bottom_rois[3] * spatial_scale_); + int roi_end_h = round(bottom_rois[4] * spatial_scale_); + CHECK_GE(roi_batch_ind, 0); + CHECK_LT(roi_batch_ind, batch_size); + + int roi_height = max(roi_end_h - roi_start_h + 1, 1); + int roi_width = max(roi_end_w - roi_start_w + 1, 1); + const Dtype bin_size_h = static_cast(roi_height) + / static_cast(pooled_height_); + const Dtype bin_size_w = static_cast(roi_width) + / static_cast(pooled_width_); + + const Dtype* batch_data = bottom_data + bottom[0]->offset(roi_batch_ind); + + for (int c = 0; c < channels_; ++c) { + for (int ph = 0; ph < pooled_height_; ++ph) { + for (int pw = 0; pw < pooled_width_; ++pw) { + // Compute pooling region for this output unit: + // start (included) = floor(ph * roi_height / pooled_height_) + // end (excluded) = ceil((ph + 1) * roi_height / pooled_height_) + int hstart = static_cast(floor(static_cast(ph) + * bin_size_h)); + int wstart = static_cast(floor(static_cast(pw) + * bin_size_w)); + int hend = static_cast(ceil(static_cast(ph + 1) + * bin_size_h)); + int wend = static_cast(ceil(static_cast(pw + 1) + * bin_size_w)); + + hstart = min(max(hstart + roi_start_h, 0), height_); + hend = min(max(hend + roi_start_h, 0), height_); + wstart = min(max(wstart + roi_start_w, 0), width_); + wend = min(max(wend + roi_start_w, 0), width_); + + bool is_empty = (hend <= hstart) || (wend <= wstart); + + const int pool_index = ph * pooled_width_ + pw; + if (is_empty) { + top_data[pool_index] = 0; + argmax_data[pool_index] = -1; + } + + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + const int index = h * width_ + w; + if (batch_data[index] > top_data[pool_index]) { + top_data[pool_index] = batch_data[index]; + argmax_data[pool_index] = index; + } + } + } + } + } + // Increment all data pointers by one channel + batch_data += bottom[0]->offset(0, 1); + top_data += top[0]->offset(0, 1); + argmax_data += max_idx_.offset(0, 1); + } + // Increment ROI data pointer + bottom_rois += bottom[1]->offset(1); + } +} + +template +void ROIPoolingLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + NOT_IMPLEMENTED; +} + + +#ifdef CPU_ONLY +STUB_GPU(ROIPoolingLayer); +#endif + +INSTANTIATE_CLASS(ROIPoolingLayer); +REGISTER_LAYER_CLASS(ROIPooling); + +} // namespace caffe diff --git a/src/caffe/layers/roi_pooling_layer.cu b/src/caffe/layers/roi_pooling_layer.cu new file mode 100644 index 00000000..2c42ccea --- /dev/null +++ b/src/caffe/layers/roi_pooling_layer.cu @@ -0,0 +1,188 @@ +// ------------------------------------------------------------------ +// Fast R-CNN +// Copyright (c) 2015 Microsoft +// Licensed under The MIT License [see fast-rcnn/LICENSE for details] +// Written by Ross Girshick +// ------------------------------------------------------------------ + +#include + +#include "caffe/fast_rcnn_layers.hpp" + +using std::max; +using std::min; + +namespace caffe { + +template +__global__ void ROIPoolForward(const int nthreads, const Dtype* bottom_data, + const Dtype spatial_scale, const int channels, const int height, + const int width, const int pooled_height, const int pooled_width, + const Dtype* bottom_rois, Dtype* top_data, int* argmax_data) { + CUDA_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + bottom_rois += n * 5; + int roi_batch_ind = bottom_rois[0]; + int roi_start_w = round(bottom_rois[1] * spatial_scale); + int roi_start_h = round(bottom_rois[2] * spatial_scale); + int roi_end_w = round(bottom_rois[3] * spatial_scale); + int roi_end_h = round(bottom_rois[4] * spatial_scale); + + // Force malformed ROIs to be 1x1 + int roi_width = max(roi_end_w - roi_start_w + 1, 1); + int roi_height = max(roi_end_h - roi_start_h + 1, 1); + Dtype bin_size_h = static_cast(roi_height) + / static_cast(pooled_height); + Dtype bin_size_w = static_cast(roi_width) + / static_cast(pooled_width); + + int hstart = static_cast(floor(static_cast(ph) + * bin_size_h)); + int wstart = static_cast(floor(static_cast(pw) + * bin_size_w)); + int hend = static_cast(ceil(static_cast(ph + 1) + * bin_size_h)); + int wend = static_cast(ceil(static_cast(pw + 1) + * bin_size_w)); + + // Add roi offsets and clip to input boundaries + hstart = min(max(hstart + roi_start_h, 0), height); + hend = min(max(hend + roi_start_h, 0), height); + wstart = min(max(wstart + roi_start_w, 0), width); + wend = min(max(wend + roi_start_w, 0), width); + bool is_empty = (hend <= hstart) || (wend <= wstart); + + // Define an empty pooling region to be zero + Dtype maxval = is_empty ? 0 : -FLT_MAX; + // If nothing is pooled, argmax = -1 causes nothing to be backprop'd + int maxidx = -1; + bottom_data += (roi_batch_ind * channels + c) * height * width; + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + int bottom_index = h * width + w; + if (bottom_data[bottom_index] > maxval) { + maxval = bottom_data[bottom_index]; + maxidx = bottom_index; + } + } + } + top_data[index] = maxval; + argmax_data[index] = maxidx; + } +} + +template +void ROIPoolingLayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + const Dtype* bottom_rois = bottom[1]->gpu_data(); + Dtype* top_data = top[0]->mutable_gpu_data(); + int* argmax_data = max_idx_.mutable_gpu_data(); + int count = top[0]->count(); + // NOLINT_NEXT_LINE(whitespace/operators) + ROIPoolForward<<>>( + count, bottom_data, spatial_scale_, channels_, height_, width_, + pooled_height_, pooled_width_, bottom_rois, top_data, argmax_data); + CUDA_POST_KERNEL_CHECK; +} + +template +__global__ void ROIPoolBackward(const int nthreads, const Dtype* top_diff, + const int* argmax_data, const int num_rois, const Dtype spatial_scale, + const int channels, const int height, const int width, + const int pooled_height, const int pooled_width, Dtype* bottom_diff, + const Dtype* bottom_rois) { + CUDA_KERNEL_LOOP(index, nthreads) { + // (n, c, h, w) coords in bottom data + int w = index % width; + int h = (index / width) % height; + int c = (index / width / height) % channels; + int n = index / width / height / channels; + + Dtype gradient = 0; + // Accumulate gradient over all ROIs that pooled this element + for (int roi_n = 0; roi_n < num_rois; ++roi_n) { + const Dtype* offset_bottom_rois = bottom_rois + roi_n * 5; + int roi_batch_ind = offset_bottom_rois[0]; + // Skip if ROI's batch index doesn't match n + if (n != roi_batch_ind) { + continue; + } + + int roi_start_w = round(offset_bottom_rois[1] * spatial_scale); + int roi_start_h = round(offset_bottom_rois[2] * spatial_scale); + int roi_end_w = round(offset_bottom_rois[3] * spatial_scale); + int roi_end_h = round(offset_bottom_rois[4] * spatial_scale); + + // Skip if ROI doesn't include (h, w) + const bool in_roi = (w >= roi_start_w && w <= roi_end_w && + h >= roi_start_h && h <= roi_end_h); + if (!in_roi) { + continue; + } + + int offset = (roi_n * channels + c) * pooled_height * pooled_width; + const Dtype* offset_top_diff = top_diff + offset; + const int* offset_argmax_data = argmax_data + offset; + + // Compute feasible set of pooled units that could have pooled + // this bottom unit + + // Force malformed ROIs to be 1x1 + int roi_width = max(roi_end_w - roi_start_w + 1, 1); + int roi_height = max(roi_end_h - roi_start_h + 1, 1); + + Dtype bin_size_h = static_cast(roi_height) + / static_cast(pooled_height); + Dtype bin_size_w = static_cast(roi_width) + / static_cast(pooled_width); + + int phstart = floor(static_cast(h - roi_start_h) / bin_size_h); + int phend = ceil(static_cast(h - roi_start_h + 1) / bin_size_h); + int pwstart = floor(static_cast(w - roi_start_w) / bin_size_w); + int pwend = ceil(static_cast(w - roi_start_w + 1) / bin_size_w); + + phstart = min(max(phstart, 0), pooled_height); + phend = min(max(phend, 0), pooled_height); + pwstart = min(max(pwstart, 0), pooled_width); + pwend = min(max(pwend, 0), pooled_width); + + for (int ph = phstart; ph < phend; ++ph) { + for (int pw = pwstart; pw < pwend; ++pw) { + if (offset_argmax_data[ph * pooled_width + pw] == (h * width + w)) { + gradient += offset_top_diff[ph * pooled_width + pw]; + } + } + } + } + bottom_diff[index] = gradient; + } +} + +template +void ROIPoolingLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + if (!propagate_down[0]) { + return; + } + const Dtype* bottom_rois = bottom[1]->gpu_data(); + const Dtype* top_diff = top[0]->gpu_diff(); + Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); + const int count = bottom[0]->count(); + caffe_gpu_set(count, Dtype(0.), bottom_diff); + const int* argmax_data = max_idx_.gpu_data(); + // NOLINT_NEXT_LINE(whitespace/operators) + ROIPoolBackward<<>>( + count, top_diff, argmax_data, top[0]->num(), spatial_scale_, channels_, + height_, width_, pooled_height_, pooled_width_, bottom_diff, bottom_rois); + CUDA_POST_KERNEL_CHECK; +} + +INSTANTIATE_LAYER_GPU_FUNCS(ROIPoolingLayer); + +} // namespace caffe diff --git a/src/caffe/layers/smooth_L1_loss_layer.cpp b/src/caffe/layers/smooth_L1_loss_layer.cpp new file mode 100644 index 00000000..4e748368 --- /dev/null +++ b/src/caffe/layers/smooth_L1_loss_layer.cpp @@ -0,0 +1,70 @@ +// ------------------------------------------------------------------ +// Fast R-CNN +// Copyright (c) 2015 Microsoft +// Licensed under The MIT License [see fast-rcnn/LICENSE for details] +// Written by Ross Girshick +// ------------------------------------------------------------------ + +#include "caffe/fast_rcnn_layers.hpp" + +namespace caffe { + +template +void SmoothL1LossLayer::LayerSetUp( + const vector*>& bottom, const vector*>& top) { + SmoothL1LossParameter loss_param = this->layer_param_.smooth_l1_loss_param(); + sigma2_ = loss_param.sigma() * loss_param.sigma(); + has_weights_ = (bottom.size() >= 3); + if (has_weights_) { + CHECK_EQ(bottom.size(), 4) << "If weights are used, must specify both " + "inside and outside weights"; + } +} + +template +void SmoothL1LossLayer::Reshape( + const vector*>& bottom, const vector*>& top) { + LossLayer::Reshape(bottom, top); + CHECK_EQ(bottom[0]->channels(), bottom[1]->channels()); + CHECK_EQ(bottom[0]->height(), bottom[1]->height()); + CHECK_EQ(bottom[0]->width(), bottom[1]->width()); + if (has_weights_) { + CHECK_EQ(bottom[0]->channels(), bottom[2]->channels()); + CHECK_EQ(bottom[0]->height(), bottom[2]->height()); + CHECK_EQ(bottom[0]->width(), bottom[2]->width()); + CHECK_EQ(bottom[0]->channels(), bottom[3]->channels()); + CHECK_EQ(bottom[0]->height(), bottom[3]->height()); + CHECK_EQ(bottom[0]->width(), bottom[3]->width()); + } + diff_.Reshape(bottom[0]->num(), bottom[0]->channels(), + bottom[0]->height(), bottom[0]->width()); + errors_.Reshape(bottom[0]->num(), bottom[0]->channels(), + bottom[0]->height(), bottom[0]->width()); + // vector of ones used to sum + ones_.Reshape(bottom[0]->num(), bottom[0]->channels(), + bottom[0]->height(), bottom[0]->width()); + for (int i = 0; i < bottom[0]->count(); ++i) { + ones_.mutable_cpu_data()[i] = Dtype(1); + } +} + +template +void SmoothL1LossLayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + NOT_IMPLEMENTED; +} + +template +void SmoothL1LossLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + NOT_IMPLEMENTED; +} + +#ifdef CPU_ONLY +STUB_GPU(SmoothL1LossLayer); +#endif + +INSTANTIATE_CLASS(SmoothL1LossLayer); +REGISTER_LAYER_CLASS(SmoothL1Loss); + +} // namespace caffe diff --git a/src/caffe/layers/smooth_L1_loss_layer.cu b/src/caffe/layers/smooth_L1_loss_layer.cu new file mode 100644 index 00000000..97314223 --- /dev/null +++ b/src/caffe/layers/smooth_L1_loss_layer.cu @@ -0,0 +1,117 @@ +// ------------------------------------------------------------------ +// Fast R-CNN +// Copyright (c) 2015 Microsoft +// Licensed under The MIT License [see fast-rcnn/LICENSE for details] +// Written by Ross Girshick +// ------------------------------------------------------------------ + +#include "caffe/fast_rcnn_layers.hpp" + +namespace caffe { + +template +__global__ void SmoothL1Forward(const int n, const Dtype* in, Dtype* out, + Dtype sigma2) { + // f(x) = 0.5 * (sigma * x)^2 if |x| < 1 / sigma / sigma + // |x| - 0.5 / sigma / sigma otherwise + CUDA_KERNEL_LOOP(index, n) { + Dtype val = in[index]; + Dtype abs_val = abs(val); + if (abs_val < 1.0 / sigma2) { + out[index] = 0.5 * val * val * sigma2; + } else { + out[index] = abs_val - 0.5 / sigma2; + } + } +} + +template +void SmoothL1LossLayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + int count = bottom[0]->count(); + caffe_gpu_sub( + count, + bottom[0]->gpu_data(), + bottom[1]->gpu_data(), + diff_.mutable_gpu_data()); // d := b0 - b1 + if (has_weights_) { + // apply "inside" weights + caffe_gpu_mul( + count, + bottom[2]->gpu_data(), + diff_.gpu_data(), + diff_.mutable_gpu_data()); // d := w_in * (b0 - b1) + } + SmoothL1Forward<<>>( + count, diff_.gpu_data(), errors_.mutable_gpu_data(), sigma2_); + CUDA_POST_KERNEL_CHECK; + + if (has_weights_) { + // apply "outside" weights + caffe_gpu_mul( + count, + bottom[3]->gpu_data(), + errors_.gpu_data(), + errors_.mutable_gpu_data()); // d := w_out * SmoothL1(w_in * (b0 - b1)) + } + + Dtype loss; + caffe_gpu_dot(count, ones_.gpu_data(), errors_.gpu_data(), &loss); + top[0]->mutable_cpu_data()[0] = loss / bottom[0]->num(); +} + +template +__global__ void SmoothL1Backward(const int n, const Dtype* in, Dtype* out, + Dtype sigma2) { + // f'(x) = sigma * sigma * x if |x| < 1 / sigma / sigma + // = sign(x) otherwise + CUDA_KERNEL_LOOP(index, n) { + Dtype val = in[index]; + Dtype abs_val = abs(val); + if (abs_val < 1.0 / sigma2) { + out[index] = sigma2 * val; + } else { + out[index] = (Dtype(0) < val) - (val < Dtype(0)); + } + } +} + +template +void SmoothL1LossLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + // after forwards, diff_ holds w_in * (b0 - b1) + int count = diff_.count(); + SmoothL1Backward<<>>( + count, diff_.gpu_data(), diff_.mutable_gpu_data(), sigma2_); + CUDA_POST_KERNEL_CHECK; + for (int i = 0; i < 2; ++i) { + if (propagate_down[i]) { + const Dtype sign = (i == 0) ? 1 : -1; + const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num(); + caffe_gpu_axpby( + count, // count + alpha, // alpha + diff_.gpu_data(), // x + Dtype(0), // beta + bottom[i]->mutable_gpu_diff()); // y + if (has_weights_) { + // Scale by "inside" weight + caffe_gpu_mul( + count, + bottom[2]->gpu_data(), + bottom[i]->gpu_diff(), + bottom[i]->mutable_gpu_diff()); + // Scale by "outside" weight + caffe_gpu_mul( + count, + bottom[3]->gpu_data(), + bottom[i]->gpu_diff(), + bottom[i]->mutable_gpu_diff()); + } + } + } +} + +INSTANTIATE_LAYER_GPU_FUNCS(SmoothL1LossLayer); + +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 6493a72d..51a8dd3e 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -385,8 +385,10 @@ message LayerParameter { optional ReductionParameter reduction_param = 136; optional ReLUParameter relu_param = 123; optional ReshapeParameter reshape_param = 133; + optional ROIPoolingParameter roi_pooling_param = 8266711; optional ScaleParameter scale_param = 142; optional SigmoidParameter sigmoid_param = 124; + optional SmoothL1LossParameter smooth_l1_loss_param = 8266712; optional SoftmaxParameter softmax_param = 125; optional SPPParameter spp_param = 132; optional SliceParameter slice_param = 126; @@ -633,6 +635,7 @@ message DataParameter { message DropoutParameter { optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio + optional bool scale_train = 2 [default = true]; // scale train or test phase } // DummyDataLayer fills any number of arbitrarily shaped blobs with random @@ -994,6 +997,17 @@ message ReshapeParameter { optional int32 num_axes = 3 [default = -1]; } +// Message that stores parameters used by ROIPoolingLayer +message ROIPoolingParameter { + // Pad, kernel size, and stride are all given as a single value for equal + // dimensions in height and width or as Y, X pairs. + optional uint32 pooled_h = 1 [default = 0]; // The pooled output height + optional uint32 pooled_w = 2 [default = 0]; // The pooled output width + // Multiplicative spatial scale factor to translate ROI coords from their + // input scale to the scale used when pooling + optional float spatial_scale = 3 [default = 1]; +} + message ScaleParameter { // The first axis of bottom[0] (the first input Blob) along which to apply // bottom[1] (the second input Blob). May be negative to index from the end @@ -1040,6 +1054,13 @@ message SigmoidParameter { optional Engine engine = 1 [default = DEFAULT]; } +message SmoothL1LossParameter { + // SmoothL1Loss(x) = + // 0.5 * (sigma * x) ** 2 -- if x < 1.0 / sigma / sigma + // |x| - 0.5 / sigma / sigma -- otherwise + optional float sigma = 1 [default = 1]; +} + message SliceParameter { // The axis along which to slice -- may be negative to index from the end // (e.g., -1 for the last axis). diff --git a/src/caffe/test/test_roi_pooling_layer.cpp b/src/caffe/test/test_roi_pooling_layer.cpp new file mode 100644 index 00000000..9290bad2 --- /dev/null +++ b/src/caffe/test/test_roi_pooling_layer.cpp @@ -0,0 +1,103 @@ +// ------------------------------------------------------------------ +// Fast R-CNN +// Copyright (c) 2015 Microsoft +// Licensed under The MIT License [see fast-rcnn/LICENSE for details] +// Written by Ross Girshick +// ------------------------------------------------------------------ + +#include +#include +#include +#include + +#include "boost/scoped_ptr.hpp" +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/fast_rcnn_layers.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +using boost::scoped_ptr; + +namespace caffe { + +typedef ::testing::Types, GPUDevice > TestDtypesGPU; + +template +class ROIPoolingLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + + protected: + ROIPoolingLayerTest() + : blob_bottom_data_(new Blob(4, 3, 12, 8)), + blob_bottom_rois_(new Blob(4, 5, 1, 1)), + blob_top_data_(new Blob()) { + // fill the values + FillerParameter filler_param; + filler_param.set_std(10); + GaussianFiller filler(filler_param); + filler.Fill(this->blob_bottom_data_); + //for (int i = 0; i < blob_bottom_data_->count(); ++i) { + // blob_bottom_data_->mutable_cpu_data()[i] = i; + //} + blob_bottom_vec_.push_back(blob_bottom_data_); + int i = 0; + blob_bottom_rois_->mutable_cpu_data()[0 + 5*i] = 0; //caffe_rng_rand() % 4; + blob_bottom_rois_->mutable_cpu_data()[1 + 5*i] = 1; // x1 < 8 + blob_bottom_rois_->mutable_cpu_data()[2 + 5*i] = 1; // y1 < 12 + blob_bottom_rois_->mutable_cpu_data()[3 + 5*i] = 6; // x2 < 8 + blob_bottom_rois_->mutable_cpu_data()[4 + 5*i] = 6; // y2 < 12 + i = 1; + blob_bottom_rois_->mutable_cpu_data()[0 + 5*i] = 2; + blob_bottom_rois_->mutable_cpu_data()[1 + 5*i] = 6; // x1 < 8 + blob_bottom_rois_->mutable_cpu_data()[2 + 5*i] = 2; // y1 < 12 + blob_bottom_rois_->mutable_cpu_data()[3 + 5*i] = 7; // x2 < 8 + blob_bottom_rois_->mutable_cpu_data()[4 + 5*i] = 11; // y2 < 12 + i = 2; + blob_bottom_rois_->mutable_cpu_data()[0 + 5*i] = 1; + blob_bottom_rois_->mutable_cpu_data()[1 + 5*i] = 3; // x1 < 8 + blob_bottom_rois_->mutable_cpu_data()[2 + 5*i] = 1; // y1 < 12 + blob_bottom_rois_->mutable_cpu_data()[3 + 5*i] = 5; // x2 < 8 + blob_bottom_rois_->mutable_cpu_data()[4 + 5*i] = 10; // y2 < 12 + i = 3; + blob_bottom_rois_->mutable_cpu_data()[0 + 5*i] = 0; + blob_bottom_rois_->mutable_cpu_data()[1 + 5*i] = 3; // x1 < 8 + blob_bottom_rois_->mutable_cpu_data()[2 + 5*i] = 3; // y1 < 12 + blob_bottom_rois_->mutable_cpu_data()[3 + 5*i] = 3; // x2 < 8 + blob_bottom_rois_->mutable_cpu_data()[4 + 5*i] = 3; // y2 < 12 + + blob_bottom_vec_.push_back(blob_bottom_rois_); + blob_top_vec_.push_back(blob_top_data_); + } + virtual ~ROIPoolingLayerTest() { + delete blob_bottom_data_; + delete blob_bottom_rois_; + delete blob_top_data_; + } + Blob* const blob_bottom_data_; + Blob* const blob_bottom_rois_; + Blob* const blob_top_data_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +TYPED_TEST_CASE(ROIPoolingLayerTest, TestDtypesGPU); + +TYPED_TEST(ROIPoolingLayerTest, TestGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ROIPoolingParameter* roi_pooling_param = + layer_param.mutable_roi_pooling_param(); + roi_pooling_param->set_pooled_h(6); + roi_pooling_param->set_pooled_w(6); + ROIPoolingLayer layer(layer_param); + GradientChecker checker(1e-4, 1e-2); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_, 0); +} + +} // namespace caffe diff --git a/src/caffe/test/test_smooth_L1_loss_layer.cpp b/src/caffe/test/test_smooth_L1_loss_layer.cpp new file mode 100644 index 00000000..e5bc2bf2 --- /dev/null +++ b/src/caffe/test/test_smooth_L1_loss_layer.cpp @@ -0,0 +1,90 @@ +#include +#include +#include +#include + +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/fast_rcnn_layers.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +namespace caffe { + +typedef ::testing::Types, GPUDevice > TestDtypesGPU; + +template +class SmoothL1LossLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + + protected: + SmoothL1LossLayerTest() + : blob_bottom_data_(new Blob(10, 5, 1, 1)), + blob_bottom_label_(new Blob(10, 5, 1, 1)), + blob_bottom_inside_weights_(new Blob(10, 5, 1, 1)), + blob_bottom_outside_weights_(new Blob(10, 5, 1, 1)), + blob_top_loss_(new Blob()) { + // fill the values + FillerParameter const_filler_param; + const_filler_param.set_value(-1.); + ConstantFiller const_filler(const_filler_param); + FillerParameter filler_param; + GaussianFiller filler(filler_param); + + filler.Fill(this->blob_bottom_data_); + blob_bottom_vec_.push_back(blob_bottom_data_); + filler.Fill(this->blob_bottom_label_); + blob_bottom_vec_.push_back(blob_bottom_label_); + + //const_filler.Fill(this->blob_bottom_inside_weights_); + filler.Fill(this->blob_bottom_inside_weights_); + blob_bottom_vec_.push_back(blob_bottom_inside_weights_); + //const_filler.Fill(this->blob_bottom_outside_weights_); + filler.Fill(this->blob_bottom_outside_weights_); + blob_bottom_vec_.push_back(blob_bottom_outside_weights_); + + blob_top_vec_.push_back(blob_top_loss_); + } + virtual ~SmoothL1LossLayerTest() { + delete blob_bottom_data_; + delete blob_bottom_label_; + delete blob_bottom_inside_weights_; + delete blob_bottom_outside_weights_; + delete blob_top_loss_; + } + + Blob* const blob_bottom_data_; + Blob* const blob_bottom_label_; + Blob* const blob_bottom_inside_weights_; + Blob* const blob_bottom_outside_weights_; + Blob* const blob_top_loss_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +TYPED_TEST_CASE(SmoothL1LossLayerTest, TestDtypesGPU); + +TYPED_TEST(SmoothL1LossLayerTest, TestGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + SmoothL1LossParameter* loss_param = + layer_param.mutable_smooth_l1_loss_param(); + loss_param->set_sigma(2.4); + + const Dtype kLossWeight = 3.7; + layer_param.add_loss_weight(kLossWeight); + SmoothL1LossLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + GradientChecker checker(1e-2, 1e-2, 1701); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_, 0); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_, 1); +} + +} // namespace caffe