Skip to content

Commit

Permalink
HDF5DataLayer shuffle: minor cleanup; clarification in HDF5DataParameter
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffdonahue committed Mar 13, 2015
1 parent 249aba4 commit 6fe2b04
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 27 deletions.
41 changes: 22 additions & 19 deletions src/caffe/layers/hdf5_data_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
#include "hdf5_hl.h"
#include "stdint.h"

#include "caffe/data_layers.hpp"
#include "caffe/layer.hpp"
#include "caffe/util/io.hpp"
#include "caffe/vision_layers.hpp"

namespace caffe {

Expand Down Expand Up @@ -48,23 +48,24 @@ void HDF5DataLayer<Dtype>::LoadHDF5FileData(const char* filename) {
CHECK_GE(status, 0) << "Failed to close HDF5 file: " << filename;

// MinTopBlobs==1 guarantees at least one top blob
int num = hdf_blobs_[0]->num();
CHECK_GE(hdf_blobs_[0]->num_axes(), 1) << "Input must have at least 1 axis.";
const int num = hdf_blobs_[0]->shape(0);
for (int i = 1; i < top_size; ++i) {
CHECK_EQ(hdf_blobs_[i]->num(), num);
CHECK_EQ(hdf_blobs_[i]->shape(0), num);
}
// permutation in file is identity by default
// Default to identity permutation.
data_permutation_.clear();
data_permutation_.resize(hdf_blobs_[0]->num());
for (int i = 0; i < hdf_blobs_[0]->num(); i++)
data_permutation_.resize(hdf_blobs_[0]->shape(0));
for (int i = 0; i < hdf_blobs_[0]->shape(0); i++)
data_permutation_[i] = i;

// shuffle data when asked
// Shuffle if needed.
if (this->layer_param_.hdf5_data_param().shuffle()) {
std::random_shuffle(data_permutation_.begin(), data_permutation_.end());
DLOG(INFO) << "Successully loaded " << hdf_blobs_[0]->num()
<< " rows (shuffled)";
DLOG(INFO) << "Successully loaded " << hdf_blobs_[0]->shape(0)
<< " rows (shuffled)";
} else {
DLOG(INFO) << "Successully loaded " << hdf_blobs_[0]->num() << " rows";
DLOG(INFO) << "Successully loaded " << hdf_blobs_[0]->shape(0) << " rows";
}
}

Expand Down Expand Up @@ -96,11 +97,12 @@ void HDF5DataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,

file_permutation_.clear();
file_permutation_.resize(num_files_);
// default order-permutation is identity permutation
for (int i = 0; i < num_files_; i++)
// Default to identity permutation.
for (int i = 0; i < num_files_; i++) {
file_permutation_[i] = i;
}

// only change order, when asked
// Shuffle if needed.
if (this->layer_param_.hdf5_data_param().shuffle()) {
std::random_shuffle(file_permutation_.begin(), file_permutation_.end());
}
Expand Down Expand Up @@ -131,25 +133,26 @@ void HDF5DataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const int batch_size = this->layer_param_.hdf5_data_param().batch_size();
for (int i = 0; i < batch_size; ++i, ++current_row_) {
if (current_row_ == hdf_blobs_[0]->num()) {
if (current_row_ == hdf_blobs_[0]->shape(0)) {
if (num_files_ > 1) {
++current_file_;
if (current_file_ == num_files_) {
current_file_ = 0;
if (this->layer_param_.hdf5_data_param().shuffle())
if (this->layer_param_.hdf5_data_param().shuffle()) {
std::random_shuffle(file_permutation_.begin(),
file_permutation_.end());
file_permutation_.end());
}
DLOG(INFO) << "Looping around to first file.";
}
LoadHDF5FileData(hdf_filenames_[file_permutation_[current_file_]].
c_str());
LoadHDF5FileData(
hdf_filenames_[file_permutation_[current_file_]].c_str());
}
current_row_ = 0;
if (this->layer_param_.hdf5_data_param().shuffle())
std::random_shuffle(data_permutation_.begin(), data_permutation_.end());
}
for (int j = 0; j < this->layer_param_.top_size(); ++j) {
int data_dim = top[j]->count() / top[j]->num();
int data_dim = top[j]->count() / top[j]->shape(0);
caffe_copy(data_dim,
&hdf_blobs_[j]->cpu_data()[data_permutation_[current_row_]
* data_dim], &top[j]->mutable_cpu_data()[i * data_dim]);
Expand Down
15 changes: 8 additions & 7 deletions src/caffe/layers/hdf5_data_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ TODO:
#include "hdf5.h"
#include "hdf5_hl.h"

#include "caffe/data_layers.hpp"
#include "caffe/layer.hpp"
#include "caffe/util/io.hpp"
#include "caffe/vision_layers.hpp"

namespace caffe {

Expand All @@ -21,25 +21,26 @@ void HDF5DataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const int batch_size = this->layer_param_.hdf5_data_param().batch_size();
for (int i = 0; i < batch_size; ++i, ++current_row_) {
if (current_row_ == hdf_blobs_[0]->num()) {
if (current_row_ == hdf_blobs_[0]->shape(0)) {
if (num_files_ > 1) {
current_file_ += 1;
if (current_file_ == num_files_) {
current_file_ = 0;
if (this->layer_param_.hdf5_data_param().shuffle())
if (this->layer_param_.hdf5_data_param().shuffle()) {
std::random_shuffle(file_permutation_.begin(),
file_permutation_.end());
file_permutation_.end());
}
DLOG(INFO) << "Looping around to first file.";
}
LoadHDF5FileData(hdf_filenames_[file_permutation_[current_file_]]
.c_str());
LoadHDF5FileData(
hdf_filenames_[file_permutation_[current_file_]].c_str());
}
current_row_ = 0;
if (this->layer_param_.hdf5_data_param().shuffle())
std::random_shuffle(data_permutation_.begin(), data_permutation_.end());
}
for (int j = 0; j < this->layer_param_.top_size(); ++j) {
int data_dim = top[j]->count() / top[j]->num();
int data_dim = top[j]->count() / top[j]->shape(0);
caffe_copy(data_dim,
&hdf_blobs_[j]->cpu_data()[data_permutation_[current_row_]
* data_dim], &top[j]->mutable_gpu_data()[i * data_dim]);
Expand Down
7 changes: 6 additions & 1 deletion src/caffe/proto/caffe.proto
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,12 @@ message HDF5DataParameter {
optional string source = 1;
// Specify the batch size.
optional uint32 batch_size = 2;
// Specify shuffling the order of training data

// Specify whether to shuffle the data.
// If shuffle == true, the ordering of the HDF5 files is shuffled,
// and the ordering of data within any given HDF5 file is shuffled,
// but data between different files are not interleaved; all of a file's
// data are output (in a random order) before moving onto another file.
optional bool shuffle = 3 [default = false];
}

Expand Down

0 comments on commit 6fe2b04

Please sign in to comment.