diff --git a/src/operator/nn/batch_norm.cc b/src/operator/nn/batch_norm.cc index ba6c413819e4..e57712549e61 100644 --- a/src/operator/nn/batch_norm.cc +++ b/src/operator/nn/batch_norm.cc @@ -382,7 +382,8 @@ static inline bool SupportMKLDNNBN(const NDArray &input, const BatchNormParam &p TShape shape = input.shape(); return SupportMKLDNN(input) && shape.ndim() == 4 && param.axis == mxnet::op::batchnorm::DEFAULT_AXIS - && shape[param.axis] % 8 == 0; + && shape[param.axis] % 8 == 0 + && !mxnet::op::batchnorm::disable_mkl; } void BatchNormComputeExCPU(const nnvm::NodeAttrs &attrs, diff --git a/tests/cpp/include/test_core_op.h b/tests/cpp/include/test_core_op.h index 019b5c932ac8..53712a6f921e 100644 --- a/tests/cpp/include/test_core_op.h +++ b/tests/cpp/include/test_core_op.h @@ -19,11 +19,12 @@ #ifndef TEST_CORE_OP_H_ #define TEST_CORE_OP_H_ +#include #include #include #include #include -#include +#include #include "./test_op.h" #include "../../../src/imperative/imperative_utils.h" @@ -61,37 +62,6 @@ template class CoreOpExecutor : public test::op::OperatorDataInitializer , public test::op::OperatorExecutorTiming { /*! \brief Performance timing categories */ - /*! - * \brief Access data blob as if on the CPU via a callback - * \tparam Type of callback Function to call with CPU-data NDArray - * \param src Source NDArray (on GPU or CPU) - * \param run_ctx Run context - * \param cb Callback Function to call with CPU-data NDArray - */ - template - static inline void AccessAsCPU(const NDArray &src, - const RunContext &run_ctx, - CallbackFunction cb) { -#if MXNET_USE_CUDA - if (src.ctx().dev_type == Context::kCPU) { - cb(src); - } else { - Context cpu_ctx, gpu_ctx = src.ctx(); - cpu_ctx.dev_type = Context::kCPU; - cpu_ctx.dev_id = 0; - NDArray on_cpu(src.shape(), cpu_ctx); - on_cpu.CheckAndAlloc(); - TBlob tmp1 = on_cpu.data(); - mxnet::ndarray::Copy(src.data(), &tmp1, cpu_ctx, gpu_ctx, run_ctx); - cb(on_cpu); - TBlob tmp2 = src.data(); - mxnet::ndarray::Copy(on_cpu.data(), &tmp2, gpu_ctx, cpu_ctx, run_ctx); - } -#else - cb(src); -#endif - } - /*! * \brief Parse additional arguments into NodeAttrs structure * \param op Pointer to operator object @@ -119,6 +89,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer */ static inline std::vector& CollectBlobs(const std::vector& src, std::vector *dest) { + dest->resize(0); dest->reserve(dest->size() + src.size()); for (size_t i = 0, n = src.size(); i < n; ++i) { dest->emplace_back(src[i].data()); @@ -132,13 +103,11 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer * \param ctx Context to use when creating the array/tensor * \return The created NDArray */ - NDArray CreateRandArray(const TShape& shape, const Context& ctx) const { + NDArray CreateRandArray(const TShape& shape, const RunContext& run_ctx, int dtype) const { CHECK_GT(shape.Size(), 0); // Check it's a valid shape - NDArray array(shape, ctx, true, mshadow::DataType::kFlag); + NDArray array(shape, run_ctx.ctx, true, dtype); array.CheckAndAlloc(); - AccessAsCPU(array, ctx_.run_ctx, [this](const NDArray &arr) { - test::op::OperatorDataInitializer::FillRandom(arr.data()); - }); + test::op::OperatorDataInitializer::FillRandom(run_ctx, array.data()); return array; } @@ -148,13 +117,11 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer * \param ctx Context to use when creating the array/tensor * \return The created NDArray */ - NDArray CreateZeroArray(const TShape& shape, const Context& ctx) const { + NDArray CreateZeroArray(const TShape& shape, const RunContext& run_ctx, int dtype) const { CHECK_GT(shape.Size(), 0); // Check it's a valid shape - NDArray array(shape, ctx, true, mshadow::DataType::kFlag); + NDArray array(shape, run_ctx.ctx, true, dtype); array.CheckAndAlloc(); - AccessAsCPU(array, ctx_.run_ctx, [this](const NDArray &arr) { - test::op::OperatorDataInitializer::FillZero(arr.data()); - }); + test::op::OperatorDataInitializer::FillZero(run_ctx, array.data()); return array; } @@ -225,40 +192,6 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer } public: - enum BlobVectorType { - kInput, - kOutput, - kAux, - kInGrad, - kOutGrad, - kBlobVectorTypeCount - }; - -#define CASE_STR(__v$) case (__v$): return #__v$ - - /*! \brief Convert BlobVectorType enum into a string */ - static inline const char *bvt2String(const BlobVectorType bvt) { - switch (bvt) { - CASE_STR(kInput); - CASE_STR(kOutput); - CASE_STR(kAux); - CASE_STR(kInGrad); - CASE_STR(kOutGrad); - default: - CHECK(false); - return ""; - } - } -#undef CASE_STR - - inline const std::vector& getBlobVect(const BlobVectorType bvt) const { - // Not implemented - CHECK(false); - static std::vector dummy; - return dummy; - } - - typedef DType DataType; typedef AccReal AccRealType; @@ -327,73 +260,55 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer #endif } - static nnvm::NodePtr GetBackwardDependency(const nnvm::NodePtr& node, - uint32_t num_inputs, - uint32_t num_outputs - //std::vector *p_save_inputs, - //std::vector *p_save_outputs - ) { + /*! + * \brief Get the operator context + * \return Reference to this operator's context object + */ + const OpContext& ctx() const { + return ctx_; + } - const Op* op = node->op(); - if(op) { - if(!op->name.empty()) { - if(op->name == "BatchNorm") { - std::cout << "Imperative::GetBackwardDependency( " << op->name << " )" << std::endl; - } - } - } + static inline int default_dtype() { + using foo = typename mshadow::DataType; + return foo::kFlag; + } + nnvm::NodePtr GetBackwardDependency(const nnvm::NodePtr& node, + std::map* index2array) const { + index2array->clear(); static auto& fgradient = nnvm::Op::GetAttr("FGradient"); -// std::vector& save_inputs = *p_save_inputs; -// std::vector& save_outputs = *p_save_outputs; -// save_inputs.resize(num_inputs); -// save_outputs.resize(num_outputs); -// std::fill(save_inputs.begin(), save_inputs.end(), false); -// std::fill(save_outputs.begin(), save_outputs.end(), false); + + const uint32_t num_inputs = inputs().size(); + const uint32_t num_outputs = outputs().size(); node->inputs.clear(); node->inputs.reserve(num_inputs); for (uint32_t i = 0; i < num_inputs; ++i) { node->inputs.emplace_back(nnvm::NodeEntry{nullptr, i, 0}); + (*index2array)[i] = &inputs()[i]; } if (fgradient.count(node->op())) { std::vector ograd_entries; ograd_entries.reserve(num_outputs); for (uint32_t i = 0; i < num_outputs; ++i) { - ograd_entries.emplace_back(nnvm::NodeEntry{nullptr, i, 1}); + const uint32_t index = num_inputs + i; + ograd_entries.emplace_back(nnvm::NodeEntry{nullptr, index, 1}); + (*index2array)[index] = &outputs()[i]; } const std::vector igrad_entries = fgradient[node->op()](node, ograd_entries); - if(!igrad_entries.empty()) { + if (!igrad_entries.empty()) { return igrad_entries[0].node; } - -// for (const auto& i : igrad_entries) { -// if (i.node == nullptr && i.version == 0) { -// save_inputs[i.index] = true; -// } else if (i.node == node) { -// save_outputs[i.index] = true; -// } -// } -// DFSVisit(igrad_entries, [&](const nnvm::NodePtr& gnode) { -// if (!gnode || gnode == node) return; -// for (const auto& i : gnode->inputs) { -// if (i.node == nullptr && i.version == 0) { -// save_inputs[i.index] = true; -// } else if (i.node == node) { -// save_outputs[i.index] = true; -// } -// } -// }); } return nullptr; } - nnvm::NodePtr CalcBackwardPass() const { + nnvm::NodePtr CalcBackwardPass(std::map *index2array) const { nnvm::NodePtr node = nnvm::Node::Create(); node->attrs = attrs_; - return GetBackwardDependency(node, inputs().size(), outputs().size()); + return GetBackwardDependency(node, index2array); } /*! @@ -424,9 +339,10 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer op_ = nnvm::Op::Get(op_name); CHECK_NOTNULL(op_); + std::map index2array; nnvm::NodePtr bwd_node_ptr; - if(backward_for_op) { - bwd_node_ptr = backward_for_op->CalcBackwardPass(); + if (backward_for_op) { + bwd_node_ptr = backward_for_op->CalcBackwardPass(&index2array); } // Set up forward @@ -435,57 +351,33 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer int num_inputs = op_->num_inputs; if (op_->get_num_inputs) { num_inputs = op_->get_num_inputs(attrs_); - } else if(backward_for_op) { - CHECK_NOTNULL(bwd_node_ptr.get()); - num_inputs = static_cast(bwd_node_ptr->inputs.size()); + } else if (backward_for_op) { + if(bwd_node_ptr) { + num_inputs = static_cast(bwd_node_ptr->inputs.size()); + } } -// if(backward_for_op) { -// const int num_fwd_outputs = backward_for_op->outputs().size(); -// num_inputs = std::max(num_fwd_outputs, num_inputs); -// } - if (!inputs.empty()) { CHECK_EQ(inputs.size(), static_cast(num_inputs)); } int inferred_num_outputs /*, num_visible_outputs*/; -// imperative::SetNumOutputs(op_, attrs_, num_inputs, &inferred_num_outputs, -// &num_visible_outputs); - if (op_->get_num_outputs) { inferred_num_outputs = op_->get_num_outputs(attrs_); } else { inferred_num_outputs = op_->num_outputs; } -// static auto& finput_names = Op::GetAttr("FListInputNames"); -// if(finput_names.count(op_)) { -// std::vector i_names = finput_names[op_](attrs_); -// const int i_name_count = i_names.size(); -// num_inputs = std::max(i_name_count, num_inputs); -// } - //using FListInputNames = std::function (const NodeAttrs& attrs)>; - -// static auto& grad_fun_map = Op::GetAttr("FGradient"); -// if(grad_fun_map.count(op_)) { -// auto grad_fun = grad_fun_map[op_]; -// nnvm::NodePtr nodeptr = std::make_shared(); -// nodeptr->attrs = attrs_; -// std::vector out_grads; -// std::vector entries = grad_fun(nodeptr, out_grads); -// const int grad_count = entries.size(); -// num_inputs = std::max(grad_count, num_inputs); -// } - - //CHECK_GE(inferred_num_outputs, num_visible_outputs); // Generic, all shapes the same. Probably this will need to be adjusted for more complex // operators such as dot - std::vector input_shapes; - for (size_t i = 0, n = num_inputs; i < n; ++i) { - input_shapes.emplace_back(i < input_shapes_.size() ? input_shapes_[i] - : input_shapes_[input_shapes_.size() - 1]); + std::vector input_shapes; + if (!input_shapes_.empty()) { + for (size_t i = 0, n = num_inputs; i < n; ++i) { + input_shapes.emplace_back(i < input_shapes_.size() ? input_shapes_[i] + : input_shapes_[input_shapes_.size() + - 1]); + } } std::vector inputs_p, outputs_p; @@ -498,52 +390,115 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer outputs_.reserve(inferred_num_outputs); outputs_p.reserve(inferred_num_outputs); - for (size_t i = 0; i < static_cast(num_inputs); ++i) { - CHECK_LT(i, static_cast(input_shapes.size())); - inputs_.emplace_back(i < inputs.size() ? inputs[i] : CreateRandArray(input_shapes[i], - ctx_.run_ctx.ctx)); - inputs_p.emplace_back(&*inputs_.rbegin()); + std::vector input_types; + input_types.reserve(num_inputs); + std::vector output_types; + output_types.reserve(inferred_num_outputs); + + static auto& finfer_type = Op::GetAttr("FInferType"); + if (finfer_type.count(op_)) { + input_types.resize(num_inputs, -1); + input_types[0] = default_dtype(); // Set first input to default type + output_types.resize(inferred_num_outputs, -1); + finfer_type[op_](attrs_, &input_types, &output_types); + CHECK_EQ(input_types.size(), num_inputs); + CHECK_EQ(output_types.size(), inferred_num_outputs); + } else { + if (backward_for_op) { + if (bwd_node_ptr) { + CHECK_EQ(bwd_node_ptr->inputs.size(), num_inputs); + input_types.resize(bwd_node_ptr->inputs.size(), -1); + for (size_t i = 0; i < num_inputs; ++i) { + const int map_key = bwd_node_ptr->inputs[i].index; + CHECK(index2array.find(map_key) != index2array.end()); + const int dtype = index2array[map_key]->dtype(); + input_types[i] = dtype; + } + for (const auto &fwd_inp : backward_for_op->inputs()) { + const int dtype = fwd_inp.data().type_flag_; + output_types.emplace_back(dtype); + } + } else { + for (size_t x = 0; x < num_inputs; ++x) { + input_types.emplace_back(default_dtype()); + } + for (const auto &fwd_inp : backward_for_op->inputs()) { + const int dtype = fwd_inp.data().type_flag_; + output_types.emplace_back(dtype); + } + } + } else { + CHECK(false); // above always true? + for (size_t x = 0; x < num_inputs; ++x) { + input_types.emplace_back(default_dtype()); + } + for (size_t x = 0; x < inferred_num_outputs; ++x) { + output_types.emplace_back(default_dtype()); + } + } } // Output arrays - if(outputs_.empty()) { + if (outputs_.empty()) { std::vector output_shapes; static auto& finfer_shape = Op::GetAttr("FInferShape"); if (finfer_shape.count(op_)) { nnvm::FInferShape call_infer_shapes = finfer_shape[op_]; output_shapes.resize(inferred_num_outputs); call_infer_shapes(attrs_, &input_shapes, &output_shapes); + input_shapes_ = input_shapes; } else { - // TODO: this should be only if outputs param is empty - output_shapes = input_shapes; - output_shapes.resize(inferred_num_outputs); + if (backward_for_op) { + // BWD Input shapes + if(bwd_node_ptr) { + input_shapes.clear(); + CHECK_EQ(bwd_node_ptr->inputs.size(), num_inputs); + for (size_t i = 0; i < num_inputs; ++i) { + const int map_key = bwd_node_ptr->inputs[i].index; + CHECK(index2array.find(map_key) != index2array.end()); + const nnvm::TShape &shp = index2array[map_key]->shape(); + input_shapes.push_back(shp); + const nnvm::TShape ss = input_shapes[i]; + } + } else { + + } + input_shapes_ = input_shapes; + // BWD Output shapes + output_shapes = backward_for_op->input_shapes_; + CHECK_EQ(output_shapes.size(), inferred_num_outputs); + } else { + output_shapes = input_shapes; + output_shapes.resize(inferred_num_outputs); + } } CHECK_EQ(output_shapes.size(), inferred_num_outputs); + for (size_t i = 0; i < static_cast(inferred_num_outputs); ++i) { // If supplied and valid, pass from the supplied outputs vector // Otherwise use empty for forward pass, or zero-filled for backward pass outputs_.emplace_back(i < outputs.size() ? outputs[i] : (backward_for_op ? CreateZeroArray(output_shapes[i], - ctx_.run_ctx.ctx) + ctx_.run_ctx, + output_types[i]) : NDArray())); outputs_p.emplace_back(&*outputs_.rbegin()); } } + for (size_t i = 0; i < static_cast(num_inputs); ++i) { + CHECK_LT(i, static_cast(input_shapes.size())); + inputs_.emplace_back(i < inputs.size() + ? inputs[i] : CreateRandArray(input_shapes[i], + ctx_.run_ctx, + input_types[i])); + inputs_p.emplace_back(&*inputs_.rbegin()); + } + if (!backward_for_op) { DispatchMode dispatch_mode = DispatchMode::kUndefined; imperative::SetShapeType(ctx_.run_ctx.ctx, attrs_, inputs_p, outputs_p, &dispatch_mode); - } else { - // Backward op, so set based upon inputs - //CHECK_EQ(static_cast(num_visible_outputs), backward_for_op->inputs().size()); -// for (int i = 0; i < num_visible_outputs; ++i) { -// CHECK_LT(static_cast(i), input_shapes.size()); -// // backward outputs should look like forward inputs -// // TODO(cjolivier01): This check fails for dot product... -// // Need better inference of backward shapes -// // CHECK_EQ(backward_for_op->inputs()[i].shape(), outputs_[i].shape()); -// } } std::vector req; @@ -591,11 +546,15 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer template inline bool initForward(const OpProp &opProp, std::vector *in_type) { Init(opProp.GetArgs()); + resetForward(); return true; } template - inline bool initBackward(const OpProp &opProp, std::vector *in_type) { return true; } + inline bool initBackward(const OpProp &opProp, std::vector *in_type) { + resetBackward(); + return true; + } inline void forward(const size_t count) { perf::TimingItem timeF(&OperatorExecutorTiming::GetTiming(), kForward, "Forward", count); @@ -620,6 +579,8 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer void Execute() { CHECK_EQ(initialized_, true); CHECK_NOTNULL(function_); + CollectBlobs(inputs_, &blob_inputs_); + CollectBlobs(outputs_, &blob_outputs_); function_(attrs_, ctx_, blob_inputs_, req_, blob_outputs_); } @@ -668,14 +629,6 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer return false; } - /*! - * \brief Get the operator context - * \return Reference to this operator's context object - */ - const OpContext& ctx() const { - return ctx_; - } - /*! * \brief Access input NDArray vector * \return reference to NDArray vector of forward inputs @@ -726,13 +679,9 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer verbose_ = verbose; } - virtual void resetForward() { - CHECK(false) << "Not implemented, generally inits forward-pass data"; - } + virtual void resetForward() {} - virtual void resetBackward() { - CHECK(false) << "Not implemented, generally inits backward-pass data"; - } + virtual void resetBackward() {} private: /*! diff --git a/tests/cpp/include/test_legacy_op.h b/tests/cpp/include/test_legacy_op.h index 498fa06650a1..e4c5b3e1febb 100644 --- a/tests/cpp/include/test_legacy_op.h +++ b/tests/cpp/include/test_legacy_op.h @@ -376,16 +376,16 @@ class LegacyOperatorExecutor : public OperatorDataInitializer copy(blob, sourceData, 0, sourceDataSize); } - void FillRandom() { - for (size_t j = 0, jn = this->c_.all_blob_vects_.size(); j < jn; ++j) { - std::vector *data_vect = this->c_.all_blob_vects_[j]; - if (data_vect) { - for (size_t i = 0, n = data_vect->size(); i < n; ++i) { - OperatorDataInitializer::FillRandom((*data_vect)[i]); - } - } - } - } +// void FillRandom() { +// for (size_t j = 0, jn = this->c_.all_blob_vects_.size(); j < jn; ++j) { +// std::vector *data_vect = this->c_.all_blob_vects_[j]; +// if (data_vect) { +// for (size_t i = 0, n = data_vect->size(); i < n; ++i) { +// OperatorDataInitializer::FillRandom((*data_vect)[i]); +// } +// } +// } +// } std::vector& inputs() { return c_.blob_input_vec_; } const std::vector& inputs() const { return c_.blob_input_vec_; } diff --git a/tests/cpp/include/test_op.h b/tests/cpp/include/test_op.h index 066168e2623f..7a0c6d3878ee 100644 --- a/tests/cpp/include/test_op.h +++ b/tests/cpp/include/test_op.h @@ -100,12 +100,12 @@ class OperatorDataInitializer { * \brief Fill a blob with random values * \param blob Blob which to fill with random values */ - void FillRandom(const TBlob& blob) const { + void FillRandom(const RunContext& run_ctx, const TBlob& blob) const { #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wabsolute-value" std::uniform_real_distribution<> dis_real(-5.0, 5.0); std::uniform_int_distribution<> dis_int(-128, 127); - test::patternFill(&blob, [this, &dis_real, &dis_int]() -> DType { + test::patternFill(run_ctx, &blob, [this, &dis_real, &dis_int]() -> DType { if (!std::is_integral::value) { DType val; do { @@ -123,8 +123,8 @@ class OperatorDataInitializer { #pragma clang diagnostic pop } - void FillZero(const TBlob& blob) const { - test::patternFill(&blob, []() -> DType { return DType(0); }); + void FillZero(const RunContext& run_ctx, const TBlob& blob) const { + test::patternFill(run_ctx, &blob, []() -> DType { return DType(0); }); } private: @@ -223,8 +223,8 @@ class Validator { /*! \brief Compare blob data */ static bool compare(const TBlob& b1, const TBlob& b2) { if (b1.shape_ == b2.shape_) { + CHECK_EQ(b1.type_flag_, b2.type_flag_) << "Can't compare blobs of different data types"; MSHADOW_REAL_TYPE_SWITCH(b1.type_flag_, DTypeX, { - CHECK_EQ(b1.type_flag_, b2.type_flag_) << "Can't compare blobs of different data types"; const DTypeX *d1 = b1.dptr(); const DTypeX *d2 = b2.dptr(); CHECK_NE(d1, d2); // don't compare the same memory @@ -255,7 +255,7 @@ class Validator { const DTypeX v2 = *valuePtr++; EXPECT_NEAR(v1, v2, kErrorBound); if (!isNear(v1, v2, kErrorBound) && !warningCount++) { - LOG(WARNING) << "Near test failure: " << i << ", " << n << std::endl << std::flush; + on_failure(i, n, v1, v2, kErrorBound); } } return true; diff --git a/tests/cpp/include/test_util.h b/tests/cpp/include/test_util.h index e33b9a6e68c7..1e4faebfad99 100644 --- a/tests/cpp/include/test_util.h +++ b/tests/cpp/include/test_util.h @@ -34,6 +34,8 @@ #include #include +#include "../../../src/ndarray/ndarray_function.h" + #if MXNET_USE_VTUNE #include #endif @@ -132,56 +134,136 @@ class StandaloneBlob : public TBlob { std::shared_ptr memory_; }; +/*! + * \brief Access a TBlob's data on the CPU within the scope of this object + * Overloaded () operator returns the CPU-bound TBlob + * RAII will copy the data back to the GPU (if it was a GPU blob) + */ +class CAccessAsCPU { + public: + CAccessAsCPU(const RunContext& run_ctx, const TBlob& src, bool copy_back_result = true) + : run_ctx_(run_ctx) + , src_(src) + , copy_back_result_(copy_back_result) { #if MXNET_USE_CUDA -/*! \brief Return blob in CPU memory */ -inline StandaloneBlob BlobOnCPU(const RunContext &rctx, const TBlob& src) { - StandaloneBlob res(src.shape_, false, src.type_flag_); - if (src.dev_mask() == cpu::kDevMask) { - LOG(WARNING) << "BlobOnCPU() is safe, but try not to call this with a CPU blob" - << " because it is inefficient"; - memcpy(res.dptr_, src.dptr_, res.MemorySize()); - } else { - mshadow::Stream *stream = rctx.get_stream(); - MSHADOW_TYPE_SWITCH(src.type_flag_, DType, { - mshadow::Copy(res.FlatTo1D(), src.FlatTo1D(stream), stream); - }); + if (run_ctx.ctx.dev_type == Context::kCPU) { + blob_ = src; + } else { + Context cpu_ctx, gpu_ctx = run_ctx.ctx; + cpu_ctx.dev_type = Context::kCPU; + cpu_ctx.dev_id = 0; + NDArray on_cpu(src.shape_, cpu_ctx, false, src_.type_flag_); + on_cpu.CheckAndAlloc(); + blob_ = on_cpu.data(); + run_ctx.get_stream()->Wait(); + mxnet::ndarray::Copy(src, &blob_, cpu_ctx, gpu_ctx, run_ctx); + run_ctx.get_stream()->Wait(); + on_cpu_ = on_cpu; + } +#else + blob_ = src; +#endif + } + ~CAccessAsCPU() { +#if MXNET_USE_CUDA + if (copy_back_result_) { + // Copy back from GPU to CPU + if (run_ctx_.ctx.dev_type == Context::kGPU) { + Context cpu_ctx, gpu_ctx = run_ctx_.ctx; + cpu_ctx.dev_type = Context::kCPU; + cpu_ctx.dev_id = 0; + run_ctx_.get_stream()->Wait(); + mxnet::ndarray::Copy(blob_, &src_, gpu_ctx, cpu_ctx, run_ctx_); + run_ctx_.get_stream()->Wait(); + } + } +#endif + } + inline const TBlob& operator ()() const { + return blob_; } - return res; -} -#endif // MXNET_USE_CUDA -constexpr const size_t MPRINT_PRECISION = 5; + private: + const RunContext run_ctx_; + TBlob src_; + const bool copy_back_result_; + NDArray on_cpu_; + TBlob blob_; +}; -template -inline void fill(const TBlob& blob, const DType val) { - DType *p1 = blob.dptr(); - for (size_t i = 0, n = blob.Size(); i < n; ++i) { - *p1++ = val; +/*! + * \brief Access data blob as if on the CPU via a callback + * \tparam Type of callback Function to call with CPU-data NDArray + * \param src Source NDArray (on GPU or CPU) + * \param run_ctx Run context + * \param cb Callback Function to call with CPU-data NDArray + */ +template +inline void AccessAsCPU(const NDArray &src, + const RunContext &run_ctx, + CallbackFunction cb) { +#if MXNET_USE_CUDA + if (src.ctx().dev_type == Context::kCPU) { + cb(src); + } else { + Context cpu_ctx, gpu_ctx = src.ctx(); + cpu_ctx.dev_type = Context::kCPU; + cpu_ctx.dev_id = 0; + NDArray on_cpu(src.shape(), cpu_ctx, false, src.dtype()); + on_cpu.CheckAndAlloc(); + TBlob tmp1 = on_cpu.data(); + run_ctx.get_stream()->Wait(); + mxnet::ndarray::Copy(src.data(), &tmp1, cpu_ctx, gpu_ctx, run_ctx); + run_ctx.get_stream()->Wait(); + cb(on_cpu); + TBlob tmp2 = src.data(); + mxnet::ndarray::Copy(on_cpu.data(), &tmp2, gpu_ctx, cpu_ctx, run_ctx); + run_ctx.get_stream()->Wait(); } +#else + cb(src); +#endif } -template -inline void try_fill(const TBlob *blob, const DType val) { - if(blob) { - DType *p1 = blob->dptr(); - for (size_t i = 0, n = blob->Size(); i < n; ++i) { - *p1++ = val; - } +/*! + * \brief Access data blob as if on the CPU via a callback + * \tparam Type of callback Function to call with CPU-data NDArray + * \param src Source TBlob (on GPU or CPU) + * \param run_ctx Run context + * \param cb Callback Function to call with CPU-data TBlob + */ +template +inline void AccessAsCPU(const TBlob& src, + const RunContext &run_ctx, + CallbackFunction cb) { +#if MXNET_USE_CUDA + if (run_ctx.ctx.dev_type == Context::kCPU) { + cb(src); + } else { + cb(CAccessAsCPU(run_ctx, src, true)()); } +#else + cb(src); +#endif } +constexpr const size_t MPRINT_PRECISION = 5; template -inline void fill(const TBlob& blob, const DType *valArray) { - DType *p1 = blob.dptr(); - for (size_t i = 0, n = blob.Size(); i < n; ++i) { - *p1++ = *valArray++; - } +inline void fill(const RunContext &run_ctx, const TBlob& _blob, const DType val) { + AccessAsCPU(_blob, run_ctx, [&run_ctx, val](const TBlob& blob) { + MSHADOW_TYPE_SWITCH(blob.type_flag_, DTypeX, { + DTypeX *p1 = blob.dptr(); + for (size_t i = 0, n = blob.Size(); i < n; ++i) { + *p1++ = val; + } + }); + }); } template -inline void try_fill(const std::vector& container, size_t index, const DType value) { - if (index < container.size()) { - test::fill(container[index], value); +inline void try_fill(const RunContext &run_ctx, const TBlob *blob, const DType val) { + if (blob) { + fill(run_ctx, *blob, val); } } @@ -292,7 +374,8 @@ inline StreamType& print_blob_(const RunContext& ctx, const bool add_endl = true) { #if MXNET_USE_CUDA if (blob.dev_mask() == gpu::kDevMask) { - return print_blob_(ctx, _os, BlobOnCPU(ctx, blob), doChannels, doBatches, add_endl); + return print_blob_(ctx, _os, CAccessAsCPU(ctx, blob, false)(), doChannels, + doBatches, add_endl); } #endif // MXNET_USE_CUDA @@ -407,9 +490,10 @@ inline StreamType& print_blob_(const RunContext& ctx, if (add_endl) { os << std::endl; } - } - if (!add_endl) { + } else if (!add_endl) { os << " "; + } else { + os << std::endl; } os << std::flush; return os; @@ -553,62 +637,76 @@ inline std::string type_name() { return demangle(typeid(T).name()); } * 2D: batch item -> channel -> row -> col * 3D: batch item -> channel -> col */ -template -static inline void patternFill(const TBlob *blob, GetNextData getNextData) { - const size_t dim = blob->ndim(); - CHECK_LE(dim, 5U) << "Will need to handle above 3 dimensions (another for loop)"; - const size_t num = blob->size(0); - const size_t channels = dim > 1 ? blob->size(1) : 1; - const size_t depth = dim > 2 ? blob->size(2) : 1; - const size_t height = dim > 3 ? blob->size(3) : 1; - const size_t width = dim > 4 ? blob->size(4) : 1; - const size_t numberOfIndexes = blob->shape_.Size(); - for (size_t n = 0; n < num; ++n) { - if (dim > 1) { - for (size_t ch = 0; ch < channels; ++ch) { - if (dim > 2) { - for (size_t d = 0; d < depth; ++d) { - if (dim > 3) { - for (size_t row = 0; row < height; ++row) { - if (dim > 4) { - for (size_t col = 0; col < width; ++col) { - if (dim == 5) { - const size_t idx = test::offset(blob->shape_, {n, ch, d, row, col}); - CHECK_LT(idx, numberOfIndexes); - DType &f = blob->dptr()[idx]; - f = getNextData(); - } else { - CHECK(dim <= 5) << "Unimplemented dimension: " << dim; +template +static inline void patternFill(const RunContext& run_ctx, + const TBlob *_blob, + GetNextData getNextData) { + AccessAsCPU(*_blob, run_ctx, [getNextData](const TBlob& blob) { + const size_t dim = static_cast(blob.ndim()); + CHECK_LE(dim, 5U) << "Will need to handle above 3 dimensions (another for loop)"; + const size_t num = blob.size(0); + const size_t channels = dim > 1 ? blob.size(1) : 1; + const size_t depth = dim > 2 ? blob.size(2) : 1; + const size_t height = dim > 3 ? blob.size(3) : 1; + const size_t width = dim > 4 ? blob.size(4) : 1; + const size_t numberOfIndexes = blob.shape_.Size(); + for (size_t n = 0; n < num; ++n) { + if (dim > 1) { + for (size_t ch = 0; ch < channels; ++ch) { + if (dim > 2) { + for (size_t d = 0; d < depth; ++d) { + if (dim > 3) { + for (size_t row = 0; row < height; ++row) { + if (dim > 4) { + for (size_t col = 0; col < width; ++col) { + if (dim == 5) { + const size_t idx = test::offset(blob.shape_, {n, ch, d, row, col}); + CHECK_LT(idx, numberOfIndexes); + MSHADOW_TYPE_SWITCH(blob.type_flag_, ThisDataType, { + ThisDataType &f = blob.dptr()[idx]; + f = getNextData(); + }); + } else { + CHECK(dim <= 5) << "Unimplemented dimension: " << dim; + } } + } else { + const size_t idx = test::offset(blob.shape_, {n, ch, d, row}); + CHECK_LT(idx, numberOfIndexes); + MSHADOW_TYPE_SWITCH(blob.type_flag_, ThisDataType, { + ThisDataType &f = blob.dptr()[idx]; + f = getNextData(); + }); } - } else { - const size_t idx = test::offset(blob->shape_, {n, ch, d, row}); - CHECK_LT(idx, numberOfIndexes); - DType &f = blob->dptr()[idx]; - f = getNextData(); } + } else { + const size_t idx = test::offset(blob.shape_, {n, ch, d}); + CHECK_LT(idx, numberOfIndexes); + MSHADOW_TYPE_SWITCH(blob.type_flag_, ThisDataType, { + ThisDataType &f = blob.dptr()[idx]; + f = getNextData(); + }); } - } else { - const size_t idx = test::offset(blob->shape_, {n, ch, d}); - CHECK_LT(idx, numberOfIndexes); - DType &f = blob->dptr()[idx]; - f = getNextData(); } + } else { + const size_t idx = test::offset(blob.shape_, {n, ch}); + CHECK_LT(idx, numberOfIndexes); + MSHADOW_TYPE_SWITCH(blob.type_flag_, ThisDataType, { + ThisDataType &f = blob.dptr()[idx]; + f = getNextData(); + }); } - } else { - const size_t idx = test::offset(blob->shape_, {n, ch}); - CHECK_LT(idx, numberOfIndexes); - DType &f = blob->dptr()[idx]; - f = getNextData(); } + } else { + const size_t idx = test::offset(blob.shape_, {n}); + CHECK_LT(idx, numberOfIndexes); + MSHADOW_TYPE_SWITCH(blob.type_flag_, ThisDataType, { + ThisDataType &f = blob.dptr()[idx]; + f = getNextData(); + }); } - } else { - const size_t idx = test::offset(blob->shape_, {n}); - CHECK_LT(idx, numberOfIndexes); - DType &f = blob->dptr()[idx]; - f = getNextData(); } - } + }); } /*! \brief Return a random number within a given range (inclusive) */ diff --git a/tests/cpp/operator/batchnorm_test.cc b/tests/cpp/operator/batchnorm_test.cc index aaa1add21b5f..4b08d985de3e 100644 --- a/tests/cpp/operator/batchnorm_test.cc +++ b/tests/cpp/operator/batchnorm_test.cc @@ -18,9 +18,9 @@ */ /*! - * Copyright (c) 2017 by Contributors + * Copyright (c) 2018 by Contributors * \file batchnorm_test.cc - * \brief batchnorm operator unit test utility functions + * \brief batchnorm operator unit tests and utility functions * \author Chris Olivier */ @@ -28,14 +28,14 @@ #include #include "../../src/operator/nn/batch_norm-inl.h" #include "../../src/operator/batch_norm_v1-inl.h" +#include "../../src/operator/operator_common.h" #include "./test_legacy_op.h" #include "./test_core_op.h" #include "executor/exec_pass.h" using namespace mxnet; -#define SIMPLE_DIMENSIONS 1 -#define MXNET_DUMP_C 0 +#define SIMPLE_DIMENSIONS 0 #define DISABLE_VALIDATION 0 // If performance profiling, may do things // that cause validation to fail @@ -49,8 +49,8 @@ static constexpr int DW = 3; static constexpr int BATCH_SIZE = 1; static constexpr int CHANNELS = 1; static constexpr int DEPTH = 1; -static constexpr int DH = 2; -static constexpr int DW = 1; +static constexpr int DH = 3; +static constexpr int DW = 2; #endif static constexpr int TIMING_BATCH_SIZE = 128; @@ -59,11 +59,52 @@ static constexpr int TIMING_DEPTH = 2; static constexpr int TIMING_DH = 28; static constexpr int TIMING_DW = 28; +#define PRT(__lbl$, __var$) \ + test::print(ctx.run_ctx, &(std::cout << (__lbl$) << ": "), (__var$), true) + +/*! + * \brief Forward + */ +enum ForwardInputs { + /* in_data */ kForInData, kForGamma, kForBeta, + /* aux_states */ kForMovingMean, kForMovingVar +}; +enum ForwardOutputs { + /* outputs */ kForOutData , kForOutMean, kForOutVar +}; + +/*! + * \brief Backward + */ +enum BackwardInputs { + /* out_grad */ bwd_out_grad_Grad, bwd_out_grad_Mean, bwd_out_grad_Var, + /* in_data */ bwd_in_data_Data, bwd_in_data_Gamma, bwd_in_data_Beta, + /* aux_states */ bwd_aux_states_MovingMean, bwd_aux_states_MovingVar, + /* in_grad */ bwd_out_data_Data, bwd_out_data_Mean, bwd_out_data_Var +}; +enum BackwardOutputs { + /* in_grad */ bwd_in_grad_Data /* Original input data */, + /* weight, bias*/ bwd_in_grad_Gamma, bwd_in_grad_Beta +}; + +/** + * _____ _ _____ _ _ + * | __ \ | | |_ _| (_)| | + * | | | | __ _| |_ __ _ | | _ __ _ | |_ + * | | | |/ _` | __|/ _` | | | | '_ \| || __| + * | |__| | (_| | |_| (_| | _| |_| | | | || |_ + * |_____/ \__,_|\__|\__,_| |_____|_| |_|_| \__| + * + * + */ /*! \brief BatchNorm-specific test data */ template class BNOperatorExecutor : public test::op::CoreOpExecutor { using Super = typename test::op::CoreOpExecutor; + public: + using Super::ctx; + BNOperatorExecutor(const bool isGPU, const TShape& inputShape, const test::op::kwargs_t& kwargs, const bool hasWeightAndBias = false) @@ -72,132 +113,78 @@ class BNOperatorExecutor : public test::op::CoreOpExecutor { param_.Init(kwargs); } - //using BlobVectorType = typename test::op::CoreOpExecutor::BlobVectorType; - - enum ForwardInputs { kForInData, kForGamma, kForBeta, kForMovingMean, kForMovingVar }; - enum ForwardOutputs { kForOutData, kForOutMean, kForOutVar }; - - enum BackwardInputs { kBackOutGrad, kBackOutGradMean, kBackOutGradVar, kBackData, - kBackGamma, kBackBeta, kBackInMovingMean, kBackInMovingVar, kBackOutData, kBackOutMean, - kBackOutVar }; - - enum WhichArray { - kForwardIn, - kForwardOut, - kBackwardIn, - kBackwardOut - }; - - const NDArray *GetForwardInArray(const int idx) const { + const NDArray *GetForwardInArray(const ForwardInputs idx) const { const std::vector &arrs = Super::inputs(); CHECK_LT(idx, arrs.size()); return &arrs[idx]; } - const NDArray *GetForwardOutArray(const int idx) const { + const NDArray *GetForwardOutArray(const ForwardOutputs idx) const { const std::vector &arrs = Super::outputs(); CHECK_LT(idx, arrs.size()); return &arrs[idx]; } - const NDArray *GetBackwardOutArray(const int idx) const { + const NDArray *GetBackwardInArray(const BackwardInputs idx) { + const std::vector &arrs = Super::bwd_inputs(); + CHECK_LT(idx, arrs.size()); + return &arrs[idx]; + } + + const NDArray *GetBackwardOutArray(const BackwardOutputs idx) const { const std::vector &arrs = Super::bwd_outputs(); CHECK_LT(idx, arrs.size()); return &arrs[idx]; } - const NDArray *GetBackwardInArray(const int idx) const { - const std::vector &arrs = Super::bwd_inputs(); - switch (idx) { - case kBackOutGrad: - CHECK_LT(kBackOutGrad, arrs.size()); - return &arrs[kBackOutGrad]; - case kBackOutGradMean: - if (param_.output_mean_var) { - CHECK_LT(kBackOutGradMean, arrs.size()); - return &arrs[kBackOutGradMean]; - } else { - CHECK(false); - return nullptr; - } - case kBackOutGradVar: - if (param_.output_mean_var) { - return &arrs[kBackOutGradVar]; - } else { - CHECK(false); - return nullptr; - } - default: { - const size_t index = param_.output_mean_var ? idx : idx - 2; - if(index < arrs.size()) { - return &arrs[index]; - } - return nullptr; - } - } + NDArray *GetArray(const ForwardInputs idx) { + return const_cast(GetForwardInArray(idx)); } - const TBlob *GetBackwardInBlob(const int idx) const { - const NDArray * arr = GetBackwardInArray(idx); - if(arr) { - return &arr->data(); - } - return nullptr; + NDArray *GetArray(const ForwardOutputs idx) { + return const_cast(GetForwardOutArray(idx)); } - const NDArray *GetArray(const WhichArray wa, const int idx) const { - switch(wa) { - case kForwardIn: - return GetForwardInArray(idx); - case kForwardOut: - return GetForwardOutArray(idx); - case kBackwardIn: - return GetBackwardOutArray(idx); - case kBackwardOut: - default: - CHECK(false); // need to check params - return nullptr; - } + NDArray *GetArray(const BackwardOutputs idx) { + return const_cast(GetBackwardOutArray(idx)); } - inline const TBlob& Blob(const NDArray *arr) const { return arr->data(); } + NDArray *GetArray(const BackwardInputs idx) { + return const_cast(GetBackwardInArray(idx)); + } + + inline const TBlob& Blob(const NDArray *arr) { return arr->data(); } template - const TBlob& GetBlob(const WhichArray wa, const EnumType idx) const { - return GetArray(wa, idx)->data(); + const TBlob& GetBlob(const EnumType idx) const { + return const_cast *>(this)->GetArray(idx)->data(); } void resetForward() override { - // Start by filling all inputs and outputs with an arbitrary value + Super::resetForward(); + + // Start by filling all inputs and outputs with an arbitrary values for (size_t i = 0, n = Super::inputs().size(); i < n; ++i) { - const TBlob& out = Blob(&Super::inputs()[i]); - const int dtype = out.type_flag_; - MSHADOW_TYPE_SWITCH(dtype, DTypeX, { test::fill(out, DTypeX(0.1234)); }); + test::try_fill(ctx().run_ctx, &Super::inputs()[i].data(), 0.1234); } for (size_t i = 0, n = Super::outputs().size(); i < n; ++i) { - const TBlob& out = Blob(&Super::outputs()[i]); - const int dtype = out.type_flag_; - MSHADOW_TYPE_SWITCH(dtype, DTypeX, { test::fill(out, DTypeX(0.1234)); }); + test::try_fill(ctx().run_ctx, &Super::outputs()[i].data(), 0.5678); + } + for (size_t i = 0, n = Super::bwd_inputs().size(); i < n; ++i) { + test::try_fill(ctx().run_ctx, &Super::bwd_inputs()[i].data(), 0.9012); + } + for (size_t i = 0, n = Super::outputs().size(); i < n; ++i) { + test::try_fill(ctx().run_ctx, &Super::bwd_outputs()[i].data(), 0.3456); } // Init input data - MSHADOW_TYPE_SWITCH( - Blob(GetForwardInArray(kForInData)).type_flag_, - //this->c_.blob_input_vec_[mxnet::op::batchnorm::kData].type_flag_, - DTypeX, - { - DTypeX val = 0; - test::patternFill( - &Blob(GetForwardInArray(kForInData)), - //&this->c_.blob_input_vec_[mxnet::op::batchnorm::kData], - [&val]{ return val += 1; }); }); + double val = 0; + test::patternFill(ctx().run_ctx, &GetBlob(kForInData), [&val]() -> double { return val += 1; }); MSHADOW_TYPE_SWITCH( - Blob(GetForwardInArray(kForGamma)).type_flag_, - //this->c_.blob_input_vec_[mxnet::op::batchnorm::kGamma].type_flag_, + GetBlob(kForGamma).type_flag_, DTypeX, { - //const TBlob& blob = this->c_.blob_input_vec_[mxnet::op::batchnorm::kGamma]; - const TBlob& blob = Blob(GetForwardInArray(kForGamma)); - test::fill(blob, DTypeX(1)); + const TBlob& blob = GetBlob(kForGamma); + test::fill(ctx().run_ctx, blob, DTypeX(1)); if (hasWeightAndBias_) { if (blob.size(0) > 1) { blob.dptr()[1] = DTypeX(3); @@ -205,15 +192,13 @@ class BNOperatorExecutor : public test::op::CoreOpExecutor { } }); MSHADOW_TYPE_SWITCH( - Blob(GetForwardInArray(kForBeta)).type_flag_, - //this->c_.blob_input_vec_[mxnet::op::batchnorm::kBeta].type_flag_, + GetBlob(kForBeta).type_flag_, DTypeX, { - //const TBlob& blob = this->c_.blob_input_vec_[mxnet::op::batchnorm::kBeta]; - const TBlob& blob = Blob(GetForwardInArray(kForBeta)); + const TBlob& blob = GetBlob(kForBeta); if (!hasWeightAndBias_) { - test::fill(blob, DTypeX(0)); + test::fill(ctx().run_ctx, blob, DTypeX(0)); } else { // This will cause forward pass check to fail when calculating sum == 0 - test::fill(blob, DTypeX(1)); + test::fill(ctx().run_ctx, blob, DTypeX(1)); if (blob.size(0) > 0) { blob.dptr()[0] = DTypeX(3); } @@ -221,93 +206,77 @@ class BNOperatorExecutor : public test::op::CoreOpExecutor { }); // Init the moving data (all mean = 0, all var = 1) - MSHADOW_TYPE_SWITCH( - //this->c_.blob_aux_states_[mxnet::op::batchnorm::kMovingMean].type_flag_, - Blob(GetForwardInArray(kForMovingMean)).type_flag_, - DTypeX, { - test::fill(Blob(GetForwardInArray(kForMovingMean)), DTypeX(0)); - //test::fill(this->c_.blob_aux_states_[mxnet::op::batchnorm::kMovingMean], DTypeX(0)); - }); - MSHADOW_TYPE_SWITCH( - Blob(GetForwardInArray(kForMovingVar)).type_flag_, - //this->c_.blob_aux_states_[mxnet::op::batchnorm::kMovingVar].type_flag_, - DTypeX, { - //test::fill(this->c_.blob_aux_states_[mxnet::op::batchnorm::kMovingVar], DTypeX(1));}); - test::fill(Blob(GetForwardInArray(kForMovingVar)), DTypeX(1)); - }); + test::try_fill(ctx().run_ctx, &GetBlob(kForMovingMean), 0); + test::try_fill(ctx().run_ctx, &GetBlob(kForMovingVar), 1); + test::try_fill(ctx().run_ctx, &GetBlob(kForOutMean), 0); + test::try_fill(ctx().run_ctx, &GetBlob(kForOutVar), 1); } void resetBackward() override { - // Start by filling all backward inputs and outputs with an arbitrary value - for (size_t i = 0, n = Super::bwd_inputs().size(); i < n; ++i) { - const TBlob& out = Blob(&Super::bwd_inputs()[i]); - const int dtype = out.type_flag_; - MSHADOW_TYPE_SWITCH(dtype, DTypeX, { test::fill(out, DTypeX(0.5678)); }); - } - for (size_t i = 0, n = Super::bwd_outputs().size(); i < n; ++i) { - const TBlob& out = Blob(&Super::bwd_outputs()[i]); - const int dtype = out.type_flag_; - MSHADOW_TYPE_SWITCH(dtype, DTypeX, { test::fill(out, DTypeX(0.5678)); }); - } - DType val = -.001; + Super::resetBackward(); + + // Join forward input and in_data array + double val = 0; + test::patternFill(ctx().run_ctx, &GetBlob(bwd_in_data_Data), [&val]() -> double { + return val += 1; + }); + MSHADOW_TYPE_SWITCH( - GetBlob(kBackwardIn, kBackOutGrad).type_flag_, - //this->c_.blob_out_grad_[mxnet::op::batchnorm::kOut].type_flag_, + GetBlob(bwd_in_data_Gamma).type_flag_, DTypeX, { - test::patternFill( - &GetBlob(kBackwardIn, kBackOutGrad), - //&this->c_.blob_out_grad_[mxnet::op::batchnorm::kOut], - [&val]{ return val += 1; }); + const TBlob& blob = GetBlob(bwd_in_data_Gamma); + test::fill(ctx().run_ctx, blob, DTypeX(1)); + if (hasWeightAndBias_) { + if (blob.size(0) > 1) { + blob.dptr()[1] = DTypeX(3); + } + } }); - - // out-grad weights - //if (mxnet::op::batchnorm::kGamma < this->c_.blob_out_grad_.size()) { - if (GetBackwardInBlob(kBackGamma)) { - MSHADOW_TYPE_SWITCH( - GetBackwardInBlob(kBackGamma)->type_flag_, - //this->c_.blob_out_grad_[mxnet::op::batchnorm::kGamma].type_flag_, - DTypeX, - { test::try_fill(GetBackwardInBlob(kBackGamma), DTypeX(0.1)); }); - } - - // out-grad biases - if (GetBackwardInBlob(kBackBeta)) { - MSHADOW_TYPE_SWITCH( - GetBackwardInBlob(kBackBeta)->type_flag_, - //this->c_.blob_out_grad_[mxnet::op::batchnorm::kGamma].type_flag_, - DTypeX, - { test::try_fill(GetBackwardInBlob(kBackBeta), DTypeX(0.1)); }); - } - - /* - // in-grad MSHADOW_TYPE_SWITCH( - this->c_.blob_in_grad_[mxnet::op::batchnorm::kData].type_flag_, - DTypeX, - { test::try_fill(this->c_.blob_in_grad_, mxnet::op::batchnorm::kData, DTypeX(0)); }); - - // in-grad weights - if (mxnet::op::batchnorm::kGamma < this->c_.blob_in_grad_.size()) { - MSHADOW_TYPE_SWITCH( - this->c_.blob_in_grad_[mxnet::op::batchnorm::kGamma].type_flag_, - DTypeX, - { test::try_fill(this->c_.blob_in_grad_, mxnet::op::batchnorm::kGamma, DTypeX(0)); }); - } + GetBlob(bwd_in_data_Beta).type_flag_, + DTypeX, { + const TBlob& blob = GetBlob(bwd_in_data_Beta); + if (!hasWeightAndBias_) { + test::fill(ctx().run_ctx, blob, DTypeX(0)); + } else { // This will cause forward pass check to fail when calculating sum == 0 + test::fill(ctx().run_ctx, blob, DTypeX(1)); + if (blob.size(0) > 0) { + blob.dptr()[0] = DTypeX(3); + } + } + }); - // in-grad biases - if (mxnet::op::batchnorm::kBeta < this->c_.blob_in_grad_.size()) { - MSHADOW_TYPE_SWITCH( - this->c_.blob_in_grad_[mxnet::op::batchnorm::kBeta].type_flag_, - DTypeX, - { test::try_fill(this->c_.blob_in_grad_, mxnet::op::batchnorm::kBeta, DTypeX(0)); }); - } - */ + // Join aux arrays + test::try_fill(ctx().run_ctx, &GetBlob(bwd_aux_states_MovingMean), 0); + test::try_fill(ctx().run_ctx, &GetBlob(bwd_aux_states_MovingVar), 1); + + val = -.101; + test::patternFill(ctx().run_ctx, &GetBlob(bwd_out_data_Data), [&val]() -> double { + return val += 1; }); + test::try_fill(ctx().run_ctx, &GetBlob(bwd_out_data_Mean), 0.0); + test::try_fill(ctx().run_ctx, &GetBlob(bwd_out_data_Var), 1.0); + + val = -.001; + test::patternFill(ctx().run_ctx, &GetBlob(bwd_out_grad_Grad), [&val]() -> double { + return val += 0.01; }); + test::try_fill(ctx().run_ctx, &GetBlob(bwd_out_grad_Mean), 0.0); + test::try_fill(ctx().run_ctx, &GetBlob(bwd_out_grad_Var), 1.0); } const bool hasWeightAndBias_; // This will cause forward pass validation to fail op::BatchNormParam param_; }; +/** + * __ __ _ _ _ _ + * \ \ / / | |(_) | | | | + * \ \ / /__ _| | _ __| | __ _| |_ ___ _ __ + * \ \/ // _` | || |/ _` |/ _` | __|/ _ \| '__| + * \ /| (_| | || | (_| | (_| | |_| (_) | | + * \/ \__,_|_||_|\__,_|\__,_|\__|\___/|_| + * + * + */ /*! \brief Validate batch norm test outputs */ template class BatchNormValidator : public test::op::Validator { @@ -350,14 +319,14 @@ class BatchNormValidator : public test::op::Validator { // expect zero mean EXPECT_NEAR(0, sum, kErrorBound); if (!Super::isNear(AccReal(0), sum, kErrorBound)) { - LOG(WARNING) << "Sum is not close enough to zero " + LOG(WARNING) << "Sum is not close enough to zero: " << saveSum << " (" << sum << "), " << saveVar << " (" << var << ")"; } // expect unit variance EXPECT_NEAR(1, var, kErrorBound); if (!Super::isNear(AccReal(1), var, kErrorBound)) { - LOG(WARNING) << "Variance is not close enough to 1 " + LOG(WARNING) << "Variance is not close enough to 1: " << saveSum << " (" << sum << "), " << saveVar << " (" << var << ")"; } @@ -375,7 +344,7 @@ class BatchNormValidator : public test::op::Validator { const size_t height = blob->shape_[2]; const size_t width = blob->shape_[3]; - size_t itemCount = 0; + size_t itemCount = 0, nonZero = 0; for (size_t j = 0; j < channels; ++j) { AccReal sum = 0, var = 0; @@ -386,10 +355,16 @@ class BatchNormValidator : public test::op::Validator { sum += data; var += data * data; ++itemCount; + if (data != 0) { + ++nonZero; + } } } } + CHECK_GT(itemCount, 1U); // Not a valid check for one item + CHECK_NE(nonZero, 0); + const AccReal saveSum = sum, saveVar = var; // not channels @@ -401,16 +376,18 @@ class BatchNormValidator : public test::op::Validator { // expect zero mean EXPECT_NEAR(0, sum, kErrorBound); if (!Super::isNear(AccReal(0), sum, kErrorBound)) { - LOG(WARNING) << "Sum is not close enough to zero " + LOG(WARNING) << "Sum is not close enough to zero: " << saveSum << " (" << sum << "), " << saveVar << " (" << var << ")"; + test::print(RunContext(), &(std::cerr << "Mean problem:" << std::endl), *blob); } // expect unit variance EXPECT_NEAR(1, var, kErrorBound); if (!Super::isNear(AccReal(1), var, kErrorBound)) { - LOG(WARNING) << "Variance is not close enough to 1" + LOG(WARNING) << "Variance is not close enough to 1: " << saveSum << " (" << sum << "), " << saveVar << " (" << var << ")"; + test::print(RunContext(), &(std::cerr << "Variance problem:" << std::endl), *blob); } } } @@ -473,96 +450,110 @@ class BatchNormValidator : public test::op::Validator { template static inline bool compare(const ExecutorType1& i1, const ExecutorType2& i2, - const typename ExecutorType1::WhichArray wa, const EnumType idx, bool print = false) { - const TBlob& b1 = i1.GetBlob(wa, idx); - const TBlob& b2 = i2.GetBlob(wa, idx); + test::CAccessAsCPU cpu1(i1.ctx().run_ctx, i1.GetBlob(idx), false), + cpu2(i2.ctx().run_ctx, i2.GetBlob(idx), false); + const TBlob& b1 = cpu1(); + const TBlob& b2 = cpu2(); if (print && test::debug_output) { - test::print(RunContext(), &(std::cout << "Blob 1:"), b1, true, true); - test::print(RunContext(), &(std::cout << "Blob 2:"), b2, true, true); + test::print(i1.ctx().run_ctx, &(std::cout << "Blob 1:"), b1, true, true); + test::print(i2.ctx().run_ctx, &(std::cout << "Blob 2:"), b2, true, true); + } + const bool rc = test::op::Validator::compare(b1, b2); + if (!rc) { + test::print(i1.ctx().run_ctx, &(std::cerr << "ERROR Blob 1:"), b1, true, true); + test::print(i2.ctx().run_ctx, &(std::cerr << "ERROR Blob 2:"), b2, true, true); } - return test::op::Validator::compare(b1, b2); + return rc; } /*! \brief Check batch norm output */ template - static void validateForward(const BNOperatorProp& data) { - //const TBlob& outputBlob = data.output_blobs()[mxnet::op::batchnorm::kData]; - const TBlob& outputBlob = data.GetBlob(BNOperatorProp::kForwardOut, - BNOperatorProp::kForOutData); - test::print(RunContext(), &(std::cout << "Fwd Output Blob:"), outputBlob, true, true); - switch (outputBlob.ndim()) { - case 3: - checkBatchNorm1D(&outputBlob); - break; - case 4: - checkBatchNorm2D(&outputBlob); - break; - case 5: - checkBatchNorm3D(&outputBlob); - break; - default: - CHECK(false) << "Supplied shape is not supported for this test"; - break; + static void validateForward(const RunContext& run_ctx, const BNOperatorProp& data) { + const TBlob &outputBlob = data.GetBlob(ForwardOutputs::kForOutData); + if (test::debug_output) { + test::print(run_ctx, &(std::cout << "Fwd Output Blob:"), outputBlob, true, true); } + test::AccessAsCPU(outputBlob, run_ctx, [](const TBlob& blob) { + switch (blob.ndim()) { + case 3: + checkBatchNorm1D(&blob); + break; + case 4: + checkBatchNorm2D(&blob); + break; + case 5: + checkBatchNorm3D(&blob); + break; + default: + CHECK(false) << "Supplied shape is not supported for this test"; + break; + } + }); } +#define TEST_ISTRUE(__args$) \ + do { \ + bool _rc; \ + EXPECT_TRUE((_rc = (__args$))); \ + if (!_rc) { \ + rc = false; \ + } \ + } while (0) + /*! \brief Compare entire operator data between two test sets */ template - static void compare( + static bool compare( const test::op::OpInfo>& info_1, const test::op::OpInfo>& info_2) { + bool rc = true; // Input - EXPECT_TRUE(compare(*info_1.executor_, *info_2.executor_, - BNOperatorExecutor::kForwardIn, - BNOperatorExecutor::kForInData)); - EXPECT_TRUE(compare(*info_1.executor_, *info_2.executor_, - BNOperatorExecutor::kForwardIn, - BNOperatorExecutor::kForGamma)); - EXPECT_TRUE(compare(*info_1.executor_, *info_2.executor_, - BNOperatorExecutor::kForwardIn, - BNOperatorExecutor::kForBeta)); + TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, ForwardInputs::kForInData)); + TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, ForwardInputs::kForGamma)); + TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, ForwardInputs::kForBeta)); // Output - EXPECT_TRUE(compare(*info_1.executor_, *info_2.executor_, - BNOperatorExecutor::kForwardOut, - BNOperatorExecutor::kForOutData)); - CHECK_EQ(info_2.prop_->getParam().use_global_stats, - info_1.prop_->getParam().use_global_stats); + TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, ForwardOutputs::kForOutData)); + CHECK_EQ(info_2.prop_->getParam().use_global_stats, info_1.prop_->getParam().use_global_stats); -#if 0 #if MXNET_USE_CUDNN != 1 /* CUDNN takes a different approach here on first pass */ // Aux - EXPECT_TRUE(compare(*info_1.executor_, *info_2.executor_, - test::op::CoreOpExecutor::kAux, - mxnet::op::batchnorm::kMovingMean)); - EXPECT_TRUE(compare(*info_1.executor_, *info_2.executor_, - test::op::CoreOpExecutor::kAux, - mxnet::op::batchnorm::kMovingVar)); + TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, ForwardOutputs::kForOutMean)); + TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, ForwardOutputs::kForOutVar)); #endif + if (!info_2.prop_->getParam().use_global_stats) { - EXPECT_TRUE(compare(*info_1.executor_, *info_2.executor_, - test::op::CoreOpExecutor::kOutput, - mxnet::op::batchnorm::kMean)); + TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, + BackwardInputs::bwd_out_data_Mean)); + TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, + BackwardInputs::bwd_out_data_Var)); // InGrad - EXPECT_TRUE(compare(*info_1.executor_, *info_2.executor_, - test::op::CoreOpExecutor::kInGrad, - mxnet::op::batchnorm::kData)); - EXPECT_TRUE(compare(*info_1.executor_, *info_2.executor_, - test::op::CoreOpExecutor::kInGrad, - mxnet::op::batchnorm::kGamma)); - EXPECT_TRUE(compare(*info_1.executor_, *info_2.executor_, - test::op::CoreOpExecutor::kInGrad, - mxnet::op::batchnorm::kBeta)); + TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, + BackwardOutputs::bwd_in_grad_Data)); +#if 0 + TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, + BackwardOutputs::bwd_in_grad_Gamma)); + TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, + BackwardOutputs::bwd_in_grad_Beta)); +#endif // OutGrad - EXPECT_TRUE(compare(*info_1.executor_, *info_2.executor_, - test::op::CoreOpExecutor::kOutGrad, - mxnet::op::batchnorm::kData)); + TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, + BackwardInputs::bwd_out_grad_Grad)); } -#endif + return rc; } }; +/** + * _____ _ + * | __ \ | | + * | |__) |__ _ _ __ __ _ _ __ ___ ___ | |_ ___ _ __ ___ + * | ___// _` | '__|/ _` | '_ ` _ \ / _ \| __|/ _ \| '__|/ __| + * | | | (_| | | | (_| | | | | | | __/| |_| __/| | \__ \ + * |_| \__,_|_| \__,_|_| |_| |_|\___| \__|\___||_| |___/ + * + * + */ static const test::op::kwargs_t blank_kwargs; static const test::op::kwargs_t blank_kwargs_nocudnn = { {"cudnn_off", "True"} }; @@ -591,39 +582,49 @@ static bool isUGS(const test::op::kwargs_t& kwargs) { } #endif // DISABLE_VALIDATION -template -static StreamType& PRT(StreamType *os, const OperatorExecutor& obj, - const typename OperatorExecutor::BlobVectorType bvt, const size_t idx) { - *os << OperatorExecutor::bvt2String(bvt) << ": " << idx - << ": "; - const TBlob& blob = obj.getBlobVect(bvt)[idx]; - - test::print(RunContext(), os, blob); +/** + * _____ _ ____ _ _ + * | __ \ | | / __ \ | | | | + * | | | | ___ | |__ _ _ __ _ | | | |_ _| |_ _ __ _ _| |_ + * | | | |/ _ \| '_ \| | | |/ _` | | | | | | | | __| '_ \| | | | __| + * | |__| | __/| |_) | |_| | (_| | | |__| | |_| | |_| |_) | |_| | |_ + * |_____/ \___||_.__/ \__,_|\__, | \____/ \__,_|\__| .__/ \__,_|\__| + * __/ | | | + * |___/ |_| + */ +template +static StreamType& _DBPRT(const RunContext& run_ctx, const char *label, + StreamType *os, const OperatorExecutor& obj, const BlobType type) { + *os << label << ": "; + test::print(RunContext(), os, test::CAccessAsCPU(run_ctx, obj.GetBlob(type), false)()); return *os; } +#define DBPRT(__os, __obj, __type$) _DBPRT(run_ctx, #__type$, __os, __obj, __type$) + template static StreamType& dumpF(StreamType *os, const test::op::OpInfo& prop, - const size_t x = 0) { - if (test::debug_output) { + const size_t x = 0, + const bool force = test::debug_output) { + if (force) { *os << std::endl; if (x) { *os << "=============================" << std::endl; *os << "= " << x << std::endl; *os << "=============================" << std::endl; } -// typedef typename OperatorExecutor::BlobVectorType BlobVectorType; -// PRT(os, *prop.executor_, BlobVectorType::kInput, mxnet::op::batchnorm::kData); -// PRT(os, *prop.executor_, BlobVectorType::kInput, mxnet::op::batchnorm::kGamma); -// PRT(os, *prop.executor_, BlobVectorType::kInput, mxnet::op::batchnorm::kBeta); -// -// PRT(os, *prop.executor_, BlobVectorType::kAux, mxnet::op::batchnorm::kMovingMean); -// PRT(os, *prop.executor_, BlobVectorType::kAux, mxnet::op::batchnorm::kMovingVar); -// -// PRT(os, *prop.executor_, BlobVectorType::kOutput, mxnet::op::batchnorm::kOut); -// PRT(os, *prop.executor_, BlobVectorType::kOutput, mxnet::op::batchnorm::kMean); -// PRT(os, *prop.executor_, BlobVectorType::kOutput, mxnet::op::batchnorm::kVar); + const RunContext run_ctx = prop.executor_->ctx().run_ctx; + DBPRT(os, *prop.executor_, ForwardInputs::kForInData); + DBPRT(os, *prop.executor_, ForwardInputs::kForGamma); + DBPRT(os, *prop.executor_, ForwardInputs::kForBeta); + + DBPRT(os, *prop.executor_, ForwardInputs::kForMovingMean); + DBPRT(os, *prop.executor_, ForwardInputs::kForMovingVar); + + DBPRT(os, *prop.executor_, ForwardOutputs::kForOutData); + DBPRT(os, *prop.executor_, ForwardOutputs::kForOutMean); + DBPRT(os, *prop.executor_, ForwardOutputs::kForOutVar); } return *os; } @@ -631,8 +632,9 @@ static StreamType& dumpF(StreamType *os, template static StreamType& dumpB(StreamType *os, const test::op::OpInfo& prop, - const size_t x = 0) { - if (test::debug_output) { + const size_t x = 0, + const bool force = test::debug_output) { + if (force) { *os << std::endl; if (x) { *os << "=============================" << std::endl; @@ -640,31 +642,29 @@ static StreamType& dumpB(StreamType *os, *os << "=============================" << std::endl; } -// typedef typename OperatorExecutor::BlobVectorType BlobVectorType; -// PRT(os, *prop.executor_, BlobVectorType::kInGrad, mxnet::op::batchnorm::kData); -// PRT(os, *prop.executor_, BlobVectorType::kInGrad, mxnet::op::batchnorm::kGamma); -// PRT(os, *prop.executor_, BlobVectorType::kInGrad, mxnet::op::batchnorm::kBeta); -// -// PRT(os, *prop.executor_, BlobVectorType::kAux, mxnet::op::batchnorm::kMovingMean); -// PRT(os, *prop.executor_, BlobVectorType::kAux, mxnet::op::batchnorm::kMovingVar); -// -// PRT(os, *prop.executor_, BlobVectorType::kOutGrad, mxnet::op::batchnorm::kOut); - } - return *os; -} + const RunContext run_ctx = prop.executor_->ctx().run_ctx; + DBPRT(os, *prop.executor_, BackwardOutputs::bwd_in_grad_Data); + DBPRT(os, *prop.executor_, BackwardOutputs::bwd_in_grad_Gamma); + DBPRT(os, *prop.executor_, BackwardOutputs::bwd_in_grad_Beta); -template -static StreamType& dumpF(StreamType *os, - const test::op::OpInfoPair& bi) { - return dumpF(&dumpF(os, bi.info_1_, 1), bi.info_2_, 2); -} + DBPRT(os, *prop.executor_, BackwardInputs::bwd_aux_states_MovingMean); + DBPRT(os, *prop.executor_, BackwardInputs::bwd_aux_states_MovingVar); -template -static StreamType& dumpB(StreamType *os, - const test::op::OpInfoPair& bi) { - return dumpB(&dumpB(os, bi.info_1_, 1), bi.info_2_, 2); + DBPRT(os, *prop.executor_, BackwardInputs::bwd_out_grad_Grad); + } + return *os; } +/** + * _______ _ ______ _ _ + * |__ __| | | | ____| | | (_) + * | | ___ ___ | |_ | |__ _ _ _ __ ___| |_ _ ___ _ __ ___ + * | |/ _ \/ __|| __| | __| | | | '_ \ / __| __| |/ _ \| '_ \ / __| + * | | __/\__ \| |_ | | | |_| | | | | (__| |_| | (_) | | | |\__ \ + * |_|\___||___/ \__| |_| \__,_|_| |_|\___|\__|_|\___/|_| |_||___/ + * + * + */ /*! \brief Test batch norm operator forward pass */ template static test::op::OpInfo TestBatchNormOperatorForward( @@ -692,7 +692,8 @@ static test::op::OpInfo TestBatchNormOperatorFor #if !DISABLE_VALIDATION if (!isUGS(kwargs)) { BatchNormValidator::validateForward(*info.executor_); + typename OperatorExecutor::AccRealType>::validateForward( + info.executor_->ctx().run_ctx, *info.executor_); } #endif @@ -718,7 +719,6 @@ static test::op::OpInfoPair test const bool isGPU2, const TShape &inputShape, const test::op::kwargs_t& kwargs, - const bool dumpC, const size_t count = 1, const size_t cycleCount = CYCLE_COUNT) { test::op::OpInfo info_1 = @@ -748,14 +748,15 @@ static test::op::OpInfoPair test } // Check that everything is the same after the forward pass - BatchNormValidator::compare(info_1, info_2); - - BatchNormValidator::compare( - *info_1.executor_, *info_2.executor_, - OperatorExecutor::kForwardIn, OperatorExecutor::kForInData, - //test::op::CoreOpExecutor::kInput, - //mxnet::op::batchnorm::kData, - false); + const bool b1 = BatchNormValidator::compare(info_1, info_2); + + const bool b2 = BatchNormValidator::compare(*info_1.executor_, + *info_2.executor_, + kForInData, false); + if (!b1 || !b2) { + dumpF(&std::cout, info_1, 1, true); + dumpF(&std::cout, info_2, 2, true); + } if (!thisCount) { // return backward @@ -772,13 +773,14 @@ static test::op::OpInfoPair test } // Check that everything is the same after the backward pass - BatchNormValidator::compare(info_1, info_2); + if (!BatchNormValidator::compare(info_1, info_2)) { + dumpF(&std::cout, info_1, 1, true); + dumpF(&std::cout, info_2, 2, true); + dumpB(&std::cout, info_1, 1, true); + dumpB(&std::cout, info_2, 2, true); + } } while (++thisCount < cycleCount); -// if (dumpC) { -// info_1.executor_->dumpC(&std::cerr, "BN_testForwardAndBackward"); -// } - return { info_1, info_2 }; } template @@ -786,7 +788,6 @@ static test::op::OpInfoPair testForwardAndBackward(const bool isGPU, const TShape &inputShape, const test::op::kwargs_t kwargs, - const bool dumpC = false, const size_t count = 1, const size_t cycleCount = CYCLE_COUNT ) { @@ -795,14 +796,23 @@ testForwardAndBackward(const bool isGPU, isGPU, inputShape, kwargs, - dumpC, count, cycleCount); } +/** + * ____ _____ + * / __ \ | __ \ + * | | | |_ __ | |__) |_ __ ___ _ __ + * | | | | '_ \ | ___/| '__|/ _ \| '_ \ + * | |__| | |_) | | | | | | (_) | |_) | + * \____/| .__/ |_| |_| \___/| .__/ + * | | | | + * |_| |_| + */ + // NOTE: This should know which version to use (V1, mkl, etc) struct BatchNormCoreOpProp : public mxnet::test::op::CoreOpProp { - void Init(const mxnet::test::op::kwargs_t& kwargs) override { mxnet::test::op::CoreOpProp::Init(kwargs); params_.Init(kwargs, dmlc::parameter::kAllowUnknown); @@ -817,77 +827,80 @@ template static test::op::OpInfoPair testBNForwardAndBackward2D(const bool isGPU, const TShape &inputShape, - const test::op::kwargs_t& kwargs, - const bool dumpC = false) { + const test::op::kwargs_t& kwargs) { CHECK_EQ(inputShape.ndim(), 4); // V1 can only handle 2D - return testForwardAndBackward( - isGPU, - isGPU, - inputShape, - kwargs, - dumpC); + return testForwardAndBackward( + isGPU, isGPU, inputShape, kwargs); } -/* - * Forward tests +template +static test::op::OpInfoPair +testBNForwardAndBackward(const bool isGPU, + const TShape &inputShape, + const test::op::kwargs_t& kwargs) { + return testForwardAndBackward( + isGPU, isGPU, inputShape, kwargs); +} + +/** + * _____ _ _ + * / ____| (_)| | + * | (___ __ _ _ __ _ | |_ _ _ + * \___ \ / _` | '_ \| || __| | | | + * ____) | (_| | | | | || |_| |_| | + * |_____/ \__,_|_| |_|_| \__|\__, | + * __/ | + * |___/ */ -TEST(BATCH_NORM, Test2DForwardV1V2) { +TEST(BATCH_NORM, TestSanityForwaredAndBackward) { MSHADOW_REAL_TYPE_SWITCH_EX( mshadow::kFloat32, - DType, - AccReal, - { - // Have to specify somehow v1 and v2 - auto infoA = testBNForwardAndBackward2D>( - false, {BATCH_SIZE, CHANNELS, DH, DW}, blank_kwargs); - }); + DType, AccReal, { + testBNForwardAndBackward2D>( + false, {BATCH_SIZE, CHANNELS, DH, DW}, blank_kwargs); + }); } -#if 0 - -static const std::vector v2_types = {mshadow::kFloat32, - mshadow::kFloat64, - mshadow::kFloat16}; +/** + * _____ _ _______ _ + * / ____| | | |__ __| | | + * | | ___ _ __ _ __ ___ ___| |_ _ __ ___ ___ ___ | | ___ ___ | |_ ___ + * | | / _ \| '__| '__|/ _ \ / __| __| '_ \ / _ \/ __|/ __| | |/ _ \/ __|| __|/ __| + * | |____| (_) | | | | | __/| (__| |_| | | | __/\__ \\__ \ | | __/\__ \| |_ \__ \ + * \_____|\___/|_| |_| \___| \___|\__|_| |_|\___||___/|___/ |_|\___||___/ \__||___/ + * + * + */ +static const std::vector v2_types = { + mshadow::kFloat32, + mshadow::kFloat64, + mshadow::kFloat16 +}; TEST(BATCH_NORM, Test1DForward) { - for (int type : v2_types) { - MSHADOW_REAL_TYPE_SWITCH_EX( - type, DType, AccReal, - { - TestBatchNormOperatorForward>( - false, {BATCH_SIZE, CHANNELS, DW}, blank_kwargs); - }); + for (const mshadow::TypeFlag type : v2_types) { + MSHADOW_REAL_TYPE_SWITCH_EX(type, DType, AccReal, { + testBNForwardAndBackward>( + false, {BATCH_SIZE, CHANNELS, DW}, blank_kwargs); + }); } } -TEST(BATCH_NORM, Test2DForwardV1) { - TestBatchNormOperatorForward>( - false, - {BATCH_SIZE, CHANNELS, DH, DW}, - blank_kwargs); -} - TEST(BATCH_NORM, Test2DForward) { for (int type : v2_types) { - MSHADOW_REAL_TYPE_SWITCH_EX( - type, DType, AccReal, - { - auto opInfoFloatH = TestBatchNormOperatorForward>( - false, {BATCH_SIZE, CHANNELS, DH, DW}, blank_kwargs); - }); + MSHADOW_REAL_TYPE_SWITCH_EX(type, DType, AccReal, { + testBNForwardAndBackward>( + false, {BATCH_SIZE, CHANNELS, DH, DW}, blank_kwargs); + }); } } TEST(BATCH_NORM, Test3DForward) { - for (int type : v2_types) { - MSHADOW_REAL_TYPE_SWITCH_EX( - type, DType, AccReal, - { - TestBatchNormOperatorForward>( - false, {BATCH_SIZE, CHANNELS, DEPTH, DH, DW}, blank_kwargs); - }); + for (const mshadow::TypeFlag type : v2_types) { + MSHADOW_REAL_TYPE_SWITCH_EX(type, DType, AccReal, { + testBNForwardAndBackward>( + false, {BATCH_SIZE, CHANNELS, DEPTH, DH, DW}, blank_kwargs); + }); } } @@ -976,16 +989,16 @@ TEST(BATCH_NORM, TestStochasticTiming_2D) { MSHADOW_REAL_TYPE_SWITCH_EX( mshadow::kFloat32, DType, AccReal, { - timingTest>( - "RANDOM: BatchNormProp", false, true, + timingTest>( + "RANDOM: BatchNormCoreOpProp", false, true, blank_kwargs_nocudnn, GPU_TEST_DIMENSIONS); }); #if MXNET_USE_CUDA if (test::unitTestsWithCuda) { MSHADOW_REAL_TYPE_SWITCH_EX( mshadow::kFloat32, DType, AccReal, { - timingTest>( - "RANDOM: BatchNormProp", true, true, + timingTest>( + "RANDOM: BatchNormCoreOpProp", true, true, blank_kwargs_nocudnn, GPU_TEST_DIMENSIONS); }); } #endif @@ -1004,28 +1017,32 @@ TEST(BATCH_NORM, TestTiming_2D) { } MSHADOW_REAL_TYPE_SWITCH_EX( mshadow::kFloat32, DType, AccReal, { -#if defined(MXNET_USE_MKL2017) && (MXNET_USE_MKL2017 == 1) - timingTest>( +#if MXNET_USE_MKLDNN + // MKL + timingTest>( "MKL BatchNormProp 2D", false, false, blank_kwargs_nocudnn, 2, THISCOUNT); #endif + // CPU test::ScopeSet disableMKL(&mxnet::op::batchnorm::disable_mkl, true); - timingTest>( + timingTest>( "BatchNormProp 2D", false, false, blank_kwargs_nocudnn, 2, THISCOUNT); #if MXNET_USE_CUDA if (test::unitTestsWithCuda) { - timingTest>( + // CUDA + timingTest>( "BatchNormProp 2D", true, false, blank_kwargs_nocudnn, 2, THISCOUNT); #if MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 5 - timingTest>( + // CUDA-CUDNN + timingTest>( "CUDNN BatchNormProp 2D", true, false, blank_kwargs, @@ -1037,16 +1054,29 @@ MSHADOW_REAL_TYPE_SWITCH_EX( } #endif // _WIN32 -/** - * Backward tests (generally include forward tests as well) - */ +inline std::ostream& operator << (std::ostream& os, const test::op::kwargs_t& kwargs) { + if (!kwargs.empty()) { + os << "["; + size_t count = 0; + for (const auto &item : kwargs) { + if (count++) { + os << ", "; + } + os << item.first << "=" << item.second; + } + os << "]"; + } + return os; +} +#if 0 TEST(BATCH_NORM, TestIterAll) { TShape shapes[] = { TShape({BATCH_SIZE, CHANNELS, DH}), TShape({BATCH_SIZE, CHANNELS, DH, DW}), TShape({BATCH_SIZE, CHANNELS, DEPTH, DH, DW}) }; + int pass = 0; const char *tof[2] = { "False", "True" }; test::op::kwargs_t kwargs; for (size_t x1 = 0; x1 < 2U; ++x1) { @@ -1058,19 +1088,25 @@ TEST(BATCH_NORM, TestIterAll) { kwargs.push_back({ "cudnn_off", "True" }); } for (TShape shape : shapes) { - for (int g1 = 0; g1 < 2; ++g1) { - for (int g2 = 0; g2 < 2; ++g2) { + for (bool g1 : { false, true }) { + for (bool g2 : { false, true }) { for (int type : v2_types) { + std::cout << shape << ", " << op::type_string(type) << ", " + << kwargs << ", g1 = " + << g1 << ", g2 = " << g2 << std::endl; + std::cout << "." << std::flush; MSHADOW_REAL_TYPE_SWITCH_EX( type, DType, AccReal, { - test::op::OpInfoPair> - bi = testForwardAndBackward>( - g1 != 0, g2 != 0, shape, kwargs, false); // Keep it simple + g1, g2, shape, kwargs); // Keep it simple }); + std::cout << std::endl; + ++pass; } } } @@ -1084,95 +1120,19 @@ TEST(BATCH_NORM, TestIterAll) { kwargs.pop_back(); } } - -TEST(BATCH_NORM, Test2DBackward2DPlusLoadAndCompareLogic) { - test::ScopeSet disableMKL(&mxnet::op::batchnorm::disable_mkl, true); - MSHADOW_REAL_TYPE_SWITCH_EX( - mshadow::kFloat32, DType, AccReal, - { - Test2DBackward2DPlusLoadAndCompareLogicUtil::test(); - }); -} - -template -void compare(const bool isGPU, - const test::op::OpInfo& object, - const std::vector< - std::vector< std::vector > >& values) { - test::op::OpInfo info_checkLoad = - test::op::createOpAndInfoF( - blank_kwargs, isGPU, object.executor_->inputs()[0].shape_); - info_checkLoad.executor_->initForward(*info_checkLoad.prop_, &info_checkLoad.in_type_); - info_checkLoad.executor_->initBackward(*info_checkLoad.prop_, &info_checkLoad.in_type_); - info_checkLoad.executor_->load(values); - BatchNormValidator< - typename OperatorExecutor::DataType, - typename OperatorExecutor::AccRealType>::compare(object, info_checkLoad); -} - - -#ifndef _WIN32 -TEST(BATCH_NORM, TestBackward1D_Simple) { - MSHADOW_REAL_TYPE_SWITCH_EX( - mshadow::kFloat32, DTypeX, AccReal, - { - const TShape inputShape({1, 1, 2}); - test::op::OpInfo> info = - TestBatchNormOperatorForward>( - false, inputShape, blank_kwargs); - info.executor_->initBackward(*info.prop_, &info.in_type_); - runOperatorBackward(&info); - -#if MXNET_DUMP_C - info.executor_->dumpC(&std::cerr, "BN_TestBackward1D_Simple"); #endif - // Expected data state when running forward+backward starting with default values - // Note: This data structure generated by dumpC() - static const std::vector< std::vector< std::vector > > - ___BN_TestBackward1D_Simple_data_shape_1_1_2___ = { - { /* kInput */ - { 1.0f, 2.0f }, - { 1.0f }, - { 0.0f } - }, - { /* kOutput */ - { -0.998006f, 0.998006f }, - { 1.5f }, - { 0.25f } - }, - { /* kAux */ - { 0.15f }, - { 0.925f } - }, - { /* kInGrad */ - { -0.00397621f, 0.00397609f }, - { 0.0f }, - { 2.998f } - }, - { /* kOutGrad */ - { 0.999f, 1.999f } - } - }; - compare(false, info, ___BN_TestBackward1D_Simple_data_shape_1_1_2___); - }); -} -#endif // _WIN32 - #ifndef _WIN32 TEST(BATCH_NORM, TestBackward3D) { MSHADOW_REAL_TYPE_SWITCH_EX( mshadow::kFloat32, DType, AccReal, { const TShape inputShape({2, 3, 2, 3, 5}); - test::op::OpInfo> info = - TestBatchNormOperatorForward>( + test::op::OpInfo> info = + TestBatchNormOperatorForward>( false, inputShape, blank_kwargs); info.executor_->initBackward(*info.prop_, &info.in_type_); runOperatorBackward(&info); -#if MXNET_DUMP_C - info.executor_->dumpC(&std::cerr, "TestBackward3D"); -#endif }); } #endif // _WIN32 @@ -1182,8 +1142,9 @@ class ChannelAxisTestData { protected: enum Mode { LOAD, SAVE }; - void loadOrSave(const TBlob& blob, int channel_axis, const Mode mode) { - mxnet::op::batchnorm::BNTensor3 tensor3(blob, channel_axis); + void loadOrSave(const RunContext& run_ctx, const TBlob& blob, int channel_axis, const Mode mode) { + test::CAccessAsCPU cpu_blob(run_ctx, blob, true); + mxnet::op::batchnorm::BNTensor3 tensor3(cpu_blob(), channel_axis); const TShape &shape = blob.shape_; CHECK_GT(shape.ndim(), 0); if (channel_axis < 0) { @@ -1233,14 +1194,15 @@ class ChannelAxisTestData { } } - static void print(const std::string& label, const TBlob& blob) { + static void print(const RunContext& run_ctx, const std::string& label, const TBlob& blob) { if (test::debug_output) { if (!label.empty()) { std::cout << label << ": "; } + test::CAccessAsCPU cpu_blob(run_ctx, blob, true); const size_t totalSize = blob.Size(); for (size_t i = 0; i < totalSize; ++i) { - const float val = blob.dptr()[i]; + const float val = cpu_blob().dptr()[i]; if (i) { std::cout << ", "; } @@ -1251,25 +1213,26 @@ class ChannelAxisTestData { } } - void save(const TBlob& blob, const int channel_axis) { - loadOrSave(blob, channel_axis, SAVE); + void save(const RunContext& run_ctx, const TBlob& blob, const int channel_axis) { + loadOrSave(run_ctx, blob, channel_axis, SAVE); } - void load(const TBlob& blob, const int channel_axis) { - loadOrSave(blob, channel_axis, LOAD); + void load(const RunContext& run_ctx, const TBlob& blob, const int channel_axis) { + loadOrSave(run_ctx, blob, channel_axis, LOAD); } }; template -static void compare(const TBlob& blob, const std::vector& vals) { +static void compare(const RunContext& run_ctx, const TBlob& blob, const std::vector& vals) { CHECK_EQ(blob.Size(), vals.size()); - const DType *v = blob.dptr(); + test::CAccessAsCPU cpu_blob(run_ctx, blob, false); + const DType *v = cpu_blob().dptr(); for (size_t i = 0, n = vals.size(); i < n; ++i) { const DType vBlob = v[i]; const DType vVect = vals[i]; const bool near = BatchNormValidator::isNear( - vBlob, vVect, BatchNormValidator::ErrorBound(&blob)); - EXPECT_TRUE(near); + vBlob, vVect, BatchNormValidator::ErrorBound(&cpu_blob())); + ASSERT_TRUE(near); if (!near) { LOG(WARNING) << vBlob << " is not near enough to " << vVect << std::endl; } @@ -1290,9 +1253,9 @@ static void compare(const std::vector>& d1, const DType v2 = vec2[i]; const bool near = BatchNormValidator::isNear( v1, v2, BatchNormValidator::ERROR_BOUND()); - EXPECT_TRUE(near); if (!near) { LOG(WARNING) << v1 << " is not near enough to " << v2 << std::endl; + ASSERT_TRUE(near); } } } @@ -1311,13 +1274,17 @@ static void testSaveAndLoad(const std::vector& dims, shape[i] = index_t(dims[i]); } + RunContext cpu_run_ctx; + cpu_run_ctx.ctx.dev_type = Context::kCPU; + cpu_run_ctx.ctx.dev_id = 0; + cpu_run_ctx.stream = nullptr; std::unique_ptr blob(new test::StandaloneBlob( shape, false, mshadow::DataType::kFlag)); - data.save(*blob, channelAxis); - ChannelAxisTestData::print("saved to blob", *blob); - compare(*blob, expectedBlobData); - data.load(*blob, channelAxis); + data.save(cpu_run_ctx, *blob, channelAxis); + ChannelAxisTestData::print(cpu_run_ctx, "saved to blob", *blob); + compare(cpu_run_ctx, *blob, expectedBlobData); + data.load(cpu_run_ctx, *blob, channelAxis); compare(data.channel_data_, inputChannelData); } @@ -1369,7 +1336,6 @@ static TShape MakeShape(const std::vector& shape, return newShape; } - /*! \brief Create and arrange equivalent data with different channel axes, then compare * normalized results */ static void runChannelAxisTest( @@ -1430,17 +1396,18 @@ static void runChannelAxisTest( // Create operator 1 with ChannelAxis2 (normally the experimental one) kwargs.push_back({"axis", std::to_string(channelAxis1)}); - test::op::OpInfo> info_c1 = - test::op::createOpAndInfoF< - mxnet::op::BatchNormProp, BNOperatorExecutor>( - kwargs, isGPU1, shape_c1); + test::op::OpInfo> info_c1 = + test::op::createOpAndInfoF>( + BNOperatorExecutor::ArgsWithOpName( + kwargs, "BatchNorm", "_backward_BatchNorm"), isGPU1, shape_c1, kwargs); + kwargs.pop_back(); // Create operator 2 with ChannelAxis2 (normally the control one) - kwargs.pop_back(); kwargs.push_back({"axis", std::to_string(channelAxis2)}); - test::op::OpInfo> info_c2 = - test::op::createOpAndInfoF>( - kwargs, isGPU2, shape_c2); + test::op::OpInfo> info_c2 = + test::op::createOpAndInfoF>( + BNOperatorExecutor::ArgsWithOpName( + kwargs, "BatchNorm", "_backward_BatchNorm"), isGPU2, shape_c2, kwargs); kwargs.pop_back(); // Init operators @@ -1450,47 +1417,69 @@ static void runChannelAxisTest( info_c2.executor_->initBackward(*info_c2.prop_, &info_c2.in_type_); // Save input data to blob with new shape 1 - data_c1.save(info_c1.executor_->inputs()[0], channelAxis1); - ChannelAxisTestData::print("blob 1 input", info_c1.executor_->inputs()[0]); + data_c1.save(info_c1.executor_->ctx().run_ctx, + info_c1.executor_->GetBlob(ForwardInputs::kForInData), channelAxis1); + ChannelAxisTestData::print(info_c1.executor_->ctx().run_ctx, + "blob 1 input", + info_c1.executor_->GetBlob(ForwardInputs::kForInData)); // Save input data to blob with new shape 2 - data_c2.save(info_c2.executor_->inputs()[0], channelAxis2); - ChannelAxisTestData::print("blob 2 input", info_c2.executor_->inputs()[0]); + data_c2.save(info_c2.executor_->ctx().run_ctx, + info_c2.executor_->GetBlob(ForwardInputs::kForInData), channelAxis2); + ChannelAxisTestData::print(info_c2.executor_->ctx().run_ctx, + "blob 2 input", + info_c2.executor_->GetBlob(ForwardInputs::kForInData)); // Save output grad to blob with new shape 1 - grad_c1.save(info_c1.executor_->bwd_inputs()[0], channelAxis1); - ChannelAxisTestData::print("blob 1 output grad", info_c1.executor_->bwd_inputs()[0]); + grad_c1.save(info_c1.executor_->ctx().run_ctx, + info_c1.executor_->GetBlob(BackwardInputs::bwd_out_grad_Grad), channelAxis1); + ChannelAxisTestData::print(info_c1.executor_->ctx().run_ctx, + "blob 1 output grad", + info_c1.executor_->GetBlob(BackwardInputs::bwd_out_grad_Grad)); // Save output grad to blob with new shape 2 - grad_c2.save(info_c2.executor_->bwd_inputs()[0], channelAxis2); - ChannelAxisTestData::print("blob 2 output grad", info_c2.executor_->bwd_inputs()[0]); + grad_c2.save(info_c2.executor_->ctx().run_ctx, + info_c2.executor_->GetBlob(BackwardInputs::bwd_out_grad_Grad), channelAxis2); + ChannelAxisTestData::print(info_c2.executor_->ctx().run_ctx, + "blob 2 output grad", + info_c2.executor_->GetBlob(BackwardInputs::bwd_out_grad_Grad)); // Run both operators forward and backwards several times for (index_t x = 0; x < numberOfPasses; ++x) { - info_c1.executor_->forward(); - info_c2.executor_->forward(); - - info_c1.executor_->backward(); - info_c2.executor_->backward(); + info_c1.executor_->forward(1); + info_c2.executor_->forward(1); + info_c1.executor_->backward(1); + info_c2.executor_->backward(1); + break; // REMOVE ME } + // + // Check forward pass + // // Transform operator 1's blob output to a normalized shape - data_c1.load(info_c1.executor_->outputs()[0], channelAxis1); + data_c1.load(info_c1.executor_->ctx().run_ctx, + info_c1.executor_->GetBlob(ForwardOutputs::kForOutData), channelAxis1); ChannelAxisTestData::print("channel data 1", data_c1.channel_data_); // Transform operator 2's blob output to a normalized shape - data_c2.load(info_c2.executor_->outputs()[0], channelAxis2); + data_c2.load(info_c2.executor_->ctx().run_ctx, + info_c2.executor_->GetBlob(ForwardOutputs::kForOutData), channelAxis2); ChannelAxisTestData::print("channel data 2", data_c2.channel_data_); // Compare the operators' output data while they're in a normalized shape compare(data_c1.channel_data_, data_c2.channel_data_); + // + // Check backward pass + // // Transform operator 1's input-grad blob to a normalized shape - grad_c1.load(info_c1.executor_->bwd_outputs()[0], channelAxis1); + grad_c1.load(info_c1.executor_->ctx().run_ctx, + info_c1.executor_->GetBlob(BackwardOutputs::bwd_in_grad_Data), channelAxis1); ChannelAxisTestData::print("input grad 1", grad_c1.channel_data_); // Transform operator 2's input-grad blob to a normalized shape - grad_c2.load(info_c2.executor_->bwd_outputs()[0], channelAxis2); + grad_c2.load(info_c2.executor_->ctx().run_ctx, + info_c2.executor_->GetBlob(BackwardOutputs::bwd_in_grad_Data), channelAxis2); ChannelAxisTestData::print("input grad 2", grad_c2.channel_data_); // Compare the operators' input grad data while they're in a normalized shape @@ -1521,6 +1510,7 @@ TEST(BATCH_NORM, TestChannelAxisSimple) { * Channel position 1 (default) is checked everywhere else, so for and * backward result equivalence here implies correctness for other channel positions */ +#if 0 TEST(BATCH_NORM, TestChannelAxis) { test::ScopeSet noDebugOutput(&test::debug_output, false); @@ -1532,14 +1522,16 @@ TEST(BATCH_NORM, TestChannelAxis) { {1, 2, 3, 4}}; const char *tof[2] = {"False", "True"}; + size_t pass = 0; for (size_t x1 = 0; x1 < 2U; ++x1) { kwargs.push_back({"fix_gamma", tof[x1]}); for (size_t x2 = 0; x2 < 2U; ++x2) { kwargs.push_back({"use_global_stats", tof[x2]}); for (size_t x3 = 0; x3 < 2U; ++x3) { kwargs.push_back({"cudnn_off", tof[x3]}); - for (index_t g1 = 0; g1 < 2U; ++g1) { - for (index_t g2 = 0; g2 < 2U; ++g2) { + for (bool g1 : { true }) { + for (bool g1 : { false, true }) { + for (bool g2 : { false, true }) { for (const std::vector &simpleShape : shapes) { const int dim = static_cast(simpleShape.size()); for (signed int channelAxis = -dim, shapeDim = dim; @@ -1547,8 +1539,9 @@ TEST(BATCH_NORM, TestChannelAxis) { ++channelAxis) { for (size_t channelCount = 1; channelCount <= 3; ++channelCount) { // Check against base-case of channel axis position 1 - runChannelAxisTest(g1 != 0, g2 != 0, kwargs, simpleShape, + runChannelAxisTest(g1, g2, kwargs, simpleShape, 1, channelAxis, channelCount, false); + ++pass; } } } @@ -1570,11 +1563,11 @@ TEST(BATCH_NORM, Test2DForward2D_gpu) { MSHADOW_REAL_TYPE_SWITCH_EX( type, DType, AccReal, { - TestBatchNormOperatorForward>( + TestBatchNormOperatorForward>( true, {BATCH_SIZE, CHANNELS, DH, DW}, blank_kwargs); - TestBatchNormOperatorForward>( + TestBatchNormOperatorForward>( true, {BATCH_SIZE, CHANNELS, DH, DW}, blank_kwargs_nocudnn); @@ -1588,12 +1581,12 @@ TEST(BATCH_NORM, Test2DBackwardMixed_gpu_cpu) { type, DType, AccReal, { const TShape inputShape({1, 1, 2, 1}); - testForwardAndBackward>( - false, true, inputShape, blank_kwargs, false); - testForwardAndBackward>( - false, true, inputShape, blank_kwargs_nocudnn, false); + false, true, inputShape, blank_kwargs_nocudnn); }); } } @@ -1604,12 +1597,12 @@ TEST(BATCH_NORM, Test2DBackwardMixedComplex_gpu_cpu) { type, DType, AccReal, { const TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW}); - testForwardAndBackward>( - false, true, inputShape, blank_kwargs, false); - testForwardAndBackward>( - false, true, inputShape, blank_kwargs_nocudnn, false); + false, true, inputShape, blank_kwargs_nocudnn); }); } } @@ -1622,12 +1615,12 @@ TEST(BATCH_NORM, Test2DBackwardMixed_gpu_cpu_nfg) { type, DType, AccReal, { const TShape inputShape({1, 1, 2, 1}); - testForwardAndBackward>( - false, true, inputShape, nonfixgamma_kwargs, false); - testForwardAndBackward>( - false, true, inputShape, nonfixgamma_kwargs_nocudnn, false); + false, true, inputShape, nonfixgamma_kwargs_nocudnn); }); } } @@ -1638,12 +1631,12 @@ TEST(BATCH_NORM, Test2DBackwardMixedComplex_gpu_cpu_nfg) { type, DType, AccReal, { const TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW}); - testForwardAndBackward>( - false, true, inputShape, nonfixgamma_kwargs, false); - testForwardAndBackward>( - false, true, inputShape, nonfixgamma_kwargs_nocudnn, false); + false, true, inputShape, nonfixgamma_kwargs_nocudnn); }); } } @@ -1656,12 +1649,12 @@ TEST(BATCH_NORM, Test2DBackwardMixed_gpu_cpu_ugs) { type, DType, AccReal, { const TShape inputShape({2, 3, 2, 2}); - testForwardAndBackward>( - false, true, inputShape, useglobalstats_kwargs_nocudnn, false); - testForwardAndBackward>( - false, true, inputShape, useglobalstats_kwargs, false); + false, true, inputShape, useglobalstats_kwargs); }); } } @@ -1672,12 +1665,12 @@ TEST(BATCH_NORM, Test2DBackwardMixedComplex_gpu_cpu_ugs) { type, DType, AccReal, { const TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW}); - testForwardAndBackward>( - false, true, inputShape, useglobalstats_kwargs, false); - testForwardAndBackward>( - false, true, inputShape, useglobalstats_kwargs_nocudnn, false); + false, true, inputShape, useglobalstats_kwargs_nocudnn); }); } } diff --git a/tests/cpp/operator/dropout_perf.cc b/tests/cpp/operator/dropout_perf.cc index 4132fcb22c62..4afd56fe586a 100644 --- a/tests/cpp/operator/dropout_perf.cc +++ b/tests/cpp/operator/dropout_perf.cc @@ -45,7 +45,6 @@ TEST(DROPOUT_PERF, ExecuteBidirectional) { kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "Dropout", "_backward_Dropout"); runner.set_verbose(true); - //runner.RunGenericOperatorForward(false, { shape }, kwargs, 1); runner.RunBidirectional(false, { shape }, kwargs, 1); } @@ -60,7 +59,7 @@ TEST(DROPOUT_PERF, TimingCPU) { test::op::CoreOperatorRunner runner; kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "Dropout", "_backward_Dropout"); - runner.RunGenericOperatorForward(false, { shape }, kwargs, 1); + runner.RunBidirectional(false, { shape }, kwargs, 1); std::vector shapes; if (test::performance_run) { shapes = { @@ -95,7 +94,7 @@ TEST(DROPOUT_PERF, TimingGPU) { test::op::CoreOperatorRunner runner; kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "Dropout", "_backward_Dropout"); - runner.RunGenericOperatorForward(true, { shape }, kwargs, 1); + runner.RunBidirectional(false, { shape }, kwargs, 1); std::vector shapes = { {1, 1, 28, 28}, {1, 3, 28, 28}, diff --git a/tests/cpp/operator/fully_conn_perf.cc b/tests/cpp/operator/fully_conn_perf.cc index 2283562dea2b..e574ae2b4379 100644 --- a/tests/cpp/operator/fully_conn_perf.cc +++ b/tests/cpp/operator/fully_conn_perf.cc @@ -47,7 +47,7 @@ TEST(FULLY_CONNECTED, ExecuteBidirectionalFullyConnected) { runner.set_verbose(true); kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "FullyConnected", "_backward_FullyConnected"); - runner.RunGenericOperatorForward(false, { shape1, shape2 }, kwargs, 1); + runner.RunBidirectional(false, { shape1, shape2 }, kwargs, 1); } /*! @@ -60,7 +60,7 @@ TEST(FULLY_CONNECTED, FullyConnectedTimingCPU) { test::op::CoreOperatorRunner runner; kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "FullyConnected", "_backward_FullyConnected"); - runner.RunGenericOperatorForward(false, { shape1, shape2 }, kwargs, 1); + runner.RunBidirectional(false, { shape1, shape2 }, kwargs, 1); std::vector shapes; if (test::performance_run) { shapes = { @@ -96,7 +96,7 @@ TEST(FULLY_CONNECTED, FullyConnectedTimingGPU) { test::op::CoreOperatorRunner runner; kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "FullyConnected", "_backward_FullyConnected"); - runner.RunGenericOperatorForward(true, { shape1, shape2 }, kwargs, 1); + runner.RunBidirectional(false, { shape1, shape2 }, kwargs, 1); std::vector shapes; if (test::performance_run) { shapes = { diff --git a/tests/cpp/operator/tune/operator_tune_test.cc b/tests/cpp/operator/tune/operator_tune_test.cc index f404e4faa923..7d84e47dbf16 100644 --- a/tests/cpp/operator/tune/operator_tune_test.cc +++ b/tests/cpp/operator/tune/operator_tune_test.cc @@ -18,13 +18,14 @@ */ #include #include -#include #include "../../src/operator/nn/activation-inl.h" #include "../../src/operator/operator_tune-inl.h" #include "../include/test_op_runner.h" #include "../include/test_core_op.h" #include "../include/test_tune.h" +#if MXNET_USE_OPERATOR_TUNING + using namespace mxnet; /*! @@ -173,3 +174,4 @@ TEST(OMP_TUNING, EvaluateTuneTestInt64) { std::cout << "Success rate for type " << test::type_name() << ": " << result << std::endl; } +#endif // MXNET_USE_OPERATOR_TUNING \ No newline at end of file diff --git a/tests/cpp/test_main.cc b/tests/cpp/test_main.cc index 5556d7bf1c2e..fc46dff1d9b9 100644 --- a/tests/cpp/test_main.cc +++ b/tests/cpp/test_main.cc @@ -64,7 +64,7 @@ static bool checkForWorkingCuda() { } } } - std::fprintf(stderr, "Warning: Could not find working CUDA driver\n"); + std::cerr << "Warning: Could not find working CUDA driver" << std::endl; return false; } #else @@ -89,19 +89,20 @@ int main(int argc, char ** argv) { mxnet::test::unitTestsWithCuda = checkForWorkingCuda(); // auto-determine for (int x = 1; x < argc; ++x) { + const char *arg = argv[x]; // force checks with CUDA - if (!strcmp(argv[x], "--with-cuda")) { + if (!strcmp(arg, "--with-cuda")) { // override (ie force attempt CUDA) mxnet::test::unitTestsWithCuda = true; - } else if (!strcmp(argv[x], "--debug")) { + } else if (!strcmp(arg, "--debug") || !strcmp(arg, "-d")) { mxnet::test::debug_output = true; - } else if (!strcmp(argv[x], "--perf")) { + } else if (!strcmp(arg, "--perf") || !strcmp(arg, "-p")) { mxnet::test::performance_run = true; - } else if (!strcmp(argv[x], "--csv")) { + } else if (!strcmp(arg, "--csv")) { mxnet::test::csv = true; - } else if (!strcmp(argv[x], "--quick") || !strcmp(argv[x], "-q")) { + } else if (!strcmp(arg, "--quick") || !strcmp(arg, "-q")) { mxnet::test::quick_test = true; - } else if (!strcmp(argv[x], "--backtrace")) { + } else if (!strcmp(arg, "--backtrace")) { backtrace_test(); return 0; }