Skip to content

Commit

Permalink
Merge branch 'master' into addops
Browse files Browse the repository at this point in the history
  • Loading branch information
yajiedesign committed Nov 30, 2015
2 parents 2e1fd25 + 562ded3 commit 6c1dd42
Show file tree
Hide file tree
Showing 5 changed files with 226 additions and 3 deletions.
5 changes: 2 additions & 3 deletions example/autoencoder/solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def backward_end(self, i, weights, grads, metric=None):
logging.log(self.level, 'Iter:%d param:%s\t\tstat(%s):%s\t\tgrad_stat:%s'%(i, key, self.stat.__name__, str(self.stat(weights[key].asnumpy())), str(self.stat(arr.asnumpy()))))
if i%self.interval == 0 and metric is not None:
logging.log(logging.INFO, 'Iter:%d metric:%f'%(i, metric.get()[1]))
metric.reset()

class Solver(object):
def __init__(self, optimizer, **kwargs):
Expand Down Expand Up @@ -108,12 +109,10 @@ def solve(self, xpu, sym, args, args_grad,
output_dict[key].copyto(output_buff[key])

exe.backward()
self.optimizer.begin_epoch(i)
for key, arr in update_dict.items():
self.updater(key, arr, args[key])

if self.metric is not None:
self.metric.reset()
self.metric.update([input_buffs[-1].asnumpy()],
[output_buff[output_names[0]].asnumpy()])

Expand All @@ -129,5 +128,5 @@ def solve(self, xpu, sym, args, args_grad,





2 changes: 2 additions & 0 deletions python/mxnet/initializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ def __call__(self, name, arr):
self._init_zero(name, arr)
elif name.endswith("moving_var"):
self._init_zero(name, arr)
elif name.endswith("moving_avg"):
self._init_zero(name, arr)
else:
self._init_default(name, arr)
# pylint: disable=no-self-use, missing-docstring, invalid-name
Expand Down
177 changes: 177 additions & 0 deletions src/operator/identity_attach_KL_sparse_reg-inl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
/*!
* Copyright (c) 2015 by Contributors
* \file sparse_reg-inl.h
* \brief
*/
#ifndef MXNET_OPERATOR_IDENTITY_ATTACH_KL_SPARSE_REG_INL_H_
#define MXNET_OPERATOR_IDENTITY_ATTACH_KL_SPARSE_REG_INL_H_
#include <dmlc/logging.h>
#include <mxnet/operator.h>
#include <cstring>
#include <map>
#include <string>
#include <vector>
#include <utility>
#include "./mshadow_op.h"
#include "./operator_common.h"

namespace mxnet {
namespace op {

namespace sparsereg {
enum IdentityAttachKLSparseRegOpInputs {kData};
enum IdentityAttachKLSparseRegOpOutputs {kOut};
enum IdentityAttachKLSparseRegOpAuxiliary {kMovingAvg};
enum IdentityAttachKLSparseRegBackResource {kTempSpace};
} // namespace sparsereg

struct IdentityAttachKLSparseRegParam : public dmlc::Parameter<IdentityAttachKLSparseRegParam> {
float penalty;
float sparseness_target;
float momentum;
DMLC_DECLARE_PARAMETER(IdentityAttachKLSparseRegParam) {
DMLC_DECLARE_FIELD(sparseness_target).set_default(0.1)
.set_range(0, 1)
.describe("The sparseness target");
DMLC_DECLARE_FIELD(penalty).set_default(0.001)
.describe("The tradeoff parameter for the sparseness penalty");
DMLC_DECLARE_FIELD(momentum).set_default(0.9)
.set_range(0, 1)
.describe("The momentum for running average");
}
}; // struct IdentityAttachKLSparseRegParam

// This op regularizes the output of a sigmoid activation function.
// In forward, it simply copies the input.
// In backward, it attaches sparseness penalty to the gradient.
// The regularization is based on the KL divergence of mean activation and target.
// More details: P11 of https://www.cs.toronto.edu/~hinton/absps/guideTR.pdf
// Please make sure that it is only paired with sigmoid activation, otherwise NaN may occur.
template<typename xpu>
class IdentityAttachKLSparseRegOp : public Operator {
public:
explicit IdentityAttachKLSparseRegOp(IdentityAttachKLSparseRegParam param) {
this->param_ = param;
}
virtual void Forward(const OpContext &ctx,
const std::vector<TBlob> &in_data,
const std::vector<OpReqType> &req,
const std::vector<TBlob> &out_data,
const std::vector<TBlob> &aux_args) {
using namespace mshadow;
using namespace mshadow::expr;
CHECK_EQ(in_data.size(), 1);
CHECK_EQ(out_data.size(), 1);
Stream<xpu> *s = ctx.get_stream<xpu>();
Tensor<xpu, 2> data = in_data[sparsereg::kData].FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 2> out = out_data[sparsereg::kOut].FlatTo2D<xpu, real_t>(s);
Assign(out, req[sparsereg::kData], F<mshadow_op::identity>(data));
}

virtual void Backward(const OpContext &ctx,
const std::vector<TBlob> &out_grad,
const std::vector<TBlob> &in_data,
const std::vector<TBlob> &out_data,
const std::vector<OpReqType> &req,
const std::vector<TBlob> &in_grad,
const std::vector<TBlob> &aux_args) {
using namespace mshadow;
using namespace mshadow::expr;
Stream<xpu> *s = ctx.get_stream<xpu>();
Tensor<xpu, 2> grad_in = in_grad[sparsereg::kData].FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 2> data_in = in_data[sparsereg::kData].FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 2> grad_out = out_grad[sparsereg::kOut].FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 1> moving_avg = aux_args[sparsereg::kMovingAvg].get<xpu, 1, real_t>(s);
Tensor<xpu, 1> avg = ctx.requested[sparsereg::kTempSpace].get_space<xpu>(
mshadow::Shape1(moving_avg.shape_[0]), s);
avg = sumall_except_dim<1>(data_in);
avg /= data_in.shape_[0];
moving_avg = param_.momentum * moving_avg + (1 - param_.momentum) * avg;
Assign(grad_in, req[sparsereg::kData], grad_out + param_.penalty *
(-param_.sparseness_target / broadcast<1>(moving_avg, data_in.shape_) +
((1 - param_.sparseness_target) / (1 - broadcast<1>(moving_avg, data_in.shape_)))));
}

private:
IdentityAttachKLSparseRegParam param_;
}; // class IdentityAttachKLSparseRegOp

template<typename xpu>
Operator *CreateOp(IdentityAttachKLSparseRegParam param);

#if DMLC_USE_CXX11
class IdentityAttachKLSparseRegProp : public OperatorProperty {
public:
void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
param_.Init(kwargs);
}

std::map<std::string, std::string> GetParams() const override {
return param_.__DICT__();
}

bool InferShape(std::vector<TShape> *in_shape,
std::vector<TShape> *out_shape,
std::vector<TShape> *aux_shape) const override {
using namespace mshadow;
CHECK_EQ(in_shape->size(), 1);
const TShape &dshape = in_shape->at(sparsereg::kData);
if (dshape.ndim() == 0) return false;
out_shape->clear();
out_shape->push_back(dshape);
aux_shape->clear();
aux_shape->push_back(Shape1(dshape[1]));
return true;
}

OperatorProperty* Copy() const override {
auto ptr = new IdentityAttachKLSparseRegProp();
ptr->param_ = param_;
return ptr;
}

std::string TypeString() const override {
return "IdentityAttachKLSparseReg";
}

std::vector<int> DeclareBackwardDependency(
const std::vector<int> &out_grad,
const std::vector<int> &in_data,
const std::vector<int> &out_data) const override {
return {out_grad[sparsereg::kOut], in_data[sparsereg::kData]};
}

std::vector<std::pair<int, void*> > ForwardInplaceOption(
const std::vector<int> &in_data,
const std::vector<void*> &out_data) const override {
return {{in_data[sparsereg::kData], out_data[sparsereg::kOut]}};
}

std::vector<std::pair<int, void*> > BackwardInplaceOption(
const std::vector<int> &out_grad,
const std::vector<int> &in_data,
const std::vector<int> &out_data,
const std::vector<void*> &in_grad) const {
return { {out_grad[sparsereg::kOut], in_grad[sparsereg::kData]} };
}

std::vector<std::string> ListAuxiliaryStates() const override {
return {"moving_avg"};
}

std::vector<ResourceRequest> BackwardResource(
const std::vector<TShape> &in_shape) const override {
return {ResourceRequest::kTempSpace};
}

Operator* CreateOperator(Context ctx) const override;

private:
IdentityAttachKLSparseRegParam param_;
}; // class IdentityAttachKLSparseRegProperty

#endif // DMLC_USE_CXX11
} // namespace op
} // namespace mxnet

#endif // MXNET_OPERATOR_IDENTITY_ATTACH_KL_SPARSE_REG_INL_H_
29 changes: 29 additions & 0 deletions src/operator/identity_attach_KL_sparse_reg.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*!
* Copyright (c) 2015 by Contributors
* \file identity_attach_KL_sparse_reg.cc
* \brief\
*/
#include "./identity_attach_KL_sparse_reg-inl.h"

namespace mxnet {
namespace op {
template<>
Operator *CreateOp<cpu>(IdentityAttachKLSparseRegParam param) {
return new IdentityAttachKLSparseRegOp<cpu>(param);
}

Operator *IdentityAttachKLSparseRegProp::CreateOperator(Context ctx) const {
DO_BIND_DISPATCH(CreateOp, param_);
}

DMLC_REGISTER_PARAMETER(IdentityAttachKLSparseRegParam);

MXNET_REGISTER_OP_PROPERTY(IdentityAttachKLSparseReg, IdentityAttachKLSparseRegProp)
.describe("Apply a sparse regularization to the output a sigmoid activation function.")
.add_argument("data", "Symbol", "Input data.")
.add_arguments(IdentityAttachKLSparseRegParam::__FIELDS__());


} // namespace op
} // namespace mxnet

16 changes: 16 additions & 0 deletions src/operator/identity_attach_KL_sparse_reg.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/*!
* Copyright (c) 2015 by Contributors
* \file identity_attach_KL_sparse_reg.cu
* \brief
*/
#include "./identity_attach_KL_sparse_reg-inl.h"

namespace mxnet {
namespace op {
template<>
Operator *CreateOp<gpu>(IdentityAttachKLSparseRegParam param) {
return new IdentityAttachKLSparseRegOp<gpu>(param);
}

} // namespace op
} // namespace mxnet

0 comments on commit 6c1dd42

Please sign in to comment.