Skip to content

Commit

Permalink
Merge pull request #2231 from antinucleon/softmax_caffe
Browse files Browse the repository at this point in the history
[Op] Add softmax norm option (mimic caffe)
  • Loading branch information
antinucleon committed May 25, 2016
2 parents 5f0a403 + 4ef6972 commit 2d97cac
Showing 1 changed file with 60 additions and 2 deletions.
62 changes: 60 additions & 2 deletions src/operator/softmax_output-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,16 @@ namespace op {
namespace softmaxout_enum {
enum SoftmaxOutputOpInputs {kData, kLabel};
enum SoftmaxOutputOpOutputs {kOut};
enum SoftmaxOutputNormType {kNull, kBatch, kValid};
enum SoftmaxOutputOpResource {kTempSpace};
} // namespace softmaxout_enum

struct SoftmaxOutputParam : public dmlc::Parameter<SoftmaxOutputParam> {
float grad_scale;
float ignore_label;
bool multi_output;
bool use_ignore;
int normalization;
DMLC_DECLARE_PARAMETER(SoftmaxOutputParam) {
DMLC_DECLARE_FIELD(grad_scale).set_default(1.0f)
.describe("Scale the gradient by a float factor");
Expand All @@ -43,6 +46,14 @@ struct SoftmaxOutputParam : public dmlc::Parameter<SoftmaxOutputParam> {
DMLC_DECLARE_FIELD(use_ignore).set_default(false)
.describe("If set to true, the ignore_label value will not contribute "
"to the backward gradient");
DMLC_DECLARE_FIELD(normalization)
.add_enum("null", softmaxout_enum::kNull)
.add_enum("batch", softmaxout_enum::kBatch)
.add_enum("valid", softmaxout_enum::kValid)
.set_default(softmaxout_enum::kNull)
.describe("If set to null, op will do nothing on output gradient."
"If set to batch, op will normalize gradient by divide batch size"
"If set to valid, op will normalize gradient by divide sample not ignored");
};
};

Expand Down Expand Up @@ -91,6 +102,7 @@ class SoftmaxOutputOp : public Operator {
CHECK_GE(in_grad.size(), 1);
CHECK_GE(req.size(), 1);
Stream<xpu> *s = ctx.get_stream<xpu>();

if (param_.multi_output) {
int n = out_data[softmaxout_enum::kOut].size(0);
int k = out_data[softmaxout_enum::kOut].size(1);
Expand All @@ -100,24 +112,65 @@ class SoftmaxOutputOp : public Operator {
out_data[softmaxout_enum::kOut].get_with_shape<xpu, 3, DType>(s3, s);
Tensor<xpu, 3, DType> grad =
in_grad[softmaxout_enum::kData].get_with_shape<xpu, 3, DType>(s3, s);

index_t valid_cnt = label.shape_.Size();
if (param_.use_ignore) {
SoftmaxGrad(grad, out, label, static_cast<DType>(param_.ignore_label));
} else {
SoftmaxGrad(grad, out, label);
}
grad *= DType(param_.grad_scale/s3[2]);
if (param_.normalization == softmaxout_enum::kBatch) {
valid_cnt = label.size(0);
} else if (param_.normalization == softmaxout_enum::kValid) {
int i_label = static_cast<int>(param_.ignore_label);
Tensor<cpu, 2, DType> workspace =
ctx.requested[softmaxout_enum::kTempSpace].get_host_space_typed<2, DType>(
label.shape_);
Copy(workspace, label, label.stream_);
for (index_t i = 0; i < workspace.size(0); ++i) {
for (index_t j = 0; j < workspace.size(1); ++j) {
if (static_cast<int>(workspace[i][j]) == i_label) {
valid_cnt--;
}
}
}
valid_cnt = valid_cnt == 0 ? 1 : valid_cnt;
} else {
valid_cnt = 1;
}
grad *= DType(param_.grad_scale /
(param_.normalization == softmaxout_enum::kValid ? 1 : s3[2]) /
valid_cnt);
} else {
const TShape& label_shape = in_data[softmaxout_enum::kLabel].shape_;
Tensor<xpu, 1, DType> label = in_data[softmaxout_enum::kLabel].get_with_shape<xpu, 1, DType>(
Shape1(label_shape.ProdShape(0, label_shape.ndim())), s);
Tensor<xpu, 2, DType> out = out_data[softmaxout_enum::kOut].FlatTo2D<xpu, DType>(s);
Tensor<xpu, 2, DType> grad = in_grad[softmaxout_enum::kData].FlatTo2D<xpu, DType>(s);
index_t valid_cnt = label.shape_.Size();
if (param_.use_ignore) {
SoftmaxGrad(grad, out, label, static_cast<DType>(param_.ignore_label));
} else {
SoftmaxGrad(grad, out, label);
}
grad *= DType(param_.grad_scale);
if (param_.normalization == softmaxout_enum::kBatch) {
valid_cnt = label.size(0);
} else if (param_.normalization == softmaxout_enum::kValid) {
int i_label = static_cast<int>(param_.ignore_label);
Tensor<cpu, 1, DType> workspace =
ctx.requested[softmaxout_enum::kTempSpace].get_host_space_typed<1, DType>(
label.shape_);
Copy(workspace, label, label.stream_);
for (index_t i = 0; i < label.size(0); ++i) {
if (static_cast<int>(workspace[i]) == i_label) {
valid_cnt--;
}
}
valid_cnt = valid_cnt == 0 ? 1 : valid_cnt;
} else {
valid_cnt = 1;
}
grad *= DType(param_.grad_scale / valid_cnt);
}
}

Expand Down Expand Up @@ -216,6 +269,11 @@ class SoftmaxOutputProp : public OperatorProperty {
return {{in_data[softmaxout_enum::kData], out_data[softmaxout_enum::kOut]}};
}

std::vector<ResourceRequest> BackwardResource(
const std::vector<TShape> &in_shape) const override {
return {ResourceRequest::kTempSpace};
}

Operator* CreateOperator(Context ctx) const override {
LOG(FATAL) << "Not Implemented.";
return NULL;
Expand Down

0 comments on commit 2d97cac

Please sign in to comment.