Skip to content

Commit

Permalink
[caffe2] allow dropout to take 1.0 as dropout ratio to zero-out a lay…
Browse files Browse the repository at this point in the history
…er (pytorch#72741)

Summary:
Pull Request resolved: pytorch#72741

as titled.

Context:
This is useful in fast mitigating feature induced overfitting in the sense that we can do omni-transfer on a trained model and apply dropout with ratio = 1 on features resulting in overfitting. Directly removing the features would not be feasible on omni-transfer scenarios since the downstream FC sizes would change.

Experimental records:
https://fb.quip.com/npIkAgRc8jl9#temp:C:DWC050ceaba14424d23a78462c01
Doing dropout = 1 on selected features improves the eval NE over the next few hours (compared to v0 baseline) as is shown in the figures.

Test Plan:
```
buck test caffe2/caffe2/python/operator_test:dropout_op_test
```

Reviewed By: ustctf

Differential Revision: D34178732

fbshipit-source-id: 533feebe21bc582eefd756de397d5c7807c7438d
(cherry picked from commit 5dabf9c)
  • Loading branch information
Xiaohan Wei authored and pytorchmergebot committed Feb 15, 2022
1 parent a7cac05 commit ca0ac3a
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 5 deletions.
5 changes: 2 additions & 3 deletions caffe2/operators/dropout_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,12 @@ bool DropoutOp<float, CPUContext>::RunOnDevice() {
return true;
} else {
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
float scale = 1. / (1. - ratio_);
float scale = ratio_ >= 1.0 ? 0.0:1. / (1. - ratio_);
// mask=true means keep, and mask=false means not keep, so we will
// generate probability depending on 1-ratio.
at::bernoulli_distribution<double> dist(1. - ratio_);
const float* Xdata = X.data<float>();
float* Ydata = Y->template mutable_data<float>();

auto mask = Output(1, X.sizes(), at::dtype<bool>());
bool* mask_data = mask->template mutable_data<bool>();
auto* gen = context_.RandGenerator();
Expand Down Expand Up @@ -52,7 +51,7 @@ bool DropoutGradientOp<float, CPUContext>::RunOnDevice() {
const bool* mask_data = mask.data<bool>();
float* dXdata = dX->template mutable_data<float>();
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
float scale = 1. / (1. - ratio_);
float scale = ratio_ >= 1.0 ? 0.0:1. / (1. - ratio_);
for (int i = 0; i < dY.numel(); ++i) {
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
dXdata[i] = dYdata[i] * mask_data[i] * scale;
Expand Down
2 changes: 0 additions & 2 deletions caffe2/operators/dropout_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ class DropoutOp final : public Operator<Context> {
is_test_(
this->template GetSingleArgument<int>(OpSchema::Arg_IsTest, 0)) {
CAFFE_ENFORCE_GE(ratio_, 0);
CAFFE_ENFORCE_LT(ratio_, 1);
}

bool RunOnDevice() override;
Expand All @@ -41,7 +40,6 @@ class DropoutGradientOp final : public Operator<Context> {
is_test_(
this->template GetSingleArgument<int>(OpSchema::Arg_IsTest, 0)) {
CAFFE_ENFORCE_GE(ratio_, 0);
CAFFE_ENFORCE_LT(ratio_, 1);
}

bool RunOnDevice() override;
Expand Down
32 changes: 32 additions & 0 deletions caffe2/python/operator_test/dropout_op_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,35 @@ def reference_dropout_ratio0(x):
gc, op, [X], reference_dropout_ratio0,
# Don't check the mask with cuDNN because it's packed data
outputs_to_check=None if engine != 'CUDNN' else [0])


@given(X=hu.tensor(),
in_place=st.booleans(),
output_mask=st.booleans(),
engine=st.sampled_from(["", "CUDNN"]),
**hu.gcs)
@settings(deadline=10000)
def test_dropout_ratio1(self, X, in_place, output_mask, engine, gc, dc):
"""Test with ratio=0 for a deterministic reference impl."""
if in_place:
# Skip if trying in-place on GPU
assume(gc.device_type not in {caffe2_pb2.CUDA, caffe2_pb2.HIP})
# If in-place on CPU, don't compare with GPU
dc = dc[:1]
is_test = not output_mask
op = core.CreateOperator("Dropout", ["X"],
["X" if in_place else "Y"] +
(["mask"] if output_mask else []),
ratio=1.0, engine=engine,
is_test=is_test)

self.assertDeviceChecks(dc, op, [X], [0])
if not is_test:
self.assertGradientChecks(gc, op, [X], 0, [0])

def reference_dropout_ratio1(x):
return (x,) if is_test else (np.zeros(x.shape, dtype=np.float), np.zeros(x.shape, dtype=np.bool))
self.assertReferenceChecks(
gc, op, [X], reference_dropout_ratio1,
# Don't check the mask with cuDNN because it's packed data
outputs_to_check=None if engine != 'CUDNN' else [0])

0 comments on commit ca0ac3a

Please sign in to comment.