nn.clearState

stephenrawls · Feb 9, 2016 · b4ebdf2 · b4ebdf2
1 parent 4a43346
commit b4ebdf2
Show file tree

Hide file tree

Showing 50 changed files with 380 additions and 67 deletions.
diff --git a/BatchNormalization.lua b/BatchNormalization.lua
@@ -154,3 +154,14 @@ function BN:accGradParameters(input, gradOutput, scale)
       self.gradBias:add(scale, self.buffer)
    end
 end
+
+function BN:clearState()
+   nn.utils.clear(self, {
+      'buffer',
+      'buffer2',
+      'centered',
+      'std',
+      'normalized',
+   })
+   return parent.clearState(self)
+end
diff --git a/Bilinear.lua b/Bilinear.lua
@@ -140,3 +140,8 @@ function Bilinear:__tostring__()
          (self.bias == nil and ' without bias' or '')
       )
 end
+
+function Bilinear:clearState()
+   if self.buff then self.buff:set() end
+   return parent.clearState(self)
+end
diff --git a/CMul.lua b/CMul.lua
@@ -116,13 +116,20 @@ end
 
 function CMul:type(type, tensorCache)
    if type then
-      self._input = nil
-      self._output = nil
-      self._weight = nil
-      self._gradWeight = nil
-      self._expand = nil
-      self._repeat = nil
-      self._sum = nil
+      self:clearState()
    end
    return parent.type(self, type, tensorCache)
 end
+
+function CMul:clearState()
+   nn.utils.clear(self, {
+      '_input',
+      '_output',
+      '_weight',
+      '_gradWeight',
+      '_expand',
+      '_repeat',
+      '_sum',
+   })
+   return parent.clearState(self)
+end
diff --git a/CMulTable.lua b/CMulTable.lua
@@ -48,3 +48,8 @@ function CMulTable:updateGradInput(input, gradOutput)
 
    return self.gradInput
 end
+
+function CMulTable:clearState()
+   if self.tout then self.tout:set() end
+   return parent.clearState(self)
+end
diff --git a/Container.lua b/Container.lua
@@ -75,3 +75,26 @@ function Container:parameters()
     end
     return w,gw
 end
+
+function Container:clearState()
+   -- don't call set because it might reset referenced tensors
+   local function clear(f)
+      if self[f] then
+         if torch.isTensor(self[f]) then
+            self[f] = self[f].new()
+         elseif type(self[f]) == 'table' then
+            self[f] = {}
+         else
+            self[f] = nil
+         end
+      end
+   end
+   clear('output')
+   clear('gradInput')
+   if self.modules then
+      for i,module in pairs(self.modules) do
+         module:clearState()
+      end
+   end
+   return self
+end
diff --git a/Cosine.lua b/Cosine.lua
@@ -161,3 +161,15 @@ function Cosine:type(type, tensorCache)
    end
    return parent.type(self, type, tensorCache)
 end
+
+function Cosine:clearState()
+   nn.utils.clear(self, {
+      '_input',
+      '_weight',
+      '_gradOutput',
+      '_sum',
+      '_inputNorm',
+      '_weightNorm',
+   })
+   return parent.clearState(self)
+end
diff --git a/CosineDistance.lua b/CosineDistance.lua
@@ -73,6 +73,11 @@ function CosineDistance:updateGradInput(input, gradOutput)
       not_batch = true
    end
 
+   if #self.gradInput ~= 2 then
+      self.gradInput[1] = self.gradInput[1] or v1.new()
+      self.gradInput[2] = self.gradInput[2] or v1.new()
+   end
+
    local gw1 = self.gradInput[1]
    local gw2 = self.gradInput[2]
    gw1:resizeAs(v1):copy(v2)
@@ -97,3 +102,15 @@ function CosineDistance:updateGradInput(input, gradOutput)
 
    return self.gradInput
 end
+
+function CosineDistance:clearState()
+   nn.utils.clear(self, {
+      'buffer',
+      'w1',
+      'w22',
+      'w',
+      'w32',
+      'ones',
+   })
+   return parent.clearState(self)
+end
diff --git a/DotProduct.lua b/DotProduct.lua
@@ -26,6 +26,11 @@ function DotProduct:updateGradInput(input, gradOutput)
    local v2 = input[2]
    local not_batch = false
 
+   if #self.gradInput ~= 2 then
+     self.gradInput[1] = self.gradInput[1] or input[1].new()
+     self.gradInput[2] = self.gradInput[2] or input[2].new()
+   end
+
    if v1:dim() == 1 then
       v1 = v1:view(1,-1)
       v2 = v2:view(1,-1)
@@ -49,3 +54,8 @@ function DotProduct:updateGradInput(input, gradOutput)
 
    return self.gradInput
 end
+
+function DotProduct:clearState()
+   if self.buffer then self.buffer:set() end
+   return parent.clearState(self)
+end
diff --git a/Dropout.lua b/Dropout.lua
@@ -64,3 +64,11 @@ end
 function Dropout:__tostring__()
    return string.format('%s(%f)', torch.type(self), self.p)
 end
+
+
+function Dropout:clearState()
+   if self.noise then
+      self.noise:set()
+   end
+   return Parent.clearState(self)
+end
diff --git a/Euclidean.lua b/Euclidean.lua
@@ -174,17 +174,24 @@ end
 function Euclidean:type(type, tensorCache)
    if type then
       -- prevent premature memory allocations
-      self._input = nil
-      self._output = nil
-      self._gradOutput = nil
-      self._weight = nil
-      self._div = nil
-      self._sum = nil
-      self._expand = nil
-      self._expand2 = nil
-      self._expand3 = nil
-      self._repeat = nil
-      self._repeat2 = nil
+      self:clearState()
    end
    return parent.type(self, type, tensorCache)
 end
+
+function Euclidean:clearState()
+   nn.utils.clear(self, {
+      '_input',
+      '_output',
+      '_gradOutput',
+      '_weight',
+      '_div',
+      '_sum',
+      '_expand',
+      '_expand2',
+      '_expand3',
+      '_repeat',
+      '_repeat2',
+   })
+   return parent.clearState(self)
+end
diff --git a/FlattenTable.lua b/FlattenTable.lua
@@ -97,6 +97,10 @@ end
 function FlattenTable:type(type, tensorCache)
   -- This function just stores references so we don't need to do any type
   -- conversions.  Just force the tables to be empty.
-  self.output = {}
+  self:clearState()
+end
+
+function FlattenTable:clearState()
   self.input_map = {}
+  return parent.clearState(self)
 end
diff --git a/GradientReversal.lua b/GradientReversal.lua
@@ -1,7 +1,7 @@
 local GradientReversal = torch.class('nn.GradientReversal', 'nn.Module')
 
 function GradientReversal:updateOutput(input)
-   self.output = input
+   self.output:set(input)
    return self.output
 end
 

diff --git a/Identity.lua b/Identity.lua
@@ -10,3 +10,21 @@ function Identity:updateGradInput(input, gradOutput)
    self.gradInput = gradOutput
    return self.gradInput
 end
+
+function Identity:clearState()
+   -- don't call set because it might reset referenced tensors
+   local function clear(f)
+      if self[f] then
+         if torch.isTensor(self[f]) then
+            self[f] = self[f].new()
+         elseif type(self[f]) == 'table' then
+            self[f] = {}
+         else
+            self[f] = nil
+         end
+      end
+   end
+   clear('output')
+   clear('gradInput')
+   return self
+end
diff --git a/Jacobian.lua b/Jacobian.lua
@@ -305,6 +305,7 @@ function nn.Jacobian.testIO(module,input, minval, maxval)
    -- write module
    local filename = os.tmpname()
    local f = torch.DiskFile(filename, 'w'):binary()
+   module:clearState()
    f:writeObject(module)
    f:close()
    -- read module

diff --git a/L1Cost.lua b/L1Cost.lua
@@ -23,3 +23,8 @@ function L1Cost:updateGradInput(input)
    )
    return self.gradInput
 end
+
+function L1Cost:clearState()
+   if self.output_tensor then self.output_tensor:set() end
+   return parent.clearState(self)
+end
diff --git a/L1Penalty.lua b/L1Penalty.lua
@@ -41,3 +41,7 @@ function L1Penalty:updateGradInput(input, gradOutput)
     return self.gradInput 
 end
 
+function L1Penalty:clearState()
+   if self.loss then self.loss:set() end
+   return parent.clearState(self)
+end
diff --git a/Linear.lua b/Linear.lua
@@ -95,6 +95,10 @@ end
 -- we do not need to accumulate parameters when sharing
 Linear.sharedAccUpdateGradParameters = Linear.accUpdateGradParameters
 
+function Linear:clearState()
+   if self.addBuffer then self.addBuffer:set() end
+   return parent.clearState(self)
+end
 
 function Linear:__tostring__()
   return torch.type(self) ..

diff --git a/LogSigmoid.lua b/LogSigmoid.lua
@@ -23,3 +23,9 @@ function LogSigmoid:updateGradInput(input, gradOutput)
    )
    return self.gradInput
 end
+
+function LogSigmoid:clearState()
+   if self.buffer then self.buffer:set() end
+   return parent.clearState(self)
+end
+
diff --git a/LookupTable.lua b/LookupTable.lua
@@ -100,5 +100,9 @@ function LookupTable:type(type, tensorCache)
    return self
 end
 
+function LookupTable:clearState()
+   return self
+end
+
 -- we do not need to accumulate parameters when sharing
 LookupTable.sharedAccUpdateGradParameters = LookupTable.accUpdateGradParameters
diff --git a/Max.lua b/Max.lua
@@ -63,3 +63,8 @@ function Max:type(type, tensorCache)
   end
   return self
 end
+
+function Max:clearState()
+   nn.utils.clear(self, '_indices', '_output')
+   return parent.clearState(self)
+end
diff --git a/Min.lua b/Min.lua
@@ -63,3 +63,8 @@ function Min:type(type, tensorCache)
   end
   return self
 end
+
+function Min:clearState()
+   nn.utils.clear(self, '_indices', '_output')
+   return parent.clearState(self)
+end
diff --git a/MixtureTable.lua b/MixtureTable.lua
@@ -156,3 +156,15 @@ function MixtureTable:type(type, tensorCache)
    self._expertView2 = nil
    return parent.type(self, type, tensorCache)
 end
+
+function MixtureTable:clearState()
+   nn.utils.clear(self, {
+     '_gaterView',
+     '_expert',
+     '_expertView',
+     '_sum',
+     '_expert2',
+     '_expertView2',
+   })
+   return parent.clearState(self)
+end
diff --git a/Module.lua b/Module.lua
@@ -364,3 +364,7 @@ function Module:listModules()
    end
    return modules
 end
+
+function Module:clearState()
+   return nn.utils.clear(self, 'output', 'gradInput')
+end
diff --git a/Normalize.lua b/Normalize.lua
@@ -140,3 +140,16 @@ function Normalize:type(type, tensorCache)
   end
   return self
 end
+
+function Normalize:clearState()
+   nn.utils.clear(self, {
+      '_output',
+      '_indices',
+      '_gradInput',
+      'buffer',
+      'norm',
+      'normp',
+      'cross',
+   })
+   return parent.clearState(self)
+end
diff --git a/PReLU.lua b/PReLU.lua
@@ -45,3 +45,8 @@ function PReLU:accGradParameters(input, gradOutput, scale)
    )
    return self.gradWeight
 end
+
+function PReLU:clearState()
+   nn.utils.clear(self, 'gradWeightBuf', 'gradWeightBuf2')
+   return parent.clearState(self)
+end
diff --git a/PairwiseDistance.lua b/PairwiseDistance.lua
@@ -10,6 +10,7 @@ function PairwiseDistance:__init(p)
 end 
 
 function PairwiseDistance:updateOutput(input)
+   self.output:resize(1)
    if input[1]:dim() == 1 then
       self.output:resize(1)
       self.output[1]=input[1]:dist(input[2],self.norm)
@@ -83,3 +84,8 @@ function PairwiseDistance:updateGradInput(input, gradOutput)
    self.gradInput[2]:zero():add(-1, self.gradInput[1])
    return self.gradInput
 end
+
+function PairwiseDistance:clearState()
+   nn.utils.clear(self, 'diff', 'outExpand', 'grad', 'ones')
+   return parent.clearState(self)
+end
diff --git a/RReLU.lua b/RReLU.lua
@@ -43,3 +43,8 @@ end
 function RReLU:__tostring__()
   return string.format('%s (l:%f, u:%f)', torch.type(self), self.lower, self.upper)
 end
+
+function RReLU:clearState()
+   if self.noise then self.noise:set() end
+   return parent.clearState(self)
+end