Merge pull request pytorch#198 from chsasank/new-fixes

chsasank · web-flow · commit 90c7ea99392b · 2018-01-22T13:52:19.000+05:30
Multiple small fixes
diff --git a/advanced_source/numpy_extensions_tutorial.py b/advanced_source/numpy_extensions_tutorial.py
@@ -86,17 +86,21 @@ def incorrect_fft(input):
 
 
 class ScipyConv2dFunction(Function):
-
-    def forward(self, input, filter):
+    @staticmethod
+    def forward(ctx, input, filter):
         result = correlate2d(input.numpy(), filter.numpy(), mode='valid')
-        self.save_for_backward(input, filter)
+        ctx.save_for_backward(input, filter)
         return torch.FloatTensor(result)
 
-    def backward(self, grad_output):
-        input, filter = self.saved_tensors
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, filter = ctx.saved_tensors
+        grad_output = grad_output.data
         grad_input = convolve2d(grad_output.numpy(), filter.t().numpy(), mode='full')
         grad_filter = convolve2d(input.numpy(), grad_output.numpy(), mode='valid')
-        return torch.FloatTensor(grad_input), torch.FloatTensor(grad_filter)
+
+        return Variable(torch.FloatTensor(grad_input)), \
+            Variable(torch.FloatTensor(grad_filter))
 
 
 class ScipyConv2d(Module):
@@ -106,7 +110,7 @@ def __init__(self, kh, kw):
         self.filter = Parameter(torch.randn(kh, kw))
 
     def forward(self, input):
-        return ScipyConv2dFunction()(input, self.filter)
+        return ScipyConv2dFunction.apply(input, self.filter)
 
 ###############################################################
 # **Example usage:**
diff --git a/beginner_source/blitz/neural_networks_tutorial.py b/beginner_source/blitz/neural_networks_tutorial.py
@@ -253,3 +253,11 @@ def num_flat_features(self, x):
 loss = criterion(output, target)
 loss.backward()
 optimizer.step()    # Does the update
+
+
+###############################################################
+# .. Note::
+#
+#       Observe how gradient buffers had to be manually set to zero using
+#       ``optimizer.zero_grad()``. This is because gradients are accumulated
+#       as explained in `Backprop`_ section.
diff --git a/beginner_source/blitz/tensor_tutorial.py b/beginner_source/blitz/tensor_tutorial.py
@@ -79,6 +79,13 @@
 
 print(x[:, 1])
 
+###############################################################
+# Resizing: If you want to resize/reshape tensor, you can use ``torch.view``:
+x = torch.randn(4, 4)
+y = x.view(16)
+z = x.view(-1, 8)  # the size -1 is inferred from other dimensions
+print(x.size(), y.size(), z.size())
+
 ###############################################################
 # **Read later:**
 #
diff --git a/beginner_source/data_loading_tutorial.py b/beginner_source/data_loading_tutorial.py
@@ -66,9 +66,9 @@
 landmarks_frame = pd.read_csv('faces/face_landmarks.csv')
 
 n = 65
-img_name = landmarks_frame.ix[n, 0]
-landmarks = landmarks_frame.ix[n, 1:].as_matrix().astype('float')
-landmarks = landmarks.reshape(-1, 2)
+img_name = landmarks_frame.iloc[n, 0]
+landmarks = landmarks_frame.iloc[n, 1:].as_matrix()
+landmarks = landmarks.astype('float').reshape(-1, 2)
 
 print('Image name: {}'.format(img_name))
 print('Landmarks shape: {}'.format(landmarks.shape))
@@ -136,10 +136,11 @@ def __len__(self):
         return len(self.landmarks_frame)
 
     def __getitem__(self, idx):
-        img_name = os.path.join(self.root_dir, self.landmarks_frame.ix[idx, 0])
+        img_name = os.path.join(self.root_dir,
+                                self.landmarks_frame.iloc[idx, 0])
         image = io.imread(img_name)
-        landmarks = self.landmarks_frame.ix[idx, 1:].as_matrix().astype('float')
-        landmarks = landmarks.reshape(-1, 2)
+        landmarks = self.landmarks_frame.iloc[idx, 1:].as_matrix()
+        landmarks = landmarks.astype('float').reshape(-1, 2)
         sample = {'image': image, 'landmarks': landmarks}
 
         if self.transform:
diff --git a/beginner_source/examples_autograd/two_layer_net_custom_function.py b/beginner_source/examples_autograd/two_layer_net_custom_function.py
@@ -23,22 +23,25 @@ class MyReLU(torch.autograd.Function):
     which operate on Tensors.
     """
 
-    def forward(self, input):
+    @staticmethod
+    def forward(ctx, input):
         """
-        In the forward pass we receive a Tensor containing the input and return a
-        Tensor containing the output. You can cache arbitrary Tensors for use in the
-        backward pass using the save_for_backward method.
+        In the forward pass we receive a Tensor containing the input and return
+        a Tensor containing the output. ctx is a context object that can be used
+        to stash information for backward computation. You can cache arbitrary
+        objects for use in the backward pass using the ctx.save_for_backward method.
         """
-        self.save_for_backward(input)
+        ctx.save_for_backward(input)
         return input.clamp(min=0)
 
-    def backward(self, grad_output):
+    @staticmethod
+    def backward(ctx, grad_output):
         """
         In the backward pass we receive a Tensor containing the gradient of the loss
         with respect to the output, and we need to compute the gradient of the loss
         with respect to the input.
         """
-        input, = self.saved_tensors
+        input, = ctx.saved_tensors
         grad_input = grad_output.clone()
         grad_input[input < 0] = 0
         return grad_input
@@ -61,8 +64,8 @@ def backward(self, grad_output):
 
 learning_rate = 1e-6
 for t in range(500):
-    # Construct an instance of our MyReLU class to use in our network
-    relu = MyReLU()
+    # To apply our Function, we use Function.apply method. We alias this as 'relu'.
+    relu = MyReLU.apply
 
     # Forward pass: compute predicted y using operations on Variables; we compute
     # ReLU using our custom autograd operation.
diff --git a/beginner_source/examples_nn/two_layer_net_optim.py b/beginner_source/examples_nn/two_layer_net_optim.py
@@ -47,8 +47,10 @@
     print(t, loss.data[0])
 
     # Before the backward pass, use the optimizer object to zero all of the
-    # gradients for the variables it will update (which are the learnable weights
-    # of the model)
+    # gradients for the variables it will update (which are the learnable
+    # weights of the model). This is because by default, gradients are
+    # accumulated in buffers( i.e, not overwritten) whenever .backward()
+    # is called. Checkout docs of torch.autograd.backward for more details.
     optimizer.zero_grad()
 
     # Backward pass: compute gradient of the loss with respect to model
diff --git a/beginner_source/nlp/pytorch_tutorial.py b/beginner_source/nlp/pytorch_tutorial.py
@@ -244,8 +244,8 @@
 y = torch.randn((2, 2))
 z = x + y  # These are Tensor types, and backprop would not be possible
 
-var_x = autograd.Variable(x)
-var_y = autograd.Variable(y)
+var_x = autograd.Variable(x, requires_grad=True)
+var_y = autograd.Variable(y, requires_grad=True)
 # var_z contains enough information to compute gradients, as we saw above
 var_z = var_x + var_y
 print(var_z.grad_fn)
diff --git a/intermediate_source/char_rnn_classification_tutorial.py b/intermediate_source/char_rnn_classification_tutorial.py
@@ -170,7 +170,8 @@ def lineToTensor(line):
 # as regular feed-forward layers.
 #
 # This RNN module (mostly copied from `the PyTorch for Torch users
-# tutorial <https://github.com/pytorch/tutorials/blob/master/Introduction%20to%20PyTorch%20for%20former%20Torchies.ipynb>`__)
+# tutorial <http://pytorch.org/tutorials/beginner/former_torchies/
+# nn_tutorial.html#example-2-recurrent-net>`__)
 # is just 2 linear layers which operate on an input and hidden state, with
 # a LogSoftmax layer after the output.
 #

Original file line number	Diff line number	Diff line change
`@@ -170,7 +170,8 @@ def lineToTensor(line):`
`170`	`170`	`# as regular feed-forward layers.`
`171`	`171`	`#`
`172`	`172`	# This RNN module (mostly copied from `the PyTorch for Torch users
`173`		-# tutorial <https://github.com/pytorch/tutorials/blob/master/Introduction%20to%20PyTorch%20for%20former%20Torchies.ipynb>`__)
	`173`	`+# tutorial <http://pytorch.org/tutorials/beginner/former_torchies/`
	`174`	+# nn_tutorial.html#example-2-recurrent-net>`__)
`174`	`175`	`# is just 2 linear layers which operate on an input and hidden state, with`
`175`	`176`	`# a LogSoftmax layer after the output.`
`176`	`177`	`#`