Merge remote-tracking branch 'origin/master' into lewlu/inceptionv3-py

prime9 · Aug 7, 2017 · 61c8202 · 61c8202
2 parents c97f34c + 6cdd482
commit 61c8202
Show file tree

Hide file tree

Showing 5 changed files with 34 additions and 8 deletions.
diff --git a/Examples/Image/Detection/FasterRCNN/README.md b/Examples/Image/Detection/FasterRCNN/README.md
@@ -29,7 +29,7 @@ If you require other versions please follow the instructions at [https://github.
 
 We use a toy dataset of images captured from a refrigerator to demonstrate Faster R-CNN. Both the dataset and the pre-trained AlexNet model can be downloaded by running the following Python command:
 
-`python install_data-and-model.py`
+`python install_data_and_model.py`
 
 After running the script, the toy dataset will be installed under the `Image/DataSets/Grocery` folder. And the AlexNet model will be downloaded to the `Image/PretrainedModels` folder. 
 We recommend you to keep the downloaded data in the respective folder while downloading, as the configuration files in this folder assumes that by default.

diff --git a/README.md b/README.md
@@ -2,6 +2,8 @@
 
 ## Latest news 
 
+***2017-08-04.*** CNTK August interation plan posted [here](https://github.com/Microsoft/CNTK/issues/2194). 
+
 ***2017-07-31.* CNTK 2.1**  
 Release of Cognitive Toolkit v.2.1.
 
@@ -20,11 +22,6 @@ Get the Release from the [CNTK Releases page](https://github.com/Microsoft/CNTK/
 
 ***2017-07-07.*** CNTK July interation plan posted [here](https://github.com/Microsoft/CNTK/issues/2064). 
 
-***2017-06-26.*** A great class for getting started with both Deep Learning and CNTK, [Deep Learning Explained](https://www.edx.org/course/deep-learning-explained-microsoft-dat236x) is now available on edX.
-
-***2017-06-01.* CNTK 2.0 is released.**
-The first production release of Cognitive Toolkit v.2. See more in the [Release Notes](https://docs.microsoft.com/en-us/cognitive-toolkit/ReleaseNotes/CNTK_2_0_Release_Notes), and get the Release from the [CNTK Releases page](https://github.com/Microsoft/CNTK/releases).
-
 See [all news](https://docs.microsoft.com/en-us/cognitive-toolkit/news)
 
 ## Introduction

diff --git a/Source/ComputationNetworkLib/ReshapingNodes.h b/Source/ComputationNetworkLib/ReshapingNodes.h
@@ -1944,8 +1944,8 @@ class GatherNode : public ComputationNodeNonLooping<ElemType>, public NumInputs<
     virtual bool OutputUsedInComputingInputNodesGradients() const override {
         return false;
     }
-    virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override {
-        return false;
+    virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override {
+        return childIndex == 0;
     }
 
     virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override

diff --git a/Source/Math/ColumnQuantizer.h b/Source/Math/ColumnQuantizer.h
@@ -132,6 +132,7 @@ class ColumnQuantizer
                 // quantize
                 size_t ij = ColMIDX(i, colIdx, M);
                 ElemType val = inMat[ij] + inResidual[ij];
+
                 // 'template' keyword to compile with GCC
                 QWordVal qval = valQ.template Quantize<ZeroThresholdFor1Bit>(val);
 

diff --git a/bindings/python/cntk/ops/tests/reshaping_test.py b/bindings/python/cntk/ops/tests/reshaping_test.py
@@ -484,6 +484,34 @@ def test_gather_op(device_id, precision):
     expectd2 = np.asarray([[[[0., 1.],[4.,5.]],[[2., 3.],[6., 7.]]],[[[4., 5.],[8.,9.]],[[6., 7.], [10., 11.]]]])
     assert np.array_equal(res2, expectd2)
 
+    #the following small model is to test the memory reuse issue of gather node.
+    x = C.input((3, 4))
+    x1 = C.to_sequence(x)
+    w = C.parameter((5, 6), init=1)
+    z = C.gather(w, x1)
+    assert z.shape == (4, 6)
+    #need the unpack node to trigger memory reuse.
+    f = C.sequence.unpack(z, 0, no_mask_output=True)
+    y = C.input((3, 4, 6))
+    loss = C.reduce_mean(C.square(f - y), axis=-1)
+    loss = C.reduce_mean(loss, axis=C.Axis.all_axes())
+
+    g = C.constant(0, shape=w.shape)
+    u = C.assign(w, g + 1)
+    learner = C.cntk_py.universal_learner([w], [g], u)
+    trainer = C.trainer.Trainer(loss, [loss], [learner])
+    indices = np.asarray([[[1, 2, 1, 2]]])
+    input = np.repeat(np.repeat(indices, 3, axis=1), 10, axis=0)
+    lable = np.full((10, 3, 4, 6), 2)
+    trainer.train_minibatch({x: input, y: lable})
+    # the 2nd and 3rd rows should be udpated by gradients.
+    assert np.mean(w.value[1, :]) < 1
+    assert np.mean(w.value[2, :]) < 1
+    # the other three rows should keep as 1
+    assert np.isclose(np.mean(w.value[0, :]), 1)
+    assert np.isclose(np.mean(w.value[3, :]), 1)
+    assert np.isclose(np.mean(w.value[4, :]), 1)
+
 def test_convert_dynamic_axis():
     #test fix batch size
     batch_size = 4