Merge with upstream/master to submit a PR for 101 tutorial

jeffinstance · Jun 14, 2017 · 7f6beb1 · 7f6beb1
2 parents c1e8600 + dd1bc6b
commit 7f6beb1
Show file tree

Hide file tree

Showing 160 changed files with 863 additions and 382 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -1,7 +1,7 @@
 You want to contribute to CNTK? We're really excited to work together!
 
-Please, follow the steps from the Wiki Article at
+Please, follow the steps from the documentation:
 
-https://github.com/Microsoft/CNTK/wiki/Contributing-to-CNTK
+https://docs.microsoft.com/en-us/cognitive-toolkit/contributing-to-cntk
 
 Your CNTK team.
diff --git a/Dependencies/CNTKCustomMKL/README.md b/Dependencies/CNTKCustomMKL/README.md
@@ -6,8 +6,8 @@ for usage by CNTK ("CNTK custom MKL" for short).
 By default, a CNTK binary with Intel® MKL support includes a prebuilt CNTK
 custom MKL.
 If you want to build CNTK with Intel® MKL support yourself, you can install a
-prebuilt CNTK custom MKL, available for download from the [CNTK web site](https://www.cntk.ai/mkl).
-See [CNTK's setup instructions](https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-your-machine)
+prebuilt CNTK custom MKL, available for download from the [here](https://www.microsoft.com/en-us/cognitive-toolkit/download-math-kernel-library/).
+See [CNTK's setup instructions](https://docs.microsoft.com/en-us/cognitive-toolkit/Setup-CNTK-on-your-machine)
 for more details.
 
 If you want to add new Intel® MKL functions to be used by CNTK you will have to

diff --git a/Documentation/Documents/Configuration Files.md b/Documentation/Documents/Configuration Files.md
@@ -232,7 +232,7 @@ stderr=c:\cntk\log\cntk # “_mnistTrain_mnistTest.log” would be appended
 traceLevel=0 # larger values mean more output
 ```
 
-The **traceLevel** parameter is uniformly used by the code in CNTK to specify how much extra output (verbosity) is desired. The default value is 0 (zero) and specifies minimal output, the higher the number the more output can be expected. Currently 0-limited output, 1-medium ouput, 2-verbose output are the only values supported.
+The **traceLevel** parameter is uniformly used by the code in CNTK to specify how much extra output (verbosity) is desired. The default value is 0 (zero) and specifies minimal output, the higher the number the more output can be expected. Currently 0-limited output, 1-medium output, 2-verbose output are the only values supported.
 
 ### Top Level Parameters
 

diff --git a/Examples/1stSteps/MNIST_Complex_Training.py b/Examples/1stSteps/MNIST_Complex_Training.py
@@ -165,7 +165,7 @@ def get_probability(data):
 X_check, Y_check = X_test[0:10000:400].copy(), Y_test[0:10000:400] # a small subsample of 25 examples
 result = get_probability(X_check)
 
-print("Label    :", [label.argmax() for label in Y_check])
+print("Label    :", [label.todense().argmax() for label in Y_check])
 print("Predicted:", [result[i,:].argmax() for i in range(len(result))])
 
 # Must call MPI finalize when process exit without exceptions

diff --git a/Examples/Evaluation/CNTKLibraryCSEvalCPUOnlyExamples/CNTKLibraryCSEvalExamples.cs b/Examples/Evaluation/CNTKLibraryCSEvalCPUOnlyExamples/CNTKLibraryCSEvalExamples.cs
@@ -72,7 +72,7 @@ public static void EvaluationSingleImage(DeviceDescriptor device)
                 var inputVal = Value.CreateBatch(inputVar.Shape, resizedCHW, device);
                 inputDataMap.Add(inputVar, inputVal);
 
-                // Create ouput data map. Using null as Value to indicate using system allocated memory.
+                // Create output data map. Using null as Value to indicate using system allocated memory.
                 // Alternatively, create a Value object and add it to the data map.
                 outputDataMap.Add(outputVar, null);
 
@@ -158,7 +158,7 @@ public static void EvaluationBatchOfImages(DeviceDescriptor device)
                 // Create input data map.
                 inputDataMap.Add(inputVar, inputVal);
 
-                // Create ouput data map. Using null as Value to indicate using system allocated memory.
+                // Create output data map. Using null as Value to indicate using system allocated memory.
                 // Alternatively, create a Value object and add it to the data map.
                 outputDataMap.Add(outputVar, null);
 
@@ -248,7 +248,7 @@ public static void EvaluateMultipleImagesInParallel(DeviceDescriptor device)
                     var inputVal = Value.CreateBatch(inputVar.Shape, resizedCHW, device);
                     inputDataMap.Add(inputVar, inputVal);
 
-                    // Create ouput data map. Using null as Value to indicate using system allocated memory.
+                    // Create output data map. Using null as Value to indicate using system allocated memory.
                     // Alternatively, create a Value object and add it to the data map.
                     outputDataMap.Add(outputVar, null);
 
@@ -326,7 +326,7 @@ public static void LoadModelFromMemory(DeviceDescriptor device)
                 var inputVal = Value.CreateBatch(inputVar.Shape, resizedCHW, device);
                 inputDataMap.Add(inputVar, inputVal);
 
-                // Create ouput data map. Using null as Value to indicate using system allocated memory.
+                // Create output data map. Using null as Value to indicate using system allocated memory.
                 // Alternatively, create a Value object and add it to the data map.
                 outputDataMap.Add(outputVar, null);
 
@@ -444,7 +444,7 @@ public static void EvaluationSingleSequenceUsingOneHot(DeviceDescriptor device)
                 // Prepare output
                 Variable outputVar = modelFunc.Output;
 
-                // Create ouput data map. Using null as Value to indicate using system allocated memory.
+                // Create output data map. Using null as Value to indicate using system allocated memory.
                 var outputDataMap = new Dictionary<Variable, Value>();
                 outputDataMap.Add(outputVar, null);
 
@@ -567,7 +567,7 @@ public static void EvaluationBatchOfSequencesUsingOneHot(DeviceDescriptor device
 
                 // Prepare output
                 Variable outputVar = modelFunc.Output;
-                // Create ouput data map. Using null as Value to indicate using system allocated memory.
+                // Create output data map. Using null as Value to indicate using system allocated memory.
                 var outputDataMap = new Dictionary<Variable, Value>();
                 outputDataMap.Add(outputVar, null);
 
@@ -687,7 +687,7 @@ public static void EvaluationSingleSequenceUsingSparse(DeviceDescriptor device)
                 // Prepare output
                 Variable outputVar = modelFunc.Output;
 
-                // Create ouput data map. Using null as Value to indicate using system allocated memory.
+                // Create output data map. Using null as Value to indicate using system allocated memory.
                 var outputDataMap = new Dictionary<Variable, Value>();
                 outputDataMap.Add(outputVar, null);
 

diff --git a/Examples/Evaluation/CSEvalClient/Program.cs b/Examples/Evaluation/CSEvalClient/Program.cs
@@ -26,7 +26,7 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
     /// There are four cases shown in this program related to model loading, network creation and evaluation.
     /// 
     /// To run this program from the CNTK binary drop, you must add the NuGet package for model evaluation first.
-    /// Refer to <see cref="https://github.com/Microsoft/CNTK/wiki/NuGet-Package"/> for information regarding the NuGet package for model evaluation.
+    /// Refer to <see cref="https://docs.microsoft.com/en-us/cognitive-toolkit/NuGet-Package"/> for information regarding the NuGet package for model evaluation.
     /// 
     /// EvaluateModelSingleLayer and EvaluateModelMultipleLayers
     /// --------------------------------------------------------
@@ -367,7 +367,7 @@ private static void EvaluateMultipleModels()
                     {
                         Interlocked.Increment(ref count);
 
-                        // The file format correspond to the CNTK Text Format Reader format (https://github.com/Microsoft/CNTK/wiki/CNTKTextFormat-Reader)
+                        // The file format correspond to the CNTK Text Format Reader format (https://docs.microsoft.com/en-us/cognitive-toolkit/Brainscript-CNTKTextFormat-Reader)
                         var sets = line.Split('|');
                         var labels = sets[1].Trim().Split(' ').Skip(1);
                         var features = sets[2].Trim().Split(' ').Skip(1);

diff --git a/Examples/Evaluation/README.md b/Examples/Evaluation/README.md
@@ -1,25 +1,28 @@
 #EvalClients
 
-The folder contains some examples using the CNTK to evalaute a trained model in your application. Please note that Visual Studio 2015 update 3 is required, and only the 64-bit target is supported.
-
-The [CNTK Eval Examples](https://github.com/Microsoft/CNTK/wiki/CNTK-Eval-Examples) page provides more details of these examples.
+The folder contains some examples using the CNTK to evaluate a trained model in your application. Please note that Visual Studio 2015 update 3 is required, and only the 64-bit target is supported.
 
+The [CNTK Eval Examples](https://docs.microsoft.com/en-us/cognitive-toolkit/CNTK-Eval-Examples) page provides more details of these examples.
 
 # CNTK Library Eval C++/C# Examples
+
 The CNTKLibraryEvalExamples.sln contains code samples demonstrating how to use the CNTK Library API in C++ and C#.
-  - CNTKLibraryCSEvalCPUOnlyExamples uses the CNTK Library CPU-Only Nuget package to evaluate models on CPU-only devices in C#.
-  - CNTKLibraryCSEvalGPUExamples uses the CNTK Library GPU Nuget package to evaluate models on GPU devices in C#.
-  - CNTKLibraryCPPEvalCPUOnlyExamples uses the CNTK Library C++ API to evaluate models on CPU-only devices. It uses the CNTK Library CPU-Only Nuget package.
-  - CNTKLibraryCPPEvalGPUExamples uses the CNTK Library C++ API to evaluate models on GPU devices. It uses the CNTK Library GPU Nuget package.
+
+* CNTKLibraryCSEvalCPUOnlyExamples uses the CNTK Library CPU-Only Nuget package to evaluate models on CPU-only devices in C#.
+* CNTKLibraryCSEvalGPUExamples uses the CNTK Library GPU Nuget package to evaluate models on GPU devices in C#.
+* CNTKLibraryCPPEvalCPUOnlyExamples uses the CNTK Library C++ API to evaluate models on CPU-only devices. It uses the CNTK Library CPU-Only Nuget package.
+* CNTKLibraryCPPEvalGPUExamples uses the CNTK Library C++ API to evaluate models on GPU devices. It uses the CNTK Library GPU Nuget package.
 
 After a successful build, the executable is saved under the $(SolutionDir)..\..$(Platform)$(ProjectName).$(Configuration)\ folder, e.g. ..\..\X64\CNTKLibraryCSEvalCPUOnlyExamples.Release\CNTKLibraryCSEvalCPUOnlyExamples.exe.
 On Linux, only C++ is supported. Please refer to Makefile for building samples. The target name CNTKLIBRARY_CPP_EVAL_EXAMPLES is used to build CNTKLibraryCPPEvalExamples.
 
 # EvalDll Eval C++/C# Examples
+
 The EvalClients.sln contains the following projects demonstrating how to use the EvalDll library in C++ and C#.
-  - CPPEvalClient: this sample uses the C++ EvalDll.
-  - CPPEvalExtendedClient: this sample uses the C++ extended Eval interface in EvalDll to evaluate a RNN model.
-  - CSEvalClient: this sample uses the C# EvalDll (only for Windows). It uses the CNTK EvalDll Nuget Package.
+
+* CPPEvalClient: this sample uses the C++ EvalDll.
+* CPPEvalExtendedClient: this sample uses the C++ extended Eval interface in EvalDll to evaluate a RNN model.
+* CSEvalClient: this sample uses the C# EvalDll (only for Windows). It uses the CNTK EvalDll Nuget Package.
 
 After a successful build, the executable is saved under the $(SolutionDir)..\..$(Platform)$(ProjectName).$(Configuration)\ folder, e.g. ..\..\X64\CPPEvalClient.Release\CppEvalClient.exe.
 On Linux, please refer to Makefile for building samples. The target name EVAL_CLIENT, and EVAL_EXTENDED_CLIENT are used to build these projects.
diff --git a/Examples/Extensibility/BinaryConvolution/BinaryConvolutionLib/BinaryConvolveOp.h b/Examples/Extensibility/BinaryConvolution/BinaryConvolutionLib/BinaryConvolveOp.h
@@ -31,7 +31,7 @@ class BinaryConvolveFunction final : public Function
 
 private:
     // simple convolve function that pulls out raw data buffers and passes them into our halide function
-    static void Convolve(const NDArrayViewPtr& weights, const NDArrayViewPtr& input, const int size, const int stride, const int pad, const int w, const int h, const int channels, const int num_filters, NDArrayViewPtr& output)
+    static void Convolve(const NDArrayViewPtr& weights, const NDArrayViewPtr& input, const int size, const int stride, const bool pad, const int w, const int h, const int channels, const int num_filters, NDArrayViewPtr& output)
     {
         auto weightBuffer = weights->DataBuffer<float>();
         auto inputBuffer = input->DataBuffer<float>();
@@ -53,7 +53,7 @@ class BinaryConvolveFunction final : public Function
         auto kernelRank = leftOperandData->Shape().Rank();
         long unsigned int num_filters;
         if (kernelRank >= 4) {
-            num_filters = leftOperandData->Shape()[3];
+            num_filters = (long unsigned int)leftOperandData->Shape()[3];
         } else {
             num_filters = 1; 
         }
@@ -70,15 +70,15 @@ class BinaryConvolveFunction final : public Function
         auto& outputValue = outputs[this->Output()];
         if (outputValue == nullptr)
         {
-            auto numOutCols = pad == 0 ? (w - size)/stride + 1 : (w - 1)/stride + 1;
-            auto numOutRows = pad == 0 ? (h - size)/stride + 1 : (h - 1)/stride + 1;
+            auto numOutCols = !pad ? (w - size)/stride + 1 : (w - 1)/stride + 1;
+            auto numOutRows = !pad ? (h - size)/stride + 1 : (h - 1)/stride + 1;
             outputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(DataType::Float, NDShape({ numOutRows , numOutCols, num_filters }), computeDevice));
         }
 
         // extract the output data
         auto outputData = outputValue->Data();
         // pass everything to Halide to compute the result, outputs are directly stored in the outputData buffer
-        Convolve(leftOperandData, rightOperandData, size, stride, pad, w, h, channels, num_filters, outputData);
+        Convolve(leftOperandData, rightOperandData, size, stride, pad, (int)w, (int)h, (int)channels, (int)num_filters, outputData);
 
         // Let's save the right input's Value in the BackPropSate to be used in the backward pass for computing gradients
         return MakeSharedObject<BackPropState>(this->shared_from_this(), computeDevice, std::unordered_map<Variable, ValuePtr>({ {Inputs()[1], inputValues[1] } }));
@@ -89,6 +89,7 @@ class BinaryConvolveFunction final : public Function
                   const std::unordered_map<Variable, ValuePtr>& rootGradientValues,
                   std::unordered_map<Variable, ValuePtr>& backPropagatedGradientValuesForInputs) override
     {
+        state; rootGradientValues; backPropagatedGradientValuesForInputs; 
         std::runtime_error("Binary Convolution does not currently support backprop");
     }
 
@@ -117,7 +118,7 @@ class BinaryConvolveFunction final : public Function
         long unsigned int num_filters;
         // determine the number of filters 
         if (kernelRank >= 4) {
-            num_filters = leftOperand.Shape()[3];
+            num_filters = (long unsigned int)leftOperand.Shape()[3];
         } else {
             num_filters = 1; 
         }

diff --git a/Examples/Extensibility/BinaryConvolution/BinaryConvolutionLib/convolve_wrapper.h b/Examples/Extensibility/BinaryConvolution/BinaryConvolutionLib/convolve_wrapper.h
@@ -9,8 +9,8 @@
 
 // perform all the boilerplate needed by halide. Basically takes a bunch of input parameters and packages them up into halide structs
 void invoke_halide_convolve(const float *filter, const float *input, int num_filters, int size, int channels, bool pad, int stride, int w, int h, const float *output) {
-    int out_w = pad == 0 ? (w - size)/stride + 1 : (w - 1)/stride + 1;
-    int out_h = pad == 0 ? (h - size)/stride + 1 : (h - 1)/stride + 1;
+    int out_w = !pad ? (w - size)/stride + 1 : (w - 1)/stride + 1;
+    int out_h = !pad ? (h - size)/stride + 1 : (h - 1)/stride + 1;
 
     // package up the filter buffer
     halide_buffer_t halide_filter_buf = {0};

diff --git a/Examples/Extensibility/BinaryConvolution/BinaryConvolutionLib/halide/halide_convolve.h b/Examples/Extensibility/BinaryConvolution/BinaryConvolutionLib/halide/halide_convolve.h
@@ -347,7 +347,11 @@ struct halide_type_t {
      * bits: The bit size of one element.
      * lanes: The number of vector elements in the type. */
     HALIDE_ALWAYS_INLINE halide_type_t(halide_type_code_t code, uint8_t bits, uint16_t lanes = 1)
+#if __cplusplus >= 201103L
         : code(code), bits(bits), lanes(lanes) {
+#else
+        : code((uint8_t)code), bits(bits), lanes(lanes) {
+#endif
     }
 
     /** Default constructor is required e.g. to declare halide_trace_event

diff --git a/Examples/Image/Classification/AlexNet/Python/README.md b/Examples/Image/Classification/AlexNet/Python/README.md
@@ -8,6 +8,6 @@ Our AlexNet model is a slight variation of the Caffe implementation of AlexNet (
 
 `python AlexNet_ImageNet_Distributed.py`
 
-You may use this python script to train AlexNet on multiple GPUs or machines. For a reference on distributed training, please check [here](https://github.com/Microsoft/CNTK/wiki/Multiple-GPUs-and-machines#32-python). For example, the command for distributed training on the same machine (with multiple GPUs) with Windows is:
+You may use this python script to train AlexNet on multiple GPUs or machines. For a reference on distributed training, please check [here](https://docs.microsoft.com/en-us/cognitive-toolkit/Multiple-GPUs-and-machines). For example, the command for distributed training on the same machine (with multiple GPUs) with Windows is:
 
 `mpiexec -n <#workers> python AlexNet_ImageNet_Distributed.py`
diff --git a/Examples/Image/Classification/ConvNet/BrainScript/README.md b/Examples/Image/Classification/ConvNet/BrainScript/README.md
@@ -25,7 +25,7 @@ The network achieves an error rate of around `18%` after 30 epochs. This is comp
 ### ConvNet_CIFAR10_DataAug.cntk
 
 The third example uses the same CNN as the previous example, but it improves by adding data augmentation to training. For this purpose, we use the `ImageReader` instead of the `CNTKTextFormatReader` to load the data. The ImageReader currently supports crop, flip, scale, color jittering, and mean subtraction.
-For a reference on image reader and transforms, please check [here](https://github.com/Microsoft/CNTK/wiki/Image-reader).
+For a reference on image reader and transforms, please check [here](https://docs.microsoft.com/en-us/cognitive-toolkit/BrainScript-Image-Reader).
 
 Run the example from the current folder using:
 

diff --git a/Examples/Image/Classification/ConvNet/Python/README.md b/Examples/Image/Classification/ConvNet/Python/README.md
@@ -49,7 +49,7 @@ All settings are identical to the previous example. The accuracy of the network
 
 ### ConvNet_CIFAR10_DataAug_Distributed.py
 
-The fifth example uses the same CNN as ConvNet_CIFAR10_DataAug.py, but it adds support for distributed training with simple aggregation. For a reference on distributed training, please check [here](https://github.com/Microsoft/CNTK/wiki/Multiple-GPUs-and-machines#32-python).
+The fifth example uses the same CNN as ConvNet_CIFAR10_DataAug.py, but it adds support for distributed training with simple aggregation. For a reference on distributed training, please check [here](https://docs.microsoft.com/en-us/cognitive-toolkit/Multiple-GPUs-and-machines).
 Note that this example will run with a CPU-only build.
 
 `mpiexec -n <#workers> python ConvNet_CIFAR10_DataAug_Distributed.py`
diff --git a/Examples/Image/Classification/GoogLeNet/BN-Inception/Python/README.md b/Examples/Image/Classification/GoogLeNet/BN-Inception/Python/README.md
@@ -24,7 +24,7 @@ For more parameter definitions, please use `-h` command to see the help text:
 
 ### BN_Inception_CIFAR10_Distributed.py
 
-[This example](./BN_Inception_CIFAR10_Distributed.py) is similar to BN_Inception_CIFAR10.py, but it adds support for distributed training via [MPI](https://en.wikipedia.org/wiki/Message_Passing_Interface). Details can be found in [here](https://github.com/Microsoft/CNTK/wiki/Multiple-GPUs-and-machines#32-python).
+[This example](./BN_Inception_CIFAR10_Distributed.py) is similar to BN_Inception_CIFAR10.py, but it adds support for distributed training via [MPI](https://en.wikipedia.org/wiki/Message_Passing_Interface). Details can be found in [here](https://docs.microsoft.com/en-us/cognitive-toolkit/Multiple-GPUs-and-machines#42-running-parallel-training-with-python).
 Note this example requires a multi-GPU machine or mpi hosts file to distribute to multiple machines.
 
 Simple aggregation, BN-Inception, with a 2-GPU machine:
@@ -49,9 +49,9 @@ For more parameter definitions, please use `-h` command to see the help text:
 
 ### BN_Inception_ImageNet_Distributed.py
 
-[This example](./BN_Inception_ImageNet_Distributed.py) is similar to BN_Inception_ImageNet.py, but it adds  distributed training support.
+[This example](./BN_Inception_ImageNet_Distributed.py) is similar to BN_Inception_ImageNet.py, but it adds distributed training support.
 
-To run it in a distributed manner, please check [here](https://github.com/Microsoft/CNTK/wiki/Multiple-GPUs-and-machines#32-python). For example, the command for distributed training on the same machine (with multiple GPUs) with Windows is:
+To run it in a distributed manner, please check [here](https://docs.microsoft.com/en-us/cognitive-toolkit/Multiple-GPUs-and-machines#42-running-parallel-training-with-python). For example, the command for distributed training on the same machine (with multiple GPUs) with Windows is:
 
 `mpiexec -n <#workers> python BN_Inception_ImageNet_Distributed.py`