Merge remote-tracking branch 'upstream/master' into bugfix/dtype-cast

apache · Sep 12, 2018 · 324390f · 324390f
2 parents 9c09a2e + 8ff50c9
commit 324390f
Show file tree

Hide file tree

Showing 58 changed files with 2,959 additions and 207 deletions.
diff --git a/ci/docker/install/tensorrt.sh b/ci/docker/install/tensorrt.sh
@@ -30,9 +30,12 @@ apt-get install -y automake libtool
 git clone --recursive -b 3.5.1.1 https://github.com/google/protobuf.git
 cd protobuf
 ./autogen.sh
-./configure
+./configure --disable-shared CXXFLAGS=-fPIC
 make -j$(nproc)
 make install
+rm -rf /usr/local/lib/libprotobuf-lite.so*
+rm -rf /usr/local/lib/libprotobuf.so*
+rm -rf /usr/local/lib/libprotoc.so*
 ldconfig
 popd
 

diff --git a/ci/docker/install/ubuntu_tvm.sh b/ci/docker/install/ubuntu_tvm.sh
@@ -25,14 +25,14 @@ cd tvm
 # This is a stable tag that support MXNet TVM bridge.
 # We use this since support for mxnet bridge just checked
 # into master and there is yet a version tag
-git checkout 30eaf463e34d7c301357c31a010945d11df16537
+git checkout v0.4
+
+cp cmake/config.cmake .
+echo set\(USE_CUDA /usr/local/cuda\) >> config.cmake
+echo set\(USE_LLVM llvm-config-5.0\) >> config.cmake
+echo set\(USE_RPC ON\) >> config.cmake
+echo set\(USE_GRAPH_RUNTIME ON\) >> config.cmake
 
-cp make/config.mk
-echo USE_CUDA=1 >> config.mk
-echo LLVM_CONFIG=llvm-config-5.0 >> config.mk
-echo USE_RPC=1 >> config.mk
-echo USE_GRAPH_RUNTIME=1 >> config.mk
-echo CUDA_PATH=/usr/local/cuda >> config.mk
 make -j$(nproc)
 
 cd python

diff --git a/ci/util.py b/ci/util.py
@@ -17,6 +17,7 @@
 
 import os
 import contextlib
+import logging
 import requests
 
 def get_mxnet_root() -> str:

diff --git a/docs/api/python/contrib/svrg_optimization.md b/docs/api/python/contrib/svrg_optimization.md
@@ -0,0 +1,86 @@
+# SVRG Optimization in Python Module API
+
+## Overview
+SVRG which stands for Stochastic Variance Reduced Gradients, is an optimization technique that was first introduced in 
+paper _Accelerating Stochastic Gradient Descent using Predictive Variance Reduction_ in 2013. It is complement to SGD 
+(Stochastic Gradient Descent), which is known for large scale optimization but suffers from slow convergence 
+asymptotically due to its inherent variance. SGD approximates the full gradients using a small batch of data or 
+a single data sample, which will introduce variance and thus requires to start with a small learning rate in order to 
+ensure convergence. SVRG remedies the problem by keeping track of a version of estimated weights that close to the 
+optimal parameter values and maintaining an average of full gradients over a full pass of data. The average of full 
+gradients is calculated with respect to the weights from the last m-th epochs in the training.  SVRG uses a different 
+update rule: gradients w.r.t current parameter values minus gradients w.r.t to parameters from the last m-th epochs 
+plus the average of full gradients over all data. 
+
+Key Characteristics of SVRG:
+* Employs explicit variance reduction by using a different update rule compared to SGD.
+* Ability to use relatively large learning rate, which leads to faster convergence compared to SGD.
+* Guarantees for fast convergence for smooth and strongly convex functions.
+
+SVRG optimization is implemented as a SVRGModule in `mxnet.contrib.svrg_optimization`, which is an extension of the 
+existing `mxnet.module.Module` APIs and encapsulates SVRG optimization logic within several new functions. SVRGModule 
+API changes compared to Module API to end users are minimal. 
+
+In distributed training, each worker gets the same special weights from the last m-th epoch and calculates the full 
+gradients with respect to its own shard of data. The standard SVRG optimization requires building a global full 
+gradients, which is calculated by aggregating the full gradients from each worker and averaging over the number of 
+workers. The workaround is to keep an additional set of keys in the KVStore that maps to full gradients. 
+The `_SVRGOptimizer` is designed to wrap two optimizers, an `_AssignmentOptimizer` which is used for full gradients 
+accumulation in the KVStore and a regular optimizer that performs actual update rule to the parameters. 
+The `_SVRGOptimizer` and `_AssignmentOptimizer` are designed to be used in `SVRGModule` only.
+
+```eval_rst
+.. warning:: This package contains experimental APIs and may change in the near future.
+``` 
+
+This document lists the SVRGModule APIs in MXNet/Contrib package:
+
+```eval_rst
+.. autosummary::
+    :nosignatures:
+
+    mxnet.contrib.svrg_optimization.svrg_module
+```
+
+### Intermediate Level API for SVRGModule
+
+The only extra step to use a SVRGModule compared to use a Module is to check if the current epoch should update the
+full gradients over all data. Code snippets below demonstrate the suggested usage of SVRGModule using intermediate 
+level APIs.
+
+```python
+>>> mod = SVRGModule(symbol=model, update_freq=2, data_names=['data'], label_names=['lin_reg_label'])
+>>> mod.bind(data_shapes=di.provide_data, label_shapes=di.provide_label)
+>>> mod.init_params()
+>>> mod.init_optimizer(optimizer='sgd', optimizer_params=(('learning_rate', 0.01), ), kvstore='local')
+>>> for epoch in range(num_epochs):
+...     if epoch % mod.update_freq == 0:
+...         mod.update_full_grads(di)
+...     di.reset()
+...     for batch in di:
+...         mod.forward_backward(data_batch=batch)
+...         mod.update()
+```
+
+### High Level API for SVRGModule
+
+The high level API usage of SVRGModule remains exactly the same as Module API. Code snippets below gives an example of
+suggested usage of high level API.
+
+```python
+>>> mod = SVRGModule(symbol=model, update_freq=2, data_names=['data'], label_names=['lin_reg_label'])
+>>> mod.fit(di, num_epochs=100, optimizer='sgd', optimizer_params=(('learning_rate', 0.01), ))
+```
+
+## API reference
+
+<script type="text/javascript" src='../../../_static/js/auto_module_index.js'></script>
+
+```eval_rst
+
+.. automodule:: mxnet.contrib.svrg_optimization.svrg_module
+.. autoclass:: mxnet.contrib.svrg_optimization.svrg_module.SVRGModule
+    :members: init_optimizer, bind, forward, backward, reshape, update, update_full_grads, fit, prepare
+ 
+```
+<script>auto_index("api-reference");</script>
diff --git a/docs/api/python/index.md b/docs/api/python/index.md
@@ -52,6 +52,7 @@ Code examples are placed throughout the API documentation and these can be run a
    contrib/contrib.md
    contrib/text.md
    contrib/onnx.md
+   contrib/svrg_optimization.md
 ```
 
 ## Gluon API
@@ -177,3 +178,13 @@ Code examples are placed throughout the API documentation and these can be run a
 
    symbol_in_pictures/symbol_in_pictures.md
 ```
+
+## Tools
+
+```eval_rst
+.. toctree::
+    :maxdepth: 1
+
+    tools/test_utils.md
+    tools/visualization.md
+```
diff --git a/docs/api/python/module/module.md b/docs/api/python/module/module.md
@@ -207,4 +207,4 @@ additional functionality. We summarize them in this section.
     :members:
 ```
 
-<script>auto_index("api-reference");</script>
+<script>auto_index("api-reference");</script>
diff --git a/docs/api/python/tools/test_utils.md b/docs/api/python/tools/test_utils.md
@@ -0,0 +1,27 @@
+# Test Utilities
+
+This module has a variety of tools that help using and testing MXNet.
+
+```eval_rst
+    .. currentmodule:: mxnet.test_utils
+```
+
+```eval_rst
+.. autosummary::
+    :nosignatures:
+
+    mxnet.test_utils
+```
+
+## API Reference
+
+<script type="text/javascript" src='../../../_static/js/auto_module_index.js'></script>
+
+```eval_rst
+
+.. automodule:: mxnet.test_utils
+    :members:
+
+```
+
+<script>auto_index("api-reference");</script>
diff --git a/docs/api/python/tools/visualization.md b/docs/api/python/tools/visualization.md
@@ -0,0 +1,27 @@
+# Visualization
+
+This module contains visualization features.
+
+```eval_rst
+    .. currentmodule:: mxnet.visualization
+```
+
+```eval_rst
+.. autosummary::
+    :nosignatures:
+
+    mxnet.visualization
+```
+
+## API Reference
+
+<script type="text/javascript" src='../../../_static/js/auto_module_index.js'></script>
+
+```eval_rst
+
+.. automodule:: mxnet.visualization
+    :members:
+
+```
+
+<script>auto_index("api-reference");</script>
diff --git a/docs/architecture/rnn_interface.md b/docs/architecture/rnn_interface.md
@@ -1,6 +1,6 @@
 # Survey of Existing Interfaces and Implementations
 
-Commonly used deep learning libraries with good RNN/LSTM support include [Theano](http://deeplearning.net/software/theano/library/scan.html) and its wrappers [Lasagne](http://lasagne.readthedocs.org/en/latest/modules/layers/recurrent.html) and [Keras](http://keras.io/layers/recurrent/); [CNTK](https://cntk.codeplex.com/); [TensorFlow](https://www.tensorflow.org/versions/master/tutorials/recurrent/index.html); and various implementations in Torch, such as [this well-known character-level language model tutorial](https://github.com/karpathy/char-rnn), [this](https://github.com/Element-Research/rnn).
+Commonly used deep learning libraries with good RNN/LSTM support include [Theano](http://deeplearning.net/software/theano/library/scan.html) and its wrappers [Lasagne](http://lasagne.readthedocs.org/en/latest/modules/layers/recurrent.html) and [Keras](http://keras.io/layers/recurrent/); [CNTK](https://cntk.codeplex.com/); [TensorFlow](https://www.tensorflow.org/tutorials/sequences/recurrent); and various implementations in Torch, such as [this well-known character-level language model tutorial](https://github.com/karpathy/char-rnn), [this](https://github.com/Element-Research/rnn).
 
 In this document, we present a comparative analysis of the approaches taken by these libraries.
 
@@ -93,7 +93,7 @@ The low-level API for recurrent connection seem to be a *delay node*. But I'm no
 
 ## TensorFlow
 
-The [current example of RNNLM](https://www.tensorflow.org/versions/master/tutorials/recurrent/index.html#recurrent-neural-networks) in TensorFlow uses explicit unrolling for a predefined number of time steps. The white-paper mentions that an advanced control flow API (Theano's scan-like) is planned.
+The [current example of RNNLM](https://www.tensorflow.org/tutorials/sequences/recurrent#recurrent-neural-networks) in TensorFlow uses explicit unrolling for a predefined number of time steps. The white-paper mentions that an advanced control flow API (Theano's scan-like) is planned.
 
 ## Next Steps
 

diff --git a/docs/build_version_doc/AddVersion.py b/docs/build_version_doc/AddVersion.py
@@ -74,7 +74,8 @@
                     version_tag_mobile.extract()
                 navbar.append(version_str)
                 navbar_mobile.append(version_str_mobile)
-                outstr = str(content).replace('&lt;', '<').replace('&gt;', '>')
+                # The following causes rendering errors in code blocks; refer to #12168
+                #outstr = str(content).replace('&lt;', '<').replace('&gt;', '>')
             # Fix link
             if args.current_version == tag_list[0]:
                 print("Fixing " + os.path.join(path, name))

diff --git a/docs/community/mxnet_channels.md b/docs/community/mxnet_channels.md
@@ -2,9 +2,9 @@
 
 Converse with the MXNet community via the following channels:
 
-- [Forum](https://discuss.mxnet.io/): [discuss.mxnet.io](https://discuss.mxnet.io/) <i class="fas fa-external-link-alt"></i>
-- [MXNet Apache developer mailing list](https://lists.apache.org/list.html?dev@mxnet.apache.org) (dev@mxnet.apache.org): To subscribe, send an email to <a href="mailto:user-subscribe@mxnet.apache.org">dev-subscribe@mxnet.apache.org</a> <i class="far fa-envelope"></i>
-- [MXNet Apache user mailing list](https://lists.apache.org/list.html?user@mxnet.apache.org) (user@mxnet.apache.org): To subscribe, send an email to <a href="mailto:dev-subscribe@mxnet.apache.org">user-subscribe@mxnet.apache.org</a> <i class="far fa-envelope"></i>
-- [MXNet Slack channel](https://apache-mxnet.slack.com): To request an invitation to the channel please subscribe to the mailing list above and then email: <a href="mailto:dev@mxnet.apache.org">dev@mxnet.apache.org</a> <i class="far fa-envelope"></i>
+- [Forum](https://discuss.mxnet.io/): [discuss.mxnet.io](https://discuss.mxnet.io/)
+- [MXNet Apache developer mailing list](https://lists.apache.org/list.html?dev@mxnet.apache.org) (dev@mxnet.apache.org): To subscribe, send an email to <a href="mailto:user-subscribe@mxnet.apache.org">user-subscribe@mxnet.apache.org</a>
+- [MXNet Apache user mailing list](https://lists.apache.org/list.html?user@mxnet.apache.org) (user@mxnet.apache.org): To subscribe, send an email to <a href="mailto:dev-subscribe@mxnet.apache.org">dev-subscribe@mxnet.apache.org</a>
+- [MXNet Slack channel](https://the-asf.slack.com/) (Channel: #mxnet): To request an invitation to the channel please subscribe to the mailing list above and then email: <a href="mailto:dev@mxnet.apache.org">dev@mxnet.apache.org</a>
 
 Note: if you have an email address with apache.org, you do not need an approval to join the MXNet Slack channel.
diff --git a/docs/install/index.md b/docs/install/index.md
@@ -272,7 +272,7 @@ Follow the four steps in this [docker documentation](https://docs.docker.com/eng
 
 If you skip this step, you need to use *sudo* each time you invoke Docker.
 
-**Step 3** Install *nvidia-docker-plugin* following the [installation instructions](https://github.com/NVIDIA/nvidia-docker/wiki/Installation). *nvidia-docker-plugin* is required to enable the usage of GPUs from the docker containers.
+**Step 3** Install *nvidia-docker-plugin* following the [installation instructions](https://github.com/NVIDIA/nvidia-docker/wiki). *nvidia-docker-plugin* is required to enable the usage of GPUs from the docker containers.
 
 **Step 4** Pull the MXNet docker image.
 

diff --git a/docs/install/windows_setup.md b/docs/install/windows_setup.md
@@ -55,7 +55,7 @@ These commands produce a library called ```mxnet.dll``` in the ```./build/Releas
 Next, we install ```graphviz``` library that we use for visualizing network graphs you build on MXNet. We will also install [Jupyter Notebook](http://jupyter.readthedocs.io/)  used for running MXNet tutorials and examples.
 - Install ```graphviz``` by downloading MSI installer from [Graphviz Download Page](https://graphviz.gitlab.io/_pages/Download/Download_windows.html).
 **Note** Make sure to add graphviz executable path to PATH environment variable. Refer [here for more details](http://stackoverflow.com/questions/35064304/runtimeerror-make-sure-the-graphviz-executables-are-on-your-systems-path-aft)
-- Install ```Jupyter``` by installing [Anaconda for Python 2.7](https://www.continuum.io/downloads)
+- Install ```Jupyter``` by installing [Anaconda for Python 2.7](https://www.anaconda.com/download/)
 **Note** Do not install Anaconda for Python 3.5. MXNet has few compatibility issue with Python 3.5.
 
 &nbsp;
@@ -69,7 +69,7 @@ We have installed MXNet core library. Next, we will install MXNet interface pack
 ## Install MXNet for Python
 
 1. Install ```Python``` using windows installer available [here](https://www.python.org/downloads/release/python-2712/).
-2. Install ```Numpy``` using windows installer available [here](http://scipy.org/install.html).
+2. Install ```Numpy``` using windows installer available [here](https://scipy.org/index.html).
 3. Next, we install Python package interface for MXNet. You can find the Python interface package for [MXNet on GitHub](https://github.com/dmlc/mxnet/tree/master/python/mxnet).
 
 ```bash

diff --git a/docs/tutorials/onnx/export_mxnet_to_onnx.md b/docs/tutorials/onnx/export_mxnet_to_onnx.md
@@ -55,7 +55,7 @@ Help on function export_model in module mxnet.contrib.onnx.mx2onnx.export_model:
 export_model(sym, params, input_shape, input_type=<type 'numpy.float32'>, onnx_file_path=u'model.onnx', verbose=False)
     Exports the MXNet model file, passed as a parameter, into ONNX model.
     Accepts both symbol,parameter objects as well as json and params filepaths as input.
-    Operator support and coverage - https://cwiki.apache.org/confluence/display/MXNET/ONNX
+    Operator support and coverage - https://cwiki.apache.org/confluence/display/MXNET/MXNet-ONNX+Integration
 
     Parameters
     ----------

diff --git a/example/README.md b/example/README.md
@@ -95,6 +95,7 @@ If your tutorial depends on specific packages, simply add them to this provision
 * [Gluon Examples](gluon) - several examples using the Gluon API
   * [Style Transfer](gluon/style_transfer) - a style transfer example using gluon
   * [Word Language Model](gluon/word_language_model) - an example that trains a multi-layer RNN on the Penn Treebank language modeling benchmark
+  * [SN-GAN](gluon/sn-gan) - an example that utilizes spectral normalization to train GAN(Generative adversarial network) using Gluon API
 * [Image Classification with R](image-classification) - image classification on MNIST,CIFAR,ImageNet-1k,ImageNet-Full, with multiple GPU and distributed training.
 * [Kaggle 1st national data science bowl](kaggle-ndsb1) - a MXnet example for Kaggle Nation Data Science Bowl 1
 * [Kaggle 2nd national data science bowl](kaggle-ndsb2) - a tutorial for Kaggle Second Nation Data Science Bowl

diff --git a/example/gluon/sn_gan/README.md b/example/gluon/sn_gan/README.md
@@ -0,0 +1,44 @@
+# Spectral Normalization GAN
+
+This example implements [Spectral Normalization for Generative Adversarial Networks](https://arxiv.org/abs/1802.05957) based on [CIFAR10](https://www.cs.toronto.edu/~kriz/cifar.html) dataset.
+
+## Usage
+
+Example runs and the results:
+
+```python
+python train.py --use-gpu --data-path=data
+```
+
+* Note that the program would download the CIFAR10 for you
+
+`python train.py --help` gives the following arguments:
+
+```bash
+optional arguments:
+  -h, --help            show this help message and exit
+  --data-path DATA_PATH
+                        path of data.
+  --batch-size BATCH_SIZE
+                        training batch size. default is 64.
+  --epochs EPOCHS       number of training epochs. default is 100.
+  --lr LR               learning rate. default is 0.0001.
+  --lr-beta LR_BETA     learning rate for the beta in margin based loss.
+                        default is 0.5.
+  --use-gpu             use gpu for training.
+  --clip_gr CLIP_GR     Clip the gradient by projecting onto the box. default
+                        is 10.0.
+  --z-dim Z_DIM         dimension of the latent z vector. default is 100.
+```
+
+## Result
+
+![SN-GAN](sn_gan_output.png)
+
+## Learned Spectral Normalization
+
+![alt text](https://github.com/taki0112/Spectral_Normalization-Tensorflow/blob/master/assests/sn.png)
+
+## Reference
+
+[Simple Tensorflow Implementation](https://github.com/taki0112/Spectral_Normalization-Tensorflow)
diff --git a/example/gluon/sn_gan/data.py b/example/gluon/sn_gan/data.py
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This example is inspired by https://github.com/jason71995/Keras-GAN-Library,
+# https://github.com/kazizzad/DCGAN-Gluon-MxNet/blob/master/MxnetDCGAN.ipynb
+# https://github.com/apache/incubator-mxnet/blob/master/example/gluon/dcgan.py
+
+import numpy as np
+
+import mxnet as mx
+from mxnet import gluon
+from mxnet.gluon.data.vision import CIFAR10
+
+IMAGE_SIZE = 64
+
+def transformer(data, label):
+    """ data preparation """
+    data = mx.image.imresize(data, IMAGE_SIZE, IMAGE_SIZE)
+    data = mx.nd.transpose(data, (2, 0, 1))
+    data = data.astype(np.float32) / 128.0 - 1
+    return data, label
+
+
+def get_training_data(batch_size):
+    """ helper function to get dataloader"""
+    return gluon.data.DataLoader(
+        CIFAR10(train=True, transform=transformer),
+        batch_size=batch_size, shuffle=True, last_batch='discard')