gluon docs (apache#7038)

* some fixes * loss not rendered * fix * fox
iblislin · Jul 14, 2017 · 7e24755 · 7e24755
1 parent 1529c5c
commit 7e24755
Show file tree

Hide file tree

Showing 10 changed files with 260 additions and 250 deletions.
diff --git a/docs/api/python/gluon.md b/docs/api/python/gluon.md
@@ -196,18 +196,16 @@ in Python and then deploy with symbolic graph in C++ and Scala.
 
 ```eval_rst
 .. currentmodule:: mxnet.gluon.loss
+.. autoclass:: mxnet.gluon.loss.L2Loss
+    :members:
+.. autoclass:: mxnet.gluon.loss.L1Loss
+    :members:
+.. autoclass:: mxnet.gluon.loss.SoftmaxCrossEntropyLoss
+    :members:
+.. autoclass:: mxnet.gluon.loss.KLDivLoss
+    :members:
 ```
 
-
-```eval_rst
-.. automethod:: mxnet.gluon.loss.custom_loss
-.. automethod:: mxnet.gluon.loss.multitask_loss
-.. automethod:: mxnet.gluon.loss.l1_loss
-.. automethod:: mxnet.gluon.loss.l2_loss
-.. automethod:: mxnet.gluon.loss.softmax_cross_entropy_loss
-```
-
-
 ## Utilities
 
 ```eval_rst

diff --git a/python/mxnet/gluon/block.py b/python/mxnet/gluon/block.py
@@ -20,7 +20,7 @@ def __init__(self, block):
 
     @staticmethod
     def create(prefix, params, hint):
-        """Create prefix and params for new `Block`."""
+        """Creates prefix and params for new `Block`."""
         current = _BlockScope._current
         if current is None:
             if prefix is None:
@@ -91,8 +91,8 @@ class Block(object):
     """Base class for all neural network layers and models. Your models should
     subclass this class.
 
-    `Block`s can be nested recursively in a tree structure. You can create and
-    assign child `Block`s as regular attributes::
+    `Block` can be nested recursively in a tree structure. You can create and
+    assign child `Block` as regular attributes::
 
         from mxnet.gluon import Block, nn
         from mxnet import ndarray as F
@@ -115,7 +115,7 @@ def forward(self, x):
         model(F.zeros((10, 10), ctx=mx.cpu(0)))
 
 
-    Child `Block`s assigned this way will be registered and `collect_params`
+    Child `Block` assigned this way will be registered and `collect_params`
     will collect their Parameters recursively.
 
     Parameters
@@ -125,7 +125,7 @@ def forward(self, x):
         Parameters and child `Block`s in this `Block`'s `name_scope`. Prefix
         should be unique within one model to prevent name collisions.
     params : ParameterDict or None
-        ParameterDict for sharing weights with the new `Block`. For example,
+        `ParameterDict` for sharing weights with the new `Block`. For example,
         if you want `dense1` to share `dense0`'s weights, you can do::
 
             dense0 = nn.Dense(20)
@@ -152,7 +152,7 @@ def params(self):
         return self._params
 
     def collect_params(self):
-        """Returns a ParameterDict containing this `Block` and all of its
+        """Returns a `ParameterDict` containing this `Block` and all of its
         children's Parameters."""
         ret = ParameterDict(self._params.prefix)
         ret.update(self.params)
@@ -162,32 +162,32 @@ def collect_params(self):
 
     @property
     def prefix(self):
-        """Prefix of this Block."""
+        """Prefix of this `Block`."""
         return self._prefix
 
     @property
     def name(self):
-        """Name of this Block, without '_' in the end."""
+        """Name of this `Block`, without '_' in the end."""
         if self.prefix.endswith('_'):
             return self.prefix[:-1]
         return self.prefix
 
     def name_scope(self):
-        """Returns a name space object managing child `Block` and parameter
-        names. Should be used by a `with` statement::
+        """Returns a name space object managing a child `Block` and parameter
+        names. Should be used within a `with` statement::
 
             with self.name_scope():
                 self.dense = nn.Dense(20)
         """
         return self._scope
 
     def register_child(self, block):
-        """Register block as a child of self. `Block`s assigned to self as
+        """Registers block as a child of self. `Block`s assigned to self as
         attributes will be registered automatically."""
         self._children.append(block)
 
     def initialize(self, init=initializer.Uniform(), ctx=None, verbose=False):
-        """Initialize `Parameter`s of this Block and its children.
+        """Initializes `Parameter`s of this `Block` and its children.
 
         Equivalent to `block.collect_params().initialize(...)`
         """
@@ -210,7 +210,7 @@ def __call__(self, *args):
         return self.forward(*args)
 
     def forward(self, *args):
-        """Override to implement forward computation using NDArray. Only
+        """Overrides to implement forward computation using `NDArray`. Only
         accepts positional arguments.
 
         Parameters
@@ -231,10 +231,13 @@ class HybridBlock(Block):
     expressions like random numbers or intermediate results, since they change
     the graph structure for each iteration.
 
-    Before activated with `hybridize()`, `HybridBlock` works just like normal
+    Before activating with `hybridize()`, `HybridBlock` works just like normal
     `Block`. After activation, `HybridBlock` will create a symbolic graph
-    representing the forward computation and cache it. On subsequent forwards
+    representing the forward computation and cache it. On subsequent forwards,
     the cached graph will be used instead of `hybrid_forward`.
+
+    Refer `Hybrid tutorial <http://mxnet.io/tutorials/gluon/hybrid.html>`_ to see
+    the end-to-end usage.
     """
     def __init__(self, prefix=None, params=None):
         super(HybridBlock, self).__init__(prefix=prefix, params=params)
@@ -288,7 +291,7 @@ def _get_graph(self, *args):
         return self._cached_graph
 
     def infer_shape(self, *args):
-        """Infer shape of Parameters from inputs."""
+        """Infers shape of Parameters from inputs."""
         syms, out = self._get_graph(*args)
         args, _, = _flatten(args)
         arg_shapes, _, aux_shapes = out.infer_shape(
@@ -324,7 +327,7 @@ def _call_cached_op(self, *args):
 
     def forward(self, x, *args):
         """Defines the forward computation. Arguments can be either
-        NDArray or Symbol."""
+        `NDArray` or `Symbol`."""
         if isinstance(x, NDArray):
             if self._active and self._cached_op is None:
                 self._build_cache(x, *args)
@@ -348,7 +351,7 @@ def forward(self, x, *args):
             return self.hybrid_forward(symbol, x, *args, **params)
 
     def hybrid_forward(self, F, x, *args, **kwargs):
-        """Override to construct symbolic graph for this `Block`.
+        """Overrides to construct symbolic graph for this `Block`.
 
         Parameters
         ----------

diff --git a/python/mxnet/gluon/loss.py b/python/mxnet/gluon/loss.py
@@ -13,20 +13,20 @@ def _apply_weighting(F, loss, weight=None, sample_weight=None):
     Parameters
     ----------
     loss : Symbol
-        the loss to be weighted.
+        The loss to be weighted.
     weight : float or None
-        global scalar weight for loss
+        Global scalar weight for loss.
     sample_weight : Symbol or None
-        per sample weighting. Must be broadcastable to
+        Per sample weighting. Must be broadcastable to
         the same shape as loss. For example, if loss has
         shape (64, 10) and you want to weight each sample
-        in the batch separately, sample_weight should have
-        shape (64, 1)
+        in the batch separately, `sample_weight` should have
+        shape (64, 1).
 
     Returns
     -------
     loss : Symbol
-        weighted loss
+        Weighted loss
     """
     if sample_weight is not None:
         loss = F.broadcast_mul(loss, sample_weight)
@@ -39,23 +39,23 @@ def _apply_weighting(F, loss, weight=None, sample_weight=None):
 
 
 class L2Loss(HybridBlock):
-    """Calculate the mean squared error between output and label:
+    """Calculates the mean squared error between output and label:
 
     .. math::
         L = \\frac{1}{2}\\sum_i \\Vert {output}_i - {label}_i \\Vert^2.
 
-    output and label can have arbitrary shape as long as they have the same
+    Output and label can have arbitrary shape as long as they have the same
     number of elements.
 
     Parameters
     ----------
     weight : float or None
-        global scalar weight for loss
+        Global scalar weight for loss.
     sample_weight : Symbol or None
-        per sample weighting. Must be broadcastable to
+        Per sample weighting. Must be broadcastable to
         the same shape as loss. For example, if loss has
         shape (64, 10) and you want to weight each sample
-        in the batch, sample_weight should have shape (64, 1)
+        in the batch, `sample_weight` should have shape (64, 1).
     batch_axis : int, default 0
         The axis that represents mini-batch.
     """
@@ -77,22 +77,22 @@ def hybrid_forward(self, F, output, label, sample_weight=None):
 
 
 class L1Loss(HybridBlock):
-    """Calculate the mean absolute error between output and label:
+    """Calculates the mean absolute error between output and label:
 
     .. math::
         L = \\frac{1}{2}\\sum_i \\vert {output}_i - {label}_i \\vert.
 
-    output and label must have the same shape.
+    Output and label must have the same shape.
 
     Parameters
     ----------
     weight : float or None
-        global scalar weight for loss
+        Global scalar weight for loss.
     sample_weight : Symbol or None
-        per sample weighting. Must be broadcastable to
+        Per sample weighting. Must be broadcastable to
         the same shape as loss. For example, if loss has
         shape (64, 10) and you want to weight each sample
-        in the batch, sample_weight should have shape (64, 1)
+        in the batch, `sample_weight` should have shape (64, 1).
     batch_axis : int, default 0
         The axis that represents mini-batch.
     """
@@ -114,19 +114,19 @@ def hybrid_forward(self, F, output, label, sample_weight=None):
 
 
 class SoftmaxCrossEntropyLoss(HybridBlock):
-    """Compute the softmax cross entropy loss.
+    """Computes the softmax cross entropy loss.
 
-    If sparse_label is True, label should contain integer category indicators:
+    If `sparse_label` is `True`, label should contain integer category indicators:
 
     .. math::
         p = {softmax}({output})
 
         L = -\\sum_i {log}(p_{i,{label}_i})
 
-    label's shape should be output's shape without the `axis` dimension. i.e. for
-    output.shape = (1,2,3,4) and axis = 2, label.shape should be (1,2,4)
+    Label's shape should be output's shape without the `axis` dimension. i.e. for
+    `output.shape` = (1,2,3,4) and axis = 2, `label.shape` should be (1,2,4).
 
-    If sparse_label is False, label should cantain probability distribution
+    If `sparse_label` is `False`, label should contain probability distribution
     with the same shape as output:
 
     .. math::
@@ -137,19 +137,19 @@ class SoftmaxCrossEntropyLoss(HybridBlock):
     Parameters
     ----------
     axis : int, default -1
-        The axis to sum over when computing softmax and entropy
+        The axis to sum over when computing softmax and entropy.
     sparse_label : bool, default True
-        whether label is a integer array instead of probability distribution
+        Whether label is an integer array instead of probability distribution.
     from_logits : bool, default False
-        whether input is log probability (usually from log_softmax) instead
+        Whether input is a log probability (usually from log_softmax) instead
         of unnormalized numbers.
     weight : float or None
-        global scalar weight for loss
+        Global scalar weight for loss.
     sample_weight : Symbol or None
-        per sample weighting. Must be broadcastable to
+        Per sample weighting. Must be broadcastable to
         the same shape as loss. For example, if loss has
         shape (64, 10) and you want to weight each sample
-        in the batch, sample_weight should have shape (64, 1)
+        in the batch, `sample_weight` should have shape (64, 1).
     batch_axis : int, default 0
         The axis that represents mini-batch.
     """
@@ -184,20 +184,21 @@ class KLDivLoss(HybridBlock):
         https://en.wikipedia.org/wiki/Kullback-Leibler_divergence
     .. math::
         L = 1/n \\sum_i (label_i * (log(label_i) - output_i))
-    label's shape should be the same as output's.
+
+    Label's shape should be the same as output's.
 
     Parameters
     ----------
-    from_logits : bool, default True
-        whether input is log probability (usually from log_softmax) instead
+    from_logits : bool, default is `True`
+        Whether the input is log probability (usually from log_softmax) instead
         of unnormalized numbers.
     weight : float or None
-        global scalar weight for loss
+        Global scalar weight for loss.
     sample_weight : Symbol or None
-        per sample weighting. Must be broadcastable to
+        Per sample weighting. Must be broadcastable to
         the same shape as loss. For example, if loss has
         shape (64, 10) and you want to weight each sample
-        in the batch, sample_weight should have shape (64, 1)
+        in the batch, `sample_weight` should have shape (64, 1).
     batch_axis : int, default 0
         The axis that represents mini-batch.
     """