Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nn utils spectral norm new #9967

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/source/nn.rst
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,8 @@ From the ``oneflow.nn.utils`` module
clip_grad_value_
weight_norm
remove_weight_norm
spectral_norm
remove_spectral_norm

Utility functions in other modules

Expand Down
2 changes: 2 additions & 0 deletions python/oneflow/nn/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from oneflow.nn.utils.clip_grad import clip_grad_norm_, clip_grad_value_
from oneflow.nn.utils.weight_norm import weight_norm
from oneflow.nn.utils.weight_norm import remove_weight_norm
from oneflow.nn.utils.spectral_norm import spectral_norm
from oneflow.nn.utils.spectral_norm import remove_spectral_norm
from oneflow.nn.utils.convert_parameters import (
parameters_to_vector,
vector_to_parameters,
Expand Down
268 changes: 268 additions & 0 deletions python/oneflow/nn/utils/spectral_norm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,268 @@
"""
Copyright 2020 The OneFlow Authors. All rights reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""


import oneflow as flow
from oneflow.framework.tensor import Tensor
from oneflow.nn.functional import normalize
from typing import Any, Optional, TypeVar
from oneflow.nn.modules.module import Module


class SpectralNorm:
# Invariant before and after each forward call:
# u = normalize(W @ v)
# NB: At initialization, this invariant is not enforced

name: str
dim: int
n_power_iterations: int
eps: float

def __init__(
self,
name: str = "weight",
n_power_iterations: int = 1,
dim: int = 0,
eps: float = 1e-12,
) -> None:
self.name = name
self.dim = dim
if n_power_iterations <= 0:
raise ValueError(
"Expected n_power_iterations to be positive, but "
"got n_power_iterations={}".format(n_power_iterations)
)
self.n_power_iterations = n_power_iterations
self.eps = eps

def reshape_weight_to_matrix(self, weight: Tensor) -> Tensor:
weight_mat = weight
if self.dim != 0:
# permute dim to front
weight_mat = weight_mat.permute(
self.dim, *[d for d in range(weight_mat.dim()) if d != self.dim]
)
height = weight_mat.size(0)
return weight_mat.reshape(height, -1)

def compute_weight(self, module: Module, do_power_iteration: bool) -> Tensor:
# NB: If `do_power_iteration` is set, the `u` and `v` vectors are
# updated in power iteration **in-place**. This is very important
# because in `DataParallel` forward, the vectors (being buffers) are
# broadcast from the parallelized module to each module replica,
# which is a new module object created on the fly. And each replica
# runs its own spectral norm power iteration. So simply assigning
# the updated vectors to the module this function runs on will cause
# the update to be lost forever. And the next time the parallelized
# module is replicated, the same randomly initialized vectors are
# broadcast and used!
#
# Therefore, to make the change propagate back, we rely on two
# important behaviors (also enforced via tests):
# 1. `DataParallel` doesn't clone storage if the broadcast tensor
# is already on correct device; and it makes sure that the
# parallelized module is already on `device[0]`.
# 2. If the out tensor in `out=` kwarg has correct shape, it will
# just fill in the values.
# Therefore, since the same power iteration is performed on all
# devices, simply updating the tensors in-place will make sure that
# the module replica on `device[0]` will update the _u vector on the
# parallized module (by shared storage).
#
# However, after we update `u` and `v` in-place, we need to **clone**
# them before using them to normalize the weight. This is to support
# backproping through two forward passes, e.g., the common pattern in
# GAN training: loss = D(real) - D(fake). Otherwise, engine will
# complain that variables needed to do backward for the first forward
# (i.e., the `u` and `v` vectors) are changed in the second forward.
weight = getattr(module, self.name + "_orig")
u = getattr(module, self.name + "_u")
v = getattr(module, self.name + "_v")
weight_mat = self.reshape_weight_to_matrix(weight)

if do_power_iteration:
with flow.no_grad():
for _ in range(self.n_power_iterations):
v = normalize(flow.mv(weight_mat.t(), u), dim=0, eps=self.eps)
u = normalize(flow.mv(weight_mat, v), dim=0, eps=self.eps)
if self.n_power_iterations > 0:
# See above on why we need to clone
u = u.clone()
v = v.clone()

sigma = flow.dot(u, flow.mv(weight_mat, v))
weight = weight / sigma
setattr(module, self.name + "_u", u)
setattr(module, self.name + "_v", v)
return weight

def remove(self, module: Module) -> None:
with flow.no_grad():
weight = self.compute_weight(module, do_power_iteration=False)
delattr(module, self.name)
delattr(module, self.name + "_u")
delattr(module, self.name + "_v")
delattr(module, self.name + "_orig")
module.register_parameter(self.name, flow.nn.Parameter(weight.detach()))

def __call__(self, module: Module, inputs: Any) -> None:
setattr(
module,
self.name,
self.compute_weight(module, do_power_iteration=module.training),
)

@staticmethod
def apply(
module: Module, name: str, n_power_iterations: int, dim: int, eps: float
) -> "SpectralNorm":
for k, hook in module._forward_pre_hooks.items():
if isinstance(hook, SpectralNorm) and hook.name == name:
raise RuntimeError(
"Cannot register two spectral_norm hooks on "
"the same parameter {}".format(name)
)

fn = SpectralNorm(name, n_power_iterations, dim, eps)
weight = module._parameters[name]
if weight is None:
raise ValueError(
f"`SpectralNorm` cannot be applied as parameter `{name}` is None"
)

with flow.no_grad():
weight_mat = fn.reshape_weight_to_matrix(weight)

h, w = weight_mat.size()
# randomly initialize `u` and `v`
u = normalize(weight.new_empty(h).normal_(0, 1), dim=0, eps=fn.eps)
v = normalize(weight.new_empty(w).normal_(0, 1), dim=0, eps=fn.eps)

delattr(module, fn.name)
module.register_parameter(fn.name + "_orig", weight)
# We still need to assign weight back as fn.name because all sorts of
# things may assume that it exists, e.g., when initializing weights.
# However, we can't directly assign as it could be an nn.Parameter and
# gets added as a parameter. Instead, we register weight.data as a plain
# attribute.
setattr(module, fn.name, weight.data)
module.register_buffer(fn.name + "_u", u)
module.register_buffer(fn.name + "_v", v)

module.register_forward_pre_hook(fn)
return fn


T_module = TypeVar("T_module", bound=Module)


def spectral_norm(
module: T_module,
name: str = "weight",
n_power_iterations: int = 1,
eps: float = 1e-12,
dim: Optional[int] = None,
) -> T_module:
r"""Applies spectral normalization to a parameter in the given module.

.. math::
\mathbf{W}_{SN} = \dfrac{\mathbf{W}}{\sigma(\mathbf{W})},
\sigma(\mathbf{W}) = \max_{\mathbf{h}: \mathbf{h} \ne 0} \dfrac{\|\mathbf{W} \mathbf{h}\|_2}{\|\mathbf{h}\|_2}

Spectral normalization stabilizes the training of discriminators (critics)
in Generative Adversarial Networks (GANs) by rescaling the weight tensor
with spectral norm :math:`\sigma` of the weight matrix calculated using
power iteration method. If the dimension of the weight tensor is greater
than 2, it is reshaped to 2D in power iteration method to get spectral
norm. This is implemented via a hook that calculates spectral norm and
rescales weight before every :meth:`~Module.forward` call.

See `Spectral Normalization for Generative Adversarial Networks`_ .

.. _`Spectral Normalization for Generative Adversarial Networks`: https://arxiv.org/abs/1802.05957

Args:
module (nn.Module): containing module
name (str, optional): name of weight parameter
n_power_iterations (int, optional): number of power iterations to
calculate spectral norm
eps (float, optional): epsilon for numerical stability in
calculating norms
dim (int, optional): dimension corresponding to number of outputs,
the default is ``0``, except for modules that are instances of
ConvTranspose{1,2,3}d, when it is ``1``

Returns:
The original module with the spectral norm hook

.. note::
This function has been reimplemented as
:func:`torch.nn.utils.parametrizations.spectral_norm` using the new
parametrization functionality in
:func:`torch.nn.utils.parametrize.register_parametrization`. Please use
the newer version. This function will be deprecated in a future version
of PyTorch.

For example:

.. code-block:: python

>>> import oneflow as flow
>>> m = flow.nn.utils.spectral_norm(flow.nn.Linear(20, 40))
>>> m
Linear(in_features=20, out_features=40, bias=True)
>>> m.weight_u.size()
oneflow.Size([40])

"""
if dim is None:
if isinstance(
module,
(flow.nn.ConvTranspose1d, flow.nn.ConvTranspose2d, flow.nn.ConvTranspose3d),
):
dim = 1
else:
dim = 0
SpectralNorm.apply(module, name, n_power_iterations, dim, eps)
return module


def remove_spectral_norm(module: T_module, name: str = "weight") -> T_module:
r"""Removes the spectral normalization reparameterization from a module.

Args:
module (Module): containing module
name (str, optional): name of weight parameter

For Example:

.. code-block:: python

>>> import oneflow as flow
>>> m = flow.nn.utils.spectral_norm(flow.nn.Linear(40, 10))
>>> remove_spectral_norm(m)

"""
for k, hook in module._forward_pre_hooks.items():
if isinstance(hook, SpectralNorm) and hook.name == name:
hook.remove(module)
del module._forward_pre_hooks[k]
break
else:
raise ValueError("spectral_norm of '{}' not found in {}".format(name, module))

return module
Loading