Skip to content

Commit

Permalink
Merge pull request #705 from JuliaGPU/tb/compat_cudnn
Browse files Browse the repository at this point in the history
Backports for Julia 1.5
  • Loading branch information
maleadt authored Feb 10, 2021
2 parents 02ac36a + b52b41d commit dad01a5
Show file tree
Hide file tree
Showing 39 changed files with 3,426 additions and 1,209 deletions.
54 changes: 27 additions & 27 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ steps:

- label: "Julia 1.5"
plugins:
- JuliaCI/julia#v0.5:
- JuliaCI/julia#v1:
version: 1.5
- JuliaCI/julia-test#v0.3:
- JuliaCI/julia-test#v1:
test_args: "--quickfail"
- JuliaCI/julia-coverage#v0.3:
- JuliaCI/julia-coverage#v1:
codecov: true
dirs:
- src
Expand All @@ -23,11 +23,11 @@ steps:

- label: "Julia 1.5 (debug)"
plugins:
- JuliaCI/julia#v0.4:
- JuliaCI/julia#v1:
version: 1.5
- JuliaCI/julia-test#v0.2:
- JuliaCI/julia-test#v1:
julia_args: "-g2"
- JuliaCI/julia-coverage#v0.2:
- JuliaCI/julia-coverage#v1:
codecov: true
dirs:
- src
Expand All @@ -50,10 +50,10 @@ steps:

- label: "CUDA 11.2"
plugins:
- JuliaCI/julia#v0.6:
- JuliaCI/julia#v1:
version: 1.5
- JuliaCI/julia-test#v0.3: ~
- JuliaCI/julia-coverage#v0.3:
- JuliaCI/julia-test#v1: ~
- JuliaCI/julia-coverage#v1:
codecov: true
dirs:
- src
Expand All @@ -70,10 +70,10 @@ steps:

- label: "CUDA 11.1"
plugins:
- JuliaCI/julia#v0.4:
- JuliaCI/julia#v1:
version: 1.5
- JuliaCI/julia-test#v0.2: ~
- JuliaCI/julia-coverage#v0.2:
- JuliaCI/julia-test#v1: ~
- JuliaCI/julia-coverage#v1:
codecov: true
dirs:
- src
Expand All @@ -92,10 +92,10 @@ steps:

- label: "CUDA 11.0"
plugins:
- JuliaCI/julia#v0.4:
- JuliaCI/julia#v1:
version: 1.5
- JuliaCI/julia-test#v0.2: ~
- JuliaCI/julia-coverage#v0.2:
- JuliaCI/julia-test#v1: ~
- JuliaCI/julia-coverage#v1:
codecov: true
dirs:
- src
Expand All @@ -114,10 +114,10 @@ steps:

- label: "CUDA 10.2"
plugins:
- JuliaCI/julia#v0.4:
- JuliaCI/julia#v1:
version: 1.5
- JuliaCI/julia-test#v0.2: ~
- JuliaCI/julia-coverage#v0.2:
- JuliaCI/julia-test#v1: ~
- JuliaCI/julia-coverage#v1:
codecov: true
dirs:
- src
Expand All @@ -136,10 +136,10 @@ steps:

- label: "CUDA 10.1"
plugins:
- JuliaCI/julia#v0.4:
- JuliaCI/julia#v1:
version: 1.5
- JuliaCI/julia-test#v0.2: ~
- JuliaCI/julia-coverage#v0.2:
- JuliaCI/julia-test#v1: ~
- JuliaCI/julia-coverage#v1:
codecov: true
dirs:
- src
Expand Down Expand Up @@ -199,10 +199,10 @@ steps:

- label: "Split memory pool"
plugins:
- JuliaCI/julia#v0.4:
- JuliaCI/julia#v1:
version: 1.5
- JuliaCI/julia-test#v0.2: ~
- JuliaCI/julia-coverage#v0.2:
- JuliaCI/julia-test#v1: ~
- JuliaCI/julia-coverage#v1:
codecov: true
dirs:
- src
Expand All @@ -225,7 +225,7 @@ steps:
# so they can run on any system in the juliagpu queue.
- label: "Benchmarks (dry run)"
plugins:
- JuliaCI/julia#v0.4:
- JuliaCI/julia#v1:
version: 1.5
command: |
julia --project -e '
Expand All @@ -249,7 +249,7 @@ steps:
# be running on the same system each time
- label: "Benchmarks"
plugins:
- JuliaCI/julia#v0.4:
- JuliaCI/julia#v1:
version: 1.5
env:
CODESPEED_PROJECT: "$BUILDKITE_PIPELINE_NAME"
Expand Down Expand Up @@ -278,7 +278,7 @@ steps:

- label: "Documentation"
plugins:
- JuliaCI/julia#v0.4:
- JuliaCI/julia#v1:
version: 1.5
command: |
julia --project -e '
Expand Down
19 changes: 19 additions & 0 deletions deps/discovery.jl
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,21 @@ const cuda_releases = [v"1.0", v"1.1",
v"11.0", v"11.1"]

const cuda_library_versions = Dict(
v"11.0.1" => Dict(
# NOTE: encountered this version in a Docker container; not sure where it came from.
"cudart" => v"11.0.171",
"cupti" => "2020.1.0", # wtf
"nvrtc" => v"11.0.167",
"nvtx" => v"11.0.167",
"nvvp" => v"11.0.167",
"cublas" => v"11.0.0", #.191
"cufft" => v"10.1.3", #.191
"curand" => v"10.2.0", #.191
"cusolver" => v"10.4.0", #.191
"cusparse" => v"11.0.0", #.191
"npp" => v"11.0.0", #.191
"nvjpeg" => v"11.0.0", #.191
),
v"11.0.2" => Dict(
"cudart" => v"11.0.171",
"cupti" => "2020.1.0", # wtf
Expand Down Expand Up @@ -250,6 +265,10 @@ const cuda_library_names = Dict(

# only for nvdisasm, to discover the CUDA toolkit version
const cuda_binary_versions = Dict(
v"11.0.1" => Dict(
# NOTE: encountered this version in a Docker container; not sure where it came from.
"nvdisasm" => v"11.0.167"
),
v"11.0.2" => Dict(
"nvdisasm" => v"11.0.194"
),
Expand Down
12 changes: 8 additions & 4 deletions lib/cudnn/CUDNN.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,24 @@ include("libcudnn_deprecated.jl")
# low-level wrappers
include("util.jl")
include("base.jl")
include("descriptors.jl")
include("tensor.jl")
include("conv.jl")
include("inplace.jl")
include("optensor.jl")
include("reduce.jl")
include("convolution.jl")
include("pooling.jl")
include("activation.jl")
include("filter.jl")
include("softmax.jl")
include("batchnorm.jl")
include("dropout.jl")
include("rnn.jl")
include("multiheadattn.jl")
include("normalization.jl")

# high-level integrations
include("nnlib.jl")
include("batchnorm.jl")

include("compat.jl")

function math_mode(mode=CUDA.math_mode())
if mode == CUDA.PEDANTIC_MATH
Expand Down
91 changes: 91 additions & 0 deletions lib/cudnn/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
## High level interface to cuDNN functions
Deniz Yuret, Nov 6, 2020

The goal of the high-level interface is to map the low level cuDNN calls to more natural
Julia functions. Here are some design choices I followed:

**Naming:** We try to keep the same function, argument, and type names from the cuDNN
library in the high level interface. The wrappers for descriptors drop the `_t` suffix,
e.g. `cudnnPoolingDescriptor_t => cudnnPoolingDescriptor`.

**Descriptors:** The cuDNN functions take data and operator descriptors. Most of these
descriptors are relatively fast to create (~500 ns for a cudnnTensorDescriptor) so they may
not be worth preallocating for the user but we provide keyword options anyway. We cache
descriptors (~100 ns) so we can use them as hash keys for memoization, which also saves a
bit of memory and speed. All descriptor fields are `isbits` types with the exception of the
`cudnnDropoutDescriptor` which points to a random number generator state and is used as a
field of some other descriptors.

**Operator descriptors:** Descriptors such as `cudnnPoolingDescriptor` specify the options
for an operator such as stride and padding. For operators with descriptors we have one
method that takes keyword arguments with reasonable defaults to construct the descriptor and
another method that takes a pre-initialized descriptor as its last argument. This way a
casual user can call the first method without worrying about the descriptor format, only
specifying non-default options, whereas a layer architect can keep a preset descriptor in
the layer that gets passed to the function using the second method. We try to use generic
Julia types for keyword arguments that specify default descriptor fields and convert these
to the appropriate cudnn types during descriptor construction.

**Output arrays:** The low level cuDNN functions take pre-allocated output arrays. The high
level interface has one Julia function that allocates its own output array
(e.g. `cudnnPoolingForward`) and another with an exclamation mark that takes a pre-allocated
output array as its first argument (e.g. `cudnnPoolingForward!`).

**Methods:** Each cuDNN forward function may have up to four methods depending on whether
the descriptor and the output array are specified:

cudnnPoolingForward(x; kwargs...)
cudnnPoolingForward(x, d::cudnnPoolingDescriptor; kwargs...)
cudnnPoolingForward!(y, x; kwargs...)
cudnnPoolingForward!(y, x, d::cudnnPoolingDescriptor; kwargs...)

The conventional order of arguments for these public methods is:

([output], weights, inputs, [descriptor]; kwargs...)

**AD method:** Neither the high level nor the low level interface is sometimes
appropriate for gradient definitions, e.g. the low level API may not return a value, the
high level API may have some gradient target parameters as keyword arguments. To solve this
issue the API exposes an intermediate function with an AD suffix,
e.g. `cudnnPoolingForwardAD`, that is called by the high level method and that makes
the low level library call. These methods may not seem like they are doing anything useful,
but they should not be removed so automatic gradient packages may make use of them.

**Backward functions:** The point of a high level interface is to give the user appropriate
defaults for the many options of typical cudnn functions. Backward functions do not have
meaningful defaults because they need to copy their options from the corresponding forward
function. Therefore we do not need high level APIs for backward functions unless they are
useful in some other way. See Knet/src/cudnn for example uses.

**Types:** Do not specify types for array arguments. Leave the high level functions generic
so they can be called with CuArray, KnetArray, AutoGrad.Param etc. Types can and should be
specified for non-array arguments. In the API we use `nothing` to indicate unspecified array
argument values, convert these to `C_NULL` or `CU_NULL` as appropriate only at the low-level
call. Similarly for numbers the API should accept generic types like `Integer` or `Real` and
convert these to the appropriate specific type, e.g. `Cint` or `Cdouble` only at the
low-level call.

**Workspace:** Some functions need a temporary allocated workspace whose required size is
determined by another cudnn call. Unfortunately, the required size may depend on factors
other than the current inputs (see [this
issue](https://github.com/FluxML/Flux.jl/issues/923#issuecomment-558671966)), so the usage
of the `@workspace` macro is used at a point as close to the library call as possible. One
exception to this is cases where the same workspace will be passed to the backward call, in
which case we allocate a regular CuArray.

**Training vs Inference:** There is no consistent way cuDNN distinguishes training vs inference calls:
* BatchNormalization and Normalization have two separate functions: `cudnnNormalizationForwardTraining / Inference`
* RNN has an indicator argument: `fwdMode` in `cudnnRNNForward`
* MultiHeadAttn looks at the `reserveSpace` argument to decide: if `NULL` inference mode, otherwise training mode
* Dropout always runs in training mode with a non-NULL `reserveSpace` (it doesn't make sense in inference mode)
* Activation, convolution, pooling, softmax, optensor, addtensor, reducetensor do not make a distinction between the two modes

In the high level API we assume inference by default and let the gradient packages override when necessary.
See the gradient implementations in Knet/src/cudnn for examples.

**TODO:**
* Keyword arg descriptor constructors.
* Test forw fns with descriptors: check for desc vs kwarg incompatibility.
* Find out about cudnnRNNSetClip_v8.
* Test with Knet.Ops20.
* Command used to test: julia17 --project -e 'using Pkg; Pkg.API.test(; test_args=`--memcheck --jobs=1 cudnn`)'
87 changes: 50 additions & 37 deletions lib/cudnn/activation.jl
Original file line number Diff line number Diff line change
@@ -1,44 +1,57 @@
# descriptor

mutable struct ActivationDesc
ptr::cudnnActivationDescriptor_t
"""
cudnnActivationForward(x; mode, nanOpt, coef, alpha)
cudnnActivationForward(x, d::cudnnActivationDescriptor; alpha)
cudnnActivationForward!(y, x; mode, nanOpt, coef, alpha, beta)
cudnnActivationForward!(y, x, d::cudnnActivationDescriptor; alpha, beta)
Return the result of the specified elementwise activation operation applied to `x`.
Optionally `y` holds the result and `d` specifies the operation. `y` should be similar to
`x` if specified. Keyword arguments `alpha=1, beta=0` can be used for scaling, i.e. `y .=
alpha*op.(x1) .+ beta*y`. The following keyword arguments specify the operation if `d` is
not given:
* `mode = CUDNN_ACTIVATION_RELU`: Options are SIGMOID, RELU, TANH, CLIPPED_RELU, ELU, IDENTITY
* `nanOpt = CUDNN_NOT_PROPAGATE_NAN`: NAN propagation policy, the other option is `CUDNN_PROPAGATE_NAN`
* `coef=1`: When the activation mode is set to CUDNN_ACTIVATION_CLIPPED_RELU, this input specifies the clipping threshold; and when the activation mode is set to CUDNN_ACTIVATION_ELU, this input specifies the α parameter.
"""
cudnnActivationForward, cudnnActivationForward!


# Public methods
cudnnActivationForward(x; o...) = cudnnActivationForwardWithDefaults(x; o...)
cudnnActivationForward!(y, x; o...) = cudnnActivationForwardWithDefaults(x; y, o...)
cudnnActivationForward(x, d::cudnnActivationDescriptor; o...) = cudnnActivationForwardWithDefaults(x; activationDesc=d, o...)
cudnnActivationForward!(y, x, d::cudnnActivationDescriptor; o...) = cudnnActivationForwardWithDefaults(x; y, activationDesc=d, o...)


# Private method
function cudnnActivationForwardWithDefaults(
x;
y = similar(x),
mode::cudnnActivationMode_t = CUDNN_ACTIVATION_RELU,
nanOpt::cudnnNanPropagation_t = CUDNN_NOT_PROPAGATE_NAN,
coef::Real=1,
activationDesc::cudnnActivationDescriptor = cudnnActivationDescriptor(mode, nanOpt, Cdouble(coef)),
alpha::Real=1,
beta::Real=0,
xDesc::cudnnTensorDescriptor = cudnnTensorDescriptor(x),
yDesc::cudnnTensorDescriptor = xDesc,
)
T = eltype(x)
alpha, beta = scalingParameter(T,alpha), scalingParameter(T,beta)
cudnnActivationForwardAD(x; activationDesc, alpha, xDesc, beta, yDesc, y)
end

unsafe_free!(ad::ActivationDesc)=cudnnDestroyActivationDescriptor(ad.ptr)

Base.unsafe_convert(::Type{cudnnActivationDescriptor_t}, ad::ActivationDesc)=ad.ptr

function ActivationDesc(mode, coeff, reluNanOpt=CUDNN_NOT_PROPAGATE_NAN)
ad = Ref{cudnnActivationDescriptor_t}()
cudnnCreateActivationDescriptor(ad)
cudnnSetActivationDescriptor(ad[],mode,reluNanOpt,coeff)
this = ActivationDesc(ad[])
finalizer(unsafe_free!, this)
return this
# AD method:
function cudnnActivationForwardAD(x; activationDesc, alpha, xDesc, beta, yDesc, y)
cudnnActivationForward(handle(), activationDesc, alpha, xDesc, x, beta, yDesc, y)
return y
end


# wrappers

function cudnnActivationForward(x::DenseCuArray{T,N}, y::DenseCuArray{T,N}=x;
mode=CUDNN_ACTIVATION_RELU, # CUDNN_ACTIVATION_IDENTITY will not work
coeff=false, reluNanOpt=CUDNN_NOT_PROPAGATE_NAN, alpha=true,
beta=false) where {T,N}
cudnnActivationForward(handle(), ActivationDesc(mode, T(coeff), reluNanOpt),
scalingParameter(T, alpha), TensorDesc(x), x,
scalingParameter(T, beta ), TensorDesc(y), y)
return y
end

function cudnnActivationBackward(x::DenseCuArray{T,N}, dx::DenseCuArray{T,N},
y::DenseCuArray{T,N}, dy::DenseCuArray{T,N}=dx;
mode=CUDNN_ACTIVATION_RELU, # CUDNN_ACTIVATION_IDENTITY will not work
coeff=false, reluNanOpt=CUDNN_NOT_PROPAGATE_NAN, alpha=1,
beta=false) where {T,N}
cudnnActivationBackward(handle(), ActivationDesc(mode, T(coeff), reluNanOpt),
scalingParameter(T, alpha), TensorDesc( y), y,
TensorDesc(dy), dy,
TensorDesc( x), x,
scalingParameter(T, beta ), TensorDesc(dx), dx)
return dx
# Deprecated:
function cudnnActivationForward(x::DenseCuArray{T,N}, y::DenseCuArray{T,N}; o...) where {T,N}
@warn "`cudnnActivationForward(x,y)` is deprecated, please use one of the methods in `@doc cudnnActivationForward`." maxlog=1
cudnnActivationForward!(y, x; o...)
end
Loading

0 comments on commit dad01a5

Please sign in to comment.