Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

create NNlibCUDA sub-package #286

Merged
merged 17 commits into from
Mar 12, 2021
52 changes: 52 additions & 0 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
steps:
- label: "GPU julia v1.6"
plugins:
- JuliaCI/julia#v1:
version: "1.6"
- JuliaCI/julia-coverage#v1:
codecov: true
dirs:
- src
- lib
commands:
- julia .ci/develop_nnlibcuda.jl
- julia .ci/test_nnlibcuda.jl
agents:
queue: "juliagpu"
cuda: "*"
timeout_in_minutes: 60

## Add these when julia 1.7 is out
# - label: "GPU julia v1"
# plugins:
# - JuliaCI/julia#v1:
# version: "1"
# - JuliaCI/julia-coverage#v1:
# codecov: true
# dirs:
# - src
# - lib
# commands:
# - julia .ci/develop.jl
# - julia .ci/test.jl
# agents:
# queue: "juliagpu"
# cuda: "*"
# timeout_in_minutes: 60

# - label: "GPU julia nightly"
# plugins:
# - JuliaCI/julia#v1:
# version: "nightly"
# - JuliaCI/julia-coverage#v1:
# codecov: true
# dirs:
# - src
# - lib
# commands:
# - julia .ci/develop.jl
# - julia .ci/test.jl
# agents:
# queue: "juliagpu"
# cuda: "*"
# timeout_in_minutes: 60
7 changes: 7 additions & 0 deletions .ci/develop.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import Pkg

root_directory = dirname(@__DIR__)

nnlib = Pkg.PackageSpec(path = root_directory)
Pkg.develop(nnlib)
Pkg.precompile()
15 changes: 15 additions & 0 deletions .ci/develop_nnlibcuda.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import Pkg

root_directory = dirname(@__DIR__)

nnlib = Pkg.PackageSpec(path = root_directory)
nnlibcuda = Pkg.PackageSpec(path = joinpath(root_directory, "lib", "NNlibCUDA"))

Pkg.develop(nnlib)
Pkg.develop(nnlibcuda)

## Do this for the time being since test doesn't pick up the manifest
## for some reason. Can remove this and manifests when cuda 3.0 is released.
Pkg.add(url="https://github.com/JuliaGPU/CUDA.jl.git", rev="master")

Pkg.precompile()
5 changes: 5 additions & 0 deletions .ci/test.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import Pkg

pkgs = ["NNlib"]

Pkg.test(pkgs; coverage = true)
5 changes: 5 additions & 0 deletions .ci/test_nnlibcuda.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import Pkg

pkgs = ["NNlibCUDA"]

Pkg.test(pkgs; coverage = true)
56 changes: 33 additions & 23 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
name: CI

# env:
# JULIA_NUM_THREADS: 2
on:
push:
branches:
- master
- staging
- trying
tags: '*'
pull_request:

on: [push, pull_request]
defaults:
run:
shell: bash

jobs:
test:
Expand Down Expand Up @@ -44,13 +52,10 @@ jobs:
steps:
- uses: actions/checkout@v2
- uses: julia-actions/setup-julia@v1
## `allow-failure` not available yet https://github.com/actions/toolkit/issues/399
#continue-on-error: ${{ matrix.julia-version == 'nightly' }}
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- uses: actions/cache@v1
# continue-on-error: ${{ matrix.julia-version == 'nightly' }}
env:
cache-name: cache-artifacts
with:
Expand All @@ -60,14 +65,10 @@ jobs:
${{ runner.os }}-test-${{ env.cache-name }}-
${{ runner.os }}-test-
${{ runner.os }}-
- uses: julia-actions/julia-buildpkg@v1
# continue-on-error: ${{ matrix.julia-version == 'nightly' }}
- uses: julia-actions/julia-runtest@v1
# continue-on-error: ${{ matrix.julia-version == 'nightly' }}
- run: julia .ci/develop.jl
- run: julia .ci/test.jl
- uses: julia-actions/julia-processcoverage@v1
# continue-on-error: ${{ matrix.julia-version == 'nightly' }}
- uses: codecov/codecov-action@v1
# continue-on-error: ${{ matrix.version == 'nightly' }}
with:
file: lcov.info

Expand All @@ -79,18 +80,27 @@ jobs:
# - uses: julia-actions/setup-julia@v1
# with:
# version: '1'
# - run: |
# julia --project=docs -e '
# using Pkg
# Pkg.develop(PackageSpec(path=pwd()))
# Pkg.instantiate()'
# - run: |
# julia --project=docs -e '
# using Documenter: DocMeta, doctest
# using NNlib
# DocMeta.setdocmeta!(NNlib, :DocTestSetup, :(using NNlib); recursive=true)
# doctest(NNlib)'
# - run: julia --project=docs .ci/develop.jl
# - run: julia --project=docs docs/make.jl
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}

# doctests:
# name: Doctests
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v2
# - uses: julia-actions/setup-julia@v1
# with:
# version: '1'
# - run: julia --project=docs .ci/develop.jl
# - run: |
# julia --project=docs -e '
# using Documenter
# using NNlib
# # using NNlibCUDA
# DocMeta.setdocmeta!(NNlib, :DocTestSetup, :(using NNlib); recursive=true)
# # DocMeta.setdocmeta!(NNlibCUDA, :DocTestSetup, :(using NNlib, CUDA); recursive=true)
# doctest(NNlib)
# # doctest(NNlibCUDA)'
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@
deps/usr
deps.jl
*.log
Manifest.toml
./Manifest.toml
3 changes: 1 addition & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ Requires = "0.5, 1.0"
julia = "1.3"

[extras]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Expand All @@ -26,4 +25,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"

[targets]
test = ["ChainRulesTestUtils", "CUDA", "FiniteDifferences", "Random", "StableRNGs", "Test", "Zygote"]
test = ["ChainRulesTestUtils", "FiniteDifferences", "Random", "StableRNGs", "Test", "Zygote"]
5 changes: 5 additions & 0 deletions docs/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[deps]
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"

[compat]
Documenter = "0.24"
1 change: 1 addition & 0 deletions lib/NNlibCUDA/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#Manifest.toml
23 changes: 23 additions & 0 deletions lib/NNlibCUDA/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name = "NNlibCUDA"
uuid = "a00861dc-f156-4864-bf3c-e6376f28a68d"
version = "0.1.0"

[deps]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[compat]
CUDA = "2.6, 3"
NNlib = "0.7"
julia = "1.6"

[extras]
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"

[targets]
test = ["ForwardDiff", "Test", "Zygote"]
5 changes: 5 additions & 0 deletions lib/NNlibCUDA/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# NNlibCUDA

CUDA compatibility for NNlib.jl.

Julia gpu kernels are in `src/`, while wrappers around `cudnn` are in `src/cudnn/`.
18 changes: 18 additions & 0 deletions lib/NNlibCUDA/src/NNlibCUDA.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
module NNlibCUDA

using NNlib
using CUDA
using CUDA: @cufunc
using Random, Statistics

include("upsample.jl")
include("activations.jl")
include("batchedmul.jl")
include("cudnn/cudnn.jl")
include("cudnn/conv.jl")
include("cudnn/pooling.jl")
include("cudnn/softmax.jl")
include("cudnn/activations.jl")
include("cudnn/batchnorm.jl")

end # module
24 changes: 24 additions & 0 deletions lib/NNlibCUDA/src/activations.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Activation functions

# Some of activation functions need a wrapper for GPU support
# https://github.com/JuliaGPU/CuArrays.jl/issues/614

# @cufunc softplus(x::Real) = ifelse(x > 0, x + log1p(exp(-x)), log1p(exp(x)))

# @cufunc logσ(x::Real) = -softplus(-x)

# @cufunc function gelu(x::Real)
# p = oftype(x / 1, π)
# λ = oftype(x / 1, √(2 / p))
# α = oftype(x / 1, 0.044715)
# h = oftype(x / 1, 0.5)
# h * x * (one(x) + tanh(λ * (x + α * x^3)))
# end

# @cufunc lisht(x::Real) = x * tanh(x)

# @cufunc logcosh(x::Real) = x + softplus(-2x) - log(oftype(x, 2))

# @cufunc mish(x::Real) = x * tanh(softplus(x))

# @cufunc tanhshrink(x::Real) = x - tanh(x)
7 changes: 7 additions & 0 deletions lib/NNlibCUDA/src/batchedmul.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Batched matrix multiplication
# 1st argument is produced by NNlib.storage_type(A)
NNlib._batched_gemm!(::Type{<:CuArray}, transA::Char, transB::Char, α::Number, A, B, β::Number, C) =
CUBLAS.gemm_strided_batched!(transA, transB, α, A, B, β, C)

Base.unsafe_convert(::Type{CuPtr{T}}, A::NNlib.BatchedAdjOrTrans{T}) where {T} =
Base.unsafe_convert(CuPtr{T}, parent(A))
39 changes: 39 additions & 0 deletions lib/NNlibCUDA/src/cudnn/activations.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@

# Activation

using Base.Broadcast
using CUDA.CUDNN: cudnnActivationForward!, cudnnOpTensor!,
CUDNN_ACTIVATION_TANH,CUDNN_ACTIVATION_SIGMOID,CUDNN_ACTIVATION_ELU,
CUDNN_ACTIVATION_RELU,CUDNN_ACTIVATION_CLIPPED_RELU,CUDNN_OP_TENSOR_MAX

for (f, op) in [
CUDA.tanh => (src,dst)->cudnnActivationForward!(dst, src, mode=CUDNN_ACTIVATION_TANH),
NNlib.σ => (src,dst)->cudnnActivationForward!(dst, src, mode=CUDNN_ACTIVATION_SIGMOID),
NNlib.elu => (src,dst)->cudnnActivationForward!(dst, src, mode=CUDNN_ACTIVATION_ELU),
NNlib.relu => (src,dst)->cudnnActivationForward!(dst, src, mode=CUDNN_ACTIVATION_RELU),
# NNlib.relu6 => (src,dst)->cudnnActivationForward!(dst, src, mode=CUDNN_ACTIVATION_CLIPPED_RELU, coef=6.0),
# NNlib.leakyrelu => (src,dst)->cudnnOpTensor!(dst, src, src; op=CUDNN_OP_TENSOR_MAX, alpha1=0.01),
]

@eval begin
# in-place
function Base.materialize!(dst::DenseCuArray{<:CUDNNFloat},
bc::Broadcast.Broadcasted{<:Any,<:Any,typeof($f),<:Tuple{DenseCuArray}})
$op(bc.args[1], dst)
return dst
end

# out of place
function Base.materialize(bc::Broadcast.Broadcasted{<:Any,<:Any,typeof($f),<:Tuple{DenseCuArray}})
ElType = Broadcast.combine_eltypes(bc.f, bc.args)
dst = similar(bc, ElType)
$op(bc.args[1], dst)
return dst
end
end
end

# CUDNN_ACTIVATION_IDENTITY does not work with cudnnActivationForward
# FIXME: put this optimization in GPUArrays' `copyto!` (like Base.Broadcast's `copyto!`)
Base.broadcasted(::typeof(identity), x::DenseCuArray{T}) where {T<:CUDNNFloat} = x

Loading