Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CuArrays -> CUDA #1204

Closed
wants to merge 21 commits into from
4 changes: 2 additions & 2 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ image: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
# tags:
# - nvidia

julia:1.3:
julia:1.4:
extends:
- .julia:1.3
- .julia:1.4
- .test
tags:
- nvidia
Expand Down
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ os:
# - osx

julia:
- 1.3
- 1.4
- 1
- nightly

Expand Down
93 changes: 35 additions & 58 deletions Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ version = "0.3.3"

[[Adapt]]
deps = ["LinearAlgebra"]
git-tree-sha1 = "fd04049c7dd78cfef0b06cdc1f0f181467655712"
git-tree-sha1 = "9e31b9f5d3f9b5ec32c1d01fd8c05270bad10dc0"
uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
version = "1.1.0"
version = "2.0.0"

[[ArrayLayouts]]
deps = ["FillArrays", "LinearAlgebra"]
Expand All @@ -34,33 +34,17 @@ uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
version = "0.5.10"

[[CEnum]]
git-tree-sha1 = "1b77a77c3b28e0b3f413f7567c9bb8dd9bdccd14"
git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9"
uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
version = "0.3.0"

[[CUDAapi]]
deps = ["Libdl", "Logging"]
git-tree-sha1 = "831b825d10104bd29e28f6da93312a976830717b"
uuid = "3895d2a7-ec45-59b8-82bb-cfc6a382f9b3"
version = "4.0.0"

[[CUDAdrv]]
deps = ["CEnum", "CUDAapi", "Printf"]
git-tree-sha1 = "f56bbf18c86bcff7a961a32a4947a5abb2963a29"
uuid = "c5f51814-7f29-56b8-a69c-e4d8f6be1fde"
version = "6.3.0"

[[CUDAnative]]
deps = ["Adapt", "BinaryProvider", "CEnum", "CUDAapi", "CUDAdrv", "ExprTools", "GPUCompiler", "LLVM", "Libdl", "Pkg", "Printf"]
git-tree-sha1 = "ac86db2b05fdfec96b011e25a504ffe7476e8a68"
uuid = "be33ccc6-a3ff-5ff2-a52e-74243cff1e17"
version = "3.1.0"

[[CodeTracking]]
deps = ["InteractiveUtils", "UUIDs"]
git-tree-sha1 = "cab4da992adc0a64f63fa30d2db2fd8bec40cab4"
uuid = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"
version = "0.5.11"
version = "0.4.1"

[[CUDA]]
deps = ["AbstractFFTs", "Adapt", "BinaryProvider", "CEnum", "DataStructures", "ExprTools", "GPUArrays", "GPUCompiler", "IntervalTrees", "LLVM", "Libdl", "LinearAlgebra", "Logging", "MacroTools", "NNlib", "Pkg", "Printf", "Random", "Reexport", "Requires", "SparseArrays", "Statistics", "TimerOutputs"]
git-tree-sha1 = "fb162ef4a593c6cc919face4a572f94c6f27b685"
repo-rev = "master"
repo-url = "https://github.com/JuliaGPU/CUDA.jl.git"
uuid = "052768ef-5323-5732-b1bb-66c8b64840ba"
version = "1.0.2"

[[CodecZlib]]
deps = ["TranscodingStreams", "Zlib_jll"]
Expand All @@ -70,9 +54,9 @@ version = "0.7.0"

[[ColorTypes]]
deps = ["FixedPointNumbers", "Random"]
git-tree-sha1 = "c73d9cfc2a9d8433dc77f5bff4bddf46b1d78c20"
git-tree-sha1 = "27eb374570946a02aa184ef5b403dabaa7380693"
uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
version = "0.10.3"
version = "0.10.4"

[[Colors]]
deps = ["ColorTypes", "FixedPointNumbers", "InteractiveUtils", "Reexport"]
Expand All @@ -92,28 +76,16 @@ git-tree-sha1 = "7c4f882c41faa72118841185afc58a2eb00ef612"
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
version = "0.3.3+0"

[[Cthulhu]]
deps = ["CodeTracking", "InteractiveUtils", "REPL", "UUIDs", "Unicode"]
git-tree-sha1 = "f3643e78353199d3097821e806348bd83f364155"
uuid = "f68482b8-f384-11e8-15f7-abe071a5a75f"
version = "1.1.1"

[[CuArrays]]
deps = ["AbstractFFTs", "Adapt", "CEnum", "CUDAapi", "CUDAdrv", "CUDAnative", "DataStructures", "GPUArrays", "Libdl", "LinearAlgebra", "MacroTools", "NNlib", "Pkg", "Printf", "Random", "Reexport", "Requires", "SparseArrays", "Statistics", "TimerOutputs"]
git-tree-sha1 = "1582b74d2322df7dd94549d4ac9d095e0f20e884"
uuid = "3a865a2d-5b23-5a0f-bc46-62713ec82fae"
version = "2.2.1"

[[DataAPI]]
git-tree-sha1 = "176e23402d80e7743fc26c19c681bfb11246af32"
uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
version = "1.3.0"

[[DataStructures]]
deps = ["InteractiveUtils", "OrderedCollections"]
git-tree-sha1 = "af6d9c86e191c917c2276fbede1137e8ea20157f"
git-tree-sha1 = "be680f1ad03c0a03796aa3fda5a2180df7f83b46"
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
version = "0.17.17"
version = "0.17.18"

[[Dates]]
deps = ["Printf"]
Expand Down Expand Up @@ -146,14 +118,14 @@ version = "0.1.1"

[[FillArrays]]
deps = ["LinearAlgebra", "Random", "SparseArrays"]
git-tree-sha1 = "44f561e293987ffc84272cd3d2b14b0b93123d63"
git-tree-sha1 = "bf726ba7ce99e00d10bf63c031285fb9ab3676ae"
uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
version = "0.8.10"
version = "0.8.11"

[[FixedPointNumbers]]
git-tree-sha1 = "3ba9ea634d4c8b289d590403b4a06f8e227a6238"
git-tree-sha1 = "8fb797c37a3b7ced4327a05ac4ca0dd6a4f1ba92"
uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
version = "0.8.0"
version = "0.8.1"

[[ForwardDiff]]
deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "NaNMath", "Random", "SpecialFunctions", "StaticArrays"]
Expand All @@ -173,15 +145,15 @@ uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"

[[GPUArrays]]
deps = ["AbstractFFTs", "Adapt", "LinearAlgebra", "Printf", "Random", "Serialization"]
git-tree-sha1 = "d887693eb1bd5e1fd573262a978745481895ec7d"
git-tree-sha1 = "ae20accf251c6da038fe33cbc2c53d5af7f51344"
uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
version = "3.4.1"
version = "4.0.0"

[[GPUCompiler]]
deps = ["Cthulhu", "DataStructures", "InteractiveUtils", "LLVM", "Libdl", "TimerOutputs"]
git-tree-sha1 = "5275aa268ecd09640b32560e1eae90c78816e4d1"
deps = ["DataStructures", "InteractiveUtils", "LLVM", "Libdl", "TimerOutputs", "UUIDs"]
git-tree-sha1 = "65f7395a1245635f0c2279649fdbef09a1b0aa7b"
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
version = "0.2.0"
version = "0.4.0"

[[IRTools]]
deps = ["InteractiveUtils", "MacroTools", "Test"]
Expand All @@ -193,6 +165,12 @@ version = "0.3.3"
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"

[[IntervalTrees]]
deps = ["InteractiveUtils", "Profile", "Random", "Test"]
git-tree-sha1 = "6c9fcd87677231ae293f6806fad928c216ab6658"
uuid = "524e6230-43b7-53ae-be76-1e9e4d08d11b"
version = "1.0.0"

[[Juno]]
deps = ["Base64", "Logging", "Media", "Profile"]
git-tree-sha1 = "a686b0cf235fa3e491b79b4783c2d2382292b436"
Expand All @@ -201,12 +179,11 @@ version = "0.8.2"

[[LLVM]]
deps = ["CEnum", "Libdl", "Printf", "Unicode"]
git-tree-sha1 = "dd3f584c3dbefe39b2a8fbafa1a3b77e31e21255"
git-tree-sha1 = "72fc0a39d5899091ff2d4cdaa64cb5e4862cf813"
uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
version = "1.5.1"
version = "1.5.2"

[[LibGit2]]
deps = ["Printf"]
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"

[[Libdl]]
Expand Down Expand Up @@ -370,9 +347,9 @@ version = "0.9.2"

[[Zlib_jll]]
deps = ["Libdl", "Pkg"]
git-tree-sha1 = "a2e0d558f6031002e380a90613b199e37a8565bf"
git-tree-sha1 = "64b39656c75e67f85b4ac2b336c54674a39f599d"
uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
version = "1.2.11+10"
version = "1.2.11+11"

[[Zygote]]
deps = ["AbstractFFTs", "ArrayLayouts", "DiffRules", "FillArrays", "ForwardDiff", "Future", "IRTools", "InteractiveUtils", "LinearAlgebra", "MacroTools", "NNlib", "NaNMath", "Random", "Requires", "SpecialFunctions", "Statistics", "ZygoteRules"]
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* Error if Dense layers weights and biases are not arrays [https://github.com/FluxML/Flux.jl/pull/1218].

# v0.10.5
* Moved CUDA compatibility to use [CUDA.jl instead of CuArrays.jl](https://github.com/FluxML/Flux.jl/pull/1204)
* Add option for [same padding](https://github.com/FluxML/Flux.jl/pull/901) to conv and pooling layers by setting `pad=SamePad()`.
* Added option to set `bias` to [Flux.Zeros](https://github.com/FluxML/Flux.jl/pull/873) to eliminating `bias` from being trained.
* Added `GlobalMaxPool` and `GlobalMeanPool` [layers](https://github.com/FluxML/Flux.jl/pull/950) for performing global pooling operations.
Expand Down
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ version = "0.11.0-DEV"
[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
CuArrays = "3a865a2d-5b23-5a0f-bc46-62713ec82fae"
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
Juno = "e5e0dc1b-0480-54bc-9374-aad01c23163d"
Expand All @@ -30,7 +30,7 @@ AbstractTrees = "0.2, 0.3"
Adapt = "1, 2.0"
CodecZlib = "0.5, 0.6, 0.7"
Colors = "0.8, 0.9, 0.10, 0.11, 0.12"
CuArrays = "2"
CUDA = "1"
Functors = "0.1"
Juno = "0.5, 0.6, 0.7, 0.8"
MacroTools = "0.3, 0.4, 0.5"
Expand Down
14 changes: 7 additions & 7 deletions docs/src/gpu.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
# GPU Support

NVIDIA GPU support should work out of the box on systems with CUDA and CUDNN installed. For more details see the [CuArrays](https://github.com/JuliaGPU/CuArrays.jl) readme.
NVIDIA GPU support should work out of the box on systems with CUDA and CUDNN installed. For more details see the [CUDA](https://github.com/JuliaGPU/CUDA.jl) readme.

## GPU Usage

Support for array operations on other hardware backends, like GPUs, is provided by external packages like [CuArrays](https://github.com/JuliaGPU/CuArrays.jl). Flux is agnostic to array types, so we simply need to move model weights and data to the GPU and Flux will handle it.
Support for array operations on other hardware backends, like GPUs, is provided by external packages like [CUDA](https://github.com/JuliaGPU/CUDA.jl). Flux is agnostic to array types, so we simply need to move model weights and data to the GPU and Flux will handle it.

For example, we can use `CuArrays` (with the `cu` converter) to run our [basic example](models/basics.md) on an NVIDIA GPU.
For example, we can use `CUDA` (with the `cu` converter) to run our [basic example](models/basics.md) on an NVIDIA GPU.
DhairyaLGandhi marked this conversation as resolved.
Show resolved Hide resolved

(Note that you need to have CUDA available to use CuArrays – please see the [CuArrays.jl](https://github.com/JuliaGPU/CuArrays.jl) instructions for more details.)
(Note that you need to have CUDA available to use CUDA – please see the [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) instructions for more details.)
DhairyaLGandhi marked this conversation as resolved.
Show resolved Hide resolved

```julia
using CuArrays
using CUDA

W = cu(rand(2, 5)) # a 2×5 CuArray
b = cu(rand(2))
Expand All @@ -38,10 +38,10 @@ m = fmap(cu, m)
d(cu(rand(10)))
```

As a convenience, Flux provides the `gpu` function to convert models and data to the GPU if one is available. By default, it'll do nothing, but loading `CuArrays` will cause it to move data to the GPU instead.
As a convenience, Flux provides the `gpu` function to convert models and data to the GPU if one is available. By default, it'll do nothing, but loading `CUDA` will cause it to move data to the GPU instead.

```julia
julia> using Flux, CuArrays
julia> using Flux, CUDA

julia> m = Dense(10,5) |> gpu
Dense(10, 5)
Expand Down
10 changes: 5 additions & 5 deletions src/Flux.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export Descent, ADAM, Momentum, Nesterov, RMSProp,
ClipValue, ClipNorm


using CuArrays
using CUDA
const use_cuda = Ref(false)

include("utils.jl")
Expand All @@ -46,10 +46,10 @@ include("deprecations.jl")
include("cuda/cuda.jl")

function __init__()
use_cuda[] = CuArrays.functional() # Can be overridden after load with `Flux.use_cuda[] = false`
if CuArrays.functional()
if !CuArrays.has_cudnn()
@warn "CuArrays.jl found cuda, but did not find libcudnn. Some functionality will not be available."
use_cuda[] = CUDA.functional() # Can be overridden after load with `Flux.use_cuda[] = false`
if CUDA.functional()
if !CUDA.has_cudnn()
@warn "CUDA.jl found cuda, but did not find libcudnn. Some functionality will not be available."
end
end
end
Expand Down
6 changes: 3 additions & 3 deletions src/cuda/cuda.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
module CUDA
module CUDAint

using ..CuArrays
using ..CUDA

using CuArrays: CUDNN
using CUDA: CUDNN
include("curnn.jl")
include("cudnn.jl")

Expand Down
2 changes: 1 addition & 1 deletion src/cuda/cudnn.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import ..Flux: data
import CuArrays.CUDNN: batchnorm, ∇batchnorm
import CUDA.CUDNN: batchnorm, ∇batchnorm

(BN::Flux.BatchNorm)(x::Union{CuArray{T,2},CuArray{T,4},CuArray{T,5}}, cache = nothing) where T<:Union{Float32, Float64} =
BN.λ.(batchnorm(BN.γ, BN.β, x, BN.μ, BN.σ², BN.momentum; cache = cache, alpha = 1, beta = 0, eps = BN.ϵ, training = Flux.istraining()))
Expand Down
3 changes: 1 addition & 2 deletions src/cuda/curnn.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import ..Flux: Flux, relu
using CuArrays.CUDAnative

CuRNN{T} = Flux.RNNCell{<:Union{typeof(tanh),typeof(relu)},<:CuArray{T,2},<:CuArray{T,1}}
CuGRU{T} = Flux.GRUCell{<:CuArray{T,2},<:CuArray{T,1}}
Expand Down Expand Up @@ -55,7 +54,7 @@ unbroadcast(x::AbstractArray, Δ) =
coerce_cuda(x::Union{CuArray,Nothing}) = x
coerce_cuda(x::Tuple) = coerce_cuda.(x)

coerce_cuda(x::AbstractArray) = x .+ CuArrays.fill(0)
coerce_cuda(x::AbstractArray) = x .+ CUDA.fill(0)

function struct_grad!(cx::Zygote.Context, x, x̄)
for f in fieldnames(typeof(x))
Expand Down
2 changes: 1 addition & 1 deletion src/functor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ end

cpu(m) = fmap(x -> adapt(Array, x), m)

gpu(x) = use_cuda[] ? fmap(CuArrays.cu, x) : x
gpu(x) = use_cuda[] ? fmap(CUDA.cu, x) : x

# Precision

Expand Down
12 changes: 6 additions & 6 deletions src/layers/stateless.jl
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,8 @@ julia> Flux.binarycrossentropy.(σ.([-1.1491, 0.8619, 0.3127]), [1, 1, 0])
"""
binarycrossentropy(ŷ, y; ϵ=eps(ŷ)) = -xlogy(y, ŷ + ϵ) - xlogy(1 - y, 1 - ŷ + ϵ)

# Re-definition to fix interaction with CuArrays.
CuArrays.@cufunc binarycrossentropy(ŷ, y; ϵ=eps(ŷ)) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ + ϵ)
# Re-definition to fix interaction with CUDA.
CUDA.@cufunc binarycrossentropy(ŷ, y; ϵ=eps(ŷ)) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ + ϵ)

"""
logitbinarycrossentropy(ŷ, y)
Expand All @@ -148,8 +148,8 @@ julia> Flux.logitbinarycrossentropy.([-1.1491, 0.8619, 0.3127], [1, 1, 0])
"""
logitbinarycrossentropy(ŷ, y) = (1 - y)*ŷ - logσ(ŷ)

# Re-definition to fix interaction with CuArrays.
CuArrays.@cufunc logitbinarycrossentropy(ŷ, y) = (1 - y)*ŷ - logσ(ŷ)
# Re-definition to fix interaction with CUDA.
CUDA.@cufunc logitbinarycrossentropy(ŷ, y) = (1 - y)*ŷ - logσ(ŷ)

"""
normalise(x; dims=1)
Expand Down Expand Up @@ -272,7 +272,7 @@ function xlogx(x)
result = x * log(x)
ifelse(iszero(x), zero(result), result)
end
CuArrays.@cufunc function xlogx(x)
CUDA.@cufunc function xlogx(x)
result = x * log(x)
ifelse(iszero(x), zero(result), result)
end
Expand All @@ -285,7 +285,7 @@ function xlogy(x, y)
result = x * log(y)
ifelse(iszero(x), zero(result), result)
end
CuArrays.@cufunc function xlogy(x, y)
CUDA.@cufunc function xlogy(x, y)
result = x * log(y)
ifelse(iszero(x), zero(result), result)
end
Expand Down
2 changes: 1 addition & 1 deletion src/onehot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ import Adapt: adapt, adapt_structure

adapt_structure(T, xs::OneHotMatrix) = OneHotMatrix(xs.height, adapt(T, xs.data))

import .CuArrays: CuArray, CuArrayStyle, cudaconvert
import .CUDA: CuArray, CuArrayStyle, cudaconvert
import Base.Broadcast: BroadcastStyle, ArrayStyle
BroadcastStyle(::Type{<:OneHotMatrix{<:CuArray}}) = CuArrayStyle{2}()
cudaconvert(x::OneHotMatrix{<:CuArray}) = OneHotMatrix(x.height, cudaconvert(x.data))
Expand Down
Loading