Skip to content

Dispatch even more to BLAS #33743

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 84 additions & 64 deletions stdlib/LinearAlgebra/src/matmul.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,15 @@ end
(*)(a::AbstractVector, B::AbstractMatrix) = reshape(a,length(a),1)*B

@inline mul!(y::StridedVector{T}, A::StridedVecOrMat{T}, x::StridedVector{T},
alpha::Union{T, Bool}, beta::Union{T, Bool}) where {T<:BlasFloat} =
alpha::Number, beta::Number) where {T<:BlasFloat} =
gemv!(y, 'N', A, x, alpha, beta)
# Complex matrix times real vector. Reinterpret the matrix as a real matrix and do real matvec compuation.
for elty in (Float32,Float64)
@eval begin
@inline function mul!(y::StridedVector{Complex{$elty}}, A::StridedVecOrMat{Complex{$elty}}, x::StridedVector{$elty},
alpha::Union{$elty, Bool}, beta::Union{$elty, Bool})
Afl = reinterpret($elty,A)
yfl = reinterpret($elty,y)
alpha::Number, beta::Number)
Afl = reinterpret($elty, A)
yfl = reinterpret($elty, y)
mul!(yfl, Afl, x, alpha, beta)
return y
end
Expand All @@ -92,7 +92,7 @@ function *(transA::Transpose{<:Any,<:AbstractMatrix{T}}, x::AbstractVector{S}) w
mul!(similar(x,TS,size(A,2)), transpose(A), x)
end
@inline function mul!(y::StridedVector{T}, transA::Transpose{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T},
alpha::Union{T, Bool}, beta::Union{T, Bool}) where {T<:BlasFloat}
alpha::Number, beta::Number) where {T<:BlasFloat}
A = transA.parent
return gemv!(y, 'T', A, x, alpha, beta)
end
Expand All @@ -114,12 +114,12 @@ function *(adjA::Adjoint{<:Any,<:AbstractMatrix{T}}, x::AbstractVector{S}) where
end

@inline function mul!(y::StridedVector{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T},
alpha::Union{T, Bool}, beta::Union{T, Bool}) where {T<:BlasReal}
alpha::Number, beta::Number) where {T<:BlasReal}
A = adjA.parent
return mul!(y, transpose(A), x, alpha, beta)
end
@inline function mul!(y::StridedVector{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T},
alpha::Union{T, Bool}, beta::Union{T, Bool}) where {T<:BlasComplex}
alpha::Number, beta::Number) where {T<:BlasComplex}
A = adjA.parent
return gemv!(y, 'C', A, x, alpha, beta)
end
Expand Down Expand Up @@ -165,13 +165,8 @@ function (*)(A::StridedMatrix{<:BlasComplex}, B::StridedMatrix{<:BlasComplex})
end

@inline function mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, B::StridedVecOrMat{T},
α::Number, β::Number) where {T<:BlasFloat}
alpha, beta = promote(α, β, zero(T))
if alpha isa T && beta isa T
return gemm_wrapper!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
else
return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(α, β))
end
alpha::Number, beta::Number) where {T<:BlasFloat}
return gemm_wrapper!(C, 'N', 'N', A, B, alpha, beta)
end
# Complex Matrix times real matrix: We use that it is generally faster to reinterpret the
# first matrix as a real matrix and carry out real matrix matrix multiply
Expand Down Expand Up @@ -307,12 +302,12 @@ julia> lmul!(F.Q, B)
lmul!(A, B)

@inline function mul!(C::StridedMatrix{T}, transA::Transpose{<:Any,<:StridedVecOrMat{T}}, B::StridedVecOrMat{T},
alpha::Union{T, Bool}, beta::Union{T, Bool}) where {T<:BlasFloat}
alpha::Number, beta::Number) where {T<:BlasFloat}
A = transA.parent
if A===B
return syrk_wrapper!(C, 'T', A, MulAddMul(alpha, beta))
return syrk_wrapper!(C, 'T', A, alpha, beta)
else
return gemm_wrapper!(C, 'T', 'N', A, B, MulAddMul(alpha, beta))
return gemm_wrapper!(C, 'T', 'N', A, B, alpha, beta)
end
end
@inline function mul!(C::AbstractMatrix, transA::Transpose{<:Any,<:AbstractVecOrMat}, B::AbstractVecOrMat,
Expand All @@ -322,19 +317,19 @@ end
end

@inline function mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, transB::Transpose{<:Any,<:StridedVecOrMat{T}},
alpha::Union{T, Bool}, beta::Union{T, Bool}) where {T<:BlasFloat}
alpha::Number, beta::Number) where {T<:BlasFloat}
B = transB.parent
if A===B
return syrk_wrapper!(C, 'N', A, MulAddMul(alpha, beta))
return syrk_wrapper!(C, 'N', A, alpha, beta)
else
return gemm_wrapper!(C, 'N', 'T', A, B, MulAddMul(alpha, beta))
return gemm_wrapper!(C, 'N', 'T', A, B, alpha, beta)
end
end
# Complex matrix times transposed real matrix. Reinterpret the first matrix to real for efficiency.
for elty in (Float32,Float64)
@eval begin
@inline function mul!(C::StridedMatrix{Complex{$elty}}, A::StridedVecOrMat{Complex{$elty}}, transB::Transpose{<:Any,<:StridedVecOrMat{$elty}},
alpha::Union{$elty, Bool}, beta::Union{$elty, Bool})
alpha::Number, beta::Number)
Afl = reinterpret($elty, A)
Cfl = reinterpret($elty, C)
mul!(Cfl, Afl, transB, alpha, beta)
Expand All @@ -354,7 +349,7 @@ end
alpha::Number, beta::Number) where {T<:BlasFloat}
A = transA.parent
B = transB.parent
return gemm_wrapper!(C, 'T', 'T', A, B, MulAddMul(alpha, beta))
return gemm_wrapper!(C, 'T', 'T', A, B, alpha, beta)
end
@inline function mul!(C::AbstractMatrix, transA::Transpose{<:Any,<:AbstractVecOrMat}, transB::Transpose{<:Any,<:AbstractVecOrMat},
alpha::Number, beta::Number)
Expand All @@ -364,10 +359,10 @@ end
end

@inline function mul!(C::StridedMatrix{T}, transA::Transpose{<:Any,<:StridedVecOrMat{T}}, transB::Adjoint{<:Any,<:StridedVecOrMat{T}},
alpha::Union{T, Bool}, beta::Union{T, Bool}) where {T<:BlasFloat}
alpha::Number, beta::Number) where {T<:BlasFloat}
A = transA.parent
B = transB.parent
return gemm_wrapper!(C, 'T', 'C', A, B, MulAddMul(alpha, beta))
return gemm_wrapper!(C, 'T', 'C', A, B, alpha, beta)
end
@inline function mul!(C::AbstractMatrix, transA::Transpose{<:Any,<:AbstractVecOrMat}, transB::Adjoint{<:Any,<:AbstractVecOrMat},
alpha::Number, beta::Number)
Expand All @@ -377,17 +372,17 @@ end
end

@inline function mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, B::StridedVecOrMat{T},
alpha::Union{T, Bool}, beta::Union{T, Bool}) where {T<:BlasReal}
alpha::Real, beta::Real) where {T<:BlasReal}
A = adjA.parent
return mul!(C, transpose(A), B, alpha, beta)
end
@inline function mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, B::StridedVecOrMat{T},
alpha::Union{T, Bool}, beta::Union{T, Bool}) where {T<:BlasComplex}
alpha::Number, beta::Number) where {T<:BlasComplex}
A = adjA.parent
if A===B
return herk_wrapper!(C, 'C', A, MulAddMul(alpha, beta))
return herk_wrapper!(C, 'C', A, alpha, beta)
else
return gemm_wrapper!(C, 'C', 'N', A, B, MulAddMul(alpha, beta))
return gemm_wrapper!(C, 'C', 'N', A, B, alpha, beta)
end
end
@inline function mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, B::AbstractVecOrMat,
Expand All @@ -402,12 +397,12 @@ end
return mul!(C, A, transpose(B), alpha, beta)
end
@inline function mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:StridedVecOrMat{T}},
alpha::Union{T, Bool}, beta::Union{T, Bool}) where {T<:BlasComplex}
alpha::Number, beta::Number) where {T<:BlasComplex}
B = adjB.parent
if A === B
return herk_wrapper!(C, 'N', A, MulAddMul(alpha, beta))
return herk_wrapper!(C, 'N', A, alpha, beta)
else
return gemm_wrapper!(C, 'N', 'C', A, B, MulAddMul(alpha, beta))
return gemm_wrapper!(C, 'N', 'C', A, B, alpha, beta)
end
end
@inline function mul!(C::AbstractMatrix, A::AbstractVecOrMat, adjB::Adjoint{<:Any,<:AbstractVecOrMat},
Expand All @@ -417,10 +412,10 @@ end
end

@inline function mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, adjB::Adjoint{<:Any,<:StridedVecOrMat{T}},
alpha::Union{T, Bool}, beta::Union{T, Bool}) where {T<:BlasFloat}
alpha::Number, beta::Number) where {T<:BlasFloat}
A = adjA.parent
B = adjB.parent
return gemm_wrapper!(C, 'C', 'C', A, B, MulAddMul(alpha, beta))
return gemm_wrapper!(C, 'C', 'C', A, B, alpha, beta)
end
@inline function mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, adjB::Adjoint{<:Any,<:AbstractVecOrMat},
alpha::Number, beta::Number)
Expand Down Expand Up @@ -455,7 +450,7 @@ end
end

function gemv!(y::StridedVector{T}, tA::AbstractChar, A::StridedVecOrMat{T}, x::StridedVector{T},
alpha::Union{T, Bool} = true, beta::Union{T, Bool} = false) where T<:BlasFloat
α::Number=true, β::Number=false) where {T<:BlasFloat}
mA, nA = lapack_size(tA, A)
if nA != length(x)
throw(DimensionMismatch("second dimension of A, $nA, does not match length of x, $(length(x))"))
Expand All @@ -467,16 +462,19 @@ function gemv!(y::StridedVector{T}, tA::AbstractChar, A::StridedVecOrMat{T}, x::
return y
end
if nA == 0
return _rmul_or_fill!(y, beta)
return _rmul_or_fill!(y, β)
end
if stride(A, 1) == 1 && stride(A, 2) >= size(A, 1)

alpha, beta = promote(α, β, zero(T))
if alpha isa Union{Bool,T} && beta isa Union{Bool,T} && stride(A, 1) == 1 && stride(A, 2) >= size(A, 1)
return BLAS.gemv!(tA, alpha, A, x, beta, y)
else
return generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
end
return generic_matvecmul!(y, tA, A, x, MulAddMul(alpha, beta))
end

function syrk_wrapper!(C::StridedMatrix{T}, tA::AbstractChar, A::StridedVecOrMat{T},
_add::MulAddMul = MulAddMul()) where T<:BlasFloat
α::Number=true, β::Number=false) where {T<:BlasFloat}
nC = checksquare(C)
if tA == 'T'
(nA, mA) = size(A,1), size(A,2)
Expand All @@ -488,24 +486,33 @@ function syrk_wrapper!(C::StridedMatrix{T}, tA::AbstractChar, A::StridedVecOrMat
if nC != mA
throw(DimensionMismatch("output matrix has size: $(nC), but should have size $(mA)"))
end
if mA == 0 || nA == 0 || iszero(_add.alpha)
return _rmul_or_fill!(C, _add.beta)
if mA == 0 || nA == 0 || iszero(α)
return _rmul_or_fill!(C, β)
end
if mA == 2 && nA == 2
return matmul2x2!(C, tA, tAt, A, A, _add)
return matmul2x2!(C, tA, tAt, A, A, MulAddMul(α, β))
end
if mA == 3 && nA == 3
return matmul3x3!(C, tA, tAt, A, A, _add)
end

if stride(A, 1) == stride(C, 1) == 1 && stride(A, 2) >= size(A, 1) && stride(C, 2) >= size(C, 1)
return copytri!(BLAS.syrk!('U', tA, _add.alpha, A, _add.beta, C), 'U')
return matmul3x3!(C, tA, tAt, A, A, MulAddMul(α, β))
end

# BLAS.syrk! only updates symmetric C
# alternatively, make non-zero β a show-stopper for BLAS.syrk!
if iszero(β) || issymmetric(C)
alpha, beta = promote(α, β, zero(T))
if (alpha isa Union{Bool,T} &&
beta isa Union{Bool,T} &&
stride(A, 1) == stride(C, 1) == 1 &&
stride(A, 2) >= size(A, 1) &&
stride(C, 2) >= size(C, 1))
return copytri!(BLAS.syrk!('U', tA, alpha, A, beta, C), 'U')
end
end
return generic_matmatmul!(C, tA, tAt, A, A, _add)
return gemm_wrapper!(C, tA, tAt, A, A, α, β)
end

function herk_wrapper!(C::Union{StridedMatrix{T}, StridedMatrix{Complex{T}}}, tA::AbstractChar, A::Union{StridedVecOrMat{T}, StridedVecOrMat{Complex{T}}},
_add::MulAddMul = MulAddMul()) where T<:BlasReal
α::Number=true, β::Number=false) where {T<:BlasReal}
nC = checksquare(C)
if tA == 'C'
(nA, mA) = size(A,1), size(A,2)
Expand All @@ -518,27 +525,34 @@ function herk_wrapper!(C::Union{StridedMatrix{T}, StridedMatrix{Complex{T}}}, tA
throw(DimensionMismatch("output matrix has size: $(nC), but should have size $(mA)"))
end
if mA == 0 || nA == 0
return _rmul_or_fill!(C, _add.beta)
return _rmul_or_fill!(C, β)
end
if mA == 2 && nA == 2
return matmul2x2!(C, tA, tAt, A, A, _add)
return matmul2x2!(C, tA, tAt, A, A, MulAddMul(α, β))
end
if mA == 3 && nA == 3
return matmul3x3!(C, tA, tAt, A, A, _add)
return matmul3x3!(C, tA, tAt, A, A, MulAddMul(α, β))
end

# Result array does not need to be initialized as long as beta==0
# C = Matrix{T}(undef, mA, mA)

if stride(A, 1) == stride(C, 1) == 1 && stride(A, 2) >= size(A, 1) && stride(C, 2) >= size(C, 1)
return copytri!(BLAS.herk!('U', tA, _add.alpha, A, _add.beta, C), 'U', true)
if iszero(β) || issymmetric(C)
alpha, beta = promote(α, β, zero(T))
if (alpha isa Union{Bool,T} &&
beta isa Union{Bool,T} &&
stride(A, 1) == stride(C, 1) == 1 &&
stride(A, 2) >= size(A, 1) &&
stride(C, 2) >= size(C, 1))
return copytri!(BLAS.herk!('U', tA, alpha, A, beta, C), 'U', true)
end
end
return generic_matmatmul!(C, tA, tAt, A, A, _add)
return gemm_wrapper!(C, tA, tAt, A, A, α, β)
end

function gemm_wrapper(tA::AbstractChar, tB::AbstractChar,
A::StridedVecOrMat{T},
B::StridedVecOrMat{T}) where T<:BlasFloat
B::StridedVecOrMat{T}) where {T<:BlasFloat}
mA, nA = lapack_size(tA, A)
mB, nB = lapack_size(tB, B)
C = similar(B, T, mA, nB)
Expand All @@ -547,7 +561,7 @@ end

function gemm_wrapper!(C::StridedVecOrMat{T}, tA::AbstractChar, tB::AbstractChar,
A::StridedVecOrMat{T}, B::StridedVecOrMat{T},
_add::MulAddMul = MulAddMul()) where T<:BlasFloat
α::Number=true, β::Number=false) where {T<:BlasFloat}
mA, nA = lapack_size(tA, A)
mB, nB = lapack_size(tB, B)

Expand All @@ -559,24 +573,30 @@ function gemm_wrapper!(C::StridedVecOrMat{T}, tA::AbstractChar, tB::AbstractChar
throw(ArgumentError("output matrix must not be aliased with input matrix"))
end

if mA == 0 || nA == 0 || nB == 0 || iszero(_add.alpha)
if mA == 0 || nA == 0 || nB == 0 || iszero(α)
if size(C) != (mA, nB)
throw(DimensionMismatch("C has dimensions $(size(C)), should have ($mA,$nB)"))
end
return _rmul_or_fill!(C, _add.beta)
return _rmul_or_fill!(C, β)
end

if mA == 2 && nA == 2 && nB == 2
return matmul2x2!(C, tA, tB, A, B, _add)
return matmul2x2!(C, tA, tB, A, B, MulAddMul(α, β))
end
if mA == 3 && nA == 3 && nB == 3
return matmul3x3!(C, tA, tB, A, B, _add)
return matmul3x3!(C, tA, tB, A, B, MulAddMul(α, β))
end

if stride(A, 1) == stride(B, 1) == stride(C, 1) == 1 && stride(A, 2) >= size(A, 1) && stride(B, 2) >= size(B, 1) && stride(C, 2) >= size(C, 1)
return BLAS.gemm!(tA, tB, _add.alpha, A, B, _add.beta, C)
alpha, beta = promote(α, β, zero(T))
if (alpha isa Union{Bool,T} &&
beta isa Union{Bool,T} &&
stride(A, 1) == stride(B, 1) == stride(C, 1) == 1 &&
stride(A, 2) >= size(A, 1) &&
stride(B, 2) >= size(B, 1) &&
stride(C, 2) >= size(C, 1))
return BLAS.gemm!(tA, tB, alpha, A, B, beta, C)
end
generic_matmatmul!(C, tA, tB, A, B, _add)
generic_matmatmul!(C, tA, tB, A, B, MulAddMul(α, β))
end

# blas.jl defines matmul for floats; other integer and mixed precision
Expand Down Expand Up @@ -687,7 +707,7 @@ const Bbuf = [Vector{UInt8}(undef, tilebufsize)]
const Cbuf = [Vector{UInt8}(undef, tilebufsize)]

function generic_matmatmul!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMatrix,
_add::MulAddMul = MulAddMul())
_add::MulAddMul=MulAddMul())
mA, nA = lapack_size(tA, A)
mB, nB = lapack_size(tB, B)
mC, nC = size(C)
Expand Down
Loading