with current @views based implementation:
|
function LinearAlgebra.mul!(v::AbstractVecOrMat, L::TensorProductOperator, u::AbstractVecOrMat) |
|
@assert L.isset "cache needs to be set up for operator of type $(typeof(L)). |
|
set up cache by calling cache_operator(L::AbstractSciMLOperator, u::AbstractArray)" |
|
|
|
mi, ni = size(L.inner) |
|
mo, no = size(L.outer) |
|
k = size(u, 2) |
|
|
|
C = L.cache |
|
U = _reshape(u, (ni, no*k)) |
|
|
|
""" |
|
v .= kron(B, A) * u |
|
V .= A * U * B' |
|
""" |
|
|
|
# C .= A * U |
|
mul!(C, L.inner, U) |
|
|
|
# V .= U * B' <===> V' .= B * C' |
|
if k>1 |
|
V = _reshape(v, (mi, mo, k)) |
|
C = _reshape(C, (mi, no, k)) |
|
|
|
@views for i=1:k |
|
mul!(transpose(V[:,:,i]), L.outer, transpose(C[:,:,i])) |
|
end |
|
else |
|
V = _reshape(v, (mi, mo)) |
|
C = _reshape(C, (mi, no)) |
|
mul!(transpose(V), L.outer, transpose(C)) |
|
end |
|
|
|
v |
|
end |
using SciMLOperators, LinearAlgebra
using BenchmarkTools
A = TensorProductOperator(rand(12,12), rand(12,12), rand(12,12))
u = rand(12^3, 100)
v = rand(12^3, 100)
A = cache_operator(A, u)
mul!(v, A, u) # dunny
@btime mul!($v, $A, $u); # 16.901 ms (348801 allocations: 45.51 MiB)
B = convert(AbstractMatrix, A)
mul!(v, B, u) # dummy
@btime mul!($v, $B, $u); # 10.337 ms (0 allocations: 0 bytes)
julia> versioninfo()
Julia Version 1.8.0-rc1
Commit 6368fdc6565 (2022-05-27 18:33 UTC)
Platform Info:
OS: macOS (x86_64-apple-darwin21.4.0)
CPU: 4 × Intel(R) Core(TM) i5-5257U CPU @ 2.70GHz
WORD_SIZE: 64
LIBM: libopenlibm
LLVM: libLLVM-13.0.1 (ORCJIT, broadwell)
Threads: 4 on 4 virtual cores
Environment:
JULIA_NUM_PRECOMPILE_TASKS = 4
JULIA_DEPOT_PATH = /Users/vp/.julia
JULIA_NUM_THREADS = 4
with current
@viewsbased implementation:SciMLOperators.jl/src/sciml.jl
Lines 584 to 618 in 746c0d5