Skip to content

Commit b8e48e8

Browse files
committed
added LoopVectorization version
1 parent 12c75f4 commit b8e48e8

File tree

2 files changed

+48
-0
lines changed

2 files changed

+48
-0
lines changed

test.jl

+22
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
using LoopVectorization: @tvectorize
2+
13
function eval_exp(N)
24
a = range(0, stop=2*pi, length=N)
35
A = Matrix{ComplexF64}(undef, N, N)
@@ -12,11 +14,31 @@ function eval_exp(N)
1214

1315
end
1416

17+
function eval_exp_tvectorize(N)
18+
a = range(0, stop=2*pi, length=N)
19+
A = Matrix{ComplexF64}(undef, N, N)
20+
21+
_A = reinterpret(reshape, Float64, A)
22+
@tvectorize for j in 1:N, i in 1:N
23+
x = sqrt(a[i]^2 + a[j]^2)
24+
prefac = exp(-x)
25+
s, c = sincos(100*x)
26+
27+
_A[1,i,j] = prefac * c
28+
_A[2,i,j] = prefac * s
29+
end
30+
return A
31+
end
32+
1533
eval_exp(5)
1634
print(string("running loop on ", Threads.nthreads(), " threads \n"))
1735
for N in 1000:1000:10000
1836
@time begin
1937
A = eval_exp(N)
2038
end
39+
40+
@time begin
41+
A = eval_exp_tvectorize(N)
42+
end
2143
end
2244

test_loopvec.jl

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
using LoopVectorization: @tvectorize
2+
3+
function eval_exp_tvectorize(N)
4+
a = range(0, stop=2*pi, length=N)
5+
A = Matrix{ComplexF64}(undef, N, N)
6+
7+
_A = reinterpret(reshape, Float64, A)
8+
@avs thread=true @tvectorize for j in 1:N, i in 1:N
9+
x = sqrt(a[i]^2 + a[j]^2)
10+
prefac = exp(-x)
11+
s, c = sincos(100*x)
12+
13+
_A[1,i,j] = prefac * c
14+
_A[2,i,j] = prefac * s
15+
end
16+
return A
17+
end
18+
19+
eval_exp_tvectorize(5)
20+
print(string("running loop on ", Threads.nthreads(), " threads \n"))
21+
for N in 1000:1000:10000
22+
@time begin
23+
A = eval_exp_tvectorize(N)
24+
end
25+
end
26+

0 commit comments

Comments
 (0)