1
1
using ImageFiltering, FFTW, LinearAlgebra, Profile, Random
2
- # using ProfileView
3
2
using ComputationalResources
4
3
5
4
FFTW. set_num_threads (parse (Int, get (ENV , " FFTW_NUM_THREADS" , " 1" )))
6
5
BLAS. set_num_threads (parse (Int, get (ENV , " BLAS_NUM_THREADS" , string (Threads. nthreads () ÷ 2 ))))
7
6
8
- function benchmark (mats)
7
+ function benchmark_new (mats)
9
8
kernel = ImageFiltering. factorkernel (Kernel. LoG (1 ))
10
9
Threads. @threads for mat in mats
11
10
frame_filtered = deepcopy (mat[:, :, 1 ])
@@ -17,6 +16,18 @@ function benchmark(mats)
17
16
return
18
17
end
19
18
end
19
+ function benchmark_old (mats)
20
+ kernel = ImageFiltering. factorkernel (Kernel. LoG (1 ))
21
+ Threads. @threads for mat in mats
22
+ frame_filtered = deepcopy (mat[:, :, 1 ])
23
+ r_noncached = CPU1 (Algorithm. FFT ())
24
+ for i in axes (mat, 3 )
25
+ frame = @view mat[:, :, i]
26
+ imfilter! (r_noncached, frame_filtered, frame, kernel)
27
+ end
28
+ return
29
+ end
30
+ end
20
31
21
32
function test (mats)
22
33
kernel = ImageFiltering. factorkernel (Kernel. LoG (1 ))
@@ -26,54 +37,31 @@ function test(mats)
26
37
f2 = deepcopy (mat[:, :, 1 ])
27
38
r_noncached = CPU1 (Algorithm. FFT ())
28
39
for i in axes (mat, 3 )
29
- frame = @view mat[:, :, i]
30
- @info " imfilter! noncached"
31
- imfilter! (r_noncached, f2, frame, kernel)
32
- @info " imfilter! cached"
33
- imfilter! (r_cached, f1, frame, kernel)
40
+ imfilter! (r_noncached, f2, deepcopy (mat[:, :, i]), kernel)
41
+ imfilter! (r_cached, f1, deepcopy (mat[:, :, i]), kernel)
34
42
@show f1[1 : 4 ] f2[1 : 4 ]
35
43
f1 ≈ f2 || error (" f1 !≈ f2" )
36
44
end
37
45
return
38
46
end
39
47
end
40
48
41
- function profile ()
49
+ function run ()
42
50
Random. seed! (1 )
43
51
nmats = 10
44
- mats = [rand (Float32, rand (80 : 100 ), rand (80 : 100 ), rand (2000 : 3000 )) for _ in 1 : nmats]
45
- GC. gc (true )
52
+ mats = [rand (Float64, rand (80 : 100 ), rand (80 : 100 ), rand (2000 : 3000 )) for _ in 1 : nmats]
46
53
47
- # benchmark(mats)
54
+ benchmark_new (mats)
55
+ for _ in 1 : 3
56
+ @time " warm run of benchmark_new(mats)" benchmark_new (mats)
57
+ end
48
58
49
- # for _ in 1:3
50
- # @time "warm run of benchmark(mats)" benchmark(mats)
51
- # end
59
+ benchmark_old (mats)
60
+ for _ in 1 : 3
61
+ @time " warm run of benchmark_old(mats)" benchmark_old (mats)
62
+ end
52
63
53
64
test (mats)
54
-
55
- # Profile.clear()
56
- # @profile benchmark(mats)
57
-
58
- # Profile.print(IOContext(stdout, :displaysize => (24, 200)); C=true, combine=true, mincount=100)
59
- # # ProfileView.view()
60
- # GC.gc(true)
61
65
end
62
66
63
- profile ()
64
-
65
- using ImageFiltering
66
- using ImageFiltering. RFFT
67
-
68
- function mwe ()
69
- a = rand (Float64, 10 , 10 )
70
- out1 = rfft (a)
71
-
72
- buf = RFFT. RCpair {Float64} (undef, size (a))
73
- rfft_plan = RFFT. plan_rfft! (buf)
74
- copy! (buf, a)
75
- out2 = complex (rfft_plan (buf))
76
-
77
- return out1 ≈ out2
78
- end
79
- mwe ()
67
+ run ()
0 commit comments