-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_fft_correctness_1.cpp
117 lines (97 loc) · 3.42 KB
/
test_fft_correctness_1.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#include "sirius.h"
/* test FFT: transform single harmonic and compare with plane wave exp(iGr) */
using namespace sirius;
int test_fft(cmd_args& args, device_t pu__)
{
double cutoff = args.value<double>("cutoff", 10);
matrix3d<double> M = {{1, 0, 0}, {0, 1, 0}, {0, 0, 1}};
FFT3D fft(find_translations(cutoff, M), Communicator::world(), pu__);
Gvec gvec(M, cutoff, Communicator::world(), false);
Gvec_partition gvecp(gvec, Communicator::world(), Communicator::self());
fft.prepare(gvecp);
mdarray<double_complex, 1> f(gvec.num_gvec());
if (pu__ == GPU) {
f.allocate(memory_t::device);
printf("running on GPU\n");
}
mdarray<double_complex, 1> ftmp(gvecp.gvec_count_fft());
int result{0};
for (int ig = 0; ig < gvec.num_gvec(); ig++) {
auto v = gvec.gvec(ig);
f.zero();
f[ig] = 1.0;
/* load local set of PW coefficients */
for (int igloc = 0; igloc < gvecp.gvec_count_fft(); igloc++) {
ftmp[igloc] = f[gvecp.idx_gvec(igloc)];
}
switch (pu__) {
case CPU: {
fft.transform<1>(&ftmp[0]);
break;
}
case GPU: {
//f.copy<memory_t::host, memory_t::device>();
//fft.transform<1, GPU>(gvec.partition(), f.at<GPU>(gvec.partition().gvec_offset_fft()));
fft.transform<1, memory_t::host>(ftmp.at(memory_t::host));
fft.buffer().copy_to(memory_t::host);
break;
}
}
double const twopi = 6.28318530717958647692528676656;
double diff = 0;
/* loop over 3D array (real space) */
for (int j0 = 0; j0 < fft.size(0); j0++) {
for (int j1 = 0; j1 < fft.size(1); j1++) {
for (int j2 = 0; j2 < fft.local_size_z(); j2++) {
/* get real space fractional coordinate */
auto rl = vector3d<double>(double(j0) / fft.size(0),
double(j1) / fft.size(1),
double(fft.offset_z() + j2) / fft.size(2));
int idx = fft.index_by_coord(j0, j1, j2);
/* compare value with the exponent */
diff += std::pow(std::abs(fft.buffer(idx) - std::exp(double_complex(0.0, twopi * dot(rl, v)))), 2);
}
}
}
Communicator::world().allreduce(&diff, 1);
diff = std::sqrt(diff / fft.size());
if (diff > 1e-10) {
result++;
}
}
fft.dismiss();
return result;
}
int run_test(cmd_args& args)
{
int result = test_fft(args, CPU);
#ifdef __GPU
result += test_fft(args, GPU);
#endif
return result;
}
int main(int argn, char **argv)
{
cmd_args args;
args.register_key("--cutoff=", "{double} cutoff radius in G-space");
args.parse_args(argn, argv);
if (args.exist("help")) {
printf("Usage: %s [options]\n", argv[0]);
args.print_help();
return 0;
}
sirius::initialize(true);
if (Communicator::world().rank() == 0) {
printf("running %-30s : ", argv[0]);
}
int result = run_test(args);
if (Communicator::world().rank() == 0) {
if (result) {
printf("\x1b[31m" "Failed" "\x1b[0m" "\n");
} else {
printf("\x1b[32m" "OK" "\x1b[0m" "\n");
}
}
sirius::finalize();
return result;
}