4
4
#include < chrono>
5
5
#include < cmath>
6
6
7
+ constexpr float speedup = 1.0 / RAND_MAX;
8
+
7
9
float frand () {
8
- return (float )rand () / RAND_MAX * 2 - 1 ;
10
+ return (float )rand () / speedup * 2 - 1 ;
9
11
}
12
+ constexpr int length = 48 ;
10
13
11
14
struct Star {
12
- float px, py, pz;
13
- float vx, vy, vz;
14
- float mass;
15
+ float px[length];
16
+ float py[length];
17
+ float pz[length];
18
+ float vx[length];
19
+ float vy[length];
20
+ float vz[length];
21
+ float mass[length];
15
22
};
16
23
17
- std::vector< Star> stars;
24
+ Star stars;
18
25
19
26
void init () {
20
27
for (int i = 0 ; i < 48 ; i++) {
21
- stars.push_back ({
22
- frand (), frand (), frand (),
23
- frand (), frand (), frand (),
24
- frand () + 1 ,
25
- });
28
+ stars.px [i] = frand ();
29
+ stars.py [i] = frand ();
30
+ stars.pz [i] = frand ();
31
+ stars.vx [i] = frand ();
32
+ stars.vy [i] = frand ();
33
+ stars.vz [i] = frand ();
34
+ stars.mass [i] = frand () + 1 ;
26
35
}
27
36
}
28
37
@@ -31,36 +40,84 @@ float eps = 0.001;
31
40
float dt = 0.01 ;
32
41
33
42
void step () {
34
- for (auto &star: stars) {
35
- for (auto &other: stars) {
36
- float dx = other.px - star.px ;
37
- float dy = other.py - star.py ;
38
- float dz = other.pz - star.pz ;
39
- float d2 = dx * dx + dy * dy + dz * dz + eps * eps;
40
- d2 *= sqrt (d2);
41
- star.vx += dx * other.mass * G * dt / d2;
42
- star.vy += dy * other.mass * G * dt / d2;
43
- star.vz += dz * other.mass * G * dt / d2;
43
+ size_t len = length;
44
+ float eps2 = eps * eps;
45
+ float gdt = G * dt;
46
+ #pragma GCC unroll 16
47
+ for (size_t i = 0 ; i < len; i++) {
48
+ float dxs[length];
49
+ float dys[length];
50
+ float dzs[length];
51
+ float d2s[length];
52
+ float ivf_d2s[length];
53
+ #pragma opm simd
54
+ for (size_t j=0 ; j < len; j++)
55
+ {
56
+ dxs[j] = stars.px [j] - stars.px [i];
57
+ }
58
+ #pragma opm simd
59
+ for (size_t j=0 ; j < len; j++)
60
+ {
61
+ dys[j] = stars.py [j] - stars.py [i];
62
+ }
63
+ #pragma opm simd
64
+ for (size_t j=0 ; j < len; j++)
65
+ {
66
+ dzs[j] = stars.pz [j] - stars.pz [i];
67
+ }
68
+ #pragma opm simd
69
+ for (size_t j=0 ; j<len; j++)
70
+ {
71
+ d2s[j] = dxs[j] * dxs[j] + dys[j] * dys[j] + dzs[j] * dzs[j] + eps2;
72
+ }
73
+ #pragma opm simd
74
+ for (size_t j=0 ; j<len; j++){
75
+ ivf_d2s[j] = 1.0 / (d2s[j] * std::sqrt (d2s[j]));
76
+ }
77
+ #pragma opm simd
78
+ for (size_t j=0 ; j<len; j++){
79
+ stars.vx [i] += dxs[j] * stars.mass [j] * (gdt * ivf_d2s[j]);
80
+ }
81
+ #pragma opm simd
82
+ for (size_t j=0 ; j<len; j++){
83
+ stars.vy [i] += dys[j] * stars.mass [j] * (gdt * ivf_d2s[j]);
84
+ }
85
+ #pragma opm simd
86
+ for (size_t j=0 ; j<len; j++){
87
+ stars.vz [i] += dzs[j] * stars.mass [j] * (gdt * ivf_d2s[j]);
88
+ }
89
+ }
90
+ #pragma opm simd
91
+ for (size_t i=0 ; i<len; i++)
92
+ {
93
+ stars.px [i] += stars.vx [i] * dt ;
94
+ }
95
+ #pragma opm simd
96
+ for (size_t i=0 ; i < len; i++)
97
+ {
98
+ stars.py [i] += stars.vy [i] * dt ;
99
+ }
100
+ #pragma opm simd
101
+ for (size_t i = 0 ; i < len; i++)
102
+ {
103
+ stars.pz [i] += stars.vz [i] * dt;
44
104
}
45
- }
46
- for (auto &star: stars) {
47
- star.px += star.vx * dt;
48
- star.py += star.vy * dt;
49
- star.pz += star.vz * dt;
50
- }
51
105
}
52
106
53
107
float calc () {
54
108
float energy = 0 ;
55
- for (auto &star: stars) {
56
- float v2 = star.vx * star.vx + star.vy * star.vy + star.vz * star.vz ;
57
- energy += star.mass * v2 / 2 ;
58
- for (auto &other: stars) {
59
- float dx = other.px - star.px ;
60
- float dy = other.py - star.py ;
61
- float dz = other.pz - star.pz ;
109
+ size_t len = length;
110
+ for (size_t i = 0 ; i < len; i++) {
111
+ float v2 = stars.vx [i] * stars.vx [i] + stars.vy [i]* stars.vy [i]+ stars.vz [i]* stars.vz [i];
112
+ energy += stars.mass [i] * v2 / 2 ;
113
+ #pragma GCC unroll 32
114
+ for (size_t j=0 ; j < len; j++) {
115
+ float dx = stars.px [j] - stars.px [i];
116
+ float dy = stars.py [j] - stars.py [i];
117
+ float dz = stars.pz [j] - stars.pz [i];
62
118
float d2 = dx * dx + dy * dy + dz * dz + eps * eps;
63
- energy -= other.mass * star.mass * G / sqrt (d2) / 2 ;
119
+ float ivf_d2 = 1.0 / (std::sqrt (d2) * 2 );
120
+ energy -= stars.mass [j] * stars.mass [j] * (G * ivf_d2);
64
121
}
65
122
}
66
123
return energy;
@@ -85,4 +142,4 @@ int main() {
85
142
printf (" Final energy: %f\n " , calc ());
86
143
printf (" Time elapsed: %ld ms\n " , dt);
87
144
return 0 ;
88
- }
145
+ }
0 commit comments