22#include " ../struct/particle.h"
33#include < vector>
44
5+
56// =========================
67// Octree definition
78// =========================
@@ -89,12 +90,6 @@ struct Octree {
8990 }
9091};
9192
92- inline float rsqrt (float x) {
93- float y = _mm_cvtss_f32 (_mm_rsqrt_ss (_mm_set_ss (x)));
94- // optional refinement:
95- return y * (1 .5f - 0 .5f * x * y * y);
96- }
97-
9893// =========================
9994// Barnes–Hut force function
10095// =========================
@@ -106,25 +101,21 @@ inline void bhForce(Octree* node, Particle& p, real theta, real dt)
106101 real dy = node->cy - p.y ;
107102 real dz = node->cz - p.z ;
108103
109- real r2 = dx*dx + dy*dy + dz*dz + real ( 1e-6 ) ;
104+ real dist = sqrt ( dx*dx + dy*dy + dz*dz) + 1e-6 ;
110105
111- // Barnes–Hut acceptance
112- real dist = sqrt (r2);
106+ // Barnes–Hut acceptance criterion
113107 if (node->leaf || (node->size / dist) < theta) {
114-
115- // division-free inverse distance cubed
116- real invDist = rsqrt (r2);
117- real invDist3 = invDist * invDist * invDist;
118-
119- real f = node->m * invDist3 * dt;
120-
108+ real inv = 1.0 / (dist * dist * dist);
109+ real f = node->m * inv * dt;
121110 p.vx += dx * f;
122111 p.vy += dy * f;
123112 p.vz += dz * f;
124113 return ;
125114 }
126115
127- for (int i = 0 ; i < 8 ; i++)
116+ // Otherwise recurse into children
117+ for (int i = 0 ; i < 8 ; i++) {
128118 if (node->child [i])
129119 bhForce (node->child [i], p, theta, dt);
130- }
120+ }
121+ }
0 commit comments