Skip to content

Commit cbee0a1

Browse files
committed
add hw4
1 parent 3ffa387 commit cbee0a1

File tree

3 files changed

+96
-38
lines changed

3 files changed

+96
-38
lines changed

main.cpp

Lines changed: 92 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,34 @@
44
#include <chrono>
55
#include <cmath>
66

7+
constexpr float speedup = 1.0 / RAND_MAX;
8+
79
float frand() {
8-
return (float)rand() / RAND_MAX * 2 - 1;
10+
return (float)rand() / speedup * 2 - 1;
911
}
12+
constexpr int length = 48;
1013

1114
struct Star {
12-
float px, py, pz;
13-
float vx, vy, vz;
14-
float mass;
15+
float px[length];
16+
float py[length];
17+
float pz[length];
18+
float vx[length];
19+
float vy[length];
20+
float vz[length];
21+
float mass[length];
1522
};
1623

17-
std::vector<Star> stars;
24+
Star stars;
1825

1926
void init() {
2027
for (int i = 0; i < 48; i++) {
21-
stars.push_back({
22-
frand(), frand(), frand(),
23-
frand(), frand(), frand(),
24-
frand() + 1,
25-
});
28+
stars.px[i] = frand();
29+
stars.py[i] = frand();
30+
stars.pz[i] = frand();
31+
stars.vx[i] = frand();
32+
stars.vy[i] = frand();
33+
stars.vz[i] = frand();
34+
stars.mass[i] = frand() + 1;
2635
}
2736
}
2837

@@ -31,36 +40,84 @@ float eps = 0.001;
3140
float dt = 0.01;
3241

3342
void step() {
34-
for (auto &star: stars) {
35-
for (auto &other: stars) {
36-
float dx = other.px - star.px;
37-
float dy = other.py - star.py;
38-
float dz = other.pz - star.pz;
39-
float d2 = dx * dx + dy * dy + dz * dz + eps * eps;
40-
d2 *= sqrt(d2);
41-
star.vx += dx * other.mass * G * dt / d2;
42-
star.vy += dy * other.mass * G * dt / d2;
43-
star.vz += dz * other.mass * G * dt / d2;
43+
size_t len = length;
44+
float eps2 = eps * eps;
45+
float gdt = G * dt;
46+
#pragma GCC unroll 16
47+
for (size_t i = 0 ; i < len; i++) {
48+
float dxs[length];
49+
float dys[length];
50+
float dzs[length];
51+
float d2s[length];
52+
float ivf_d2s[length];
53+
#pragma opm simd
54+
for(size_t j=0; j < len; j++)
55+
{
56+
dxs[j] = stars.px[j] - stars.px[i];
57+
}
58+
#pragma opm simd
59+
for(size_t j=0; j < len; j++)
60+
{
61+
dys[j] = stars.py[j] - stars.py[i];
62+
}
63+
#pragma opm simd
64+
for(size_t j=0; j < len; j++)
65+
{
66+
dzs[j] = stars.pz[j] - stars.pz[i];
67+
}
68+
#pragma opm simd
69+
for(size_t j=0; j<len; j++)
70+
{
71+
d2s[j] = dxs[j] * dxs[j] + dys[j] * dys[j] + dzs[j] * dzs[j] + eps2;
72+
}
73+
#pragma opm simd
74+
for(size_t j=0; j<len; j++){
75+
ivf_d2s[j] = 1.0 / (d2s[j] * std::sqrt(d2s[j]));
76+
}
77+
#pragma opm simd
78+
for(size_t j=0; j<len; j++){
79+
stars.vx[i] += dxs[j] * stars.mass[j] * (gdt * ivf_d2s[j]);
80+
}
81+
#pragma opm simd
82+
for(size_t j=0; j<len; j++){
83+
stars.vy[i] += dys[j] * stars.mass[j] * (gdt * ivf_d2s[j]);
84+
}
85+
#pragma opm simd
86+
for(size_t j=0; j<len; j++){
87+
stars.vz[i] += dzs[j] * stars.mass[j] * (gdt * ivf_d2s[j]);
88+
}
89+
}
90+
#pragma opm simd
91+
for(size_t i=0; i<len; i++)
92+
{
93+
stars.px[i] += stars.vx[i] * dt ;
94+
}
95+
#pragma opm simd
96+
for(size_t i=0; i < len; i++)
97+
{
98+
stars.py[i] += stars.vy[i] * dt ;
99+
}
100+
#pragma opm simd
101+
for(size_t i = 0; i < len; i++)
102+
{
103+
stars.pz[i] += stars.vz[i] * dt;
44104
}
45-
}
46-
for (auto &star: stars) {
47-
star.px += star.vx * dt;
48-
star.py += star.vy * dt;
49-
star.pz += star.vz * dt;
50-
}
51105
}
52106

53107
float calc() {
54108
float energy = 0;
55-
for (auto &star: stars) {
56-
float v2 = star.vx * star.vx + star.vy * star.vy + star.vz * star.vz;
57-
energy += star.mass * v2 / 2;
58-
for (auto &other: stars) {
59-
float dx = other.px - star.px;
60-
float dy = other.py - star.py;
61-
float dz = other.pz - star.pz;
109+
size_t len = length;
110+
for (size_t i = 0; i < len; i++) {
111+
float v2 = stars.vx[i] * stars.vx[i] + stars.vy[i]* stars.vy[i]+ stars.vz[i]* stars.vz[i];
112+
energy += stars.mass[i] * v2 / 2;
113+
#pragma GCC unroll 32
114+
for (size_t j=0; j < len; j++) {
115+
float dx = stars.px[j] - stars.px[i];
116+
float dy = stars.py[j] - stars.py[i];
117+
float dz = stars.pz[j] - stars.pz[i];
62118
float d2 = dx * dx + dy * dy + dz * dz + eps * eps;
63-
energy -= other.mass * star.mass * G / sqrt(d2) / 2;
119+
float ivf_d2 = 1.0 / (std::sqrt(d2) * 2);
120+
energy -= stars.mass[j] * stars.mass[j] * (G * ivf_d2);
64121
}
65122
}
66123
return energy;
@@ -85,4 +142,4 @@ int main() {
85142
printf("Final energy: %f\n", calc());
86143
printf("Time elapsed: %ld ms\n", dt);
87144
return 0;
88-
}
145+
}

opt_main

22.5 KB
Binary file not shown.

run.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/bin/sh
22
set -e
3-
cmake -B build
4-
cmake --build build
5-
build/main
3+
4+
g++ -std=c++17 -march=native -ffast-math -O3 -fopenmp main.cpp -o opt_main
5+
./opt_main
6+

0 commit comments

Comments
 (0)