Skip to content

Commit

Permalink
update cuda backend + cleanups
Browse files Browse the repository at this point in the history
  • Loading branch information
samayala22 committed Jan 5, 2025
1 parent 684bd6c commit aee1e8c
Show file tree
Hide file tree
Showing 13 changed files with 685 additions and 129 deletions.
2 changes: 1 addition & 1 deletion data/2dof.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ def format_subplot(fig, row, col, xlabel, ylabel):
# Dimensionless parameters
dt_nd = 0.2
# t_final_nd = U_vel * 200.0
t_final_nd = 1000.0
t_final_nd = 100.0
vec_t_nd = np.arange(0, t_final_nd, dt_nd)
n = len(vec_t_nd)

Expand Down
24 changes: 15 additions & 9 deletions data/theodorsen.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import matplotlib.pyplot as plt
import scipy.integrate as spi
import scipy.special as scsp
from pathlib import Path

EPS_sqrt_f = np.sqrt(1.19209e-07)

Expand All @@ -26,14 +27,18 @@ def uvlm_data(filename):
uvlm_t = []
uvlm_z = []
uvlm_alpha = []
with open(f"build/windows/x64/release/{filename}.txt", "r") as f:
k = float(f.readline()) # get reduced frequency of the simulation
for line in f:
time_step, z, cl, alpha = line.split()
uvlm_t.append(float(time_step))
uvlm_z.append(float(z))
uvlm_cl.append(float(cl))
uvlm_alpha.append(float(alpha))
filepath = f"build/windows/x64/debug/{filename}.txt"
if Path(filepath).exists():
with open(filepath, "r") as f:
k = float(f.readline()) # get reduced frequency of the simulation
for line in f:
time_step, z, cl, alpha = line.split()
uvlm_t.append(float(time_step))
uvlm_z.append(float(z))
uvlm_cl.append(float(cl))
uvlm_alpha.append(float(alpha))
else:
print(f"File {filepath} not found")

uvlm_alpha = np.array(uvlm_alpha)
uvlm_cl = np.array(uvlm_cl)
Expand Down Expand Up @@ -68,7 +73,8 @@ def uvlm_data(filename):
)

files = [
"cl_data",
"cl_data_CPU",
"cl_data_CUDA",
# "cl_data_025",
# "cl_data_050",
# "cl_data_075",
Expand Down
302 changes: 302 additions & 0 deletions mesh/cylinder.x

Large diffs are not rendered by default.

14 changes: 0 additions & 14 deletions tests/ut_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,6 @@

using namespace vlm;

void print3d(const TensorView<f32, 3, Location::Host>& tensor) {
// Print the 3D tensor
for (i64 z = 0; z < tensor.shape(2); z++) {
std::printf("Layer %lld:\n", z);
for (i64 x = 0; x < tensor.shape(0); x++) {
for (i64 y = 0; y < tensor.shape(1); y++) {
std::printf("%6.1f ", tensor(x, y, z));
}
std::printf("\n");
}
std::printf("\n");
}
}

int main(int, char**) {
const std::vector<std::string> backends = get_available_backends();

Expand Down
25 changes: 2 additions & 23 deletions tests/uvlm_2dof.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
#define COUPLED 1
// #define COUPLED_VTU_IO 1
// #define COUPLED_NOAERO 1
// #define COUPLED_NOAERO_NOHEAVE 1

using namespace vlm;
using namespace linalg::ostream_overloads;
Expand Down Expand Up @@ -274,14 +273,6 @@ void UVLM_2DOF::alloc_buffers() {
// return alpha;
// }

// def alpha_freeplay(alpha, M0 = 0.0, Mf = 0.0, delta = np.radians(0.5), a_f = np.radians(0.25)):
// if (alpha < a_f):
// return M0 + alpha - a_f
// elif (alpha >= a_f and alpha <= (a_f + delta)):
// return M0 + Mf * (alpha - a_f)
// else: # alpha > a_F + delta
// return M0 + alpha - a_f + delta * (Mf - 1)

f32 torsional_func(f32 alpha, f32 M0 = 0.0f, f32 Mf = 0.0f, f32 delta = to_radians(0.5f), f32 a_f = to_radians(0.25f)) {
if (alpha < a_f) {
return M0 + alpha - a_f;
Expand Down Expand Up @@ -367,11 +358,7 @@ void UVLM_2DOF::initialize_structure_data(const UVLM_2DOF_Vars& vars, const i64

M_hv(0, 0) = 1.0f;
M_hv(1, 0) = vars.x_a / pow(vars.r_a, 2);
#ifdef COUPLED_NOAERO_NOHEAVE
M_hv(0, 1) = 0.0f;
#else
M_hv(0, 1) = vars.x_a;
#endif
M_hv(1, 1) = 1.0f;

C_hv(0, 0) = 2.0f * vars.zeta_h * (vars.omega / vars.U_a);
Expand Down Expand Up @@ -494,8 +481,7 @@ void UVLM_2DOF::run(const Assembly& assembly, const UVLM_2DOF_Vars& vars, f32 t_
const f32 dy = verts_first_wing(0, -1, 1) - verts_first_wing(0, -2, 1);
const f32 dz = verts_first_wing(0, -1, 2) - verts_first_wing(0, -2, 2);
const f32 last_panel_chord = std::sqrt(dx*dx + dy*dy + dz*dz);
// const f32 dt_nd = last_panel_chord / linalg::length(assembly.kinematics()->linear_velocity(0.0f, {0.f, 0.f, 0.f}));
const f32 dt_nd = last_panel_chord;
const f32 dt_nd = last_panel_chord; // nd freestream = 1
const i64 t_steps = static_cast<i64>(t_final_nd / dt_nd);

std::cout << "dt_nd: " << dt_nd << "\n";
Expand Down Expand Up @@ -661,13 +647,6 @@ void UVLM_2DOF::run(const Assembly& assembly, const UVLM_2DOF_Vars& vars, f32 t_
dt_nd // dimensionless dt
);

// NOTE: this doesnt work on the GPU backend
#ifdef COUPLED_NOAERO_NOHEAVE
du.view()(0) = 0.0f;
dv.view()(0) = 0.0f;
da.view()(0) = 0.0f;
#endif

du.view().to(du_h.view());

u_d.view().slice(All, i).to(u_d.view().slice(All, i+1));
Expand Down Expand Up @@ -875,7 +854,7 @@ int main() {

const f32 flutter_speed = 6.285f;
const f32 flutter_ratio = 0.2f;
const f32 t_final_nd = 1000.f;
const f32 t_final_nd = 100.f;

UVLM_2DOF_Vars vars;
vars.b = 0.5f;
Expand Down
2 changes: 1 addition & 1 deletion tests/uvlm_theodorsen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ using namespace vlm;
using namespace linalg::ostream_overloads;

int main() {
const std::vector<std::string> meshes = {"../../../../mesh/infinite_rectangular_5x10.x"};
const std::vector<std::string> meshes = {"../../../../mesh/infinite_rectangular_10x5.x"};
const std::vector<std::string> backends = get_available_backends();

auto solvers = tiny::make_combination(meshes, backends);
Expand Down
6 changes: 2 additions & 4 deletions vlm/backends/cpu/src/vlm_backend_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ void CPU_BLAS::axpy(const f32 alpha, const TensorView<f32, 2, Location::Device>&
}

BackendCPU::BackendCPU() : Backend(create_memory_manager(), create_blas()) {
name = "CPU";
print_cpu_info();
}

Expand Down Expand Up @@ -499,10 +500,7 @@ linalg::float3 BackendCPU::coeff_cm(
const linalg::float3 v1 = {verts_wing(i+1, j, 0), verts_wing(i+1, j, 1), verts_wing(i+1, j, 2)}; // right leading vortex line
const linalg::float3 force = {forces(i, j, 0), forces(i, j, 1), forces(i, j, 2)};

// const linalg::float3 dst_to_ref = ref_pt - 0.5f * (v0 + v1);
// cm += linalg::cross(force, dst_to_ref);

const linalg::float3 f_applied = 0.5f * (v0 + v1);
const linalg::float3 f_applied = 0.5f * (v0 + v1); // force applied at the center of leading edge vortex line
const linalg::float3 lever = f_applied - ref_pt;
cm += linalg::cross(lever, force);
}
Expand Down
43 changes: 40 additions & 3 deletions vlm/backends/cuda/include/vlm_backend_cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,37 @@ class BackendCUDA final : public Backend {
void rhs_assemble_wake_influence(TensorView<f32, 1, Location::Device>& rhs, const MultiTensorView2D<Location::Device>& gamma_wake, const MultiTensorView3D<Location::Device>& colloc, const MultiTensorView3D<Location::Device>& normals, const MultiTensorView3D<Location::Device>& verts_wake, i32 iteration) override;
void displace_wake_rollup(MultiTensorView3D<Location::Device>& wake_rollup, const MultiTensorView3D<Location::Device>& verts_wake, const MultiTensorView3D<Location::Device>& verts_wing, const MultiTensorView2D<Location::Device>& gamma_wing, const MultiTensorView2D<Location::Device>& gamma_wake, f32 dt, i32 iteration) override;

// Per mesh kernels
// TODO: deprecate
f32 coeff_steady_cl_single(const TensorView3D<Location::Device>& verts_wing, const TensorView2D<Location::Device>& gamma_delta, const FlowData& flow, f32 area) override;
f32 coeff_steady_cd_single(const TensorView3D<Location::Device>& verts_wake, const TensorView2D<Location::Device>& gamma_wake, const FlowData& flow, f32 area) override;
f32 coeff_unsteady_cl_single(const TensorView3D<Location::Device>& verts_wing, const TensorView2D<Location::Device>& gamma_delta, const TensorView2D<Location::Device>& gamma, const TensorView2D<Location::Device>& gamma_prev, const TensorView3D<Location::Device>& local_velocities, const TensorView2D<Location::Device>& areas, const TensorView3D<Location::Device>& normals, const linalg::float3& freestream, f32 dt, f32 area) override;
void coeff_unsteady_cl_single_forces(const TensorView3D<Location::Device>& verts_wing, const TensorView2D<Location::Device>& gamma_delta, const TensorView2D<Location::Device>& gamma, const TensorView2D<Location::Device>& gamma_prev, const TensorView3D<Location::Device>& velocities, const TensorView2D<Location::Device>& areas, const TensorView3D<Location::Device>& normals, TensorView3D<Location::Device>& forces, const linalg::float3& freestream, f32 dt) override;

void forces_unsteady(
const TensorView3D<Location::Device>& verts_wing,
const TensorView2D<Location::Device>& gamma_delta,
const TensorView2D<Location::Device>& gamma,
const TensorView2D<Location::Device>& gamma_prev,
const TensorView3D<Location::Device>& velocities,
const TensorView2D<Location::Device>& areas,
const TensorView3D<Location::Device>& normals,
const TensorView3D<Location::Device>& forces,
f32 dt
) override;
f32 coeff_cl(
const TensorView3D<Location::Device>& forces,
const linalg::float3& lift_axis,
const linalg::float3& freestream,
const f32 rho,
const f32 area
) override;
linalg::float3 coeff_cm(
const TensorView3D<Location::Device>& forces,
const TensorView3D<Location::Device>& verts_wing,
const linalg::float3& ref_pt,
const linalg::float3& freestream,
const f32 rho,
const f32 area,
const f32 mac
) override;

void mesh_metrics(const f32 alpha_rad, const MultiTensorView3D<Location::Device>& verts_wing, MultiTensorView3D<Location::Device>& colloc, MultiTensorView3D<Location::Device>& normals, MultiTensorView2D<Location::Device>& areas) override;
f32 mesh_mac(const TensorView3D<Location::Device>& verts_wing, const TensorView2D<Location::Device>& areas) override;
Expand All @@ -31,6 +57,17 @@ class BackendCUDA final : public Backend {
// std::unique_ptr<Kernels> create_kernels() override;
std::unique_ptr<LU> create_lu_solver() override;
std::unique_ptr<BLAS> create_blas() override;

// Intermediate values for reduction
// Still not certain if this is the best way
// Currently it makes the functions that use these values not thread safe
// So multiple instances of the same function cannot run in parallel
f32* d_cl;
f32* d_cd;
f32* d_cm_x;
f32* d_cm_y;
f32* d_cm_z;
f32* d_mac;
};

} // namespace vlm
Loading

0 comments on commit aee1e8c

Please sign in to comment.