From d319db454bf0447933c7b6c92991744884204948 Mon Sep 17 00:00:00 2001 From: Alan Kaptanoglu Date: Fri, 23 Aug 2024 16:09:33 -0400 Subject: [PATCH] did a quick parallelization for the c++ functions. Still seems slower than it should be so might need additional debugging. --- examples/2_Intermediate/tokamak_exact.py | 6 +-- src/simsopt/geo/permanent_magnet_grid.py | 6 ++- src/simsoptpp/Bcube_nonVec.cpp | 58 +++++++++++++++--------- src/simsoptpp/Bcube_nonVec.h | 15 +++--- 4 files changed, 52 insertions(+), 33 deletions(-) diff --git a/examples/2_Intermediate/tokamak_exact.py b/examples/2_Intermediate/tokamak_exact.py index 7b80ce7fc..db860b8e0 100755 --- a/examples/2_Intermediate/tokamak_exact.py +++ b/examples/2_Intermediate/tokamak_exact.py @@ -23,14 +23,14 @@ nphi = 4 # nphi = ntheta >= 64 needed for accurate full-resolution runs ntheta = nphi else: - nphi = 64 # nphi = ntheta >= 64 needed for accurate full-resolution runs + nphi = 16 # nphi = ntheta >= 64 needed for accurate full-resolution runs ntheta = nphi - Nx = 50 # cartesian bricks but note that we are not modelling the cubic geometry! + Nx = 40 # cartesian bricks but note that we are not modelling the cubic geometry! Ny = Nx Nz = Nx coff = 0.1 # PM grid starts offset ~ 10 cm from the plasma surface -poff = 0.5 # PM grid end offset ~ 15 cm from the plasma surface +poff = 0.25 # PM grid end offset ~ 15 cm from the plasma surface input_name = 'input.circular_tokamak' # Read in the plas/ma equilibrium file diff --git a/src/simsopt/geo/permanent_magnet_grid.py b/src/simsopt/geo/permanent_magnet_grid.py index 3f84add39..c569d5250 100644 --- a/src/simsopt/geo/permanent_magnet_grid.py +++ b/src/simsopt/geo/permanent_magnet_grid.py @@ -1034,12 +1034,16 @@ def _optimization_setup(self): self.b_obj = - self.Bn.reshape(self.nphi * self.ntheta) # Compute geometric factor with the C++ routine - self.A_obj = cub.Acube( + import time + t1 = time.time() + self.A_obj = sopp.Acube( np.ascontiguousarray(self.plasma_boundary.gamma().reshape(-1, 3)), np.ascontiguousarray(self.pm_grid_xyz), np.ascontiguousarray(self.plasma_boundary.unitnormal().reshape(-1, 3)), self.dims, self.phiThetas ) + t2 = time.time() + print('Acube took ', t2 - t1, ' seconds') print('# points = ',len(np.ascontiguousarray(self.plasma_boundary.gamma().reshape(-1, 3)))) print('# mag points = ', len(np.ascontiguousarray(self.pm_grid_xyz))) diff --git a/src/simsoptpp/Bcube_nonVec.cpp b/src/simsoptpp/Bcube_nonVec.cpp index 93bd57020..209a6f421 100644 --- a/src/simsoptpp/Bcube_nonVec.cpp +++ b/src/simsoptpp/Bcube_nonVec.cpp @@ -32,7 +32,7 @@ Array Pd(double phi, double theta) { return P; } -Array iterate_over_corners(Array corner, Array x, Array y, Array z) { +Array iterate_over_corners(Array& corner, Array& x, Array& y, Array& z) { int i = corner[0], j = corner[1], k = corner[2]; double summa = std::pow(-1, i+j+k); double rijk = std::sqrt(x[i]*x[i] + y[j]*y[j] + z[k]*z[k]); @@ -57,7 +57,7 @@ Array iterate_over_corners(Array corner, Array x, Array y, Array z) { return summa * h; } -Array Hd_i_prime(Array r, Array dims) { +Array Hd_i_prime(Array& r, Array& dims) { Array H = xt::zeros({3, 3}); @@ -89,18 +89,22 @@ Array Hd_i_prime(Array r, Array dims) { } -Array B_direct(Array points, Array magPos, Array M, Array dims, Array phiThetas) { +Array B_direct(Array& points, Array& magPos, Array& M, Array& dims, Array& phiThetas) { int N = points.shape(0); int D = M.shape(0); Array B = xt::zeros({N, 3}); + #pragma omp parallel for for (int n = 0; n < N; ++n) { + double x = points(n, 0); + double y = points(n, 1); + double z = points(n, 2); for (int d = 0; d < D; ++d) { Array P = Pd(phiThetas(d,0), phiThetas(d,1)); Array r = xt::zeros({3}); - r[0] = points(n,0) - magPos(d,0); - r[1] = points(n,1) - magPos(d,1); - r[2] = points(n,2) - magPos(d,2); + r[0] = x - magPos(d,0); + r[1] = y - magPos(d,1); + r[2] = z - magPos(d,2); Array r_loc = xt::zeros({3}); Array M_loc = xt::zeros({3}); for (int i = 0; i < 3; ++i) { @@ -112,20 +116,20 @@ Array B_direct(Array points, Array magPos, Array M, Array dims, Array phiThetas) Array H = Hd_i_prime(r_loc, dims); - double tx = heaviside(dims[0]/2 - std::abs(r_loc[0]),0.5); - double ty = heaviside(dims[1]/2 - std::abs(r_loc[1]),0.5); - double tz = heaviside(dims[2]/2 - std::abs(r_loc[2]),0.5); + double tx = heaviside(dims[0]/2 - std::abs(r_loc[0]), 0.5); + double ty = heaviside(dims[1]/2 - std::abs(r_loc[1]), 0.5); + double tz = heaviside(dims[2]/2 - std::abs(r_loc[2]), 0.5); double tm = 2*tx*ty*tz; Array B_loc = xt::zeros({3}); for (int i = 0; i < 3; ++i) { for (int j = 0; j < 3; ++j) { - B_loc(n,i) += mu0 * (H(i,j) * M_loc[j] + tm * M_loc[i]); + B_loc(n, i) += mu0 * (H(i,j) * M_loc[j] + tm * M_loc[i]); } } for (int i = 0; i < 3; ++i) { for (int j = 0; j < 3; ++j) { - B(n,i) += P(j,i) * B_loc[j]; + B(n, i) += P(j,i) * B_loc[j]; } } } @@ -133,12 +137,13 @@ Array B_direct(Array points, Array magPos, Array M, Array dims, Array phiThetas) return B; } -Array Bn_direct(Array points, Array magPos, Array M, Array norms, Array dims, Array phiThetas) { +Array Bn_direct(Array& points, Array& magPos, Array& M, Array& norms, Array& dims, Array& phiThetas) { int N = points.shape(0); Array B = B_direct(points, magPos, M, dims, phiThetas); Array Bn = xt::zeros({N}); + #pragma omp parallel for for (int n = 0; n < N; ++n) { for (int i = 0; i < 3; ++ i) { Bn[n] += B(n,i) * norms(n,i); @@ -148,7 +153,7 @@ Array Bn_direct(Array points, Array magPos, Array M, Array norms, Array dims, Ar } -Array gd_i(Array r_loc, Array n_i_loc, Array dims) { +Array gd_i(Array& r_loc, Array& n_i_loc, Array& dims) { double tx = heaviside(dims[0]/2 - std::abs(r_loc[0]),0.5); double ty = heaviside(dims[1]/2 - std::abs(r_loc[1]),0.5); double tz = heaviside(dims[2]/2 - std::abs(r_loc[2]),0.5); @@ -170,19 +175,23 @@ Array gd_i(Array r_loc, Array n_i_loc, Array dims) { return mu0 * g_loc; } -Array Acube(Array points, Array magPos, Array norms, Array dims, Array phiThetas) { +Array Acube(Array& points, Array& magPos, Array& norms, Array& dims, Array& phiThetas) { int N = points.shape(0); int D = magPos.shape(0); double magVol = dims[0] * dims[1] * dims[2]; - Array A = xt::zeros({N,3*D}); + Array A = xt::zeros({N, 3*D}); + #pragma omp parallel for for (int n = 0; n < N; ++n) { + double x = points(n, 0); + double y = points(n, 1); + double z = points(n, 2); for (int d = 0; d < D; ++d) { Array P = Pd(phiThetas(d,0), phiThetas(d,1)); Array r = xt::zeros({3}); - r[0] = points(n,0) - magPos(d,0); - r[1] = points(n,1) - magPos(d,1); - r[2] = points(n,2) - magPos(d,2); + // double rx = x - magPos(d,0); + // double ry = y - magPos(d,1); + // double rz = z - magPos(d,2); Array r_loc = xt::zeros({3}); Array n_loc = xt::zeros({3}); for (int i = 0; i < 3; ++i) { @@ -191,6 +200,9 @@ Array Acube(Array points, Array magPos, Array norms, Array dims, Array phiThetas n_loc[i] += P(i,j) * norms(n,j); } } + // double rlocx = P(0, 0) * rx + P(0, 1) * ry + P(0, 2) * rz; + // double rlocy = P(1, 0) * rx + P(1, 1) * ry + P(1, 2) * rz; + // double rlocz = P(2, 0) * rx + P(2, 1) * ry + P(2, 2) * rz; Array g_loc = gd_i(r_loc, n_loc, dims); Array g = xt::zeros({3}); // double magVol = dims(d,0) * dims(d,1) * dims(d,2); @@ -199,25 +211,27 @@ Array Acube(Array points, Array magPos, Array norms, Array dims, Array phiThetas g[i] += (P(j,i) * g_loc[j]) / magVol; } } - A(n,3*d) = g[0]; - A(n,3*d + 1) = g[1]; - A(n,3*d + 2) = g[2]; + A(n, 3*d) = g[0]; + A(n, 3*d + 1) = g[1]; + A(n, 3*d + 2) = g[2]; } } return A; } -Array Bn_fromMat(Array points, Array magPos, Array M, Array norms, Array dims, Array phiThetas) { +Array Bn_fromMat(Array& points, Array& magPos, Array& M, Array& norms, Array& dims, Array& phiThetas) { int N = points.shape(0); int D = M.shape(0); Array A = Acube(points, magPos, norms, dims, phiThetas); Array Ms = xt::zeros({3*D}); + #pragma omp parallel for for (int d = 0; d < D; ++d) { for (int i = 0; i < 3; ++i) { Ms(3*d + i) = M(d,i); } } Array Bn = xt::zeros({N}); + #pragma omp parallel for for (int n = 0; n < N; ++n) { for (int d = 0; d < 3*D; ++d) { Bn[n] += A(n,d) * Ms[d]; diff --git a/src/simsoptpp/Bcube_nonVec.h b/src/simsoptpp/Bcube_nonVec.h index fc555c2f3..00a5d9522 100644 --- a/src/simsoptpp/Bcube_nonVec.h +++ b/src/simsoptpp/Bcube_nonVec.h @@ -1,3 +1,4 @@ +#pragma once #include #include @@ -8,16 +9,16 @@ double heaviside(double x1, double x2); Array Pd(double phi, double theta); -Array iterate_over_corners(Array corner, Array x, Array y, Array z); +Array iterate_over_corners(Array& corner, Array& x, Array& y, Array& z); -Array Hd_i_prime(Array r, Array dims); +Array Hd_i_prime(Array& r, Array& dims); -Array B_direct(Array points, Array magPos, Array M, Array dims, Array phiThetas); +Array B_direct(Array& points, Array& magPos, Array& M, Array& dims, Array& phiThetas); -Array Bn_direct(Array point, Array magPos, Array M, Array norms, Array dims, Array phiThetas); +Array Bn_direct(Array& point, Array& magPos, Array& M, Array& norms, Array& dims, Array& phiThetas); -Array gd_i(Array r_loc, Array n_i_loc, Array dims); +Array gd_i(Array& r_loc, Array& n_i_loc, Array& dims); -Array Acube(Array points, Array magPos, Array norms, Array dims, Array phithetas); +Array Acube(Array& points, Array& magPos, Array& norms, Array& dims, Array& phithetas); -Array Bn_fromMat(Array points, Array magPos, Array M, Array norms, Array dims, Array phiThetas); \ No newline at end of file +Array Bn_fromMat(Array& points, Array& magPos, Array& M, Array& norms, Array& dims, Array& phiThetas); \ No newline at end of file