Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/pfloat #87

Closed
wants to merge 11 commits into from
Closed
151 changes: 99 additions & 52 deletions include/linAlg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,75 +50,97 @@ class linAlg_t {
/*********************/

// o_a[n] = alpha
void set(const dlong N, const dfloat alpha, deviceMemory<dfloat> o_a);
template<typename T>
void set(const dlong N, const T alpha, deviceMemory<T> o_a);

// o_a[n] *= alpha
void scale(const dlong N, const dfloat alpha, deviceMemory<dfloat> o_a);
template<typename T>
void scale(const dlong N, const T alpha, deviceMemory<T> o_a);

// o_a[n] += alpha
void add(const dlong N, const dfloat alpha, deviceMemory<dfloat> o_a);
template<typename T>
void add(const dlong N, const T alpha, deviceMemory<T> o_a);

// o_y[n] = beta*o_y[n] + alpha*o_x[n]
void axpy(const dlong N, const dfloat alpha, deviceMemory<dfloat> o_x,
const dfloat beta, deviceMemory<dfloat> o_y);
template<typename T>
void axpy(const dlong N, const T alpha, deviceMemory<T> o_x,
const T beta, deviceMemory<T> o_y);

// o_z[n] = beta*o_y[n] + alpha*o_x[n]
void zaxpy(const dlong N, const dfloat alpha, deviceMemory<dfloat> o_x,
const dfloat beta, deviceMemory<dfloat> o_y,
deviceMemory<dfloat> o_z);
template<typename T>
void zaxpy(const dlong N, const T alpha, deviceMemory<T> o_x,
const T beta, deviceMemory<T> o_y,
deviceMemory<T> o_z);

// o_x[n] = alpha*o_a[n]*o_x[n]
void amx(const dlong N, const dfloat alpha,
deviceMemory<dfloat> o_a, deviceMemory<dfloat> o_x);
template<typename T>
void amx(const dlong N, const T alpha,
deviceMemory<T> o_a, deviceMemory<T> o_x);

// o_y[n] = alpha*o_a[n]*o_x[n] + beta*o_y[n]
void amxpy(const dlong N, const dfloat alpha,
deviceMemory<dfloat> o_a, deviceMemory<dfloat> o_x,
const dfloat beta, deviceMemory<dfloat> o_y);
template<typename T>
void amxpy(const dlong N, const T alpha,
deviceMemory<T> o_a, deviceMemory<T> o_x,
const T beta, deviceMemory<T> o_y);

// o_z[n] = alpha*o_a[n]*o_x[n] + beta*o_y[n]
void zamxpy(const dlong N, const dfloat alpha,
deviceMemory<dfloat> o_a, deviceMemory<dfloat> o_x,
const dfloat beta, deviceMemory<dfloat> o_y, deviceMemory<dfloat> o_z);
template<typename T>
void zamxpy(const dlong N, const T alpha,
deviceMemory<T> o_a, deviceMemory<T> o_x,
const T beta, deviceMemory<T> o_y, deviceMemory<T> o_z);

// o_x[n] = alpha*o_x[n]/o_a[n]
void adx(const dlong N, const dfloat alpha,
deviceMemory<dfloat> o_a, deviceMemory<dfloat> o_x);
template<typename T>
void adx(const dlong N, const T alpha,
deviceMemory<T> o_a, deviceMemory<T> o_x);

// o_y[n] = alpha*o_x[n]/o_a[n] + beta*o_y[n]
void adxpy(const dlong N, const dfloat alpha,
deviceMemory<dfloat> o_a, deviceMemory<dfloat> o_x,
const dfloat beta, deviceMemory<dfloat> o_y);
template<typename T>
void adxpy(const dlong N, const T alpha,
deviceMemory<T> o_a, deviceMemory<T> o_x,
const T beta, deviceMemory<T> o_y);

// o_z[n] = alpha*o_x[n]/o_a[n] + beta*o_y[n]
void zadxpy(const dlong N, const dfloat alpha,
deviceMemory<dfloat> o_a, deviceMemory<dfloat> o_x,
const dfloat beta, deviceMemory<dfloat> o_y, deviceMemory<dfloat> o_z);
template<typename T>
void zadxpy(const dlong N, const T alpha,
deviceMemory<T> o_a, deviceMemory<T> o_x,
const T beta, deviceMemory<T> o_y, deviceMemory<T> o_z);

// \min o_a
dfloat min(const dlong N, deviceMemory<dfloat> o_a, comm_t comm);
template<typename T>
T min(const dlong N, deviceMemory<T> o_a, comm_t comm);

// \max o_a
dfloat max(const dlong N, deviceMemory<dfloat> o_a, comm_t comm);
template<typename T>
T max(const dlong N, deviceMemory<T> o_a, comm_t comm);

// \sum o_a
dfloat sum(const dlong N, deviceMemory<dfloat> o_a, comm_t comm);
template<typename T>
T sum(const dlong N, deviceMemory<T> o_a, comm_t comm);

// ||o_a||_2
dfloat norm2(const dlong N, deviceMemory<dfloat> o_a, comm_t comm);
template<typename T>
T norm2(const dlong N, deviceMemory<T> o_a, comm_t comm);

// o_x.o_y
dfloat innerProd(const dlong N, deviceMemory<dfloat> o_x, deviceMemory<dfloat> o_y,
template<typename T>
T innerProd(const dlong N, deviceMemory<T> o_x, deviceMemory<T> o_y,
comm_t comm);

// ||o_a||_w2
dfloat weightedNorm2(const dlong N, deviceMemory<dfloat> o_w, deviceMemory<dfloat> o_a,
template<typename T>
T weightedNorm2(const dlong N, deviceMemory<T> o_w, deviceMemory<T> o_a,
comm_t comm);

// o_w.o_x.o_y
dfloat weightedInnerProd(const dlong N, deviceMemory<dfloat> o_w, deviceMemory<dfloat> o_x,
deviceMemory<dfloat> o_y, comm_t comm);

template<typename T>
T weightedInnerProd(const dlong N, deviceMemory<T> o_w, deviceMemory<T> o_x,
deviceMemory<T> o_y, comm_t comm);

// p=d
void p2d(const dlong N, deviceMemory<pfloat> o_p, deviceMemory<dfloat> o_d);
void d2p(const dlong N, deviceMemory<dfloat> o_d, deviceMemory<pfloat> o_p);

static void matrixRightSolve(const int NrowsA, const int NcolsA, const memory<double> A,
const int NrowsB, const int NcolsB, const memory<double> B,
memory<double> C);
Expand Down Expand Up @@ -174,30 +196,55 @@ class linAlg_t {

private:
platform_t *platform;
properties_t kernelInfo;
properties_t kernelInfoFloat;
properties_t kernelInfoDouble;

static constexpr int blocksize = 256;

kernel_t setKernel;
kernel_t addKernel;
kernel_t scaleKernel;
kernel_t axpyKernel;
kernel_t zaxpyKernel;
kernel_t amxKernel;
kernel_t amxpyKernel;
kernel_t zamxpyKernel;
kernel_t adxKernel;
kernel_t adxpyKernel;
kernel_t zadxpyKernel;
kernel_t minKernel;
kernel_t maxKernel;
kernel_t sumKernel;
kernel_t norm2Kernel;
kernel_t weightedNorm2Kernel;
kernel_t innerProdKernel;
kernel_t weightedInnerProdKernel;
kernel_t setKernelFloat;
kernel_t addKernelFloat;
kernel_t scaleKernelFloat;
kernel_t axpyKernelFloat;
kernel_t zaxpyKernelFloat;
kernel_t amxKernelFloat;
kernel_t amxpyKernelFloat;
kernel_t zamxpyKernelFloat;
kernel_t adxKernelFloat;
kernel_t adxpyKernelFloat;
kernel_t zadxpyKernelFloat;
kernel_t minKernelFloat;
kernel_t maxKernelFloat;
kernel_t sumKernelFloat;
kernel_t norm2KernelFloat;
kernel_t weightedNorm2KernelFloat;
kernel_t innerProdKernelFloat;
kernel_t weightedInnerProdKernelFloat;

kernel_t setKernelDouble;
kernel_t addKernelDouble;
kernel_t scaleKernelDouble;
kernel_t axpyKernelDouble;
kernel_t zaxpyKernelDouble;
kernel_t amxKernelDouble;
kernel_t amxpyKernelDouble;
kernel_t zamxpyKernelDouble;
kernel_t adxKernelDouble;
kernel_t adxpyKernelDouble;
kernel_t zadxpyKernelDouble;
kernel_t minKernelDouble;
kernel_t maxKernelDouble;
kernel_t sumKernelDouble;
kernel_t norm2KernelDouble;
kernel_t weightedNorm2KernelDouble;
kernel_t innerProdKernelDouble;
kernel_t weightedInnerProdKernelDouble;

kernel_t p2dKernel;
kernel_t d2pKernel;

};


} //namespace libp

#endif
24 changes: 21 additions & 3 deletions include/mesh.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,17 +163,27 @@ class mesh_t {
memory<dfloat> Dr, Ds, Dt; // collocation differentiation matrices
memory<dfloat> D;
deviceMemory<dfloat> o_D;
memory<dfloat> MM, invMM; // reference mass matrix
deviceMemory<pfloat> o_pfloat_D;

memory<dfloat> invMM;
memory<pfloat> pfloat_invMM;

memory<dfloat> MM; // reference mass matrix
deviceMemory<dfloat> o_MM;
deviceMemory<pfloat> o_pfloat_MM;

memory<dfloat> LIFT; // lift matrix
deviceMemory<dfloat> o_LIFT;
deviceMemory<pfloat> o_pfloat_LIFT;

memory<dfloat> sM; // surface mass (MM*LIFT)^T
deviceMemory<dfloat> o_sM;
memory<dfloat> Srr, Srs, Srt; //element stiffness matrices
memory<dfloat> Ssr, Sss, Sst;
memory<dfloat> Str, Sts, Stt;
memory<dfloat> S;
deviceMemory<dfloat> o_S;
deviceMemory<pfloat> o_pfloat_S;

/*************************/
/* Cubature */
Expand Down Expand Up @@ -230,18 +240,25 @@ class mesh_t {
// Jacobian
memory<dfloat> wJ;
deviceMemory<dfloat> o_wJ;
deviceMemory<pfloat> o_pfloat_wJ;

// volumeGeometricFactors;
dlong Nvgeo;
memory<dfloat> vgeo;
deviceMemory<dfloat> o_vgeo;
deviceMemory<pfloat> o_pfloat_vgeo;

// surfaceGeometricFactors;
dlong Nsgeo;
memory<dfloat> sgeo;
deviceMemory<dfloat> o_sgeo;
deviceMemory<pfloat> o_pfloat_sgeo;

// second order volume geometric factors
dlong Nggeo;
memory<dfloat> ggeo;
deviceMemory<dfloat> o_ggeo;
deviceMemory<pfloat> o_pfloat_ggeo;

memory<dfloat> cubx, cuby, cubz; // coordinates of physical nodes
deviceMemory<dfloat> o_cubx, o_cuby, o_cubz;
Expand Down Expand Up @@ -332,8 +349,9 @@ class mesh_t {
memory<int> FEMEToV;
memory<dfloat> rFEM, sFEM, tFEM;
memory<dfloat> SEMFEMInterp;
deviceMemory<dfloat> o_SEMFEMInterp;
deviceMemory<dfloat> o_SEMFEMAnterp;
memory<dfloat> SEMFEMAnterp;
deviceMemory<pfloat> o_pfloat_SEMFEMInterp;
deviceMemory<pfloat> o_pfloat_SEMFEMAnterp;

kernel_t MassMatrixKernel;

Expand Down
2 changes: 2 additions & 0 deletions include/ogs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ typedef enum { Float, Double, Int32, Int64} Type;

constexpr Type Dfloat = (std::is_same<double, dfloat>::value)
? Double : Float;
constexpr Type Pfloat = (std::is_same<double, pfloat>::value)
? Double : Float;
// constexpr Type Pfloat = (std::is_same<double, pfloat>::value)
// ? Double : Float;
constexpr Type Dlong = (std::is_same<int32_t, dlong>::value)
Expand Down
7 changes: 5 additions & 2 deletions include/operator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,11 @@ namespace libp {
//basic operator
class operator_t {
public:
virtual void Operator(deviceMemory<dfloat> &o_r, deviceMemory<dfloat> &o_Mr) {
LIBP_FORCE_ABORT("Operator not implemented in this object");
virtual void Operator(deviceMemory<double> &o_r, deviceMemory<double> &o_Mr) {
LIBP_FORCE_ABORT("Operator not implemented in this object: double");
};
virtual void Operator(deviceMemory<float> &o_r, deviceMemory<float> &o_Mr) {
LIBP_FORCE_ABORT("Operator not implemented in this object: float");
};
};

Expand Down
Loading