From 9fc64e2153068f3a80ad8017dcd111919ec61bde Mon Sep 17 00:00:00 2001 From: Alwin Date: Mon, 10 Oct 2022 13:54:38 -0700 Subject: [PATCH 1/8] merge and add new container build_tests.yml to main --- .github/workflows/build_tests.yml | 275 ++++++++++++------------------ 1 file changed, 110 insertions(+), 165 deletions(-) diff --git a/.github/workflows/build_tests.yml b/.github/workflows/build_tests.yml index 8fb88c7ba..b183b3188 100644 --- a/.github/workflows/build_tests.yml +++ b/.github/workflows/build_tests.yml @@ -1,165 +1,110 @@ -name: Cholla Compile - -on: - pull_request: -# branches: [ CAAR ] - schedule: - - cron: "37 07 * * 1" # run every Monday at 07:37UTC. Crontab computed with crontab.guru - workflow_dispatch: - -jobs: - Build: - name: > - Build - API:${{ matrix.gpu-api }} - Make-Type:${{ matrix.make-type }} - Cuda-toolkit:v${{ matrix.cuda-toolkit-version }} - GCC:v${{ matrix.gcc-version }} - ROCm:v${{ matrix.rocm-version }} - # if: ${{ false }} # If uncommented this line will disable this job - - # Choose OS/Runner - runs-on: ubuntu-latest - - # Matrix for different make types - strategy: - fail-fast: false - matrix: - make-type: [hydro, gravity, disk, particles, cosmology, mhd] - gpu-api: [HIP, CUDA] - # NOTE: if more than one parameter is in any of these three variables - # you need to manually exclude it for the GPU API that doesn't use it. - # An example exclude is shown below but commented out. Uncomment and - # tweak it to fit your needs - # CUDA uses the cuda-toolkit-version and gcc-version - # HIP uses the clang-version - cuda-toolkit-version: ['11.2.2'] - gcc-version: [9] - rocm-version: ['5.1.0'] - mpi: ['openmpi'] #Can use mpich and/or openmpi - # exclude: - # - gpu-api: HIP - # make-type: hydro - - # Setup environment variables - env: - CHOLLA_MACHINE: github - CHOLLA_MAKE_TYPE: ${{ matrix.make-type }} - CUDA_ROOT: /usr/local/cuda - HDF5_ROOT: /usr/lib/x86_64-linux-gnu/hdf5/serial - MPI_ROOT: /usr/lib/x86_64-linux-gnu/openmpi - - # Run the job itself - steps: - - # Install required Tools - - uses: actions/checkout@v2 - - name: Setup MPI - uses: mpi4py/setup-mpi@v1 - with: - mpi: ${{ matrix.mpi }} - - name: Show MPI version - run: mpirun --version - - name: Install HDF5 Serial - run: sudo apt-get install libhdf5-serial-dev - - name: Show HDF5 config - run: | - h5cc -showconfig - - # Install CUDA and dependencies if this is a CUDA build - - uses: Jimver/cuda-toolkit@v0.2.8 - if: matrix.gpu-api == 'CUDA' - id: cuda-toolkit - with: - cuda: ${{ matrix.cuda-toolkit-version }} - - name: NVCC version & set CUDA_ROOT - if: matrix.gpu-api == 'CUDA' - run: | - nvcc -V - - name: Set up GCC - if: matrix.gpu-api == 'CUDA' - uses: egor-tensin/setup-gcc@v1 - with: - version: ${{ matrix.gcc-version }} - - name: GCC Version - if: matrix.gpu-api == 'CUDA' - run: | - cc --version - c++ --version - - # Install HIP and dependencies if this is a HIP build - - name: Setup ROCm - if: matrix.gpu-api == 'HIP' - run: | - # Download and install the installation script - sudo apt-get update - wget https://repo.radeon.com/amdgpu-install/22.20.1/ubuntu/focal/amdgpu-install_22.20.50201-1_all.deb - sudo apt-get install -y ./amdgpu-install_22.20.50201-1_all.deb - - # Get names correct by stripping out the last ".0" if it exists - ROCM_VERSION=${{ matrix.rocm-version }} - if [ "${ROCM_VERSION:0-1}" = "0" ] - then - # If the last character is a "0" then trim the last ".0" - ROCM_REPO_VERSION="${ROCM_VERSION:0:3}" - else - ROCM_REPO_VERSION=$ROCM_VERSION - fi - - # Add the repo for the version of ROCm that we want - echo "deb [arch=amd64] https://repo.radeon.com/rocm/apt/${ROCM_REPO_VERSION} ubuntu main" | sudo tee /etc/apt/sources.list.d/rocm.list - sudo apt update - - name: Install ROCm - if: matrix.gpu-api == 'HIP' - run: | - # Install ROCm - sudo amdgpu-install -y --usecase=rocm --rocmrelease=${{ matrix.rocm-version }} - - name: Install hipFFT and RocFFT - if: matrix.gpu-api == 'HIP' - run: | - sudo apt install -y hipfft${{ matrix.rocm-version }} rocfft${{ matrix.rocm-version }} - - name: Verify HIP install - if: matrix.gpu-api == 'HIP' - run: | - hipconfig --full - - name: Set Environment Variables and Files - if: matrix.gpu-api == 'HIP' - run: | - echo "HIPCONFIG=$(hipconfig -C)" >> $GITHUB_ENV - echo "ROCM_PATH=$(hipconfig -R)" >> $GITHUB_ENV - echo "gfx90a" | sudo tee --append $(hipconfig -R)/bin/target.lst # trick ROCm into thinking there's a GPU - - name: Echo Environment Variables and Files - if: matrix.gpu-api == 'HIP' - run: | - echo "HIPCONFIG = ${HIPCONFIG}" - echo "ROCM_PATH = ${ROCM_PATH}" - echo "HIPFFT_PATH = ${HIPFFT_PATH}" - - echo "The contents of $(hipconfig -R)/bin/target.lst are:" - sudo cat $(hipconfig -R)/bin/target.lst - - # Perform Build - - name: Cholla setup - run: | - source builds/run_tests.sh - setupTests -c gcc - echo "CHOLLA_ROOT = ${CHOLLA_ROOT}" - echo "CHOLLA_LAUNCH_COMMAND = ${CHOLLA_LAUNCH_COMMAND}" - - echo "CHOLLA_ROOT=${CHOLLA_ROOT}" >> $GITHUB_ENV - echo "CHOLLA_LAUNCH_COMMAND=${CHOLLA_LAUNCH_COMMAND}" >> $GITHUB_ENV - echo "F_OFFLOAD=${F_OFFLOAD} >> $GITHUB_ENV - echo "CHOLLA_ENVSET=${CHOLLA_ENVSET} >> $GITHUB_ENV - - name: Build GoogleTest - run: | - source builds/run_tests.sh - buildGoogleTest - echo "GOOGLETEST_ROOT=${GOOGLETEST_ROOT}" >> $GITHUB_ENV - - name: Build Cholla - run: | - source builds/run_tests.sh - buildCholla - - name: Build Tests - run: | - source builds/run_tests.sh - buildChollaTests +name: Cholla Compile + +on: + pull_request: + schedule: + - cron: "37 07 * * 1" # run every Monday at 07:37UTC. Crontab computed with crontab.guru + workflow_dispatch: + +jobs: + Build: + name: > + Build + ${{ matrix.container.name }} + TYPE=${{ matrix.make-type }} + + # Cuda-toolkit:v${{ matrix.cuda-toolkit-version }} + # GCC:v${{ matrix.gcc-version }} + # ROCm:v${{ matrix.rocm-version }} + # if: ${{ false }} # If uncommented this line will disable this job + + # Choose OS/Runner + runs-on: ubuntu-latest + container: + image: ${{matrix.container.link}} + defaults: + run: + shell: bash + # Matrix for different make types + strategy: + fail-fast: false + matrix: + make-type: [hydro, gravity, disk, particles, cosmology, mhd] + container: [{name: "CUDA", link: "docker://alwinm/cholla:cuda_github"}, {name: "HIP",link: "docker://alwinm/cholla:hip_github"},] + # gpu-api: [CUDA] + # NOTE: if more than one parameter is in any of these three variables + # you need to manually exclude it for the GPU API that doesn't use it. + # An example exclude is shown below but commented out. Uncomment and + # tweak it to fit your needs + # CUDA uses the cuda-toolkit-version and gcc-version + # HIP uses the clang-version + # cuda-toolkit-version: ['11.2.2'] + # gcc-version: [9] + # rocm-version: ['5.1.0'] + mpi: ['openmpi'] #Can use mpich and/or openmpi + # exclude: + # - gpu-api: HIP + # make-type: hydro + + # Setup environment variables + env: + CHOLLA_MACHINE: github + CHOLLA_MAKE_TYPE: ${{ matrix.make-type }} + CUDA_ROOT: /usr/local/cuda + HDF5_ROOT: /usr/lib/x86_64-linux-gnu/hdf5/serial + MPI_ROOT: /usr/lib/x86_64-linux-gnu/openmpi + + # Run the job itself + steps: + + # Install required Tools + - uses: actions/checkout@v2 + + # Show versions + - name: Show MPI version + run: mpirun --version + - name: Show HDF5 config + run: | + h5cc -showconfig + - name: Git Safe Directory + run: | + git --version + git config --global --add safe.directory /__w/cholla/cholla + git config --global --add safe.directory '*' + - name: Show CUDA and gcc version + if: matrix.container.name == 'CUDA' + run: | + cc --version + c++ --version + nvcc -V + - name: Show HIP and hipcc version + if: matrix.container.name == 'HIP' + run: | + hipcc --version + hipconfig --full + + + # Perform Build + - name: Cholla setup + run: | + source builds/run_tests.sh + setupTests -c gcc + echo "CHOLLA_ROOT = ${CHOLLA_ROOT}" + echo "CHOLLA_LAUNCH_COMMAND = ${CHOLLA_LAUNCH_COMMAND}" + echo "CHOLLA_ROOT=${CHOLLA_ROOT}" >> $GITHUB_ENV + echo "CHOLLA_LAUNCH_COMMAND=${CHOLLA_LAUNCH_COMMAND}" >> $GITHUB_ENV + echo "F_OFFLOAD=${F_OFFLOAD} >> $GITHUB_ENV + echo "CHOLLA_ENVSET=${CHOLLA_ENVSET} >> $GITHUB_ENV + - name: Build GoogleTest + run: | + source builds/run_tests.sh + buildGoogleTest + echo "GOOGLETEST_ROOT=${GOOGLETEST_ROOT}" >> $GITHUB_ENV + - name: Build Cholla + run: | + source builds/run_tests.sh + buildCholla OPTIMIZE + - name: Build Tests + run: | + source builds/run_tests.sh + buildChollaTests From 92e9a807c2ca51b36cc83bc324cd9adc15d24b82 Mon Sep 17 00:00:00 2001 From: alwinm Date: Mon, 10 Oct 2022 16:15:28 -0700 Subject: [PATCH 2/8] Update build_tests.yml Use checkout v3 instead of v2 in preparation for github deprecating node 12 for node 16 --- .github/workflows/build_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_tests.yml b/.github/workflows/build_tests.yml index b183b3188..a21b7c950 100644 --- a/.github/workflows/build_tests.yml +++ b/.github/workflows/build_tests.yml @@ -58,7 +58,7 @@ jobs: steps: # Install required Tools - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 # Show versions - name: Show MPI version From 83360cb8b085e1eb028553e727bea622a7107604 Mon Sep 17 00:00:00 2001 From: alwinm Date: Tue, 11 Oct 2022 07:38:56 -0700 Subject: [PATCH 3/8] Update build_tests.yml --- .github/workflows/build_tests.yml | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/.github/workflows/build_tests.yml b/.github/workflows/build_tests.yml index a21b7c950..19bdabb44 100644 --- a/.github/workflows/build_tests.yml +++ b/.github/workflows/build_tests.yml @@ -12,10 +12,6 @@ jobs: Build ${{ matrix.container.name }} TYPE=${{ matrix.make-type }} - - # Cuda-toolkit:v${{ matrix.cuda-toolkit-version }} - # GCC:v${{ matrix.gcc-version }} - # ROCm:v${{ matrix.rocm-version }} # if: ${{ false }} # If uncommented this line will disable this job # Choose OS/Runner @@ -31,20 +27,6 @@ jobs: matrix: make-type: [hydro, gravity, disk, particles, cosmology, mhd] container: [{name: "CUDA", link: "docker://alwinm/cholla:cuda_github"}, {name: "HIP",link: "docker://alwinm/cholla:hip_github"},] - # gpu-api: [CUDA] - # NOTE: if more than one parameter is in any of these three variables - # you need to manually exclude it for the GPU API that doesn't use it. - # An example exclude is shown below but commented out. Uncomment and - # tweak it to fit your needs - # CUDA uses the cuda-toolkit-version and gcc-version - # HIP uses the clang-version - # cuda-toolkit-version: ['11.2.2'] - # gcc-version: [9] - # rocm-version: ['5.1.0'] - mpi: ['openmpi'] #Can use mpich and/or openmpi - # exclude: - # - gpu-api: HIP - # make-type: hydro # Setup environment variables env: From fcc63deb8b7ad59e5f23f964532cd859df94871b Mon Sep 17 00:00:00 2001 From: Alwin Date: Sun, 2 Oct 2022 01:38:50 -0700 Subject: [PATCH 4/8] add viz --- src/global/global.cpp | 6 +- src/global/global.h | 2 + src/io/io.cpp | 7 ++- src/io/io.h | 7 ++- src/io/viz.cu | 124 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 141 insertions(+), 5 deletions(-) create mode 100644 src/io/viz.cu diff --git a/src/global/global.cpp b/src/global/global.cpp index a99c1360e..d84a04a68 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -223,6 +223,10 @@ void parse_param(char *name,char *value, struct parameters *parms){ parms->n_rotated_projection = atoi(value); else if (strcmp(name, "n_slice")==0) parms->n_slice = atoi(value); + else if (strcmp(name, "n_outviz")==0) + parms->n_outviz = atoi(value); + else if (strcmp(name, "outviz_density")==0) + parms->outviz_density = atoi(value); else if (strcmp(name, "xmin")==0) parms->xmin = atof(value); else if (strcmp(name, "ymin")==0) @@ -366,7 +370,7 @@ void parse_param(char *name,char *value, struct parameters *parms){ #ifdef CHEMISTRY_GPU else if (strcmp(name, "UVB_rates_file")==0) strncpy (parms->UVB_rates_file, value, MAXLEN); -#endif +#endif #ifdef COOLING_GRACKLE else if (strcmp(name, "UVB_rates_file")==0) strncpy (parms->UVB_rates_file, value, MAXLEN); diff --git a/src/global/global.h b/src/global/global.h index 560ddb5f4..0fc1ff47b 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -204,6 +204,8 @@ struct parameters int n_projection; int n_rotated_projection; int n_slice; + int n_outviz=0; + int outviz_density=0; Real xmin; Real ymin; Real zmin; diff --git a/src/io/io.cpp b/src/io/io.cpp index 46ee71916..7d51c030c 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -72,7 +72,7 @@ void Write_Message_To_Log_File( const char* message ){ out_file.close(); } -/* Write the initial conditions */ +/* Write Cholla Output Data */ void WriteData(Grid3D &G, struct parameters P, int nfile) { @@ -109,6 +109,11 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) if (nfile % P.n_hydro == 0) OutputData(G,P,nfile); #endif + // This function does other checks to make sure it is valid (3D only) + #ifdef HDF5 + if (P.n_outviz && nfile % P.n_outviz == 0) OutputViz3D(G,P,nfile); + #endif + #ifdef PROJECTION if (nfile % P.n_projection == 0) OutputProjectedData(G,P,nfile); #endif /*PROJECTION*/ diff --git a/src/io/io.h b/src/io/io.h index deccec5ce..2e07bf083 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -1,5 +1,4 @@ -#ifndef IO_CHOLLA_H -#define IO_CHOLLA_H +#pragma once #include "../global/global.h" #include "../grid/grid3D.h" @@ -29,4 +28,6 @@ void Create_Log_File( struct parameters P ); void Write_Message_To_Log_File( const char* message ); void write_debug ( Real *Value, const char *fname, int nValues, int iProc ); -#endif /*IO_CHOLLA_H*/ + +// From io/viz.cu +void OutputViz3D(Grid3D &G, struct parameters P, int nfile); diff --git a/src/io/viz.cu b/src/io/viz.cu new file mode 100644 index 000000000..bfbf549e2 --- /dev/null +++ b/src/io/viz.cu @@ -0,0 +1,124 @@ +// Require HDF5 +#ifdef HDF5 + +#include + +#include "../grid/grid3D.h" + +#include "../io/io.h" // To provide io.h with OutputViz3D + + +void CopyReal3D_CPU(Real* source, Real* destination, Header H) +{ + int i,j,k,id,buf_id; + + for (k=0; k 0) { + WriteVizField(H, file_id, dataspace_id, dataset_buffer, G.C.density, "/density"); + } + /* + // Just an example of extending this function to include other fields. + // Not implemented yet + if (P.outviz_energy > 0) { + WriteVizField(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/energy"); + } + */ + + + free(dataset_buffer); + herr_t status = H5Sclose(dataspace_id); +} + + + +void OutputViz3D(Grid3D &G, struct parameters P, int nfile) +{ + Header H = G.H; + // Do nothing in 1-D and 2-D case + if (H.ny == 1) { + return; + } + if (H.nz == 1) { + return; + } + // Do nothing if nfile is not multiple of n_outviz + if (nfile % P.n_outviz != 0) { + return; + } + + char filename[MAXLEN]; + char timestep[20]; + + // create the filename + sprintf(timestep, "%d", nfile); + strcpy(filename, P.outdir); + strcat(filename, timestep); + strcat(filename, ".viz3d.h5"); + #ifdef MPI_CHOLLA + sprintf(filename,"%s.%d",filename,procID); + #endif + + // create hdf5 file + hid_t file_id; /* file identifier */ + herr_t status; + + // Create a new file using default properties. + file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + + // Write the header (file attributes) + G.Write_Header_HDF5(file_id); + + // write the conserved variables to the output file + WriteViz3D(G, P, file_id); + + // close the file + status = H5Fclose(file_id); + + if (status < 0) {printf("File write failed.\n"); exit(-1); } + +} + +#endif From c4be450bf440fb1866b1ec95c852e51d3822a13e Mon Sep 17 00:00:00 2001 From: Alwin Date: Thu, 6 Oct 2022 03:41:31 -0400 Subject: [PATCH 5/8] hdf5 output refactor passes regression tests --- src/io/io.cpp | 404 ++++++++++---------------------------------------- src/io/io.h | 5 + src/io/viz.cu | 133 ++++++++++++----- 3 files changed, 182 insertions(+), 360 deletions(-) diff --git a/src/io/io.cpp b/src/io/io.cpp index 7d51c030c..d9ea04548 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -113,7 +113,7 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) #ifdef HDF5 if (P.n_outviz && nfile % P.n_outviz == 0) OutputViz3D(G,P,nfile); #endif - + #ifdef PROJECTION if (nfile % P.n_projection == 0) OutputProjectedData(G,P,nfile); #endif /*PROJECTION*/ @@ -1098,6 +1098,42 @@ void Grid3D::Write_Grid_Binary(FILE *fp) #ifdef HDF5 + +void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, Real* dataset_buffer, Real* source, const char* name) +{ + int id = H.n_ghost; + memcpy(&dataset_buffer[0], &(source[id]), H.nx_real*sizeof(Real)); + + // Create a dataset id for density + hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + // Write the density array to file // NOTE: NEED TO FIX FOR FLOAT REAL!!! + herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + // Free the dataset id + status = H5Dclose(dataset_id); +} + +void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, Real* dataset_buffer, Real* source, const char* name) +{ + int i,j,id,buf_id; + // Copy the density array to the memory buffer + for (j=0; j 1 this substitution can be attempted. + // Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset); + id = H.n_ghost; memcpy(&dataset_buffer[0], &(C.scalar[id+s*H.n_cells]), H.nx_real*sizeof(Real)); @@ -1234,20 +1224,12 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); + } #endif //SCALAR #ifdef DE - // Copy the internal energy array to the memory buffer - id = H.n_ghost; - memcpy(&dataset_buffer[0], &(C.GasEnergy[id]), H.nx_real*sizeof(Real)); - - // Create a dataset id for internal energy - dataset_id = H5Dcreate(file_id, "/GasEnergy", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - // Write the internal energy array to file // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); + Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.GasEnergy, "/GasEnergy"); #endif //DE // Free the dataspace id @@ -1268,85 +1250,11 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) dims[1] = ny_dset; dataspace_id = H5Screate_simple(2, dims, NULL); - // Copy the density array to the memory buffer - for (j=0; j 1 this substitution can be attempted. + // Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset); + // Copy the scalar array to the memory buffer for (j=0; j device_buffer, then copy device_buffer -> buffer, then write HDF5 field +void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* source, const char* name); +void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* source, const char* name); diff --git a/src/io/viz.cu b/src/io/viz.cu index bfbf549e2..a8cbaf0a3 100644 --- a/src/io/viz.cu +++ b/src/io/viz.cu @@ -7,69 +7,136 @@ #include "../io/io.h" // To provide io.h with OutputViz3D +// Copy Real (non-ghost) cells from source to a double destination (for writing HDF5 in double precision) +__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, double* destination, Real* source) +{ + + int dest_id,source_id,id,i,j,k; + id = threadIdx.x + blockIdx.x * blockDim.x; + + k = id/(nx_real*ny_real); + j = (id - k*nx_real*ny_real)/nx_real; + i = id - j*nx_real - k*nx_real*ny_real; + + if (k >= nz_real) { + return; + } + + // This converts into HDF5 indexing that plays well with Python + dest_id = k + j*nz_real + i*ny_real*nz_real; + source_id = (i+n_ghost) + (j+n_ghost)*nx + (k+n_ghost)*nx*ny; + + destination[dest_id] = (double) source[source_id]; +} -void CopyReal3D_CPU(Real* source, Real* destination, Header H) +// Copy Real (non-ghost) cells from source to a float destination (for writing HDF5 in float precision) +__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, float* destination, Real* source) { - int i,j,k,id,buf_id; + + int dest_id,source_id,id,i,j,k; + id = threadIdx.x + blockIdx.x * blockDim.x; - for (k=0; k= nz_real) { + return; } + + // This converts into HDF5 indexing that plays well with Python + dest_id = k + j*nz_real + i*ny_real*nz_real; + source_id = (i+n_ghost) + (j+n_ghost)*nx + (k+n_ghost)*nx*ny; + + destination[dest_id] = (float) source[source_id]; } -void WriteVizField(Header H, hid_t file_id, hid_t dataspace_id, Real* buffer, Real* source, const char* name) +// When buffer is double, automatically use the double version of everything using function overloading +void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* device_source, const char* name) { hid_t dataset_id; herr_t status; - // Copy non-ghost parts of source to buffer - CopyReal3D_CPU(source, buffer, H); + hsize_t dims[3]; + dims[0] = nx_real; + dims[1] = ny_real; + dims[2] = nz_real; + hid_t dataspace_id = H5Screate_simple(3, dims, NULL); + + //Copy non-ghost parts of source to buffer + dim3 dim1dGrid((nx_real*ny_real*nz_real+TPB-1)/TPB, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + hipLaunchKernelGGL(CopyReal3D_GPU_Kernel,dim1dGrid,dim1dBlock,0,0,nx,ny,nx_real,ny_real,nz_real,n_ghost,device_buffer,device_source); + CudaSafeCall(cudaMemcpy( buffer, device_buffer, nx_real*ny_real*nz_real*sizeof(double), cudaMemcpyDeviceToHost)); // Create a dataset id dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); // Write the buffer to file status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); - // Free the dataset id - status = H5Dclose(dataset_id); + // Free the dataset id and dataspace id + status = H5Dclose(dataset_id); + status = H5Sclose(dataspace_id); +} + + +// When buffer is float, automatically use the float version of everything using function overloading +void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* device_source, const char* name) +{ + + hid_t dataset_id; + herr_t status; + hsize_t dims[3]; + dims[0] = nx_real; + dims[1] = ny_real; + dims[2] = nz_real; + hid_t dataspace_id = H5Screate_simple(3, dims, NULL); + //Copy non-ghost parts of source to buffer + dim3 dim1dGrid((nx_real*ny_real*nz_real+TPB-1)/TPB, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + hipLaunchKernelGGL(CopyReal3D_GPU_Kernel,dim1dGrid,dim1dBlock,0,0,nx,ny,nx_real,ny_real,nz_real,n_ghost,device_buffer,device_source); + CudaSafeCall(cudaMemcpy( buffer, device_buffer, nx_real*ny_real*nz_real*sizeof(float), cudaMemcpyDeviceToHost)); + + // Create a dataset id + dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + // Write the buffer to file + status = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); + // Free the dataset id and dataspace id + status = H5Dclose(dataset_id); + status = H5Sclose(dataspace_id); } + void WriteViz3D(Grid3D &G, struct parameters P, hid_t file_id) { Header H = G.H; - int nx_dset = H.nx_real; - int ny_dset = H.ny_real; - int nz_dset = H.nz_real; - hsize_t dims[3]; - // Create the data space for the datasets - dims[0] = nx_dset; - dims[1] = ny_dset; - dims[2] = nz_dset; - hid_t dataspace_id = H5Screate_simple(3, dims, NULL); - - - Real* dataset_buffer = (Real *) malloc(H.nx_real*H.ny_real*H.nz_real*sizeof(Real)); + int nx_real = H.nx_real; + int ny_real = H.ny_real; + int nz_real = H.nz_real; + int n_ghost = H.n_ghost; + int nx = H.nx; + int ny = H.ny; + float* dataset_buffer = (float *) malloc(H.nx_real*H.ny_real*H.nz_real*sizeof(Real)); + float* device_buffer; + CudaSafeCall(cudaMalloc(&device_buffer, nx_real*ny_real*nz_real*sizeof(float))); if (P.outviz_density > 0) { - WriteVizField(H, file_id, dataspace_id, dataset_buffer, G.C.density, "/density"); + WriteHDF5Field3D(nx, ny, nx_real, ny_real, nz_real, n_ghost, file_id, dataset_buffer, device_buffer, G.C.d_density, "/density"); } + + + /* // Just an example of extending this function to include other fields. // Not implemented yet if (P.outviz_energy > 0) { - WriteVizField(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/energy"); + WriteHDF5Field(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/energy"); } */ - + CudaSafeCall(cudaFree(device_buffer)); free(dataset_buffer); - herr_t status = H5Sclose(dataspace_id); + } @@ -78,10 +145,10 @@ void OutputViz3D(Grid3D &G, struct parameters P, int nfile) { Header H = G.H; // Do nothing in 1-D and 2-D case - if (H.ny == 1) { + if (H.ny_real == 1) { return; } - if (H.nz == 1) { + if (H.nz_real == 1) { return; } // Do nothing if nfile is not multiple of n_outviz From f68e794620ec1410b3d2f35c2b7e709a5fc00306 Mon Sep 17 00:00:00 2001 From: Alwin Date: Tue, 11 Oct 2022 12:18:16 -0400 Subject: [PATCH 6/8] hdf5 refactor part 2 (this part also passes tests) --- examples/3D/float32_sound_wave.txt | 59 ++++ src/global/global.cpp | 28 +- src/global/global.h | 16 +- src/io/io.cpp | 437 ++++++++++++++--------------- src/io/io.h | 12 +- src/io/{viz.cu => io_gpu.cu} | 125 ++------- 6 files changed, 346 insertions(+), 331 deletions(-) create mode 100644 examples/3D/float32_sound_wave.txt rename src/io/{viz.cu => io_gpu.cu} (50%) diff --git a/examples/3D/float32_sound_wave.txt b/examples/3D/float32_sound_wave.txt new file mode 100644 index 000000000..68b3d4952 --- /dev/null +++ b/examples/3D/float32_sound_wave.txt @@ -0,0 +1,59 @@ +# +# Parameter File for sound wave test with float32 output +# + +################################################ +# number of grid cells in the x dimension +nx=256 +# number of grid cells in the y dimension +ny=256 +# number of grid cells in the z dimension +nz=256 +# final output time +tout=0.05 +# time interval for output +outstep=0.05 +# name of initial conditions +init=Sound_Wave +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=4.0 +ylen=4.0 +zlen=4.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +# Enable float32 output +# Enable float32 density field +n_out_float32=1 +out_float32_density=1 + +# Uncomment this to enable momentum_x +# out_float32_momentum_x=1 + +################################################# +# Parameters for linear wave problems +# initial density +rho=1.0 +# velocity in the x direction +vx=0 +# velocity in the y direction +vy=0 +# velocity in the z direction +vz=0 +# initial pressure +P=0.6 +# amplitude of perturbing oscillations +A=1e-4 +# value of gamma +gamma=1.666666666666667 + diff --git a/src/global/global.cpp b/src/global/global.cpp index d84a04a68..1f6a5cbfa 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -223,10 +223,30 @@ void parse_param(char *name,char *value, struct parameters *parms){ parms->n_rotated_projection = atoi(value); else if (strcmp(name, "n_slice")==0) parms->n_slice = atoi(value); - else if (strcmp(name, "n_outviz")==0) - parms->n_outviz = atoi(value); - else if (strcmp(name, "outviz_density")==0) - parms->outviz_density = atoi(value); + else if (strcmp(name, "n_out_float32")==0) + parms->n_out_float32 = atoi(value); + else if (strcmp(name, "out_float32_density")==0) + parms->out_float32_density = atoi(value); + else if (strcmp(name, "out_float32_momentum_x")==0) + parms->out_float32_momentum_x = atoi(value); + else if (strcmp(name, "out_float32_momentum_y")==0) + parms->out_float32_momentum_y = atoi(value); + else if (strcmp(name, "out_float32_momentum_z")==0) + parms->out_float32_momentum_z = atoi(value); + else if (strcmp(name, "out_float32_Energy")==0) + parms->out_float32_Energy = atoi(value); +#ifdef DE + else if (strcmp(name, "out_float32_GasEnergy")==0) + parms->out_float32_GasEnergy = atoi(value); +#endif // DE +#ifdef MHD + else if (strcmp(name, "out_float32_magnetic_x")==0) + parms->out_float32_magnetic_x = atoi(value); + else if (strcmp(name, "out_float32_magnetic_y")==0) + parms->out_float32_magnetic_y = atoi(value); + else if (strcmp(name, "out_float32_magnetic_z")==0) + parms->out_float32_magnetic_z = atoi(value); +#endif // MHD else if (strcmp(name, "xmin")==0) parms->xmin = atof(value); else if (strcmp(name, "ymin")==0) diff --git a/src/global/global.h b/src/global/global.h index 0fc1ff47b..4e6d8eeb9 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -204,8 +204,20 @@ struct parameters int n_projection; int n_rotated_projection; int n_slice; - int n_outviz=0; - int outviz_density=0; + int n_out_float32=0; + int out_float32_density=0; + int out_float32_momentum_x=0; + int out_float32_momentum_y=0; + int out_float32_momentum_z=0; + int out_float32_Energy=0; +#ifdef DE + int out_float32_GasEnergy=0; +#endif +#ifdef MHD + int out_float32_magnetic_x=0; + int out_float32_magnetic_y=0; + int out_float32_magnetic_z=0; +#endif Real xmin; Real ymin; Real zmin; diff --git a/src/io/io.cpp b/src/io/io.cpp index d9ea04548..fc6c52eb7 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -16,6 +16,7 @@ #include "../mpi/mpi_routines.h" #endif //MPI_CHOLLA #include "../utils/error_handling.h" +#include "../utils/DeviceVector.h" #ifdef COSMOLOGY #include "../cosmology/cosmology.h" @@ -111,7 +112,7 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) // This function does other checks to make sure it is valid (3D only) #ifdef HDF5 - if (P.n_outviz && nfile % P.n_outviz == 0) OutputViz3D(G,P,nfile); + if (P.n_out_float32 && nfile % P.n_out_float32 == 0) OutputFloat32(G,P,nfile); #endif #ifdef PROJECTION @@ -232,6 +233,92 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) #endif } +void OutputFloat32(Grid3D &G, struct parameters P, int nfile) +{ + + Header H = G.H; + // Do nothing in 1-D and 2-D case + if (H.ny_real == 1) { + return; + } + if (H.nz_real == 1) { + return; + } + // Do nothing if nfile is not multiple of n_out_float32 + if (nfile % P.n_out_float32 != 0) { + return; + } + + char filename[MAXLEN]; + char timestep[20]; + + // create the filename + sprintf(timestep, "%d", nfile); + strcpy(filename, P.outdir); + strcat(filename, timestep); + strcat(filename, ".float32.h5"); + #ifdef MPI_CHOLLA + sprintf(filename,"%s.%d",filename,procID); + #endif + + // create hdf5 file + hid_t file_id; /* file identifier */ + herr_t status; + + // Create a new file using default properties. + file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + + // Write the header (file attributes) + G.Write_Header_HDF5(file_id); + + // write the conserved variables to the output file + + // 3-D Case + if (H.nx>1 && H.ny>1 && H.nz>1) { + int nx_dset = H.nx_real; + int ny_dset = H.ny_real; + int nz_dset = H.nz_real; + size_t buffer_size; + // Need a larger device buffer for MHD. In the future, if other fields need a larger device buffer, choose the maximum of the sizes. + // If the buffer is too large, it does not cause bugs (Oct 6 2022) +#ifdef MHD + buffer_size = (nx_dset+1)*(ny_dset+1)*(nz_dset+1); +#else + buffer_size = nx_dset*ny_dset*nz_dset; +#endif + + // Using static DeviceVector here automatically allocates the buffer the first time it is needed + // It persists until program exit, and then calls Free upon destruction + cuda_utilities::DeviceVector static device_dataset_vector{buffer_size}; + float* device_dataset_buffer = device_dataset_vector.data(); + float* dataset_buffer = (float *) malloc(buffer_size*sizeof(float)); + + if (P.out_float32_density > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_density, "/density"); + if (P.out_float32_momentum_x > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_x, "/momentum_x"); + if (P.out_float32_momentum_y > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_y, "/momentum_y"); + if (P.out_float32_momentum_z > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_z, "/momentum_z"); + if (P.out_float32_Energy > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_Energy, "/Energy"); +#ifdef DE + if (P.out_float32_GasEnergy > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_GasEnergy, "/GasEnergy"); +#endif //DE +#ifdef MHD + if (P.out_float32_magnetic_x > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_x, "/magnetic_x"); + if (P.out_float32_magnetic_y > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_y, "/magnetic_y"); + if (P.out_float32_magnetic_z > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_z, "/magnetic_z"); +#endif + + + free(dataset_buffer); + + if (status < 0) {printf("File write failed.\n"); exit(-1); } + } // 3-D case + + // close the file + status = H5Fclose(file_id); + + +} + /* Output a projection of the grid data to file. */ void OutputProjectedData(Grid3D &G, struct parameters P, int nfile) @@ -1099,23 +1186,54 @@ void Grid3D::Write_Grid_Binary(FILE *fp) #ifdef HDF5 -void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, Real* dataset_buffer, Real* source, const char* name) +// Helper function which uses the correct HDF5 arguments based on the type of dataset_buffer to avoid writing garbage +herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double* dataset_buffer, const char* name) { - int id = H.n_ghost; - memcpy(&dataset_buffer[0], &(source[id]), H.nx_real*sizeof(Real)); - // Create a dataset id for density hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - // Write the density array to file // NOTE: NEED TO FIX FOR FLOAT REAL!!! + // Write the density array to file herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); + return status; +} + +herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float* dataset_buffer, const char* name) +{ + // Create a dataset id for density + hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + // Write the density array to file + herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + // Free the dataset id + status = H5Dclose(dataset_id); + return status; +} + + +void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, Real* dataset_buffer, Real* source, const char* name) +{ + // Copy non-ghost source to Buffer + int id = H.n_ghost; + memcpy(&dataset_buffer[0], &(source[id]), H.nx_real*sizeof(Real)); + // Buffer write to HDF5 Dataset + herr_t status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); +} + +void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, float* dataset_buffer, double* source, const char* name) +{ + // Copy non-ghost source to Buffer with conversion from double to float + int i; + for (i=0; i 1 this substitution can be attempted. + // TODO: If there is a test case for regression testing NSCALARS > 1 this substitution can be attempted. // Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset); - + id = H.n_ghost; memcpy(&dataset_buffer[0], &(C.scalar[id+s*H.n_cells]), H.nx_real*sizeof(Real)); - - // Create a dataset id for the scalar - dataset_id = H5Dcreate(file_id, dataset, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - // Write the scalar array to file // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - + // dataset here is just a name + status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); } + #endif //SCALAR #ifdef DE @@ -1265,7 +1388,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) sprintf(number, "%d", s); strcat(dataset,number); - // TODO: If there is a test case for regression testing NSCALARS > 1 this substitution can be attempted. + // TODO: If there is a test case for regression testing NSCALARS > 1 this substitution can be attempted. // Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset); // Copy the scalar array to the memory buffer @@ -1276,12 +1399,8 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) dataset_buffer[buf_id] = C.scalar[id+s*H.n_cells]; } } - // Create a dataset id for the scalar - dataset_id = H5Dcreate(file_id, dataset, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - // Write the scalar array to file // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); + + status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); } #endif //SCALAR @@ -1303,17 +1422,27 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) hsize_t dims[3]; hsize_t dims_full[3]; - double* device_dataset_buffer; - CudaSafeCall(cudaMalloc(&device_dataset_buffer,nx_dset*ny_dset*nz_dset*sizeof(double))); + size_t buffer_size; + // Need a larger device buffer for MHD. In the future, if other fields need a larger device buffer, choose the maximum of the sizes. + // If the buffer is too large, it does not cause bugs (Oct 6 2022) + #ifdef MHD + buffer_size = (nx_dset+1)*(ny_dset+1)*(nz_dset+1); + #else + buffer_size = nx_dset*ny_dset*nz_dset; + #endif + // Using static DeviceVector here automatically allocates the buffer the first time it is needed + // It persists until program exit, and then calls Free upon destruction + cuda_utilities::DeviceVector static device_dataset_vector{buffer_size}; + double* device_dataset_buffer = device_dataset_vector.data(); + dataset_buffer = (Real*) malloc(buffer_size*sizeof(Real)); + //CudaSafeCall(cudaMalloc(&device_dataset_buffer,nx_dset*ny_dset*nz_dset*sizeof(double))); - dataset_buffer = (Real *) malloc(H.nx_real*H.ny_real*H.nz_real*sizeof(Real)); - // Create the data space for the datasets + // Create the data space for the datasets (note: WriteHDF5Field3D creates its own dataspace, does not use the shared one) dims[0] = nx_dset; dims[1] = ny_dset; dims[2] = nz_dset; dataspace_id = H5Screate_simple(3, dims, NULL); - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_density, "/density"); if ( output_momentum || H.Output_Complete_Data ) { WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_x, "/momentum_x"); @@ -1325,7 +1454,6 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_Energy, "/Energy"); } - #ifdef SCALAR #if !defined(COOLING_GRACKLE) && !defined(CHEMISTRY_GPU) // Dont write scalars when using grackle for (int s=0; s device_buffer, then copy device_buffer -> buffer, then write HDF5 field void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* source, const char* name); void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* source, const char* name); +#endif diff --git a/src/io/viz.cu b/src/io/io_gpu.cu similarity index 50% rename from src/io/viz.cu rename to src/io/io_gpu.cu index a8cbaf0a3..c6cab6e8a 100644 --- a/src/io/viz.cu +++ b/src/io/io_gpu.cu @@ -7,13 +7,17 @@ #include "../io/io.h" // To provide io.h with OutputViz3D +// Note that the HDF5 file and buffer will have size nx_real * ny_real * nz_real whereas the conserved variables have size nx,ny,nz +// Note that magnetic fields add +1 to nx_real ny_real nz_real since an extra face needs to be output, but also has the same size nx ny nz +// For the magnetic field case, a different nx_real+1 ny_real+1 nz_real+1 n_ghost-1 are provided as inputs. + // Copy Real (non-ghost) cells from source to a double destination (for writing HDF5 in double precision) __global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, double* destination, Real* source) { int dest_id,source_id,id,i,j,k; id = threadIdx.x + blockIdx.x * blockDim.x; - + k = id/(nx_real*ny_real); j = (id - k*nx_real*ny_real)/nx_real; i = id - j*nx_real - k*nx_real*ny_real; @@ -35,7 +39,7 @@ __global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int dest_id,source_id,id,i,j,k; id = threadIdx.x + blockIdx.x * blockDim.x; - + k = id/(nx_real*ny_real); j = (id - k*nx_real*ny_real)/nx_real; i = id - j*nx_real - k*nx_real*ny_real; @@ -54,27 +58,26 @@ __global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, // When buffer is double, automatically use the double version of everything using function overloading void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* device_source, const char* name) { - hid_t dataset_id; herr_t status; hsize_t dims[3]; dims[0] = nx_real; dims[1] = ny_real; dims[2] = nz_real; hid_t dataspace_id = H5Screate_simple(3, dims, NULL); - + //Copy non-ghost parts of source to buffer dim3 dim1dGrid((nx_real*ny_real*nz_real+TPB-1)/TPB, 1, 1); - dim3 dim1dBlock(TPB, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); hipLaunchKernelGGL(CopyReal3D_GPU_Kernel,dim1dGrid,dim1dBlock,0,0,nx,ny,nx_real,ny_real,nz_real,n_ghost,device_buffer,device_source); CudaSafeCall(cudaMemcpy( buffer, device_buffer, nx_real*ny_real*nz_real*sizeof(double), cudaMemcpyDeviceToHost)); - // Create a dataset id - dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - // Write the buffer to file - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); - // Free the dataset id and dataspace id - status = H5Dclose(dataset_id); - status = H5Sclose(dataspace_id); + // Write Buffer to HDF5 + status = HDF5_Dataset(file_id, dataspace_id, buffer, name); + + status = H5Sclose(dataspace_id); + if (status < 0) {printf("File write failed.\n");} + + } @@ -82,110 +85,26 @@ void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* device_source, const char* name) { - hid_t dataset_id; herr_t status; hsize_t dims[3]; dims[0] = nx_real; dims[1] = ny_real; dims[2] = nz_real; hid_t dataspace_id = H5Screate_simple(3, dims, NULL); - + //Copy non-ghost parts of source to buffer dim3 dim1dGrid((nx_real*ny_real*nz_real+TPB-1)/TPB, 1, 1); - dim3 dim1dBlock(TPB, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); hipLaunchKernelGGL(CopyReal3D_GPU_Kernel,dim1dGrid,dim1dBlock,0,0,nx,ny,nx_real,ny_real,nz_real,n_ghost,device_buffer,device_source); CudaSafeCall(cudaMemcpy( buffer, device_buffer, nx_real*ny_real*nz_real*sizeof(float), cudaMemcpyDeviceToHost)); - // Create a dataset id - dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - // Write the buffer to file - status = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); - // Free the dataset id and dataspace id - status = H5Dclose(dataset_id); - status = H5Sclose(dataspace_id); -} - - -void WriteViz3D(Grid3D &G, struct parameters P, hid_t file_id) -{ - Header H = G.H; - int nx_real = H.nx_real; - int ny_real = H.ny_real; - int nz_real = H.nz_real; - int n_ghost = H.n_ghost; - int nx = H.nx; - int ny = H.ny; - - float* dataset_buffer = (float *) malloc(H.nx_real*H.ny_real*H.nz_real*sizeof(Real)); - float* device_buffer; - CudaSafeCall(cudaMalloc(&device_buffer, nx_real*ny_real*nz_real*sizeof(float))); - - if (P.outviz_density > 0) { - WriteHDF5Field3D(nx, ny, nx_real, ny_real, nz_real, n_ghost, file_id, dataset_buffer, device_buffer, G.C.d_density, "/density"); - } - + // Write Buffer to HDF5 + status = HDF5_Dataset(file_id, dataspace_id, buffer, name); - - /* - // Just an example of extending this function to include other fields. - // Not implemented yet - if (P.outviz_energy > 0) { - WriteHDF5Field(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/energy"); - } - */ - CudaSafeCall(cudaFree(device_buffer)); - - free(dataset_buffer); + status = H5Sclose(dataspace_id); + if (status < 0) {printf("File write failed.\n");} } - -void OutputViz3D(Grid3D &G, struct parameters P, int nfile) -{ - Header H = G.H; - // Do nothing in 1-D and 2-D case - if (H.ny_real == 1) { - return; - } - if (H.nz_real == 1) { - return; - } - // Do nothing if nfile is not multiple of n_outviz - if (nfile % P.n_outviz != 0) { - return; - } - - char filename[MAXLEN]; - char timestep[20]; - - // create the filename - sprintf(timestep, "%d", nfile); - strcpy(filename, P.outdir); - strcat(filename, timestep); - strcat(filename, ".viz3d.h5"); - #ifdef MPI_CHOLLA - sprintf(filename,"%s.%d",filename,procID); - #endif - - // create hdf5 file - hid_t file_id; /* file identifier */ - herr_t status; - - // Create a new file using default properties. - file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); - - // Write the header (file attributes) - G.Write_Header_HDF5(file_id); - - // write the conserved variables to the output file - WriteViz3D(G, P, file_id); - - // close the file - status = H5Fclose(file_id); - - if (status < 0) {printf("File write failed.\n"); exit(-1); } - -} - -#endif +#endif //HDF5 From 03b34829556aa7a8a5c8a2b34b5c6d29c60f63a9 Mon Sep 17 00:00:00 2001 From: Alwin Date: Tue, 18 Oct 2022 02:10:33 -0400 Subject: [PATCH 7/8] static_grav renamed/fixed, light side fixes --- builds/make.type.rot_proj | 31 ++++++++++++++++++ builds/make.type.static_grav | 32 +++++++++++++++++++ src/global/global_cuda.h | 16 ---------- src/gravity/gravity_cuda.h | 18 ----------- .../{gravity_cuda.cu => static_grav.h} | 15 ++++----- src/hydro/hydro_cuda.cu | 2 +- src/io/io.cpp | 23 ++++++++----- 7 files changed, 86 insertions(+), 51 deletions(-) create mode 100644 builds/make.type.rot_proj create mode 100644 builds/make.type.static_grav delete mode 100644 src/gravity/gravity_cuda.h rename src/gravity/{gravity_cuda.cu => static_grav.h} (90%) diff --git a/builds/make.type.rot_proj b/builds/make.type.rot_proj new file mode 100644 index 000000000..e6faa7514 --- /dev/null +++ b/builds/make.type.rot_proj @@ -0,0 +1,31 @@ +#-- Default hydro only build with rotated projection + +DFLAGS += -DCUDA +DFLAGS += -DMPI_CHOLLA +DFLAGS += -DPRECISION=2 +DFLAGS += -DPPMC +DFLAGS += -DHLLC + +# Integrator +DFLAGS += -DSIMPLE +#DFLAGS += -DVL + +# Apply a density and temperature floor +DFLAGS += -DDENSITY_FLOOR +DFLAGS += -DTEMPERATURE_FLOOR + +# Solve the Gas Internal Energy usisng a Dual Energy Formalism +#DFLAGS += -DDE + +# Apply cooling on the GPU from precomputed tables +#DFLAGS += -DCOOLING_GPU + +# Measure the Timing of the different stages +#DFLAGS += -DCPU_TIME + +# Select output format +# Can also add -DSLICES and -DPROJECTIONS +OUTPUT ?= -DOUTPUT -DHDF5 +DFLAGS += $(OUTPUT) + +DFLAGS += -DROTATED_PROJECTION \ No newline at end of file diff --git a/builds/make.type.static_grav b/builds/make.type.static_grav new file mode 100644 index 000000000..ffa15c4ee --- /dev/null +++ b/builds/make.type.static_grav @@ -0,0 +1,32 @@ +#-- Default hydro only build with static_grav + +DFLAGS += -DCUDA +DFLAGS += -DMPI_CHOLLA +DFLAGS += -DPRECISION=2 +DFLAGS += -DPPMC +DFLAGS += -DHLLC + +# Integrator +DFLAGS += -DSIMPLE +#DFLAGS += -DVL + +# Apply a density and temperature floor +DFLAGS += -DDENSITY_FLOOR +DFLAGS += -DTEMPERATURE_FLOOR + +# Solve the Gas Internal Energy usisng a Dual Energy Formalism +#DFLAGS += -DDE + +DFLAGS += -DSTATIC_GRAV + +# Apply cooling on the GPU from precomputed tables +#DFLAGS += -DCOOLING_GPU + +# Measure the Timing of the different stages +#DFLAGS += -DCPU_TIME + +# Select output format +# Can also add -DSLICES and -DPROJECTIONS +OUTPUT ?= -DOUTPUT -DHDF5 +DFLAGS += $(OUTPUT) + diff --git a/src/global/global_cuda.h b/src/global/global_cuda.h index 7a5beca55..35c0c355f 100644 --- a/src/global/global_cuda.h +++ b/src/global/global_cuda.h @@ -92,17 +92,6 @@ inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true) } } - - -/*! \fn Real minof3(Real a, Real b, Real c) - * \brief Returns the minimum of three floating point numbers. */ -__device__ inline Real minof3(Real a, Real b, Real c) -{ - return fmin(a, fmin(b,c)); -} - - - /*! \fn int sgn_CUDA * \brief Mathematical sign function. Returns sign of x. */ __device__ inline int sgn_CUDA(Real x) @@ -111,11 +100,6 @@ __device__ inline int sgn_CUDA(Real x) else return 1; } - -__global__ void test_function(); - - - #endif //GLOBAL_CUDA_H #endif //CUDA diff --git a/src/gravity/gravity_cuda.h b/src/gravity/gravity_cuda.h deleted file mode 100644 index b4d885262..000000000 --- a/src/gravity/gravity_cuda.h +++ /dev/null @@ -1,18 +0,0 @@ -/*! \file gravity_cuda.h - * \brief Declarations of functions used to calculate gravitational accelerations. */ - -#ifdef CUDA -#ifndef GRAVITY_CUDA_H -#define GRAVITY_CUDA_H - -#include "../global/global.h" - - -__device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real xbound, Real *gx); - -__device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real *gx, Real *gy); - -__device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz); - -#endif // GRAVITY_CUDA_H -#endif // CUDA diff --git a/src/gravity/gravity_cuda.cu b/src/gravity/static_grav.h similarity index 90% rename from src/gravity/gravity_cuda.cu rename to src/gravity/static_grav.h index 0137c44f1..3ddbb86be 100644 --- a/src/gravity/gravity_cuda.cu +++ b/src/gravity/static_grav.h @@ -4,17 +4,16 @@ functions in hydro_cuda.cu. */ #ifdef CUDA +#pragma once + #include -#include -#include "../utils/gpu.hpp" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../gravity/gravity_cuda.h" +#include // provides sqrt log cos sin atan etc. +#include "../global/global.h" // provides GN etc. // Work around lack of pow(Real,int) in Hip Clang for Rocm 3.5 static inline __device__ Real pow2(const Real x) { return x*x; } -__device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real xbound, Real *gx) +inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real xbound, Real *gx) { Real x_pos, r_disk, r_halo; x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; @@ -52,7 +51,7 @@ __device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real xbound, } -__device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real *gx, Real *gy) +inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real *gx, Real *gy) { Real x_pos, y_pos, r, phi; // use the subgrid offset and global boundaries to calculate absolute positions on the grid @@ -108,7 +107,7 @@ __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, R } -__device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz) +inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz) { Real x_pos, y_pos, z_pos, r_disk, r_halo; // use the subgrid offset and global boundaries to calculate absolute positions on the grid diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index bf385d25f..ee033e334 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -10,7 +10,7 @@ #include "../global/global.h" #include "../global/global_cuda.h" #include "../hydro/hydro_cuda.h" -#include "../gravity/gravity_cuda.h" +#include "../gravity/static_grav.h" #include "../utils/hydro_utilities.h" #include "../utils/cuda_utilities.h" #include "../utils/reduction_utilities.h" diff --git a/src/io/io.cpp b/src/io/io.cpp index fc6c52eb7..be0a1b9fa 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1820,6 +1820,11 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) int nx_dset = R.nx; int nz_dset = R.nz; + if (R.nx * R.nz == 0) { + chprintf("WARNING: compiled with -DROTATED_PROJECTION but input parameters nxr or nzr = 0\n"); + return; + } + // set the projected dataset size for this process to capture // this piece of the simulation volume // min and max values were set in the header write @@ -1920,15 +1925,17 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) // Free the dataspace id status = H5Sclose(dataspace_xzr_id); + //free the data + free(dataset_buffer_dxzr); + free(dataset_buffer_Txzr); + free(dataset_buffer_vxxzr); + free(dataset_buffer_vyxzr); + free(dataset_buffer_vzxzr); + } - else printf("Rotated projection write only implemented for 3D data.\n"); - - //free the data - free(dataset_buffer_dxzr); - free(dataset_buffer_Txzr); - free(dataset_buffer_vxxzr); - free(dataset_buffer_vyxzr); - free(dataset_buffer_vzxzr); + else chprintf("Rotated projection write only implemented for 3D data.\n"); + + } #endif //HDF5 From 342c59f135c26d7cc6d052ae9ff41386f72647a6 Mon Sep 17 00:00:00 2001 From: bcaddy <41171425+bcaddy@users.noreply.github.com> Date: Thu, 28 Sep 2023 09:30:42 -0400 Subject: [PATCH 8/8] Fix typo in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b372f6b34..c9d731b89 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ https://user-images.githubusercontent.com/3432028/188235319-e5eb4e5e-00c6-435f-a Getting started ---------------- -This is the stable branch of the *Cholla* hydrodyamics code. +This is the stable branch of the *Cholla* hydrodynamics code. *Cholla* is designed to be run using (AMD or NVIDIA) GPUs, and can be run in serial mode using one GPU or with MPI for multiple GPUs.