From 9fc64e2153068f3a80ad8017dcd111919ec61bde Mon Sep 17 00:00:00 2001
From: Alwin <alwin.mao@pitt.edu>
Date: Mon, 10 Oct 2022 13:54:38 -0700
Subject: [PATCH 1/8] merge and add new container build_tests.yml to main

---
 .github/workflows/build_tests.yml | 275 ++++++++++++------------------
 1 file changed, 110 insertions(+), 165 deletions(-)

diff --git a/.github/workflows/build_tests.yml b/.github/workflows/build_tests.yml
index 8fb88c7ba..b183b3188 100644
--- a/.github/workflows/build_tests.yml
+++ b/.github/workflows/build_tests.yml
@@ -1,165 +1,110 @@
-name: Cholla Compile
-
-on:
-  pull_request:
-#    branches: [ CAAR ]
-  schedule:
-    - cron: "37 07 * * 1"  # run every Monday at 07:37UTC. Crontab computed with crontab.guru
-  workflow_dispatch:
-
-jobs:
-  Build:
-    name: >
-      Build
-      API:${{ matrix.gpu-api }}
-      Make-Type:${{ matrix.make-type }}
-      Cuda-toolkit:v${{ matrix.cuda-toolkit-version }}
-      GCC:v${{ matrix.gcc-version }}
-      ROCm:v${{ matrix.rocm-version }}
-    # if: ${{ false }}  # If uncommented this line will disable this job
-
-    # Choose OS/Runner
-    runs-on: ubuntu-latest
-
-    # Matrix for different make types
-    strategy:
-      fail-fast: false
-      matrix:
-        make-type: [hydro, gravity, disk, particles, cosmology, mhd]
-        gpu-api: [HIP, CUDA]
-        # NOTE: if more than one parameter is in any of these three variables
-        # you need to manually exclude it for the GPU API that doesn't use it.
-        # An example exclude is shown below but commented out. Uncomment and
-        # tweak it to fit your needs
-        # CUDA uses the cuda-toolkit-version and gcc-version
-        # HIP uses the clang-version
-        cuda-toolkit-version: ['11.2.2']
-        gcc-version: [9]
-        rocm-version: ['5.1.0']
-        mpi: ['openmpi'] #Can use mpich and/or openmpi
-        # exclude:
-        #   - gpu-api: HIP
-        #     make-type: hydro
-
-    # Setup environment variables
-    env:
-      CHOLLA_MACHINE: github
-      CHOLLA_MAKE_TYPE: ${{ matrix.make-type }}
-      CUDA_ROOT: /usr/local/cuda
-      HDF5_ROOT: /usr/lib/x86_64-linux-gnu/hdf5/serial
-      MPI_ROOT: /usr/lib/x86_64-linux-gnu/openmpi
-
-    # Run the job itself
-    steps:
-
-    # Install required Tools
-    - uses: actions/checkout@v2
-    - name: Setup MPI
-      uses: mpi4py/setup-mpi@v1
-      with:
-        mpi: ${{ matrix.mpi }}
-    - name: Show MPI version
-      run: mpirun --version
-    - name: Install HDF5 Serial
-      run: sudo apt-get install libhdf5-serial-dev
-    - name: Show HDF5 config
-      run: |
-        h5cc -showconfig
-
-    # Install CUDA and dependencies if this is a CUDA build
-    - uses: Jimver/cuda-toolkit@v0.2.8
-      if: matrix.gpu-api == 'CUDA'
-      id: cuda-toolkit
-      with:
-        cuda: ${{ matrix.cuda-toolkit-version }}
-    - name: NVCC version & set CUDA_ROOT
-      if: matrix.gpu-api == 'CUDA'
-      run: |
-        nvcc -V
-    - name: Set up GCC
-      if: matrix.gpu-api == 'CUDA'
-      uses: egor-tensin/setup-gcc@v1
-      with:
-        version: ${{ matrix.gcc-version }}
-    - name: GCC Version
-      if: matrix.gpu-api == 'CUDA'
-      run: |
-        cc --version
-        c++ --version
-
-    # Install HIP and dependencies if this is a HIP build
-    - name: Setup ROCm
-      if: matrix.gpu-api == 'HIP'
-      run: |
-        # Download and install the installation script
-        sudo apt-get update
-        wget https://repo.radeon.com/amdgpu-install/22.20.1/ubuntu/focal/amdgpu-install_22.20.50201-1_all.deb
-        sudo apt-get install -y ./amdgpu-install_22.20.50201-1_all.deb
-
-        # Get names correct by stripping out the last ".0" if it exists
-        ROCM_VERSION=${{ matrix.rocm-version }}
-        if [ "${ROCM_VERSION:0-1}" = "0" ]
-        then
-            # If the last character is a "0" then trim the last ".0"
-            ROCM_REPO_VERSION="${ROCM_VERSION:0:3}"
-        else
-            ROCM_REPO_VERSION=$ROCM_VERSION
-        fi
-
-        # Add the repo for the version of ROCm that we want
-        echo "deb [arch=amd64] https://repo.radeon.com/rocm/apt/${ROCM_REPO_VERSION} ubuntu main" | sudo tee /etc/apt/sources.list.d/rocm.list
-        sudo apt update
-    - name: Install ROCm
-      if: matrix.gpu-api == 'HIP'
-      run: |
-        # Install ROCm
-        sudo amdgpu-install -y --usecase=rocm --rocmrelease=${{ matrix.rocm-version }}
-    - name: Install hipFFT and RocFFT
-      if: matrix.gpu-api == 'HIP'
-      run: |
-        sudo apt install -y hipfft${{ matrix.rocm-version }} rocfft${{ matrix.rocm-version }}
-    - name: Verify HIP install
-      if: matrix.gpu-api == 'HIP'
-      run: |
-        hipconfig --full
-    - name: Set Environment Variables and Files
-      if: matrix.gpu-api == 'HIP'
-      run: |
-        echo "HIPCONFIG=$(hipconfig -C)" >> $GITHUB_ENV
-        echo "ROCM_PATH=$(hipconfig -R)" >> $GITHUB_ENV
-        echo "gfx90a" | sudo tee --append $(hipconfig -R)/bin/target.lst  # trick ROCm into thinking there's a GPU
-    - name: Echo Environment Variables and Files
-      if: matrix.gpu-api == 'HIP'
-      run: |
-        echo "HIPCONFIG = ${HIPCONFIG}"
-        echo "ROCM_PATH = ${ROCM_PATH}"
-        echo "HIPFFT_PATH = ${HIPFFT_PATH}"
-
-        echo "The contents of $(hipconfig -R)/bin/target.lst are:"
-        sudo cat $(hipconfig -R)/bin/target.lst
-
-    # Perform Build
-    - name: Cholla setup
-      run: |
-        source builds/run_tests.sh
-        setupTests -c gcc
-        echo "CHOLLA_ROOT           = ${CHOLLA_ROOT}"
-        echo "CHOLLA_LAUNCH_COMMAND = ${CHOLLA_LAUNCH_COMMAND}"
-
-        echo "CHOLLA_ROOT=${CHOLLA_ROOT}"                     >> $GITHUB_ENV
-        echo "CHOLLA_LAUNCH_COMMAND=${CHOLLA_LAUNCH_COMMAND}" >> $GITHUB_ENV
-        echo "F_OFFLOAD=${F_OFFLOAD}                          >> $GITHUB_ENV
-        echo "CHOLLA_ENVSET=${CHOLLA_ENVSET}                  >> $GITHUB_ENV
-    - name: Build GoogleTest
-      run: |
-        source builds/run_tests.sh
-        buildGoogleTest
-        echo "GOOGLETEST_ROOT=${GOOGLETEST_ROOT}" >> $GITHUB_ENV
-    - name: Build Cholla
-      run: |
-        source builds/run_tests.sh
-        buildCholla
-    - name: Build Tests
-      run: |
-        source builds/run_tests.sh
-        buildChollaTests
+name: Cholla Compile
+
+on:
+  pull_request:
+  schedule:
+    - cron: "37 07 * * 1"  # run every Monday at 07:37UTC. Crontab computed with crontab.guru
+  workflow_dispatch:
+
+jobs:
+  Build:
+    name: >
+      Build
+      ${{ matrix.container.name }}
+      TYPE=${{ matrix.make-type }}
+
+    # Cuda-toolkit:v${{ matrix.cuda-toolkit-version }}
+    # GCC:v${{ matrix.gcc-version }}
+    # ROCm:v${{ matrix.rocm-version }}
+    # if: ${{ false }}  # If uncommented this line will disable this job
+
+    # Choose OS/Runner
+    runs-on: ubuntu-latest
+    container: 
+      image: ${{matrix.container.link}} 
+    defaults:
+      run:
+        shell: bash
+    # Matrix for different make types
+    strategy:
+      fail-fast: false
+      matrix:
+        make-type: [hydro, gravity, disk, particles, cosmology, mhd]
+        container: [{name: "CUDA", link: "docker://alwinm/cholla:cuda_github"}, {name: "HIP",link: "docker://alwinm/cholla:hip_github"},]
+        # gpu-api: [CUDA]
+        # NOTE: if more than one parameter is in any of these three variables
+        # you need to manually exclude it for the GPU API that doesn't use it.
+        # An example exclude is shown below but commented out. Uncomment and
+        # tweak it to fit your needs
+        # CUDA uses the cuda-toolkit-version and gcc-version
+        # HIP uses the clang-version
+        # cuda-toolkit-version: ['11.2.2']
+        # gcc-version: [9]
+        # rocm-version: ['5.1.0']
+        mpi: ['openmpi'] #Can use mpich and/or openmpi
+        # exclude:
+        #   - gpu-api: HIP
+        #     make-type: hydro
+
+    # Setup environment variables
+    env:
+      CHOLLA_MACHINE: github
+      CHOLLA_MAKE_TYPE: ${{ matrix.make-type }}
+      CUDA_ROOT: /usr/local/cuda
+      HDF5_ROOT: /usr/lib/x86_64-linux-gnu/hdf5/serial
+      MPI_ROOT: /usr/lib/x86_64-linux-gnu/openmpi
+
+    # Run the job itself
+    steps:
+
+    # Install required Tools
+    - uses: actions/checkout@v2
+
+    # Show versions
+    - name: Show MPI version
+      run: mpirun --version
+    - name: Show HDF5 config
+      run: |
+        h5cc -showconfig
+    - name: Git Safe Directory
+      run: |
+        git --version
+        git config --global --add safe.directory /__w/cholla/cholla
+        git config --global --add safe.directory '*'
+    - name: Show CUDA and gcc version
+      if: matrix.container.name == 'CUDA'
+      run: |
+        cc --version
+        c++ --version
+        nvcc -V
+    - name: Show HIP and hipcc version
+      if: matrix.container.name == 'HIP'
+      run: |
+        hipcc --version
+        hipconfig --full
+    
+
+    # Perform Build
+    - name: Cholla setup
+      run: |
+        source builds/run_tests.sh
+        setupTests -c gcc
+        echo "CHOLLA_ROOT           = ${CHOLLA_ROOT}"
+        echo "CHOLLA_LAUNCH_COMMAND = ${CHOLLA_LAUNCH_COMMAND}"
+        echo "CHOLLA_ROOT=${CHOLLA_ROOT}"                     >> $GITHUB_ENV
+        echo "CHOLLA_LAUNCH_COMMAND=${CHOLLA_LAUNCH_COMMAND}" >> $GITHUB_ENV
+        echo "F_OFFLOAD=${F_OFFLOAD}                          >> $GITHUB_ENV
+        echo "CHOLLA_ENVSET=${CHOLLA_ENVSET}                  >> $GITHUB_ENV
+    - name: Build GoogleTest
+      run: |
+        source builds/run_tests.sh
+        buildGoogleTest
+        echo "GOOGLETEST_ROOT=${GOOGLETEST_ROOT}" >> $GITHUB_ENV
+    - name: Build Cholla
+      run: |
+        source builds/run_tests.sh
+        buildCholla OPTIMIZE
+    - name: Build Tests
+      run: |
+        source builds/run_tests.sh
+        buildChollaTests

From 92e9a807c2ca51b36cc83bc324cd9adc15d24b82 Mon Sep 17 00:00:00 2001
From: alwinm <alwin.mao@gmail.com>
Date: Mon, 10 Oct 2022 16:15:28 -0700
Subject: [PATCH 2/8] Update build_tests.yml

Use checkout v3 instead of v2 in preparation for github deprecating node 12 for node 16
---
 .github/workflows/build_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_tests.yml b/.github/workflows/build_tests.yml
index b183b3188..a21b7c950 100644
--- a/.github/workflows/build_tests.yml
+++ b/.github/workflows/build_tests.yml
@@ -58,7 +58,7 @@ jobs:
     steps:
 
     # Install required Tools
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     # Show versions
     - name: Show MPI version

From 83360cb8b085e1eb028553e727bea622a7107604 Mon Sep 17 00:00:00 2001
From: alwinm <alwin.mao@gmail.com>
Date: Tue, 11 Oct 2022 07:38:56 -0700
Subject: [PATCH 3/8] Update build_tests.yml

---
 .github/workflows/build_tests.yml | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/.github/workflows/build_tests.yml b/.github/workflows/build_tests.yml
index a21b7c950..19bdabb44 100644
--- a/.github/workflows/build_tests.yml
+++ b/.github/workflows/build_tests.yml
@@ -12,10 +12,6 @@ jobs:
       Build
       ${{ matrix.container.name }}
       TYPE=${{ matrix.make-type }}
-
-    # Cuda-toolkit:v${{ matrix.cuda-toolkit-version }}
-    # GCC:v${{ matrix.gcc-version }}
-    # ROCm:v${{ matrix.rocm-version }}
     # if: ${{ false }}  # If uncommented this line will disable this job
 
     # Choose OS/Runner
@@ -31,20 +27,6 @@ jobs:
       matrix:
         make-type: [hydro, gravity, disk, particles, cosmology, mhd]
         container: [{name: "CUDA", link: "docker://alwinm/cholla:cuda_github"}, {name: "HIP",link: "docker://alwinm/cholla:hip_github"},]
-        # gpu-api: [CUDA]
-        # NOTE: if more than one parameter is in any of these three variables
-        # you need to manually exclude it for the GPU API that doesn't use it.
-        # An example exclude is shown below but commented out. Uncomment and
-        # tweak it to fit your needs
-        # CUDA uses the cuda-toolkit-version and gcc-version
-        # HIP uses the clang-version
-        # cuda-toolkit-version: ['11.2.2']
-        # gcc-version: [9]
-        # rocm-version: ['5.1.0']
-        mpi: ['openmpi'] #Can use mpich and/or openmpi
-        # exclude:
-        #   - gpu-api: HIP
-        #     make-type: hydro
 
     # Setup environment variables
     env:

From fcc63deb8b7ad59e5f23f964532cd859df94871b Mon Sep 17 00:00:00 2001
From: Alwin <alwin.mao@pitt.edu>
Date: Sun, 2 Oct 2022 01:38:50 -0700
Subject: [PATCH 4/8] add viz

---
 src/global/global.cpp |   6 +-
 src/global/global.h   |   2 +
 src/io/io.cpp         |   7 ++-
 src/io/io.h           |   7 ++-
 src/io/viz.cu         | 124 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 141 insertions(+), 5 deletions(-)
 create mode 100644 src/io/viz.cu

diff --git a/src/global/global.cpp b/src/global/global.cpp
index a99c1360e..d84a04a68 100644
--- a/src/global/global.cpp
+++ b/src/global/global.cpp
@@ -223,6 +223,10 @@ void parse_param(char *name,char *value, struct parameters *parms){
     parms->n_rotated_projection = atoi(value);
   else if (strcmp(name, "n_slice")==0)
     parms->n_slice = atoi(value);
+  else if (strcmp(name, "n_outviz")==0)
+    parms->n_outviz = atoi(value);
+  else if (strcmp(name, "outviz_density")==0)
+    parms->outviz_density = atoi(value);
   else if (strcmp(name, "xmin")==0)
     parms->xmin = atof(value);
   else if (strcmp(name, "ymin")==0)
@@ -366,7 +370,7 @@ void parse_param(char *name,char *value, struct parameters *parms){
 #ifdef CHEMISTRY_GPU
     else if (strcmp(name, "UVB_rates_file")==0)
       strncpy (parms->UVB_rates_file, value, MAXLEN);
-#endif  
+#endif
 #ifdef COOLING_GRACKLE
   else if (strcmp(name, "UVB_rates_file")==0)
     strncpy (parms->UVB_rates_file, value, MAXLEN);
diff --git a/src/global/global.h b/src/global/global.h
index 560ddb5f4..0fc1ff47b 100644
--- a/src/global/global.h
+++ b/src/global/global.h
@@ -204,6 +204,8 @@ struct parameters
   int n_projection;
   int n_rotated_projection;
   int n_slice;
+  int n_outviz=0;
+  int outviz_density=0;
   Real xmin;
   Real ymin;
   Real zmin;
diff --git a/src/io/io.cpp b/src/io/io.cpp
index 46ee71916..7d51c030c 100644
--- a/src/io/io.cpp
+++ b/src/io/io.cpp
@@ -72,7 +72,7 @@ void Write_Message_To_Log_File( const char* message ){
     out_file.close();
 }
 
-/* Write the initial conditions */
+/* Write Cholla Output Data */
 void WriteData(Grid3D &G, struct parameters P, int nfile)
 {
 
@@ -109,6 +109,11 @@ void WriteData(Grid3D &G, struct parameters P, int nfile)
   if (nfile % P.n_hydro == 0) OutputData(G,P,nfile);
   #endif
 
+  // This function does other checks to make sure it is valid (3D only)
+  #ifdef HDF5
+  if (P.n_outviz && nfile % P.n_outviz == 0) OutputViz3D(G,P,nfile);
+  #endif
+  
   #ifdef PROJECTION
   if (nfile % P.n_projection == 0) OutputProjectedData(G,P,nfile);
   #endif /*PROJECTION*/
diff --git a/src/io/io.h b/src/io/io.h
index deccec5ce..2e07bf083 100644
--- a/src/io/io.h
+++ b/src/io/io.h
@@ -1,5 +1,4 @@
-#ifndef IO_CHOLLA_H
-#define IO_CHOLLA_H
+#pragma once
 
 #include "../global/global.h"
 #include "../grid/grid3D.h"
@@ -29,4 +28,6 @@ void Create_Log_File( struct parameters P );
 void Write_Message_To_Log_File( const char* message );
 
 void write_debug ( Real *Value, const char *fname, int nValues, int iProc );
-#endif /*IO_CHOLLA_H*/
+
+// From io/viz.cu
+void OutputViz3D(Grid3D &G, struct parameters P, int nfile);
diff --git a/src/io/viz.cu b/src/io/viz.cu
new file mode 100644
index 000000000..bfbf549e2
--- /dev/null
+++ b/src/io/viz.cu
@@ -0,0 +1,124 @@
+// Require HDF5
+#ifdef HDF5
+
+#include <hdf5.h>
+
+#include "../grid/grid3D.h"
+
+#include "../io/io.h" // To provide io.h with OutputViz3D
+
+
+void CopyReal3D_CPU(Real* source, Real* destination, Header H)
+{
+  int i,j,k,id,buf_id;
+  
+  for (k=0; k<H.nz_real; k++) {
+    for (j=0; j<H.ny_real; j++) {
+      for (i=0; i<H.nx_real; i++) {
+	id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
+	buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
+	destination[buf_id] = source[id];
+      }
+    }
+  }
+}
+
+void WriteVizField(Header H, hid_t file_id, hid_t dataspace_id, Real* buffer, Real* source, const char* name)
+{
+  hid_t dataset_id;
+  herr_t status;
+  // Copy non-ghost parts of source to buffer
+  CopyReal3D_CPU(source, buffer, H);
+
+  // Create a dataset id 
+  dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  // Write the buffer to file
+  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer);
+  // Free the dataset id
+  status = H5Dclose(dataset_id); 
+  
+}
+
+void WriteViz3D(Grid3D &G, struct parameters P, hid_t file_id)
+{
+  Header H = G.H;
+  int       nx_dset = H.nx_real;
+  int       ny_dset = H.ny_real;
+  int       nz_dset = H.nz_real;
+  hsize_t   dims[3];
+  // Create the data space for the datasets
+  dims[0] = nx_dset;
+  dims[1] = ny_dset;
+  dims[2] = nz_dset;
+  hid_t dataspace_id = H5Screate_simple(3, dims, NULL);  
+
+  
+  Real* dataset_buffer = (Real *) malloc(H.nx_real*H.ny_real*H.nz_real*sizeof(Real));
+
+
+  if (P.outviz_density > 0) {
+    WriteVizField(H, file_id, dataspace_id, dataset_buffer, G.C.density, "/density");
+  }
+  /* 
+  // Just an example of extending this function to include other fields. 
+  // Not implemented yet
+  if (P.outviz_energy > 0) {
+    WriteVizField(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/energy");
+  } 
+  */ 
+  
+  
+  free(dataset_buffer);
+  herr_t status = H5Sclose(dataspace_id);  
+}
+
+
+
+void OutputViz3D(Grid3D &G, struct parameters P, int nfile)
+{
+  Header H = G.H;  
+  // Do nothing in 1-D and 2-D case
+  if (H.ny == 1) {
+    return;
+  }
+  if (H.nz == 1) {
+    return;
+  }
+  // Do nothing if nfile is not multiple of n_outviz
+  if (nfile % P.n_outviz != 0) {
+    return;
+  }
+  
+  char filename[MAXLEN];
+  char timestep[20];
+
+  // create the filename
+  sprintf(timestep, "%d", nfile);  
+  strcpy(filename, P.outdir);
+  strcat(filename, timestep);
+  strcat(filename, ".viz3d.h5");
+  #ifdef MPI_CHOLLA
+  sprintf(filename,"%s.%d",filename,procID);
+  #endif
+
+  // create hdf5 file
+  hid_t   file_id; /* file identifier */
+  herr_t  status;
+
+  // Create a new file using default properties.
+  file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+
+  // Write the header (file attributes)
+  G.Write_Header_HDF5(file_id);
+
+  // write the conserved variables to the output file
+  WriteViz3D(G, P, file_id);
+
+  // close the file
+  status = H5Fclose(file_id);
+
+  if (status < 0) {printf("File write failed.\n"); exit(-1); }
+
+}
+
+#endif

From c4be450bf440fb1866b1ec95c852e51d3822a13e Mon Sep 17 00:00:00 2001
From: Alwin <sam505@login0b.htc.sam.pitt.edu>
Date: Thu, 6 Oct 2022 03:41:31 -0400
Subject: [PATCH 5/8] hdf5 output refactor passes regression tests

---
 src/io/io.cpp | 404 ++++++++++----------------------------------------
 src/io/io.h   |   5 +
 src/io/viz.cu | 133 ++++++++++++-----
 3 files changed, 182 insertions(+), 360 deletions(-)

diff --git a/src/io/io.cpp b/src/io/io.cpp
index 7d51c030c..d9ea04548 100644
--- a/src/io/io.cpp
+++ b/src/io/io.cpp
@@ -113,7 +113,7 @@ void WriteData(Grid3D &G, struct parameters P, int nfile)
   #ifdef HDF5
   if (P.n_outviz && nfile % P.n_outviz == 0) OutputViz3D(G,P,nfile);
   #endif
-  
+
   #ifdef PROJECTION
   if (nfile % P.n_projection == 0) OutputProjectedData(G,P,nfile);
   #endif /*PROJECTION*/
@@ -1098,6 +1098,42 @@ void Grid3D::Write_Grid_Binary(FILE *fp)
 
 
 #ifdef HDF5
+
+void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, Real* dataset_buffer, Real* source, const char* name)
+{
+  int id = H.n_ghost;
+  memcpy(&dataset_buffer[0], &(source[id]), H.nx_real*sizeof(Real));
+
+  // Create a dataset id for density
+  hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  // Write the density array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
+  herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
+  // Free the dataset id
+  status = H5Dclose(dataset_id);
+}
+
+void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, Real* dataset_buffer, Real* source, const char* name)
+{
+  int i,j,id,buf_id;
+  // Copy the density array to the memory buffer
+  for (j=0; j<H.ny_real; j++) {
+    for (i=0; i<H.nx_real; i++) {
+      id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx;
+      buf_id = j + i*H.ny_real;
+      dataset_buffer[buf_id] = source[id];
+    }
+  }
+
+  // Create a dataset id for density
+  hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  // Write the density array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
+  herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
+  // Free the dataset id
+  status = H5Dclose(dataset_id);
+}
+
+
+
 /*! \fn void Write_Grid_HDF5(hid_t file_id)
  *  \brief Write the grid to a file, at the current simulation time. */
 void Grid3D::Write_Grid_HDF5(hid_t file_id)
@@ -1160,61 +1196,11 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
     dims[0] = nx_dset;
     dataspace_id = H5Screate_simple(1, dims, NULL);
 
-    // Copy the density array to the memory buffer
-    // (Remapping to play nicely with python)
-    id = H.n_ghost;
-    memcpy(&dataset_buffer[0], &(C.density[id]), H.nx_real*sizeof(Real));
-
-    // Create a dataset id for density
-    dataset_id = H5Dcreate(file_id, "/density", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the density array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Copy the x momentum array to the memory buffer
-    id = H.n_ghost;
-    memcpy(&dataset_buffer[0], &(C.momentum_x[id]), H.nx_real*sizeof(Real));
-
-    // Create a dataset id for x momentum
-    dataset_id = H5Dcreate(file_id, "/momentum_x", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the x momentum array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Copy the y momentum array to the memory buffer
-    id = H.n_ghost;
-    memcpy(&dataset_buffer[0], &(C.momentum_y[id]), H.nx_real*sizeof(Real));
-
-    // Create a dataset id for y momentum
-    dataset_id = H5Dcreate(file_id, "/momentum_y", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the y momentum array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Copy the z momentum array to the memory buffer
-    id = H.n_ghost;
-    memcpy(&dataset_buffer[0], &(C.momentum_z[id]), H.nx_real*sizeof(Real));
-
-    // Create a dataset id for z momentum
-    dataset_id = H5Dcreate(file_id, "/momentum_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the z momentum array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Copy the Energy array to the memory buffer
-    id = H.n_ghost;
-    memcpy(&dataset_buffer[0], &(C.Energy[id]), H.nx_real*sizeof(Real));
-
-    // Create a dataset id for Energy
-    dataset_id = H5Dcreate(file_id, "/Energy", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the Energy array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
+    Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.density, "/density");
+    Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_x, "/momentum_x");
+    Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_y, "/momentum_y");
+    Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_z, "/momentum_z");
+    Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/Energy");
 
     #ifdef SCALAR
     for (int s=0; s<NSCALARS; s++) {
@@ -1225,6 +1211,10 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
       sprintf(number, "%d", s);
       strcat(dataset,number);
       // Copy the scalar array to the memory buffer
+
+      // TODO: If there is a test case for regression testing NSCALARS > 1 this substitution can be attempted. 
+      // Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset);
+      
       id = H.n_ghost;
       memcpy(&dataset_buffer[0], &(C.scalar[id+s*H.n_cells]), H.nx_real*sizeof(Real));
 
@@ -1234,20 +1224,12 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
       status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
       // Free the dataset id
       status = H5Dclose(dataset_id);
+
     }
     #endif  //SCALAR
 
     #ifdef DE
-    // Copy the internal energy array to the memory buffer
-    id = H.n_ghost;
-    memcpy(&dataset_buffer[0], &(C.GasEnergy[id]), H.nx_real*sizeof(Real));
-
-    // Create a dataset id for internal energy
-    dataset_id = H5Dcreate(file_id, "/GasEnergy", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the internal energy array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
+    Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.GasEnergy, "/GasEnergy");
     #endif  //DE
 
     // Free the dataspace id
@@ -1268,85 +1250,11 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
     dims[1] = ny_dset;
     dataspace_id = H5Screate_simple(2, dims, NULL);
 
-    // Copy the density array to the memory buffer
-    for (j=0; j<H.ny_real; j++) {
-      for (i=0; i<H.nx_real; i++) {
-        id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx;
-        buf_id = j + i*H.ny_real;
-        dataset_buffer[buf_id] = C.density[id];
-      }
-    }
-
-    // Create a dataset id for density
-    dataset_id = H5Dcreate(file_id, "/density", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the density array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Copy the x momentum array to the memory buffer
-    for (j=0; j<H.ny_real; j++) {
-      for (i=0; i<H.nx_real; i++) {
-        id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx;
-        buf_id = j + i*H.ny_real;
-        dataset_buffer[buf_id] = C.momentum_x[id];
-      }
-    }
-    // Create a dataset id for x momentum
-    dataset_id = H5Dcreate(file_id, "/momentum_x", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the x momentum array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Copy the y momentum array to the memory buffer
-    for (j=0; j<H.ny_real; j++) {
-      for (i=0; i<H.nx_real; i++) {
-        id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx;
-        buf_id = j + i*H.ny_real;
-        dataset_buffer[buf_id] = C.momentum_y[id];
-      }
-    }
-
-    // Create a dataset id for y momentum
-    dataset_id = H5Dcreate(file_id, "/momentum_y", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the y momentum array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Copy the z momentum array to the memory buffer
-    for (j=0; j<H.ny_real; j++) {
-      for (i=0; i<H.nx_real; i++) {
-        id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx;
-        buf_id = j + i*H.ny_real;
-        dataset_buffer[buf_id] = C.momentum_z[id];
-      }
-    }
-
-    // Create a dataset id for z momentum
-    dataset_id = H5Dcreate(file_id, "/momentum_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the z momentum array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Copy the Energy array to the memory buffer
-    for (j=0; j<H.ny_real; j++) {
-      for (i=0; i<H.nx_real; i++) {
-        id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx;
-        buf_id = j + i*H.ny_real;
-        dataset_buffer[buf_id] = C.Energy[id];
-      }
-    }
-
-    // Create a dataset id for Energy
-    dataset_id = H5Dcreate(file_id, "/Energy", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the Energy array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
+    Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.density, "/density");
+    Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_x, "/momentum_x");
+    Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_y, "/momentum_y");
+    Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_z, "/momentum_z");
+    Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/Energy");
 
     #ifdef SCALAR
     for (int s=0; s<NSCALARS; s++) {
@@ -1356,6 +1264,10 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
       strcpy(dataset, "/scalar");
       sprintf(number, "%d", s);
       strcat(dataset,number);
+
+      // TODO: If there is a test case for regression testing NSCALARS > 1 this substitution can be attempted. 
+      // Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset);
+
       // Copy the scalar array to the memory buffer
       for (j=0; j<H.ny_real; j++) {
         for (i=0; i<H.nx_real; i++) {
@@ -1375,21 +1287,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
 
 
     #ifdef DE
-    // Copy the internal energy array to the memory buffer
-    for (j=0; j<H.ny_real; j++) {
-      for (i=0; i<H.nx_real; i++) {
-        id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx;
-        buf_id = j + i*H.ny_real;
-        dataset_buffer[buf_id] = C.GasEnergy[id];
-      }
-    }
-
-    // Create a dataset id for internal energy
-    dataset_id = H5Dcreate(file_id, "/GasEnergy", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the internal energy array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
+    Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.GasEnergy, "/GasEnergy");
     #endif  //DE
 
     // Free the dataspace id
@@ -1405,6 +1303,9 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
     hsize_t   dims[3];
     hsize_t   dims_full[3];
 
+    double* device_dataset_buffer;
+    CudaSafeCall(cudaMalloc(&device_dataset_buffer,nx_dset*ny_dset*nz_dset*sizeof(double)));
+
     dataset_buffer = (Real *) malloc(H.nx_real*H.ny_real*H.nz_real*sizeof(Real));
 
     // Create the data space for the datasets
@@ -1413,101 +1314,18 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
     dims[2] = nz_dset;
     dataspace_id = H5Screate_simple(3, dims, NULL);
 
-    // Copy the density array to the memory buffer
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-          buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-          dataset_buffer[buf_id] = C.density[id];
-        }
-      }
-    }
-
-    // Create a dataset id for density
-    dataset_id = H5Dcreate(file_id, "/density", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the density array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-
-    // Copy the x momentum array to the memory buffer
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-          buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-          dataset_buffer[buf_id] = C.momentum_x[id];
-        }
-      }
-    }
-    if ( output_momentum || H.Output_Complete_Data ){
-      // Create a dataset id for x momentum
-      dataset_id = H5Dcreate(file_id, "/momentum_x", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-      // Write the x momentum array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-      status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      // Free the dataset id
-      status = H5Dclose(dataset_id);
-    }
-    // Copy the y momentum array to the memory buffer
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-          buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-          dataset_buffer[buf_id] = C.momentum_y[id];
-        }
-      }
-    }
-    if ( output_momentum || H.Output_Complete_Data ){
-      // Create a dataset id for y momentum
-      dataset_id = H5Dcreate(file_id, "/momentum_y", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-      // Write the y momentum array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-      status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      // Free the dataset id
-      status = H5Dclose(dataset_id);
+    WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_density, "/density");
+    if ( output_momentum || H.Output_Complete_Data ) {
+      WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_x, "/momentum_x");
+      WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_y, "/momentum_y");
+      WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_z, "/momentum_z");
     }
 
-    // Copy the z momentum array to the memory buffer
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-          buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-          dataset_buffer[buf_id] = C.momentum_z[id];
-        }
-      }
-    }
-    if ( output_momentum || H.Output_Complete_Data ){
-      // Create a dataset id for z momentum
-      dataset_id = H5Dcreate(file_id, "/momentum_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-      // Write the z momentum array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-      status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      // Free the dataset id
-      status = H5Dclose(dataset_id);
-    }
-
-
-    // Copy the energy array to the memory buffer
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-          buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-          dataset_buffer[buf_id] = C.Energy[id];
-        }
-      }
-    }
     if ( output_energy || H.Output_Complete_Data ){
-      // Create a dataset id for Energy
-      dataset_id = H5Dcreate(file_id, "/Energy", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-      // Write the Energy array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-      status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      // Free the dataset id
-      status = H5Dclose(dataset_id);
+      WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_Energy, "/Energy");
     }
 
+
     #ifdef SCALAR
     #if !defined(COOLING_GRACKLE) && !defined(CHEMISTRY_GPU) // Dont write scalars when using grackle
     for (int s=0; s<NSCALARS; s++) {
@@ -1704,23 +1522,8 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
     #endif //SCALAR
 
     #ifdef DE
-    // Copy the internal energy array to the memory buffer
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-          buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-          dataset_buffer[buf_id] = C.GasEnergy[id];
-        }
-      }
-    }
     if ( output_energy || H.Output_Complete_Data ){
-      // Create a dataset id for internal energy
-      dataset_id = H5Dcreate(file_id, "/GasEnergy", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-      // Write the internal energy array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-      status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      // Free the dataset id
-      status = H5Dclose(dataset_id);
+    WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_GasEnergy, "/GasEnergy");
     }
     #endif  //DE
 
@@ -1751,75 +1554,22 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
       free(dataset_buffer);
       dataset_buffer = (Real *) malloc((H.nx_real+1)*(H.ny_real+1)*(H.nz_real+1)*sizeof(Real));
 
-      // Create the data space for the datasets
-      dims[0]++;
-      dims[1]++;
-      dims[2]++;
-      dataspace_id = H5Screate_simple(3, dims, NULL);
-
-      // Copy the x magnetic field array to the memory buffer
-      for (k=0; k<H.nz_real+1; k++) {
-        for (j=0; j<H.ny_real+1; j++) {
-          for (i=0; i<H.nx_real+1; i++) {
-            id = (i+H.n_ghost-1) + (j+H.n_ghost-1)*H.nx + (k+H.n_ghost-1)*H.nx*H.ny;
-            buf_id = k + j*(H.nz_real+1) + i*(H.nz_real+1)*(H.ny_real+1);
-            dataset_buffer[buf_id] = C.magnetic_x[id];
-          }
-        }
-      }
-      if ( H.Output_Complete_Data ){
-        // Create a dataset id for x magnetic field
-        dataset_id = H5Dcreate(file_id, "/magnetic_x", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-        // Write the x magnetic field array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-        status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-        // Free the dataset id
-        status = H5Dclose(dataset_id);
-      }
-
-      // Copy the y magnetic field array to the memory buffer
-      for (k=0; k<H.nz_real+1; k++) {
-        for (j=0; j<H.ny_real+1; j++) {
-          for (i=0; i<H.nx_real+1; i++) {
-            id = (i+H.n_ghost-1) + (j+H.n_ghost-1)*H.nx + (k+H.n_ghost-1)*H.nx*H.ny;
-            buf_id = k + j*(H.nz_real+1) + i*(H.nz_real+1)*(H.ny_real+1);
-            dataset_buffer[buf_id] = C.magnetic_y[id];
-          }
-        }
-      }
-      if ( H.Output_Complete_Data ){
-        // Create a dataset id for y magnetic field
-        dataset_id = H5Dcreate(file_id, "/magnetic_y", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-        // Write the y magnetic field array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-        status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-        // Free the dataset id
-        status = H5Dclose(dataset_id);
-      }
-
-      // Copy the z magnetic field array to the memory buffer
-      for (k=0; k<H.nz_real+1; k++) {
-        for (j=0; j<H.ny_real+1; j++) {
-          for (i=0; i<H.nx_real+1; i++) {
-            id = (i+H.n_ghost-1) + (j+H.n_ghost-1)*H.nx + (k+H.n_ghost-1)*H.nx*H.ny;
-            buf_id = k + j*(H.nz_real+1) + i*(H.nz_real+1)*(H.ny_real+1);
-            dataset_buffer[buf_id] = C.magnetic_z[id];
-          }
-        }
-      }
-      if ( H.Output_Complete_Data ){
-        // Create a dataset id for z magnetic field
-        dataset_id = H5Dcreate(file_id, "/magnetic_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-        // Write the z magnetic field array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-        status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-        // Free the dataset id
-        status = H5Dclose(dataset_id);
+      if (H.Output_Complete_Data) {
+	// Note: for this function, use the left side n_ghost
+	WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, C.d_magnetic_x, "/magnetic_x");
+	WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, C.d_magnetic_y, "/magnetic_y");
+	WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, C.d_magnetic_z, "/magnetic_z");
       }
     #endif  //MHD
 
     // Free the dataspace id
     status = H5Sclose(dataspace_id);
+    CudaSafeCall(cudaFree(device_dataset_buffer));
   }
   free(dataset_buffer);
 
+
+
 }
 #endif //HDF5
 
diff --git a/src/io/io.h b/src/io/io.h
index 2e07bf083..ff1f4c06c 100644
--- a/src/io/io.h
+++ b/src/io/io.h
@@ -30,4 +30,9 @@ void Write_Message_To_Log_File( const char* message );
 void write_debug ( Real *Value, const char *fname, int nValues, int iProc );
 
 // From io/viz.cu
+
 void OutputViz3D(Grid3D &G, struct parameters P, int nfile);
+
+// Use GPU to pack source -> device_buffer, then copy device_buffer -> buffer, then write HDF5 field 
+void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* source, const char* name);
+void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* source, const char* name);  
diff --git a/src/io/viz.cu b/src/io/viz.cu
index bfbf549e2..a8cbaf0a3 100644
--- a/src/io/viz.cu
+++ b/src/io/viz.cu
@@ -7,69 +7,136 @@
 
 #include "../io/io.h" // To provide io.h with OutputViz3D
 
+// Copy Real (non-ghost) cells from source to a double destination (for writing HDF5 in double precision)
+__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, double* destination, Real* source)
+{
+
+  int dest_id,source_id,id,i,j,k;
+  id = threadIdx.x + blockIdx.x * blockDim.x;
+  
+  k = id/(nx_real*ny_real);
+  j = (id - k*nx_real*ny_real)/nx_real;
+  i = id - j*nx_real - k*nx_real*ny_real;
+
+  if (k >= nz_real) {
+    return;
+  }
+
+  // This converts into HDF5 indexing that plays well with Python
+  dest_id = k + j*nz_real + i*ny_real*nz_real;
+  source_id = (i+n_ghost) + (j+n_ghost)*nx + (k+n_ghost)*nx*ny;
+
+  destination[dest_id] = (double) source[source_id];
+}
 
-void CopyReal3D_CPU(Real* source, Real* destination, Header H)
+// Copy Real (non-ghost) cells from source to a float destination (for writing HDF5 in float precision)
+__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, float* destination, Real* source)
 {
-  int i,j,k,id,buf_id;
+
+  int dest_id,source_id,id,i,j,k;
+  id = threadIdx.x + blockIdx.x * blockDim.x;
   
-  for (k=0; k<H.nz_real; k++) {
-    for (j=0; j<H.ny_real; j++) {
-      for (i=0; i<H.nx_real; i++) {
-	id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-	buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-	destination[buf_id] = source[id];
-      }
-    }
+  k = id/(nx_real*ny_real);
+  j = (id - k*nx_real*ny_real)/nx_real;
+  i = id - j*nx_real - k*nx_real*ny_real;
+
+  if (k >= nz_real) {
+    return;
   }
+
+  // This converts into HDF5 indexing that plays well with Python
+  dest_id = k + j*nz_real + i*ny_real*nz_real;
+  source_id = (i+n_ghost) + (j+n_ghost)*nx + (k+n_ghost)*nx*ny;
+
+  destination[dest_id] = (float) source[source_id];
 }
 
-void WriteVizField(Header H, hid_t file_id, hid_t dataspace_id, Real* buffer, Real* source, const char* name)
+// When buffer is double, automatically use the double version of everything using function overloading
+void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* device_source, const char* name)
 {
   hid_t dataset_id;
   herr_t status;
-  // Copy non-ghost parts of source to buffer
-  CopyReal3D_CPU(source, buffer, H);
+  hsize_t dims[3];
+  dims[0] = nx_real;
+  dims[1] = ny_real;
+  dims[2] = nz_real;
+  hid_t dataspace_id = H5Screate_simple(3, dims, NULL);
+  
+  //Copy non-ghost parts of source to buffer
+  dim3 dim1dGrid((nx_real*ny_real*nz_real+TPB-1)/TPB, 1, 1);
+  dim3 dim1dBlock(TPB, 1, 1); 
+  hipLaunchKernelGGL(CopyReal3D_GPU_Kernel,dim1dGrid,dim1dBlock,0,0,nx,ny,nx_real,ny_real,nz_real,n_ghost,device_buffer,device_source);
+  CudaSafeCall(cudaMemcpy( buffer, device_buffer, nx_real*ny_real*nz_real*sizeof(double), cudaMemcpyDeviceToHost));
 
   // Create a dataset id 
   dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
   // Write the buffer to file
   status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer);
-  // Free the dataset id
-  status = H5Dclose(dataset_id); 
+  // Free the dataset id and dataspace id
+  status = H5Dclose(dataset_id);
+  status = H5Sclose(dataspace_id);                                                                                                                                                                           
+}
+
+
+// When buffer is float, automatically use the float version of everything using function overloading
+void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* device_source, const char* name)
+{
+
+  hid_t dataset_id;
+  herr_t status;
+  hsize_t dims[3];
+  dims[0] = nx_real;
+  dims[1] = ny_real;
+  dims[2] = nz_real;
+  hid_t dataspace_id = H5Screate_simple(3, dims, NULL);
   
+  //Copy non-ghost parts of source to buffer
+  dim3 dim1dGrid((nx_real*ny_real*nz_real+TPB-1)/TPB, 1, 1);
+  dim3 dim1dBlock(TPB, 1, 1); 
+  hipLaunchKernelGGL(CopyReal3D_GPU_Kernel,dim1dGrid,dim1dBlock,0,0,nx,ny,nx_real,ny_real,nz_real,n_ghost,device_buffer,device_source);
+  CudaSafeCall(cudaMemcpy( buffer, device_buffer, nx_real*ny_real*nz_real*sizeof(float), cudaMemcpyDeviceToHost));
+
+  // Create a dataset id 
+  dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  // Write the buffer to file
+  status = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer);
+  // Free the dataset id and dataspace id
+  status = H5Dclose(dataset_id);
+  status = H5Sclose(dataspace_id);                                                                                                                                                                           
 }
 
+
 void WriteViz3D(Grid3D &G, struct parameters P, hid_t file_id)
 {
   Header H = G.H;
-  int       nx_dset = H.nx_real;
-  int       ny_dset = H.ny_real;
-  int       nz_dset = H.nz_real;
-  hsize_t   dims[3];
-  // Create the data space for the datasets
-  dims[0] = nx_dset;
-  dims[1] = ny_dset;
-  dims[2] = nz_dset;
-  hid_t dataspace_id = H5Screate_simple(3, dims, NULL);  
-
-  
-  Real* dataset_buffer = (Real *) malloc(H.nx_real*H.ny_real*H.nz_real*sizeof(Real));
+  int nx_real = H.nx_real;
+  int ny_real = H.ny_real;
+  int nz_real = H.nz_real;
+  int n_ghost = H.n_ghost;
+  int nx = H.nx;
+  int ny = H.ny;
 
+  float* dataset_buffer = (float *) malloc(H.nx_real*H.ny_real*H.nz_real*sizeof(Real));
+  float* device_buffer;
+  CudaSafeCall(cudaMalloc(&device_buffer,  nx_real*ny_real*nz_real*sizeof(float)));
 
   if (P.outviz_density > 0) {
-    WriteVizField(H, file_id, dataspace_id, dataset_buffer, G.C.density, "/density");
+    WriteHDF5Field3D(nx, ny, nx_real, ny_real, nz_real, n_ghost, file_id, dataset_buffer, device_buffer, G.C.d_density, "/density");
   }
+
+
+  
   /* 
   // Just an example of extending this function to include other fields. 
   // Not implemented yet
   if (P.outviz_energy > 0) {
-    WriteVizField(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/energy");
+    WriteHDF5Field(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/energy");
   } 
   */ 
-  
+  CudaSafeCall(cudaFree(device_buffer));  
   
   free(dataset_buffer);
-  herr_t status = H5Sclose(dataspace_id);  
+
 }
 
 
@@ -78,10 +145,10 @@ void OutputViz3D(Grid3D &G, struct parameters P, int nfile)
 {
   Header H = G.H;  
   // Do nothing in 1-D and 2-D case
-  if (H.ny == 1) {
+  if (H.ny_real == 1) {
     return;
   }
-  if (H.nz == 1) {
+  if (H.nz_real == 1) {
     return;
   }
   // Do nothing if nfile is not multiple of n_outviz

From f68e794620ec1410b3d2f35c2b7e709a5fc00306 Mon Sep 17 00:00:00 2001
From: Alwin <sam505@login0b.htc.sam.pitt.edu>
Date: Tue, 11 Oct 2022 12:18:16 -0400
Subject: [PATCH 6/8] hdf5 refactor part 2 (this part also passes tests)

---
 examples/3D/float32_sound_wave.txt |  59 ++++
 src/global/global.cpp              |  28 +-
 src/global/global.h                |  16 +-
 src/io/io.cpp                      | 437 ++++++++++++++---------------
 src/io/io.h                        |  12 +-
 src/io/{viz.cu => io_gpu.cu}       | 125 ++-------
 6 files changed, 346 insertions(+), 331 deletions(-)
 create mode 100644 examples/3D/float32_sound_wave.txt
 rename src/io/{viz.cu => io_gpu.cu} (50%)

diff --git a/examples/3D/float32_sound_wave.txt b/examples/3D/float32_sound_wave.txt
new file mode 100644
index 000000000..68b3d4952
--- /dev/null
+++ b/examples/3D/float32_sound_wave.txt
@@ -0,0 +1,59 @@
+#
+# Parameter File for sound wave test with float32 output
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=256
+# number of grid cells in the y dimension
+ny=256
+# number of grid cells in the z dimension
+nz=256
+# final output time
+tout=0.05
+# time interval for output
+outstep=0.05
+# name of initial conditions
+init=Sound_Wave
+# domain properties
+xmin=0.0
+ymin=0.0
+zmin=0.0
+xlen=4.0
+ylen=4.0
+zlen=4.0
+# type of boundary conditions
+xl_bcnd=1
+xu_bcnd=1
+yl_bcnd=1
+yu_bcnd=1
+zl_bcnd=1
+zu_bcnd=1
+# path to output directory
+outdir=./
+
+# Enable float32 output
+# Enable float32 density field
+n_out_float32=1
+out_float32_density=1
+
+# Uncomment this to enable momentum_x
+# out_float32_momentum_x=1
+
+#################################################
+# Parameters for linear wave problems
+# initial density 
+rho=1.0
+# velocity in the x direction 
+vx=0
+# velocity in the y direction
+vy=0
+# velocity in the z direction
+vz=0
+# initial pressure 
+P=0.6
+# amplitude of perturbing oscillations
+A=1e-4
+# value of gamma
+gamma=1.666666666666667
+
diff --git a/src/global/global.cpp b/src/global/global.cpp
index d84a04a68..1f6a5cbfa 100644
--- a/src/global/global.cpp
+++ b/src/global/global.cpp
@@ -223,10 +223,30 @@ void parse_param(char *name,char *value, struct parameters *parms){
     parms->n_rotated_projection = atoi(value);
   else if (strcmp(name, "n_slice")==0)
     parms->n_slice = atoi(value);
-  else if (strcmp(name, "n_outviz")==0)
-    parms->n_outviz = atoi(value);
-  else if (strcmp(name, "outviz_density")==0)
-    parms->outviz_density = atoi(value);
+  else if (strcmp(name, "n_out_float32")==0)
+    parms->n_out_float32 = atoi(value);
+  else if (strcmp(name, "out_float32_density")==0)
+    parms->out_float32_density = atoi(value);
+  else if (strcmp(name, "out_float32_momentum_x")==0)
+    parms->out_float32_momentum_x = atoi(value);
+  else if (strcmp(name, "out_float32_momentum_y")==0)
+    parms->out_float32_momentum_y = atoi(value);
+  else if (strcmp(name, "out_float32_momentum_z")==0)
+    parms->out_float32_momentum_z = atoi(value);
+  else if (strcmp(name, "out_float32_Energy")==0)
+    parms->out_float32_Energy = atoi(value);
+#ifdef DE
+  else if (strcmp(name, "out_float32_GasEnergy")==0)
+    parms->out_float32_GasEnergy = atoi(value);
+#endif // DE
+#ifdef MHD
+  else if (strcmp(name, "out_float32_magnetic_x")==0)
+    parms->out_float32_magnetic_x = atoi(value);
+  else if (strcmp(name, "out_float32_magnetic_y")==0)
+    parms->out_float32_magnetic_y = atoi(value);
+  else if (strcmp(name, "out_float32_magnetic_z")==0)
+    parms->out_float32_magnetic_z = atoi(value);
+#endif // MHD
   else if (strcmp(name, "xmin")==0)
     parms->xmin = atof(value);
   else if (strcmp(name, "ymin")==0)
diff --git a/src/global/global.h b/src/global/global.h
index 0fc1ff47b..4e6d8eeb9 100644
--- a/src/global/global.h
+++ b/src/global/global.h
@@ -204,8 +204,20 @@ struct parameters
   int n_projection;
   int n_rotated_projection;
   int n_slice;
-  int n_outviz=0;
-  int outviz_density=0;
+  int n_out_float32=0;
+  int out_float32_density=0;
+  int out_float32_momentum_x=0;
+  int out_float32_momentum_y=0;
+  int out_float32_momentum_z=0;
+  int out_float32_Energy=0;
+#ifdef DE
+  int out_float32_GasEnergy=0;
+#endif
+#ifdef MHD
+  int out_float32_magnetic_x=0;
+  int out_float32_magnetic_y=0;
+  int out_float32_magnetic_z=0;
+#endif
   Real xmin;
   Real ymin;
   Real zmin;
diff --git a/src/io/io.cpp b/src/io/io.cpp
index d9ea04548..fc6c52eb7 100644
--- a/src/io/io.cpp
+++ b/src/io/io.cpp
@@ -16,6 +16,7 @@
 #include "../mpi/mpi_routines.h"
 #endif  //MPI_CHOLLA
 #include "../utils/error_handling.h"
+#include "../utils/DeviceVector.h"
 
 #ifdef COSMOLOGY
 #include "../cosmology/cosmology.h"
@@ -111,7 +112,7 @@ void WriteData(Grid3D &G, struct parameters P, int nfile)
 
   // This function does other checks to make sure it is valid (3D only)
   #ifdef HDF5
-  if (P.n_outviz && nfile % P.n_outviz == 0) OutputViz3D(G,P,nfile);
+  if (P.n_out_float32 && nfile % P.n_out_float32 == 0) OutputFloat32(G,P,nfile);
   #endif
 
   #ifdef PROJECTION
@@ -232,6 +233,92 @@ void OutputData(Grid3D &G, struct parameters P, int nfile)
   #endif
 }
 
+void OutputFloat32(Grid3D &G, struct parameters P, int nfile)
+{
+  
+  Header H = G.H;
+  // Do nothing in 1-D and 2-D case
+  if (H.ny_real == 1) {
+    return;
+  }
+  if (H.nz_real == 1) {
+    return;
+  }
+  // Do nothing if nfile is not multiple of n_out_float32
+  if (nfile % P.n_out_float32 != 0) {
+    return;
+  }
+
+  char filename[MAXLEN];
+  char timestep[20];
+
+  // create the filename
+  sprintf(timestep, "%d", nfile);
+  strcpy(filename, P.outdir);
+  strcat(filename, timestep);
+  strcat(filename, ".float32.h5");
+  #ifdef MPI_CHOLLA
+  sprintf(filename,"%s.%d",filename,procID);
+  #endif
+
+  // create hdf5 file
+  hid_t   file_id; /* file identifier */
+  herr_t  status;
+
+  // Create a new file using default properties.
+  file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+
+  // Write the header (file attributes)
+  G.Write_Header_HDF5(file_id);
+
+  // write the conserved variables to the output file
+
+  // 3-D Case
+  if (H.nx>1 && H.ny>1 && H.nz>1) {
+    int nx_dset = H.nx_real;
+    int ny_dset = H.ny_real;
+    int nz_dset = H.nz_real;
+    size_t buffer_size;
+    // Need a larger device buffer for MHD. In the future, if other fields need a larger device buffer, choose the maximum of the sizes.
+    // If the buffer is too large, it does not cause bugs (Oct 6 2022)
+#ifdef MHD
+    buffer_size = (nx_dset+1)*(ny_dset+1)*(nz_dset+1);
+#else
+    buffer_size = nx_dset*ny_dset*nz_dset;
+#endif
+
+    // Using static DeviceVector here automatically allocates the buffer the first time it is needed
+    // It persists until program exit, and then calls Free upon destruction
+    cuda_utilities::DeviceVector<float> static device_dataset_vector{buffer_size};
+    float* device_dataset_buffer = device_dataset_vector.data();
+    float* dataset_buffer = (float *) malloc(buffer_size*sizeof(float));
+
+    if (P.out_float32_density > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_density, "/density");
+    if (P.out_float32_momentum_x > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_x, "/momentum_x");
+    if (P.out_float32_momentum_y > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_y, "/momentum_y");
+    if (P.out_float32_momentum_z > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_z, "/momentum_z");
+    if (P.out_float32_Energy > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_Energy, "/Energy");
+#ifdef DE
+    if (P.out_float32_GasEnergy > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_GasEnergy, "/GasEnergy");
+#endif  //DE
+#ifdef MHD
+    if (P.out_float32_magnetic_x > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_x, "/magnetic_x");
+    if (P.out_float32_magnetic_y > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_y, "/magnetic_y");
+    if (P.out_float32_magnetic_z > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_z, "/magnetic_z");
+#endif
+
+
+    free(dataset_buffer);
+
+    if (status < 0) {printf("File write failed.\n"); exit(-1); }
+  } // 3-D case
+
+    // close the file
+  status = H5Fclose(file_id);
+
+
+}
+
 
 /* Output a projection of the grid data to file. */
 void OutputProjectedData(Grid3D &G, struct parameters P, int nfile)
@@ -1099,23 +1186,54 @@ void Grid3D::Write_Grid_Binary(FILE *fp)
 
 #ifdef HDF5
 
-void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, Real* dataset_buffer, Real* source, const char* name)
+// Helper function which uses the correct HDF5 arguments based on the type of dataset_buffer to avoid writing garbage
+herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double* dataset_buffer, const char* name)
 {
-  int id = H.n_ghost;
-  memcpy(&dataset_buffer[0], &(source[id]), H.nx_real*sizeof(Real));
-
   // Create a dataset id for density
   hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  // Write the density array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
+  // Write the density array to file
   herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
   // Free the dataset id
   status = H5Dclose(dataset_id);
+  return status;
+}
+
+herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float* dataset_buffer, const char* name)
+{
+  // Create a dataset id for density
+  hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  // Write the density array to file
+  herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
+  // Free the dataset id
+  status = H5Dclose(dataset_id);
+  return status;
+}
+
+
+void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, Real* dataset_buffer, Real* source, const char* name)
+{
+  // Copy non-ghost source to Buffer
+  int id = H.n_ghost;
+  memcpy(&dataset_buffer[0], &(source[id]), H.nx_real*sizeof(Real));
+  // Buffer write to HDF5 Dataset
+  herr_t status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name);
+}
+
+void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, float* dataset_buffer, double* source, const char* name)
+{
+  // Copy non-ghost source to Buffer with conversion from double to float
+  int i;
+  for (i=0; i<H.nx_real; i++) {
+    dataset_buffer[i] = (float) source[i+H.n_ghost];
+  }
+  // Buffer write to HDF5 Dataset
+  herr_t status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name);
 }
 
 void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, Real* dataset_buffer, Real* source, const char* name)
 {
   int i,j,id,buf_id;
-  // Copy the density array to the memory buffer
+  // Copy non-ghost source to Buffer
   for (j=0; j<H.ny_real; j++) {
     for (i=0; i<H.nx_real; i++) {
       id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx;
@@ -1123,16 +1241,25 @@ void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, Real*
       dataset_buffer[buf_id] = source[id];
     }
   }
-
-  // Create a dataset id for density
-  hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  // Write the density array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-  herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-  // Free the dataset id
-  status = H5Dclose(dataset_id);
+  // Buffer write to HDF5 Dataset
+  herr_t status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name);
 }
 
-
+// Convert double to float if necessary
+void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, float* dataset_buffer, double* source, const char* name)
+{
+  int i,j,id,buf_id;
+  // Copy non-ghost source to Buffer with conversion to float
+  for (j=0; j<H.ny_real; j++) {
+    for (i=0; i<H.nx_real; i++) {
+      id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx;
+      buf_id = j + i*H.ny_real;
+      dataset_buffer[buf_id] = (float) source[id];
+    }
+  }
+  // Buffer write to HDF5 Dataset
+  herr_t status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name);
+}
 
 /*! \fn void Write_Grid_HDF5(hid_t file_id)
  *  \brief Write the grid to a file, at the current simulation time. */
@@ -1147,6 +1274,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
   bool output_energy;
   bool output_momentum;
 
+
   #ifdef OUTPUT_ENERGY
   output_energy = true;
   #else  // not OUTPUT_ENERGY
@@ -1212,20 +1340,15 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
       strcat(dataset,number);
       // Copy the scalar array to the memory buffer
 
-      // TODO: If there is a test case for regression testing NSCALARS > 1 this substitution can be attempted. 
+      // TODO: If there is a test case for regression testing NSCALARS > 1 this substitution can be attempted.
       // Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset);
-      
+
       id = H.n_ghost;
       memcpy(&dataset_buffer[0], &(C.scalar[id+s*H.n_cells]), H.nx_real*sizeof(Real));
-
-      // Create a dataset id for the scalar
-      dataset_id = H5Dcreate(file_id, dataset, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-      // Write the scalar array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-      status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      // Free the dataset id
-      status = H5Dclose(dataset_id);
-
+      // dataset here is just a name
+      status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset);
     }
+
     #endif  //SCALAR
 
     #ifdef DE
@@ -1265,7 +1388,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
       sprintf(number, "%d", s);
       strcat(dataset,number);
 
-      // TODO: If there is a test case for regression testing NSCALARS > 1 this substitution can be attempted. 
+      // TODO: If there is a test case for regression testing NSCALARS > 1 this substitution can be attempted.
       // Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset);
 
       // Copy the scalar array to the memory buffer
@@ -1276,12 +1399,8 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
           dataset_buffer[buf_id] = C.scalar[id+s*H.n_cells];
         }
       }
-      // Create a dataset id for the scalar
-      dataset_id = H5Dcreate(file_id, dataset, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-      // Write the scalar array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-      status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      // Free the dataset id
-      status = H5Dclose(dataset_id);
+
+      status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset);
     }
     #endif  //SCALAR
 
@@ -1303,17 +1422,27 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
     hsize_t   dims[3];
     hsize_t   dims_full[3];
 
-    double* device_dataset_buffer;
-    CudaSafeCall(cudaMalloc(&device_dataset_buffer,nx_dset*ny_dset*nz_dset*sizeof(double)));
+    size_t buffer_size;
+    // Need a larger device buffer for MHD. In the future, if other fields need a larger device buffer, choose the maximum of the sizes.
+    // If the buffer is too large, it does not cause bugs (Oct 6 2022)
+    #ifdef MHD
+    buffer_size = (nx_dset+1)*(ny_dset+1)*(nz_dset+1);
+    #else
+    buffer_size = nx_dset*ny_dset*nz_dset;
+    #endif
+    // Using static DeviceVector here automatically allocates the buffer the first time it is needed
+    // It persists until program exit, and then calls Free upon destruction
+    cuda_utilities::DeviceVector<double> static device_dataset_vector{buffer_size};
+    double* device_dataset_buffer = device_dataset_vector.data();
+    dataset_buffer = (Real*) malloc(buffer_size*sizeof(Real));
+    //CudaSafeCall(cudaMalloc(&device_dataset_buffer,nx_dset*ny_dset*nz_dset*sizeof(double)));
 
-    dataset_buffer = (Real *) malloc(H.nx_real*H.ny_real*H.nz_real*sizeof(Real));
 
-    // Create the data space for the datasets
+    // Create the data space for the datasets (note: WriteHDF5Field3D creates its own dataspace, does not use the shared one)
     dims[0] = nx_dset;
     dims[1] = ny_dset;
     dims[2] = nz_dset;
     dataspace_id = H5Screate_simple(3, dims, NULL);
-
     WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_density, "/density");
     if ( output_momentum || H.Output_Complete_Data ) {
       WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_x, "/momentum_x");
@@ -1325,7 +1454,6 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
       WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_Energy, "/Energy");
     }
 
-
     #ifdef SCALAR
     #if !defined(COOLING_GRACKLE) && !defined(CHEMISTRY_GPU) // Dont write scalars when using grackle
     for (int s=0; s<NSCALARS; s++) {
@@ -1345,12 +1473,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
           }
         }
       }
-      // Create a dataset id for the scalar
-      dataset_id = H5Dcreate(file_id, dataset, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-      // Write the scalar array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-      status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      // Free the dataset id
-      status = H5Dclose(dataset_id);
+      status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset);
     }
     #else // COOLING_GRACKLE or CHEMISTRY_GPU. Write Chemistry when using GRACKLE
     #ifdef OUTPUT_CHEMISTRY
@@ -1368,9 +1491,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
         }
       }
     }
-    dataset_id = H5Dcreate(file_id, "/HI_density", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    status = H5Dclose(dataset_id);
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HI_density");
 
     for (k=0; k<H.nz_real; k++) {
       for (j=0; j<H.ny_real; j++) {
@@ -1388,9 +1509,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
       }
     }
     if ( output_full_ionization || H.Output_Complete_Data ){
-      dataset_id = H5Dcreate(file_id, "/HII_density", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-      status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      status = H5Dclose(dataset_id);
+      status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HII_density");
     }
 
     for (k=0; k<H.nz_real; k++) {
@@ -1408,9 +1527,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
       }
     }
     if ( output_full_ionization || H.Output_Complete_Data ){
-      dataset_id = H5Dcreate(file_id, "/HeI_density", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-      status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      status = H5Dclose(dataset_id);
+      status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HeI_density");
     }
     for (k=0; k<H.nz_real; k++) {
       for (j=0; j<H.ny_real; j++) {
@@ -1426,9 +1543,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
         }
       }
     }
-    dataset_id = H5Dcreate(file_id, "/HeII_density", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    status = H5Dclose(dataset_id);
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HeII_density");
 
     for (k=0; k<H.nz_real; k++) {
       for (j=0; j<H.ny_real; j++) {
@@ -1444,9 +1559,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
         }
       }
     }
-    dataset_id = H5Dcreate(file_id, "/HeIII_density", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    status = H5Dclose(dataset_id);
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HeIII_density");
 
     for (k=0; k<H.nz_real; k++) {
       for (j=0; j<H.ny_real; j++) {
@@ -1463,9 +1576,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
       }
     }
     if ( output_electrons || H.Output_Complete_Data ){
-      dataset_id = H5Dcreate(file_id, "/e_density", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-      status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      status = H5Dclose(dataset_id);
+      status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/e_density");
     }
 
 
@@ -1480,9 +1591,7 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
       }
     }
     if ( output_metals || H.Output_Complete_Data ){
-      dataset_id = H5Dcreate(file_id, "/metal_density", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-      status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      status = H5Dclose(dataset_id);
+      status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/metal_density");
     }
     #endif //GRACKLE_METALS
 
@@ -1509,12 +1618,8 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
         }
       }
     }
-    // Create a dataset id for density
-    dataset_id = H5Dcreate(file_id, "/temperature", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the density array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
+
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/temperature");
 
     #endif //OUTPUT_TEMPERATURE
 
@@ -1540,36 +1645,23 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id)
         }
       }
     }
-    // Create a dataset id for density
-    dataset_id = H5Dcreate(file_id, "/grav_potential", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the density array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/grav_potential");
     #endif//GRAVITY and OUTPUT_POTENTIAL
 
     #ifdef  MHD
-      // Start by creating a dataspace and buffer that is large enough for the
-      // magnetic field since it's one larger than the rest
-      free(dataset_buffer);
-      dataset_buffer = (Real *) malloc((H.nx_real+1)*(H.ny_real+1)*(H.nz_real+1)*sizeof(Real));
-
-      if (H.Output_Complete_Data) {
-	// Note: for this function, use the left side n_ghost
-	WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, C.d_magnetic_x, "/magnetic_x");
-	WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, C.d_magnetic_y, "/magnetic_y");
-	WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, C.d_magnetic_z, "/magnetic_z");
-      }
+    if (H.Output_Complete_Data) {
+      // Note: for WriteHDF5Field3D, use the left side n_ghost
+      WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, C.d_magnetic_x, "/magnetic_x");
+      WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, C.d_magnetic_y, "/magnetic_y");
+      WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, C.d_magnetic_z, "/magnetic_z");
+    }
     #endif  //MHD
 
     // Free the dataspace id
     status = H5Sclose(dataspace_id);
-    CudaSafeCall(cudaFree(device_dataset_buffer));
+    //CudaSafeCall(cudaFree(device_dataset_buffer));// No longer needed because devicevector frees when it should
   }
   free(dataset_buffer);
-
-
-
 }
 #endif //HDF5
 
@@ -1671,34 +1763,11 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id)
       }
     }
 
-
-    // Create a dataset id for projected xy density
-    dataset_id = H5Dcreate(file_id, "/d_xy", H5T_IEEE_F64BE, dataspace_xy_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the projected density array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_dxy);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Create a dataset id for projected xz density
-    dataset_id = H5Dcreate(file_id, "/d_xz", H5T_IEEE_F64BE, dataspace_xz_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the projected density array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_dxz);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Create a dataset id for projected xy temperature
-    dataset_id = H5Dcreate(file_id, "/T_xy", H5T_IEEE_F64BE, dataspace_xy_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the projected temperature array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_Txy);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Create a dataset id for projected xz density
-    dataset_id = H5Dcreate(file_id, "/T_xz", H5T_IEEE_F64BE, dataspace_xz_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the projected temperature array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_Txz);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
+    // Write the projected density and temperature arrays to file
+    status = HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_dxy, "/d_xy");
+    status = HDF5_Dataset(file_id, dataspace_xz_id, dataset_buffer_dxz, "/d_xz");
+    status = HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_Txy, "/T_xy");
+    status = HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_Txz, "/T_xz");
 
     // Free the dataspace ids
     status = H5Sclose(dataspace_xz_id);
@@ -1841,41 +1910,12 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id)
       }
     }
 
-
-    // Create a dataset id for projected xy density
-    dataset_id = H5Dcreate(file_id, "/d_xzr", H5T_IEEE_F64BE, dataspace_xzr_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the projected density array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_dxzr);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Create a dataset id for projected xz density
-    dataset_id = H5Dcreate(file_id, "/T_xzr", H5T_IEEE_F64BE, dataspace_xzr_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the projected density array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_Txzr);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Create a dataset id for projected xz density
-    dataset_id = H5Dcreate(file_id, "/vx_xzr", H5T_IEEE_F64BE, dataspace_xzr_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the projected density array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_vxxzr);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Create a dataset id for projected xz density
-    dataset_id = H5Dcreate(file_id, "/vy_xzr", H5T_IEEE_F64BE, dataspace_xzr_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the projected density array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_vyxzr);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Create a dataset id for projected xz density
-    dataset_id = H5Dcreate(file_id, "/vz_xzr", H5T_IEEE_F64BE, dataspace_xzr_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    // Write the projected density array to file  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_vzxzr);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
+    // Write projected d,T,vx,vy,vz
+    status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_dxzr, "/d_xzr");
+    status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_Txzr, "/T_xzr");
+    status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vxxzr, "/vx_xzr");
+    status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vyxzr, "/vy_xzr");
+    status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vzxzr, "/vz_xzr");
 
     // Free the dataspace id
     status = H5Sclose(dataspace_xzr_id);
@@ -1998,30 +2038,16 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id)
     }
 
     // Write out the xy datasets for each variable
-    dataset_id = H5Dcreate(file_id, "/d_xy", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_d);
-    status = H5Dclose(dataset_id);
-    dataset_id = H5Dcreate(file_id, "/mx_xy", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_mx);
-    status = H5Dclose(dataset_id);
-    dataset_id = H5Dcreate(file_id, "/my_xy", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_my);
-    status = H5Dclose(dataset_id);
-    dataset_id = H5Dcreate(file_id, "/mz_xy", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_mz);
-    status = H5Dclose(dataset_id);
-    dataset_id = H5Dcreate(file_id, "/E_xy", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_E);
-    status = H5Dclose(dataset_id);
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_xy");
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_xy");
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_xy");
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_xy");
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_xy");
     #ifdef DE
-    dataset_id = H5Dcreate(file_id, "/GE_xy", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_GE);
-    status = H5Dclose(dataset_id);
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_xy");
     #endif
     #ifdef SCALAR
-    dataset_id = H5Dcreate(file_id, "/scalar_xy", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_scalar);
-    status = H5Dclose(dataset_id);
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, "/scalar_xy");
     #endif
     // Free the dataspace id
     status = H5Sclose(dataspace_id);
@@ -2105,31 +2131,18 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id)
     }
 
     // Write out the xz datasets for each variable
-    dataset_id = H5Dcreate(file_id, "/d_xz", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_d);
-    status = H5Dclose(dataset_id);
-    dataset_id = H5Dcreate(file_id, "/mx_xz", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_mx);
-    status = H5Dclose(dataset_id);
-    dataset_id = H5Dcreate(file_id, "/my_xz", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_my);
-    status = H5Dclose(dataset_id);
-    dataset_id = H5Dcreate(file_id, "/mz_xz", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_mz);
-    status = H5Dclose(dataset_id);
-    dataset_id = H5Dcreate(file_id, "/E_xz", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_E);
-    status = H5Dclose(dataset_id);
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_xz");
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_xz");
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_xz");
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_xz");
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_xz");
     #ifdef DE
-    dataset_id = H5Dcreate(file_id, "/GE_xz", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_GE);
-    status = H5Dclose(dataset_id);
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_xz");
     #endif
     #ifdef SCALAR
-    dataset_id = H5Dcreate(file_id, "/scalar_xz", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_scalar);
-    status = H5Dclose(dataset_id);
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, "/scalar_xz");
     #endif
+
     // Free the dataspace id
     status = H5Sclose(dataspace_id);
 
@@ -2211,31 +2224,18 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id)
       }
     }
 
+
     // Write out the yz datasets for each variable
-    dataset_id = H5Dcreate(file_id, "/d_yz", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_d);
-    status = H5Dclose(dataset_id);
-    dataset_id = H5Dcreate(file_id, "/mx_yz", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_mx);
-    status = H5Dclose(dataset_id);
-    dataset_id = H5Dcreate(file_id, "/my_yz", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_my);
-    status = H5Dclose(dataset_id);
-    dataset_id = H5Dcreate(file_id, "/mz_yz", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_mz);
-    status = H5Dclose(dataset_id);
-    dataset_id = H5Dcreate(file_id, "/E_yz", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_E);
-    status = H5Dclose(dataset_id);
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_yz");
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_yz");
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_yz");
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_yz");
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_yz");
     #ifdef DE
-    dataset_id = H5Dcreate(file_id, "/GE_yz", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_GE);
-    status = H5Dclose(dataset_id);
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_yz");
     #endif
     #ifdef SCALAR
-    dataset_id = H5Dcreate(file_id, "/scalar_yz", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_scalar);
-    status = H5Dclose(dataset_id);
+    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, "/scalar_yz");
     #endif
 
     // Free the dataspace id
@@ -2495,7 +2495,6 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P)
     // need a dataset buffer to remap fastest index
     dataset_buffer = (Real *) malloc(H.nx_real*sizeof(Real));
 
-
     // Open the density dataset
     dataset_id = H5Dopen(file_id, "/density", H5P_DEFAULT);
     // Read the density array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!!
diff --git a/src/io/io.h b/src/io/io.h
index ff1f4c06c..f7dfe6eb7 100644
--- a/src/io/io.h
+++ b/src/io/io.h
@@ -11,6 +11,9 @@ void WriteData(Grid3D &G, struct parameters P, int nfile);
 /* Output the grid data to file. */
 void OutputData(Grid3D &G, struct parameters P, int nfile);
 
+/* Output the grid data to file as 32-bit floats. */
+void OutputFloat32(Grid3D &G, struct parameters P, int nfile);
+
 /* Output a projection of the grid data to file. */
 void OutputProjectedData(Grid3D &G, struct parameters P, int nfile);
 
@@ -29,10 +32,13 @@ void Write_Message_To_Log_File( const char* message );
 
 void write_debug ( Real *Value, const char *fname, int nValues, int iProc );
 
-// From io/viz.cu
-
-void OutputViz3D(Grid3D &G, struct parameters P, int nfile);
+#ifdef HDF5
+// From io/io.cpp
+herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double* dataset_buffer, const char* name);
+herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float* dataset_buffer, const char* name);
 
+// From io/io_gpu.cu
 // Use GPU to pack source -> device_buffer, then copy device_buffer -> buffer, then write HDF5 field 
 void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* source, const char* name);
 void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* source, const char* name);  
+#endif
diff --git a/src/io/viz.cu b/src/io/io_gpu.cu
similarity index 50%
rename from src/io/viz.cu
rename to src/io/io_gpu.cu
index a8cbaf0a3..c6cab6e8a 100644
--- a/src/io/viz.cu
+++ b/src/io/io_gpu.cu
@@ -7,13 +7,17 @@
 
 #include "../io/io.h" // To provide io.h with OutputViz3D
 
+// Note that the HDF5 file and buffer will have size nx_real * ny_real * nz_real whereas the conserved variables have size nx,ny,nz
+// Note that magnetic fields add +1 to nx_real ny_real nz_real since an extra face needs to be output, but also has the same size nx ny nz
+// For the magnetic field case, a different nx_real+1 ny_real+1 nz_real+1 n_ghost-1 are provided as inputs.
+
 // Copy Real (non-ghost) cells from source to a double destination (for writing HDF5 in double precision)
 __global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, double* destination, Real* source)
 {
 
   int dest_id,source_id,id,i,j,k;
   id = threadIdx.x + blockIdx.x * blockDim.x;
-  
+
   k = id/(nx_real*ny_real);
   j = (id - k*nx_real*ny_real)/nx_real;
   i = id - j*nx_real - k*nx_real*ny_real;
@@ -35,7 +39,7 @@ __global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real,
 
   int dest_id,source_id,id,i,j,k;
   id = threadIdx.x + blockIdx.x * blockDim.x;
-  
+
   k = id/(nx_real*ny_real);
   j = (id - k*nx_real*ny_real)/nx_real;
   i = id - j*nx_real - k*nx_real*ny_real;
@@ -54,27 +58,26 @@ __global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real,
 // When buffer is double, automatically use the double version of everything using function overloading
 void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* device_source, const char* name)
 {
-  hid_t dataset_id;
   herr_t status;
   hsize_t dims[3];
   dims[0] = nx_real;
   dims[1] = ny_real;
   dims[2] = nz_real;
   hid_t dataspace_id = H5Screate_simple(3, dims, NULL);
-  
+
   //Copy non-ghost parts of source to buffer
   dim3 dim1dGrid((nx_real*ny_real*nz_real+TPB-1)/TPB, 1, 1);
-  dim3 dim1dBlock(TPB, 1, 1); 
+  dim3 dim1dBlock(TPB, 1, 1);
   hipLaunchKernelGGL(CopyReal3D_GPU_Kernel,dim1dGrid,dim1dBlock,0,0,nx,ny,nx_real,ny_real,nz_real,n_ghost,device_buffer,device_source);
   CudaSafeCall(cudaMemcpy( buffer, device_buffer, nx_real*ny_real*nz_real*sizeof(double), cudaMemcpyDeviceToHost));
 
-  // Create a dataset id 
-  dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  // Write the buffer to file
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer);
-  // Free the dataset id and dataspace id
-  status = H5Dclose(dataset_id);
-  status = H5Sclose(dataspace_id);                                                                                                                                                                           
+  // Write Buffer to HDF5
+  status = HDF5_Dataset(file_id, dataspace_id, buffer, name);
+
+  status = H5Sclose(dataspace_id);
+  if (status < 0) {printf("File write failed.\n");}
+
+
 }
 
 
@@ -82,110 +85,26 @@ void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int
 void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* device_source, const char* name)
 {
 
-  hid_t dataset_id;
   herr_t status;
   hsize_t dims[3];
   dims[0] = nx_real;
   dims[1] = ny_real;
   dims[2] = nz_real;
   hid_t dataspace_id = H5Screate_simple(3, dims, NULL);
-  
+
   //Copy non-ghost parts of source to buffer
   dim3 dim1dGrid((nx_real*ny_real*nz_real+TPB-1)/TPB, 1, 1);
-  dim3 dim1dBlock(TPB, 1, 1); 
+  dim3 dim1dBlock(TPB, 1, 1);
   hipLaunchKernelGGL(CopyReal3D_GPU_Kernel,dim1dGrid,dim1dBlock,0,0,nx,ny,nx_real,ny_real,nz_real,n_ghost,device_buffer,device_source);
   CudaSafeCall(cudaMemcpy( buffer, device_buffer, nx_real*ny_real*nz_real*sizeof(float), cudaMemcpyDeviceToHost));
 
-  // Create a dataset id 
-  dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  // Write the buffer to file
-  status = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer);
-  // Free the dataset id and dataspace id
-  status = H5Dclose(dataset_id);
-  status = H5Sclose(dataspace_id);                                                                                                                                                                           
-}
-
-
-void WriteViz3D(Grid3D &G, struct parameters P, hid_t file_id)
-{
-  Header H = G.H;
-  int nx_real = H.nx_real;
-  int ny_real = H.ny_real;
-  int nz_real = H.nz_real;
-  int n_ghost = H.n_ghost;
-  int nx = H.nx;
-  int ny = H.ny;
-
-  float* dataset_buffer = (float *) malloc(H.nx_real*H.ny_real*H.nz_real*sizeof(Real));
-  float* device_buffer;
-  CudaSafeCall(cudaMalloc(&device_buffer,  nx_real*ny_real*nz_real*sizeof(float)));
-
-  if (P.outviz_density > 0) {
-    WriteHDF5Field3D(nx, ny, nx_real, ny_real, nz_real, n_ghost, file_id, dataset_buffer, device_buffer, G.C.d_density, "/density");
-  }
-
+  // Write Buffer to HDF5
+  status = HDF5_Dataset(file_id, dataspace_id, buffer, name);
 
-  
-  /* 
-  // Just an example of extending this function to include other fields. 
-  // Not implemented yet
-  if (P.outviz_energy > 0) {
-    WriteHDF5Field(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/energy");
-  } 
-  */ 
-  CudaSafeCall(cudaFree(device_buffer));  
-  
-  free(dataset_buffer);
+  status = H5Sclose(dataspace_id);
+  if (status < 0) {printf("File write failed.\n");}
 
 }
 
 
-
-void OutputViz3D(Grid3D &G, struct parameters P, int nfile)
-{
-  Header H = G.H;  
-  // Do nothing in 1-D and 2-D case
-  if (H.ny_real == 1) {
-    return;
-  }
-  if (H.nz_real == 1) {
-    return;
-  }
-  // Do nothing if nfile is not multiple of n_outviz
-  if (nfile % P.n_outviz != 0) {
-    return;
-  }
-  
-  char filename[MAXLEN];
-  char timestep[20];
-
-  // create the filename
-  sprintf(timestep, "%d", nfile);  
-  strcpy(filename, P.outdir);
-  strcat(filename, timestep);
-  strcat(filename, ".viz3d.h5");
-  #ifdef MPI_CHOLLA
-  sprintf(filename,"%s.%d",filename,procID);
-  #endif
-
-  // create hdf5 file
-  hid_t   file_id; /* file identifier */
-  herr_t  status;
-
-  // Create a new file using default properties.
-  file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
-
-  // Write the header (file attributes)
-  G.Write_Header_HDF5(file_id);
-
-  // write the conserved variables to the output file
-  WriteViz3D(G, P, file_id);
-
-  // close the file
-  status = H5Fclose(file_id);
-
-  if (status < 0) {printf("File write failed.\n"); exit(-1); }
-
-}
-
-#endif
+#endif //HDF5

From 03b34829556aa7a8a5c8a2b34b5c6d29c60f63a9 Mon Sep 17 00:00:00 2001
From: Alwin <sam505@login0b.htc.sam.pitt.edu>
Date: Tue, 18 Oct 2022 02:10:33 -0400
Subject: [PATCH 7/8] static_grav renamed/fixed, light side fixes

---
 builds/make.type.rot_proj                     | 31 ++++++++++++++++++
 builds/make.type.static_grav                  | 32 +++++++++++++++++++
 src/global/global_cuda.h                      | 16 ----------
 src/gravity/gravity_cuda.h                    | 18 -----------
 .../{gravity_cuda.cu => static_grav.h}        | 15 ++++-----
 src/hydro/hydro_cuda.cu                       |  2 +-
 src/io/io.cpp                                 | 23 ++++++++-----
 7 files changed, 86 insertions(+), 51 deletions(-)
 create mode 100644 builds/make.type.rot_proj
 create mode 100644 builds/make.type.static_grav
 delete mode 100644 src/gravity/gravity_cuda.h
 rename src/gravity/{gravity_cuda.cu => static_grav.h} (90%)

diff --git a/builds/make.type.rot_proj b/builds/make.type.rot_proj
new file mode 100644
index 000000000..e6faa7514
--- /dev/null
+++ b/builds/make.type.rot_proj
@@ -0,0 +1,31 @@
+#-- Default hydro only build with rotated projection
+
+DFLAGS    += -DCUDA
+DFLAGS    += -DMPI_CHOLLA
+DFLAGS    += -DPRECISION=2
+DFLAGS    += -DPPMC
+DFLAGS    += -DHLLC
+
+# Integrator
+DFLAGS    += -DSIMPLE
+#DFLAGS    += -DVL
+
+# Apply a density and temperature floor
+DFLAGS    += -DDENSITY_FLOOR
+DFLAGS    += -DTEMPERATURE_FLOOR
+
+# Solve the Gas Internal Energy usisng a Dual Energy Formalism
+#DFLAGS    += -DDE
+
+# Apply cooling on the GPU from precomputed tables
+#DFLAGS    += -DCOOLING_GPU
+
+# Measure the Timing of the different stages
+#DFLAGS    += -DCPU_TIME
+
+# Select output format
+# Can also add -DSLICES and -DPROJECTIONS
+OUTPUT    ?=  -DOUTPUT -DHDF5
+DFLAGS    += $(OUTPUT)
+
+DFLAGS    += -DROTATED_PROJECTION
\ No newline at end of file
diff --git a/builds/make.type.static_grav b/builds/make.type.static_grav
new file mode 100644
index 000000000..ffa15c4ee
--- /dev/null
+++ b/builds/make.type.static_grav
@@ -0,0 +1,32 @@
+#-- Default hydro only build with static_grav
+
+DFLAGS    += -DCUDA
+DFLAGS    += -DMPI_CHOLLA
+DFLAGS    += -DPRECISION=2
+DFLAGS    += -DPPMC
+DFLAGS    += -DHLLC
+
+# Integrator
+DFLAGS    += -DSIMPLE
+#DFLAGS    += -DVL
+
+# Apply a density and temperature floor
+DFLAGS    += -DDENSITY_FLOOR
+DFLAGS    += -DTEMPERATURE_FLOOR
+
+# Solve the Gas Internal Energy usisng a Dual Energy Formalism
+#DFLAGS    += -DDE
+
+DFLAGS    += -DSTATIC_GRAV
+
+# Apply cooling on the GPU from precomputed tables
+#DFLAGS    += -DCOOLING_GPU
+
+# Measure the Timing of the different stages
+#DFLAGS    += -DCPU_TIME
+
+# Select output format
+# Can also add -DSLICES and -DPROJECTIONS
+OUTPUT    ?=  -DOUTPUT -DHDF5
+DFLAGS    += $(OUTPUT)
+
diff --git a/src/global/global_cuda.h b/src/global/global_cuda.h
index 7a5beca55..35c0c355f 100644
--- a/src/global/global_cuda.h
+++ b/src/global/global_cuda.h
@@ -92,17 +92,6 @@ inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
    }
 }
 
-
-
-/*! \fn Real minof3(Real a, Real b, Real c)
- *  \brief Returns the minimum of three floating point numbers. */
-__device__ inline Real minof3(Real a, Real b, Real c)
-{
-  return fmin(a, fmin(b,c));
-}
-
-
-
 /*! \fn int sgn_CUDA
  *  \brief Mathematical sign function. Returns sign of x. */
 __device__ inline int sgn_CUDA(Real x)
@@ -111,11 +100,6 @@ __device__ inline int sgn_CUDA(Real x)
   else return 1;
 }
 
-
-__global__ void test_function();
-
-
-
 #endif //GLOBAL_CUDA_H
 
 #endif //CUDA
diff --git a/src/gravity/gravity_cuda.h b/src/gravity/gravity_cuda.h
deleted file mode 100644
index b4d885262..000000000
--- a/src/gravity/gravity_cuda.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*! \file gravity_cuda.h
- *  \brief Declarations of functions used to calculate gravitational accelerations. */
-
-#ifdef CUDA
-#ifndef GRAVITY_CUDA_H
-#define GRAVITY_CUDA_H
-
-#include "../global/global.h"
-
-
-__device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real xbound, Real *gx);
-
-__device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real *gx, Real *gy);
-
-__device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz);
-
-#endif // GRAVITY_CUDA_H
-#endif // CUDA
diff --git a/src/gravity/gravity_cuda.cu b/src/gravity/static_grav.h
similarity index 90%
rename from src/gravity/gravity_cuda.cu
rename to src/gravity/static_grav.h
index 0137c44f1..3ddbb86be 100644
--- a/src/gravity/gravity_cuda.cu
+++ b/src/gravity/static_grav.h
@@ -4,17 +4,16 @@
            functions in hydro_cuda.cu. */
 #ifdef CUDA
 
+#pragma once
+
 #include <stdio.h>
-#include <math.h>
-#include "../utils/gpu.hpp"
-#include "../global/global.h"
-#include "../global/global_cuda.h"
-#include "../gravity/gravity_cuda.h"
+#include <math.h> // provides sqrt log cos sin atan etc.
+#include "../global/global.h" // provides GN etc.
 
 // Work around lack of pow(Real,int) in Hip Clang for Rocm 3.5
 static inline __device__ Real pow2(const Real x) { return x*x; }
 
-__device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real xbound, Real *gx)
+inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real xbound, Real *gx)
 {
   Real x_pos, r_disk, r_halo;
   x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound;
@@ -52,7 +51,7 @@ __device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real xbound,
 }
 
 
-__device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real *gx, Real *gy)
+inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real *gx, Real *gy)
 {
   Real x_pos, y_pos, r, phi;
   // use the subgrid offset and global boundaries to calculate absolute positions on the grid
@@ -108,7 +107,7 @@ __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, R
 }
 
 
-__device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz)
+inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz)
 {
   Real x_pos, y_pos, z_pos, r_disk, r_halo;
   // use the subgrid offset and global boundaries to calculate absolute positions on the grid
diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu
index bf385d25f..ee033e334 100644
--- a/src/hydro/hydro_cuda.cu
+++ b/src/hydro/hydro_cuda.cu
@@ -10,7 +10,7 @@
 #include "../global/global.h"
 #include "../global/global_cuda.h"
 #include "../hydro/hydro_cuda.h"
-#include "../gravity/gravity_cuda.h"
+#include "../gravity/static_grav.h"
 #include "../utils/hydro_utilities.h"
 #include "../utils/cuda_utilities.h"
 #include "../utils/reduction_utilities.h"
diff --git a/src/io/io.cpp b/src/io/io.cpp
index fc6c52eb7..be0a1b9fa 100644
--- a/src/io/io.cpp
+++ b/src/io/io.cpp
@@ -1820,6 +1820,11 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id)
     int nx_dset = R.nx;
     int nz_dset = R.nz;
 
+    if (R.nx * R.nz == 0) {
+      chprintf("WARNING: compiled with -DROTATED_PROJECTION but input parameters nxr or nzr = 0\n");
+      return;
+    }
+
     // set the projected dataset size for this process to capture
     // this piece of the simulation volume
     // min and max values were set in the header write
@@ -1920,15 +1925,17 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id)
     // Free the dataspace id
     status = H5Sclose(dataspace_xzr_id);
 
+    //free the data
+    free(dataset_buffer_dxzr);
+    free(dataset_buffer_Txzr);
+    free(dataset_buffer_vxxzr);
+    free(dataset_buffer_vyxzr);
+    free(dataset_buffer_vzxzr);
+
   }
-  else printf("Rotated projection write only implemented for 3D data.\n");
-
-  //free the data
-  free(dataset_buffer_dxzr);
-  free(dataset_buffer_Txzr);
-  free(dataset_buffer_vxxzr);
-  free(dataset_buffer_vyxzr);
-  free(dataset_buffer_vzxzr);
+  else chprintf("Rotated projection write only implemented for 3D data.\n");
+
+
 
 }
 #endif //HDF5

From 342c59f135c26d7cc6d052ae9ff41386f72647a6 Mon Sep 17 00:00:00 2001
From: bcaddy <41171425+bcaddy@users.noreply.github.com>
Date: Thu, 28 Sep 2023 09:30:42 -0400
Subject: [PATCH 8/8] Fix typo in README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b372f6b34..c9d731b89 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ https://user-images.githubusercontent.com/3432028/188235319-e5eb4e5e-00c6-435f-a
 
 Getting started
 ----------------
-This is the stable branch of the *Cholla* hydrodyamics code.
+This is the stable branch of the *Cholla* hydrodynamics code.
 
 *Cholla* is designed to be run using (AMD or NVIDIA) GPUs, and can be run in serial mode using one GPU
 or with MPI for multiple GPUs.